2011-03-08 95 views
3

如何使用c#將csv數據導入到Oracle中。要導入的數據大小爲3GB,行數爲7512263.我設法將csv數據導入到Oracle中,但時間大約需要1個小時。如何加快將csv數據導入到oracle的時間。謝謝。 這是我的代碼:如何使用c#將csv數據導入到Oracle中#

using System; 
using System.Collections.Generic; 
using System.Linq; 
using System.Text; 
using System.Diagnostics; 
using System.Threading; 
using System.Text.RegularExpressions; 
using System.IO; 
using FileHelpers; 
using System.Data.OracleClient; 


namespace sqlloader 
{ 
    class Program 
    { 

     static void Main(string[] args) 
     { 
      int jum; 
      int i; 
      bool isFirstLine = false; 
      FileHelperEngine engine = new FileHelperEngine(typeof(XL_XDR)); 

      //Connect To Database 
      string constr = "Data Source=(DESCRIPTION=(ADDRESS_LIST=" 
       + "(ADDRESS=(PROTOCOL=TCP)(HOST= pt-9a84825594af)(PORT=1521)))" 
       + "(CONNECT_DATA=(SERVER=DEDICATED)(SERVICE_NAME=o11g)));" 
       + "User Id=xl;Password=rahasia;"; 
      OracleConnection con = new OracleConnection(constr); 
      con.Open(); 



      // To Read Use: 
      XL_XDR[] res = engine.ReadFile("DataOut.csv") as XL_XDR[]; 


      jum = CountLinesInFile("DataOut.csv"); 

      FileInfo f2 = new FileInfo("DataOut.csv"); 
      long s2 = f2.Length; 
      int jmlRecord = jum - 1; 

      for (i = 0; i < jum; i++) 
      { 
       ShowPercentProgress("Processing...", i, jum); 
       Thread.Sleep(100); 

       if (isFirstLine == false) 
       { 
        isFirstLine = true; 
       } 
       else 
       { 
        string sql = "INSERT INTO XL_XDR (XDR_ID, XDR_TYPE, SESSION_START_TIME, SESSION_END_TIME, SESSION_LAST_UPDATE_TIME, " + 
           "SESSION_FLAG, VERSION, CONNECTION_ROW_COUNT, ERROR_CODE, METHOD, HOST_LEN, HOST, URL_LEN, URL, CONNECTION_START_TIME, " + 
           "CONNECTION_LAST_UPDATE_TIME, CONNECTION_FLAG, CONNECTION_ID, TOTAL_EVENT_COUNT, TUNNEL_PAIR_ID, RESPONSIVENESS_TYPE, " + 
           "CLIENT_PORT, PAYLOAD_TYPE, VIRTUAL_TYPE, VID_CLIENT, VID_SERVER, CLIENT_ADDR, SERVER_ADDR, CLIENT_TUNNEL_ADDR, " + 
           "SERVER_TUNNEL_ADDR, ERROR_CODE_2, IPID, C2S_PKTS, C2S_OCTETS, S2C_PKTS, S2C_OCTETS, NUM_SUCC_TRANS, CONNECT_TIME, " + 
           "TOTAL_RESP, TIMEOUTS, RETRIES, RAI, TCP_SYNS, TCP_SYN_ACKS, TCP_SYN_RESETS, TCP_SYN_FINS, EVENT_TYPE, FLAGS, TIME_STAMP, " + 
           "EVENT_ID, EVENT_CODE) VALUES (" + 
           "'" + res[i].XDR_ID + "', '" + res[i].XDR_TYPE + "', '" + res[i].SESSION_START_TIME + "', '" + res[i].SESSION_END_TIME + "', " + 
           "'" + res[i].SESSION_LAST_UPDATE_TIME + "', '" + res[i].SESSION_FLAG + "', '" + res[i].VERSION + "', '" + res[i].CONNECTION_ROW_COUNT + "', " + 
           "'" + res[i].ERROR_CODE + "', '" + res[i].METHOD + "', '" + res[i].HOST_LEN + "', '" + res[i].HOST + "', " + 
           "'" + res[i].URL_LEN + "', '" + res[i].URL + "', '" + res[i].CONNECTION_START_TIME + "', '" + res[i].CONNECTION_LAST_UPDATE_TIME + "', " + 
           "'" + res[i].CONNECTION_FLAG + "', '" + res[i].CONNECTION_ID + "', '" + res[i].TOTAL_EVENT_COUNT + "', '" + res[i].TUNNEL_PAIR_ID + "', " + 
           "'" + res[i].RESPONSIVENESS_TYPE + "', '" + res[i].CLIENT_PORT + "', '" + res[i].PAYLOAD_TYPE + "', '" + res[i].VIRTUAL_TYPE + "', " + 
           "'" + res[i].VID_CLIENT + "', '" + res[i].VID_SERVER + "', '" + res[i].CLIENT_ADDR + "', '" + res[i].SERVER_ADDR + "', " + 
           "'" + res[i].CLIENT_TUNNEL_ADDR + "', '" + res[i].SERVER_TUNNEL_ADDR + "', '" + res[i].ERROR_CODE_2 + "', '" + res[i].IPID + "', " + 
           "'" + res[i].C2S_PKTS + "', '" + res[i].C2S_OCTETS + "', '" + res[i].S2C_PKTS + "', '" + res[i].S2C_OCTETS + "', " + 
           "'" + res[i].NUM_SUCC_TRANS + "', '" + res[i].CONNECT_TIME + "', '" + res[i].TOTAL_RESP + "', '" + res[i].TIMEOUTS + "', " + 
           "'" + res[i].RETRIES + "', '" + res[i].RAI + "', '" + res[i].TCP_SYNS + "', '" + res[i].TCP_SYN_ACKS + "', " + 
           "'" + res[i].TCP_SYN_RESETS + "', '" + res[i].TCP_SYN_FINS + "', '" + res[i].EVENT_TYPE + "', '" + res[i].FLAGS + "', " + 
           "'" + res[i].TIME_STAMP + "', '" + res[i].EVENT_ID + "', '" + res[i].EVENT_CODE + "')"; 

        OracleCommand command = new OracleCommand(sql, con); 
        command.ExecuteNonQuery(); 

       } 




      } 

      Console.WriteLine("Successfully Inserted"); 
      Console.WriteLine(); 
      Console.WriteLine("Number of Row Data: " + jmlRecord.ToString()); 
      Console.WriteLine(); 
      Console.WriteLine("The size of {0} is {1} bytes.", f2.Name, f2.Length); 
      con.Close(); 




     } 

     static void ShowPercentProgress(string message, int currElementIndex, int totalElementCount) 
     { 
      if (currElementIndex < 0 || currElementIndex >= totalElementCount) 
      { 
       throw new InvalidOperationException("currElement out of range"); 
      } 
      int percent = (100 * (currElementIndex + 1))/totalElementCount; 
      Console.Write("\r{0}{1}% complete", message, percent); 
      if (currElementIndex == totalElementCount - 1) 
      { 
       Console.WriteLine(Environment.NewLine); 
      } 
     } 

     static int CountLinesInFile(string f) 
     { 
      int count = 0; 
      using (StreamReader r = new StreamReader(f)) 
      { 
       string line; 
       while ((line = r.ReadLine()) != null) 
       { 
        count++; 
       } 
      } 
      return count; 
     } 

    } 

    [DelimitedRecord(",")] 
    public class XL_XDR 
    { 
     public string XDR_ID; 
     public string XDR_TYPE; 
     public string SESSION_START_TIME; 
     public string SESSION_END_TIME; 
     public string SESSION_LAST_UPDATE_TIME; 
     public string SESSION_FLAG; 
     public string VERSION; 
     public string CONNECTION_ROW_COUNT; 
     public string ERROR_CODE; 
     public string METHOD; 
     public string HOST_LEN; 
     public string HOST; 
     public string URL_LEN; 
     public string URL; 
     public string CONNECTION_START_TIME; 
     public string CONNECTION_LAST_UPDATE_TIME; 
     public string CONNECTION_FLAG; 
     public string CONNECTION_ID; 
     public string TOTAL_EVENT_COUNT; 
     public string TUNNEL_PAIR_ID; 
     public string RESPONSIVENESS_TYPE; 
     public string CLIENT_PORT; 
     public string PAYLOAD_TYPE; 
     public string VIRTUAL_TYPE; 
     public string VID_CLIENT; 
     public string VID_SERVER; 
     public string CLIENT_ADDR; 
     public string SERVER_ADDR; 
     public string CLIENT_TUNNEL_ADDR; 
     public string SERVER_TUNNEL_ADDR; 
     public string ERROR_CODE_2; 
     public string IPID; 
     public string C2S_PKTS; 
     public string C2S_OCTETS; 
     public string S2C_PKTS; 
     public string S2C_OCTETS; 
     public string NUM_SUCC_TRANS; 
     public string CONNECT_TIME; 
     public string TOTAL_RESP; 
     public string TIMEOUTS; 
     public string RETRIES; 
     public string RAI; 
     public string TCP_SYNS; 
     public string TCP_SYN_ACKS; 
     public string TCP_SYN_RESETS; 
     public string TCP_SYN_FINS; 
     public string EVENT_TYPE; 
     public string FLAGS; 
     public string TIME_STAMP; 
     public string EVENT_ID; 
     public string EVENT_CODE; 


    } 
} 
+4

爲什麼不使用綁定變量? Oracle可能花費大部分時間*解析*查詢。解析一次,執行很多。 – 2011-03-08 17:21:48

回答

0

只刪除調用CountLinesInFile可能會幫助,因爲在該方法中,你正在閱讀的行中的所有文件中的行,正如你說該文件是biiig ...

2

我在ODB.NET中使用批量綁定方法運氣良好。在此頁面上搜索「ArrayBindCount」。

http://dotnetslackers.com/articles/ado_net/BulkOperationsUsingOracleDataProviderForNETODPNET.aspx

這將允許您一次插入,而不是成千上萬的小刀片的一切。您目前看到的小時數將變成分鐘數。

+0

雖然這可能在理論上回答這個問題,[這將是更可取的](http://meta.stackexchange.com/q/8259)在這裏包括答案的基本部分,並提供參考鏈接。 – 2013-01-08 22:53:56

2

雖然強烈支持文森特的建議,使用綁定變量(這可能會是一個巨大的性能增益,以及防止DBA限制你粉碎共享池)和布羅斯托的建議做批量綁定,我會傾向於質疑爲什麼你會用C#編寫這類東西。使用外部表格這樣做會更有效率,因此您的應用程序會將文件放置在數據庫服務器的文件系統上,並且通過外部表定義來完成數據的解析和加載(或者甚至讓應用程序調用SQL * Loader)。這可以讓您利用Oracle已經針對此類處理進行了優化的代碼。

此外,由於您要一開始就對文件中的行數進行一次計算,以顯示進度條,您可以通過從更容易獲取的信息中獲取大致的行數來優化該行數。您大致知道文件中每行有多少個字節,並且文件的大小是一個相對容易檢索的文件屬性。這應該允許您估算文件中相對準確的行數,這對於進度條應該足夠了。