2017-07-04 96 views
0

我正在使用Microsoft DocumentFormat.OpenXml SDK從Excel文件中讀取數據。 雖然這樣做,我正在考慮如果一個單元格有空白值(如果是,也請閱讀)。使用Microsoft DocumentFormat.OpenXml SDK讀取c#中的excel文件

現在,面臨着其中workSheet.SheetDimension爲空的Excel表之一的問題,因此代碼拋出異常。使用

代碼:

類OpenXMLHelper { //一個輔助函數來打開使用的OpenXML Excel文件,並且從一個 工作表//返回包含所有的數據的數據表。 // //我們在使用OLEDB讀取Excel數據時遇到了很多問題(例如,ACE驅動程序不再存在於新服務器上, // OLEDB由於安全問題而不工作,並且公然忽略空白行工作表頂部),所以這是一個更穩定的數據讀取方法。 //

public static DataTable ExcelWorksheetToDataTable(string pathFilename) 
    { 
     try 
     { 
      DataTable dt = new DataTable(); 
      string dimensions = string.Empty; 

      using (FileStream fs = new FileStream(pathFilename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) 
      { 
       using (SpreadsheetDocument document = SpreadsheetDocument.Open(fs, false)) 
       { 
        // Find the sheet with the supplied name, and then use that 
        // Sheet object to retrieve a reference to the first worksheet. 
        //Sheet theSheet = document.WorkbookPart.Workbook.Descendants<Sheet>().Where(s => s.Name == worksheetName).FirstOrDefault(); 
        //--Sheet theSheet = document.WorkbookPart.Workbook.Descendants<Sheet>().FirstOrDefault(); 

        //--if (theSheet == null) 
        //-- throw new Exception("Couldn't find the worksheet: "+ theSheet.Id); 

        // Retrieve a reference to the worksheet part. 
        //WorksheetPart wsPart = (WorksheetPart)(document.WorkbookPart.GetPartById(theSheet.Id)); 
        //--WorksheetPart wsPart = (WorksheetPart)(document.WorkbookPart.GetPartById(theSheet.Id)); 

        WorkbookPart workbookPart = document.WorkbookPart; 
        WorksheetPart wsPart = workbookPart.WorksheetParts.FirstOrDefault(); 
        Worksheet workSheet = wsPart.Worksheet; 

        dimensions = workSheet.SheetDimension.Reference.InnerText;  // Get the dimensions of this worksheet, eg "B2:F4" 

        int numOfColumns = 0; 
        int numOfRows = 0; 
        CalculateDataTableSize(dimensions, ref numOfColumns, ref numOfRows); 
        //System.Diagnostics.Trace.WriteLine(string.Format("The worksheet \"{0}\" has dimensions \"{1}\", so we need a DataTable of size {2}x{3}.", worksheetName, dimensions, numOfColumns, numOfRows)); 

        SheetData sheetData = workSheet.GetFirstChild<SheetData>(); 
        IEnumerable<Row> rows = sheetData.Descendants<Row>(); 

        string[,] cellValues = new string[numOfColumns, numOfRows]; 

        int colInx = 0; 
        int rowInx = 0; 
        string value = ""; 
        SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart; 

        // Iterate through each row of OpenXML data, and store each cell's value in the appropriate slot in our [,] string array. 
        foreach (Row row in rows) 
        { 
         for (int i = 0; i < row.Descendants<Cell>().Count(); i++) 
         { 
          // *DON'T* assume there's going to be one XML element for each column in each row... 
          Cell cell = row.Descendants<Cell>().ElementAt(i); 
          if (cell.CellValue == null || cell.CellReference == null) 
           continue;      // eg when an Excel cell contains a blank string 

          // Convert this Excel cell's CellAddress into a 0-based offset into our array (eg "G13" -> [6, 12]) 
          colInx = GetColumnIndexByName(cell.CellReference);    // eg "C" -> 2 (0-based) 
          rowInx = GetRowIndexFromCellAddress(cell.CellReference) - 1;  // Needs to be 0-based 

          // Fetch the value in this cell 
          value = cell.CellValue.InnerXml; 
          if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString) 
          { 
           value = stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText; 
          } 

          cellValues[colInx, rowInx] = value; 
         } 
        } 

        // Copy the array of strings into a DataTable. 
        // We don't (currently) make any attempt to work out which columns should be numeric, rather than string. 
        for (int col = 0; col < numOfColumns; col++) 
        { 
         //dt.Columns.Add("Column_" + col.ToString()); 
         dt.Columns.Add(cellValues[col, 0]); 
        } 

        //foreach (Cell cell in rows.ElementAt(0)) 
        //{ 
        // dt.Columns.Add(GetCellValue(doc, cell)); 
        //} 


        for (int row = 0; row < numOfRows; row++) 
        { 
         DataRow dataRow = dt.NewRow(); 
         for (int col = 0; col < numOfColumns; col++) 
         { 
          dataRow.SetField(col, cellValues[col, row]); 
         } 
         dt.Rows.Add(dataRow); 
        } 

        dt.Rows.RemoveAt(0); 
        //#if DEBUG 
        //    // Write out the contents of our DataTable to the Output window (for debugging) 
        //    string str = ""; 
        //    for (rowInx = 0; rowInx < maxNumOfRows; rowInx++) 
        //    { 
        //     for (colInx = 0; colInx < maxNumOfColumns; colInx++) 
        //     { 
        //      object val = dt.Rows[rowInx].ItemArray[colInx]; 
        //      str += (val == null) ? "" : val.ToString(); 
        //      str += "\t"; 
        //     } 
        //     str += "\n"; 
        //    } 
        //    System.Diagnostics.Trace.WriteLine(str); 
        //#endif 
        return dt; 
       } 
      } 
     } 
     catch (Exception ex) 
     { 
      return null; 
     } 

    } 

    public static void CalculateDataTableSize(string dimensions, ref int numOfColumns, ref int numOfRows) 
    { 
     // How many columns & rows of data does this Worksheet contain ? 
     // We'll read in the Dimensions string from the Excel file, and calculate the size based on that. 
     //  eg "B1:F4" -> we'll need 6 columns and 4 rows. 
     // 
     // (We deliberately ignore the top-left cell address, and just use the bottom-right cell address.) 
     try 
     { 
      string[] parts = dimensions.Split(':');  // eg "B1:F4" 
      if (parts.Length != 2) 
       throw new Exception("Couldn't find exactly *two* CellAddresses in the dimension"); 

      numOfColumns = 1 + GetColumnIndexByName(parts[1]);  // A=1, B=2, C=3 (1-based value), so F4 would return 6 columns 
      numOfRows = GetRowIndexFromCellAddress(parts[1]); 
     } 
     catch 
     { 
      throw new Exception("Could not calculate maximum DataTable size from the worksheet dimension: " + dimensions); 
     } 
    } 

    public static int GetRowIndexFromCellAddress(string cellAddress) 
    { 
     // Convert an Excel CellReference column into a 1-based row index 
     // eg "D42" -> 42 
     //  "F123" -> 123 
     string rowNumber = System.Text.RegularExpressions.Regex.Replace(cellAddress, "[^0-9 _]", ""); 
     return int.Parse(rowNumber); 
    } 

    public static int GetColumnIndexByName(string cellAddress) 
    { 
     // Convert an Excel CellReference column into a 0-based column index 
     // eg "D42" -> 3 
     //  "F123" -> 5 
     var columnName = System.Text.RegularExpressions.Regex.Replace(cellAddress, "[^A-Z_]", ""); 
     int number = 0, pow = 1; 
     for (int i = columnName.Length - 1; i >= 0; i--) 
     { 
      number += (columnName[i] - 'A' + 1) * pow; 
      pow *= 26; 
     } 
     return number - 1; 
    } 
}[enter image description here][1] 

回答

0

SheetDimension部分是可選的(並且爲此你不能總是依賴於它是最新的)。看到的OpenXML說明書的以下部分:

18.3.1.35尺寸(工作單尺寸)

該元素指定的工作表的使用範圍。它指定工作表中 已用單元格的行和列邊界。 這是可選的,不是必需的。 已使用的單元格包括具有公式,文本內容和單元格格式的單元格。當整列被格式化時,只有 該列中的第一個單元被認爲被使用。

因此,沒有任何SheetDimension部分的Excel文件是完全有效的,所以您不應該依賴它存在於Excel文件中。

爲此我建議簡單地解析包含在SheetData部分的所有元素,和「算」的行數(而不是閱讀SheetDimensions部分獲得的行/列數)。這樣您還可以考慮到Excel文件可能包含數據之間的完全空白行。

+0

只分析行不會獲取所需的結果。 Issue: 行1:5單元格(全部具有值) 行2:6單元格(單元格1,2爲空白)。 所以發生了什麼是放置數據時,第1行將被打印,但第2行有2個空白單元格,在插入到數據表中時會向左移動。正因爲如此,我考慮了SheetDimension。 任何建議如何解決。 代碼片段可以更清晰。 –