無法使用c＃重寫PDFTextStripper.writeString（String text，List <TextPosition> textPositions）方法？

我使用PDFBOX的.NET解析提取從PDF文本非常久遠的那個location.For文本，同時搜索，我發現下面的Java代碼：無法使用c＃重寫PDFTextStripper.writeString（String text，List <TextPosition> textPositions）方法？

PDFTextStripper stripper = new PDFTextStripper() 
{ 
    @Override 
    protected void writeString(String text, List<TextPosition> textPositions) throws IOException 
    { 
     super.writeString(text, textPositions); 

     TextPosition firstProsition = textPositions.get(0); 
     TextPosition lastPosition = textPositions.get(textPositions.size() - 1); 
     writeString(String.format("[%s - %s/%s]", firstProsition.getXDirAdj(), lastPosition.getXDirAdj() + lastPosition.getWidthDirAdj(), firstProsition.getYDirAdj())); 
    } 
}; 
stripper.setSortByPosition(true); 
return stripper.getText(document);

我把它轉換以下列方式到.NET：

class PDFTextLocationStripper : PDFTextStripper 
{ 
    public string textWithPostion = ""; 
    protected override void processTextPosition(TextPosition text) 
    { 
      textWithPostion += "String[" + text.getXDirAdj() + "," + 
      text.getYDirAdj() + " fs=" + text.getFontSize() + " xscale=" + 
      text.getXScale() + " height=" + text.getHeightDir() + " space=" + 
      text.getWidthOfSpace() + " width=" + 
      text.getWidthDirAdj() + "]" + text.getCharacter(); 
    } 

    protected override void writeString(java.lang.String text, java.util.List textPositions) 
    { 
      base.writeString(text, textPositions); 
      TextPosition firstProsition = (TextPosition)textPositions.get(0); 
      TextPosition lastPosition =(TextPosition) textPositions.get(textPositions.size() - 1); 
      writeString(String.Format("[%s - %s/%s]", firstProsition.getXDirAdj(), lastPosition.getXDirAdj() + lastPosition.getWidthDirAdj(), firstProsition.getYDirAdj())); 
    } 

}

但是，我得到了上面的代碼編譯錯誤：

錯誤1，沒有超載的方法「writeString」需要兩個參數

錯誤2 'PDFTextLocationStripper.writeString（java.lang.String中，java.util.List的）'：發現重寫

因此，沒有合適的方法，如何重寫writeString方法使得我可以提取文本與位置？

來源

2017-01-10 V K

因爲，我不能超載writeString method.I使用的processTextPosition從與他們的positions.Here沿着PDF中提取的話是代碼：

class PDFTextLocationStripper : PDFTextStripper 
    { 
     public string textWithPostion = ""; 
     public Dictionary<float, Dictionary<float, PdfWord>> pdfWordsByXByY; 

     public PDFTextLocationStripper(): base() 
     { 
      try 
      { 
       textWithPostion = ""; 
       pdfWordsByXByY = new Dictionary<float, Dictionary<float, PdfWord>>(); 
      } 
      catch (Exception ex) 
      { 

      } 
     } 

     protected override void processTextPosition(TextPosition text) 
     { 
      try 
      { 
       float textX = text.getXDirAdj(); 
       float textY = text.getYDirAdj(); 
       if (!String.IsNullOrWhiteSpace(text.getCharacter())) 
       { 
        if (pdfWordsByXByY.ContainsKey(textY)) 
        { 
         Dictionary<float, PdfWord> wordsByX = pdfWordsByXByY[textY]; 
         if (wordsByX.ContainsKey(textX)) 
         { 
          PdfWord word = wordsByX[textX]; 
          wordsByX.Remove(word.Right); 
          word.EndCharWidth = text.getWidthDirAdj(); 
          word.Height = text.getHeightDir(); 
          word.EndX = textX; 
          word.Text += text.getCharacter(); 
          if (!wordsByX.Keys.Contains(word.Right)) 
          { 
           wordsByX.Add(word.Right, word); 
          } 
         } 
         else 
         { 
          float requiredX = -1; 
          float minDiff = float.MaxValue; 
          for (int index = 0; index < wordsByX.Keys.Count; index++) 
          { 
           float key = wordsByX.Keys.ElementAt(index); 
           float diff = key - textX; 
           if (diff < 0) 
           { 
            diff = -diff; 
           } 
           if (diff < minDiff) 
           { 
            minDiff = diff; 
            requiredX = key; 
           } 
          } 
          if (requiredX > -1 && minDiff <= 1) 
          { 
           PdfWord word = wordsByX[requiredX]; 
           wordsByX.Remove(requiredX); 
           word.EndCharWidth = text.getWidthDirAdj(); 
           word.Height = text.getHeightDir(); 
           word.EndX = textX; 
           word.Text += text.getCharacter(); 
           if (!wordsByX.ContainsKey(word.Right)) 
           { 
            wordsByX.Add(word.Right, word); 
           } 
          } 
          else 
          { 
           PdfWord word = new PdfWord(); 
           word.Text = text.getCharacter(); 
           word.EndX = word.StartX = textX; 
           word.Y = textY; 
           word.EndCharWidth = word.StartCharWidth = text.getWidthDirAdj(); 
           word.Height = text.getHeightDir(); 
           if (!wordsByX.ContainsKey(word.Right)) 
           { 
            wordsByX.Add(word.Right, word); 
           } 
           pdfWordsByXByY[textY] = wordsByX; 
          } 
         } 
        } 
        else 
        { 
         Dictionary<float, PdfWord> wordsByX = new Dictionary<float, PdfWord>(); 
         PdfWord word = new PdfWord(); 
         word.Text = text.getCharacter(); 
         word.EndX = word.StartX = textX; 
         word.Y = textY; 
         word.EndCharWidth = word.StartCharWidth = text.getWidthDirAdj(); 
         word.Height = text.getHeightDir(); 
         wordsByX.Add(word.Right, word); 
         pdfWordsByXByY.Add(textY, wordsByX); 
        } 
       } 
      } 
      catch (Exception ex) 
      { 

      } 
     } 
    }

這裏是PdfWord類。

class PdfWord 
    { 
     public float StartX { get; set; } 
     public float EndX { get; set; } 
     public float Y { get; set; } 
     public float StartCharWidth { get; set; } 
     public float EndCharWidth { get; set; } 
     public float Height { get; set; } 
     public string Text { get; set; } 
     public float Right { get { return EndX + EndCharWidth; } } 
    }

來源

2017-02-09 06:44:37

無法使用c＃重寫PDFTextStripper.writeString（String text，List <TextPosition> textPositions）方法？

回答

相關問題