2017-10-20 170 views
0

我有一個函數可以獲取pdf文件的頁碼。如何使用vb獲取pdf上下文?

Public Function GetNumPages(ByVal PdfFile As String) As Long 
    Dim objTempDoc As Object 
    Dim fso As FileSystemObject 
    Set fso = New FileSystemObject 

    If fso.FileExists(PdfFile) Then 
     Set objTemp = CreateObject("AcroExch.PDDoc") 
     objTemp.Open pstrPdfFilename 
     GetNumPages = objTemp.GetNumPages 
     objTemp.Close 
     Set objTemp = Nothing 
    End If 

    Set fso = Nothing 
End Function 

我想在pdf文件的最後一頁得到最後一行的上下文。

我找到了這個API,但我不知道如何使用它。 它會返回我想要的上下文嗎?

PDOCContext PDDocGetOCContext(PDDoc pdDoc)

我想這種方式使用的API,但它是失敗的。

Set objTempDoc = CreateObject("AcroExch.PDDoc") 
objTempDoc.Open PdfFile 
myPDFPage = objTempDoc.GetOCContext 
+1

PDOCContext不是Acrobat Interapplication Communication API中的對象。如果您嘗試獲取PDPage對象,請使用AquirePage(nPage)和您的PDDoc對象。 – joelgeraci

回答

0

可以調用此函數來獲取最後一頁的文本。

Public Function GetPDFText(ByVal pstrPdfFilename As String) As String 

     Dim PDDoc As Object 
     Dim CAcroRect As New Acrobat.AcroRect 
     Dim PDPage As Acrobat.AcroPDPage 
     Dim PDTxtSelect As Acrobat.AcroPDTextSelect 
     Dim CArcoPoint As Acrobat.AcroPoint 
     Dim iNumWords As Integer 
     Dim iMax As Long 
     Dim arPdfLines() As String 
     Dim i As Integer 
     Dim fso As FileSystemObject 

     Set fso = New FileSystemObject 
     If fso.FileExists(pstrPdfFilename) Then 
      Set PDDoc = CreateObject("AcroExch.PDDoc") 
      PDDoc.Open pstrPdfFilename 
      Set PDPage = PDDoc.AcquirePage(PDDoc.GetNumPages() - 1) 
      Set CArcoPoint = PDPage.GetSize() 
      CAcroRect.Top = CArcoPoint.y 
      CAcroRect.Left = 0 
      CAcroRect.Right = CArcoPoint.x 
      CAcroRect.bottom = 0 
      Set PDTxtSelect = PDDoc.CreateTextSelect(PDDoc.GetNumPages() - 1, CAcroRect) 
      If PDTxtSelect Is Nothing Then 
       iNumWords = 0 
       iMax = 0 
       GetPDFLastLineText = "" 
      Else 
       iNumWords = PDTxtSelect.GetNumText 
       iMax = iNumWords - 1 
       Dim ii As Long 
       For ii = 0 To iMax 
       GetPDFLastLineText = GetPDFLastLineText & PDTxtSelect.GetText(ii) 
      Next 
     End If 
     PDDoc.Close 
    End If 

    Set fso = Nothing 
    Set PDDoc = Nothing 
    Set CAcroRect = Nothing 
    Set PDPage = Nothing 
    Set PDTxtSelect = Nothing 
    Set CArcoPoint = Nothing 

End Function