2012-01-07 516 views
3

我使用Pdfstamper在pdf上添加了水印。這裏是代碼:使用iTextSharp從PDF中刪除水印

for (int pageIndex = 1; pageIndex <= pageCount; pageIndex++) 
{ 
    iTextSharp.text.Rectangle pageRectangle = reader.GetPageSizeWithRotation(pageIndex); 
    PdfContentByte pdfData = stamper.GetUnderContent(pageIndex); 
    pdfData.SetFontAndSize(BaseFont.CreateFont(BaseFont.HELVETICA, BaseFont.CP1252, 
     BaseFont.NOT_EMBEDDED), watermarkFontSize); 
    PdfGState graphicsState = new PdfGState(); 
    graphicsState.FillOpacity = watermarkFontOpacity; 
    pdfData.SetGState(graphicsState); 
    pdfData.SetColorFill(iTextSharp.text.BaseColor.BLACK); 
    pdfData.BeginText(); 
    pdfData.ShowTextAligned(PdfContentByte.ALIGN_CENTER, "LipikaChatterjee", 
     pageRectangle.Width/2, pageRectangle.Height/2, watermarkRotation); 
    pdfData.EndText(); 
} 

這工作正常。現在我想從我的pdf中刪除這個水印。我看着iTextSharp,但無法得到任何幫助。我甚至嘗試添加水印作爲圖層,然後刪除圖層,但無法從pdf中刪除圖層的內容。我查看了iText的圖層刪除功能,發現了一個OCGRemover類,但是我無法在iTextsharp中獲得相應的類。

+1

如果你有一個過程,增加了水印,然後以最簡單的做法是:當你不想水印, **不要添加**。 – 2012-01-07 09:42:41

+0

我需要刪除已添加的水印 – Lipika 2012-01-07 11:18:01

+1

,通常意味着您想從別人的內容中刪除水印...我錯了嗎? – 2012-01-07 11:26:27

回答

11

我要給你根據語句的疑點利益「我甚至嘗試添加水印爲層」,並假設您正在使用您要創建的內容,而不是試圖unwatermark別人的內容。

PDF文件使用可選內容組(OCG)來存儲對象作爲層。如果您將水印文本添加到圖層中,稍後可以很容易地將其刪除。

下面的代碼是一個完整的工作C#2010的WinForms應用定位iTextSharp的5.1.1.0。它使用基於Bruno's original Java code found here的代碼。代碼分爲三部分。第1部分爲我們創建了一個樣本PDF。第2部分從第一部分創建一個新的PDF,並將水印應用到單獨圖層上的每個頁面。第3部分從第二部分創建了一個最終的PDF,但使用我們的水印文本移除了圖層。請參閱代碼註釋瞭解更多詳細信息。

當你創建一個PdfLayer對象可以分配到一個PDF閱讀器中顯示的名稱。不幸的是,我無法找到訪問該名稱的方法,因此下面的代碼會查找圖層中的實際水印文本。如果你不使用額外的PDF層,我會建議尋找/OC內容流中,而不是浪費時間尋找你的實際水印文本。如果你找到一種方法來尋找/OC組的名稱,請讓我kwow!

using System; 
using System.Windows.Forms; 
using System.IO; 
using iTextSharp.text; 
using iTextSharp.text.pdf; 

namespace WindowsFormsApplication1 { 
    public partial class Form1 : Form { 
     public Form1() { 
      InitializeComponent(); 
     } 

     private void Form1_Load(object sender, EventArgs e) { 
      string workingFolder = Environment.GetFolderPath(Environment.SpecialFolder.Desktop); 
      string startFile = Path.Combine(workingFolder, "StartFile.pdf"); 
      string watermarkedFile = Path.Combine(workingFolder, "Watermarked.pdf"); 
      string unwatermarkedFile = Path.Combine(workingFolder, "Un-watermarked.pdf"); 
      string watermarkText = "This is a test"; 

      //SECTION 1 
      //Create a 5 page PDF, nothing special here 
      using (FileStream fs = new FileStream(startFile, FileMode.Create, FileAccess.Write, FileShare.None)) { 
       using (Document doc = new Document(PageSize.LETTER)) { 
        using (PdfWriter witier = PdfWriter.GetInstance(doc, fs)) { 
         doc.Open(); 

         for (int i = 1; i <= 5; i++) { 
          doc.NewPage(); 
          doc.Add(new Paragraph(String.Format("This is page {0}", i))); 
         } 

         doc.Close(); 
        } 
       } 
      } 

      //SECTION 2 
      //Create our watermark on a separate layer. The only different here is that we are adding the watermark to a PdfLayer which is an OCG or Optional Content Group 
      PdfReader reader1 = new PdfReader(startFile); 
      using (FileStream fs = new FileStream(watermarkedFile, FileMode.Create, FileAccess.Write, FileShare.None)) { 
       using (PdfStamper stamper = new PdfStamper(reader1, fs)) { 
        int pageCount1 = reader1.NumberOfPages; 
        //Create a new layer 
        PdfLayer layer = new PdfLayer("WatermarkLayer", stamper.Writer); 
        for (int i = 1; i <= pageCount1; i++) { 
         iTextSharp.text.Rectangle rect = reader1.GetPageSize(i); 
         //Get the ContentByte object 
         PdfContentByte cb = stamper.GetUnderContent(i); 
         //Tell the CB that the next commands should be "bound" to this new layer 
         cb.BeginLayer(layer); 
         cb.SetFontAndSize(BaseFont.CreateFont(BaseFont.HELVETICA, BaseFont.CP1252, BaseFont.NOT_EMBEDDED), 50); 
         PdfGState gState = new PdfGState(); 
         gState.FillOpacity = 0.25f; 
         cb.SetGState(gState); 
         cb.SetColorFill(BaseColor.BLACK); 
         cb.BeginText(); 
         cb.ShowTextAligned(PdfContentByte.ALIGN_CENTER, watermarkText, rect.Width/2, rect.Height/2, 45f); 
         cb.EndText(); 
         //"Close" the layer 
         cb.EndLayer(); 
        } 
       } 
      } 

      //SECTION 3 
      //Remove the layer created above 
      //First we bind a reader to the watermarked file, then strip out a bunch of things, and finally use a simple stamper to write out the edited reader 
      PdfReader reader2 = new PdfReader(watermarkedFile); 

      //NOTE, This will destroy all layers in the document, only use if you don't have additional layers 
      //Remove the OCG group completely from the document. 
      //reader2.Catalog.Remove(PdfName.OCPROPERTIES); 

      //Clean up the reader, optional 
      reader2.RemoveUnusedObjects(); 

      //Placeholder variables 
      PRStream stream; 
      String content; 
      PdfDictionary page; 
      PdfArray contentarray; 

      //Get the page count 
      int pageCount2 = reader2.NumberOfPages; 
      //Loop through each page 
      for (int i = 1; i <= pageCount2; i++) { 
       //Get the page 
       page = reader2.GetPageN(i); 
       //Get the raw content 
       contentarray = page.GetAsArray(PdfName.CONTENTS); 
       if (contentarray != null) { 
        //Loop through content 
        for (int j = 0; j < contentarray.Size; j++) { 
         //Get the raw byte stream 
         stream = (PRStream)contentarray.GetAsStream(j); 
         //Convert to a string. NOTE, you might need a different encoding here 
         content = System.Text.Encoding.ASCII.GetString(PdfReader.GetStreamBytes(stream)); 
         //Look for the OCG token in the stream as well as our watermarked text 
         if (content.IndexOf("/OC") >= 0 && content.IndexOf(watermarkText) >= 0) { 
          //Remove it by giving it zero length and zero data 
          stream.Put(PdfName.LENGTH, new PdfNumber(0)); 
          stream.SetData(new byte[0]); 
         } 
        } 
       } 
      } 

      //Write the content out 
      using (FileStream fs = new FileStream(unwatermarkedFile, FileMode.Create, FileAccess.Write, FileShare.None)) { 
       using (PdfStamper stamper = new PdfStamper(reader2, fs)) { 

       } 
      } 
      this.Close(); 
     } 
    } 
} 
1

作爲擴展到Chris's answer,用於除去層的VB.Net類被包括在此信息應該是更精確的比特的底部。

  1. 它經歷層的PDF文件列表(存儲在OCGs陣列中的文件的目錄OCProperties字典)。此數組包含對PDF文件中包含名稱的對象的間接引用,它包含名稱
  2. 它遍歷頁面的屬性(也存儲在字典中)以查找指向圖層對象的屬性(通過間接引用)
  3. 它做的內容流的實際解析找到模式/OC /{PagePropertyReference} BDC {Actual Content} EMC的實例,因此它可以去除不僅僅是這些段適當

代碼然後清除所有的引用一樣,因爲它可以。調用代碼可能的工作,如下所示:

Public Shared Sub RemoveWatermark(path As String, savePath As String) 
    Using reader = New PdfReader(path) 
    Using fs As New FileStream(savePath, FileMode.Create, FileAccess.Write, FileShare.None) 
     Using stamper As New PdfStamper(reader, fs) 
     Using remover As New PdfLayerRemover(reader) 
      remover.RemoveByName("WatermarkLayer") 
     End Using 
     End Using 
    End Using 
    End Using 
End Sub 

滿級:

Imports iTextSharp.text 
Imports iTextSharp.text.io 
Imports iTextSharp.text.pdf 
Imports iTextSharp.text.pdf.parser 

Public Class PdfLayerRemover 
    Implements IDisposable 

    Private _reader As PdfReader 
    Private _layerNames As New List(Of String) 

    Public Sub New(reader As PdfReader) 
    _reader = reader 
    End Sub 

    Public Sub RemoveByName(name As String) 
    _layerNames.Add(name) 
    End Sub 

    Private Sub RemoveLayers() 
    Dim ocProps = _reader.Catalog.GetAsDict(PdfName.OCPROPERTIES) 
    If ocProps Is Nothing Then Return 
    Dim ocgs = ocProps.GetAsArray(PdfName.OCGS) 
    If ocgs Is Nothing Then Return 

    'Get a list of indirect references to the layer information 
    Dim layerRefs = (From l In (From i In ocgs 
           Select Obj = DirectCast(PdfReader.GetPdfObject(i), PdfDictionary), 
             Ref = DirectCast(i, PdfIndirectReference)) 
        Where _layerNames.Contains(l.Obj.GetAsString(PdfName.NAME).ToString) 
        Select l.Ref).ToList 
    'Get a list of numbers for these layer references 
    Dim layerRefNumbers = (From l In layerRefs Select l.Number).ToList 

    'Loop through the pages 
    Dim page As PdfDictionary 
    Dim propsToRemove As IEnumerable(Of PdfName) 
    For i As Integer = 1 To _reader.NumberOfPages 
     'Get the page 
     page = _reader.GetPageN(i) 

     'Get the page properties which reference the layers to remove 
     Dim props = _reader.GetPageResources(i).GetAsDict(PdfName.PROPERTIES) 
     propsToRemove = (From k In props.Keys Where layerRefNumbers.Contains(props.GetAsIndirectObject(k).Number) Select k).ToList 

     'Get the raw content 
     Dim contentarray = page.GetAsArray(PdfName.CONTENTS) 
     If contentarray IsNot Nothing Then 
     For j As Integer = 0 To contentarray.Size - 1 
      'Parse the stream data looking for references to a property pointing to the layer. 
      Dim stream = DirectCast(contentarray.GetAsStream(j), PRStream) 
      Dim streamData = PdfReader.GetStreamBytes(stream) 
      Dim newData = GetNewStream(streamData, (From p In propsToRemove Select p.ToString.Substring(1))) 

      'Store data without the stream references in the stream 
      If newData.Length <> streamData.Length Then 
      stream.SetData(newData) 
      stream.Put(PdfName.LENGTH, New PdfNumber(newData.Length)) 
      End If 
     Next 
     End If 

     'Remove the properties from the page data 
     For Each prop In propsToRemove 
     props.Remove(prop) 
     Next 
    Next 

    'Remove references to the layer in the master catalog 
    RemoveIndirectReferences(ocProps, layerRefNumbers) 

    'Clean up unused objects 
    _reader.RemoveUnusedObjects() 
    End Sub 

    Private Shared Function GetNewStream(data As Byte(), propsToRemove As IEnumerable(Of String)) As Byte() 
    Dim item As PdfLayer = Nothing 
    Dim positions As New List(Of Integer) 
    positions.Add(0) 

    Dim pos As Integer 
    Dim inGroup As Boolean = False 
    Dim tokenizer As New PRTokeniser(New RandomAccessFileOrArray(New RandomAccessSourceFactory().CreateSource(data))) 
    While tokenizer.NextToken 
     If tokenizer.TokenType = PRTokeniser.TokType.NAME AndAlso tokenizer.StringValue = "OC" Then 
     pos = CInt(tokenizer.FilePointer - 3) 
     If tokenizer.NextToken() AndAlso tokenizer.TokenType = PRTokeniser.TokType.NAME Then 
      If Not inGroup AndAlso propsToRemove.Contains(tokenizer.StringValue) Then 
      inGroup = True 
      positions.Add(pos) 
      End If 
     End If 
     ElseIf tokenizer.TokenType = PRTokeniser.TokType.OTHER AndAlso tokenizer.StringValue = "EMC" AndAlso inGroup Then 
     positions.Add(CInt(tokenizer.FilePointer)) 
     inGroup = False 
     End If 
    End While 
    positions.Add(data.Length) 

    If positions.Count > 2 Then 
     Dim length As Integer = 0 
     For i As Integer = 0 To positions.Count - 1 Step 2 
     length += positions(i + 1) - positions(i) 
     Next 

     Dim newData(length) As Byte 
     length = 0 
     For i As Integer = 0 To positions.Count - 1 Step 2 
     Array.Copy(data, positions(i), newData, length, positions(i + 1) - positions(i)) 
     length += positions(i + 1) - positions(i) 
     Next 

     Dim origStr = System.Text.Encoding.UTF8.GetString(data) 
     Dim newStr = System.Text.Encoding.UTF8.GetString(newData) 

     Return newData 
    Else 
     Return data 
    End If 
    End Function 

    Private Shared Sub RemoveIndirectReferences(dict As PdfDictionary, refNumbers As IEnumerable(Of Integer)) 
    Dim newDict As PdfDictionary 
    Dim arrayData As PdfArray 
    Dim indirect As PdfIndirectReference 
    Dim i As Integer 

    For Each key In dict.Keys 
     newDict = dict.GetAsDict(key) 
     arrayData = dict.GetAsArray(key) 
     If newDict IsNot Nothing Then 
     RemoveIndirectReferences(newDict, refNumbers) 
     ElseIf arrayData IsNot Nothing Then 
     i = 0 
     While i < arrayData.Size 
      indirect = arrayData.GetAsIndirectObject(i) 
      If refNumbers.Contains(indirect.Number) Then 
      arrayData.Remove(i) 
      Else 
      i += 1 
      End If 
     End While 
     End If 
    Next 
    End Sub 

#Region "IDisposable Support" 
    Private disposedValue As Boolean ' To detect redundant calls 

    ' IDisposable 
    Protected Overridable Sub Dispose(disposing As Boolean) 
    If Not Me.disposedValue Then 
     If disposing Then 
     RemoveLayers() 
     End If 

     ' TODO: free unmanaged resources (unmanaged objects) and override Finalize() below. 
     ' TODO: set large fields to null. 
    End If 
    Me.disposedValue = True 
    End Sub 

    ' TODO: override Finalize() only if Dispose(ByVal disposing As Boolean) above has code to free unmanaged resources. 
    'Protected Overrides Sub Finalize() 
    ' ' Do not change this code. Put cleanup code in Dispose(ByVal disposing As Boolean) above. 
    ' Dispose(False) 
    ' MyBase.Finalize() 
    'End Sub 

    ' This code added by Visual Basic to correctly implement the disposable pattern. 
    Public Sub Dispose() Implements IDisposable.Dispose 
    ' Do not change this code. Put cleanup code in Dispose(ByVal disposing As Boolean) above. 
    Dispose(True) 
    GC.SuppressFinalize(Me) 
    End Sub 
#End Region 

End Class