2013-11-27 96 views
3

我想將使用圖像的html文件轉換爲使用iText的pdf。我在這裏提供我的來源。使用iText將html轉換爲pdf

這是我的HTML文件...

<html> 

<body> 
<img src='data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAD4AAABQCAMAAAB24TZcAAAABGdBTUEAANbY1E9YMgAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAGAUExURdSmeJp2SHlbQIRoSUg2J499a8KebqeHZuGufBEVJPz7+3NWPVxGMduwhPXEktnX1mtROLq7t5WDc2VMNv3LmKB8TMSidMbFxLGlmXlhSMSddpJUL+y8i3VlVqedlOzr6gUIF2lXRLCLY4ZyXLyYaYhtUYiJhJFyU1dBLLiVZnlwZrWRY/Hx8b+2rbySaJh9YqeooDw4NygnKvvJlpyblzksIUhGRryYckc7MPjGlKODX5x8VVA8K+azgM3FvDInHK2JW2ZbUOHh4Xt2cFpaWKeAUM6kel1RRJmUjo5vSrWzrJJ1WFhLQCQmMuK1iJiMgmthWPPCkOm3hEtBOunm5LCNXnJtZquEXmNkYvG+i7Ctq+y5hrWRbKqSeaN/WqmFVYFgQh8aGOa4isWkd8mcby4vONDNy0AwI5h2U19JMxkdLzIuL1JBMjQ3P5Z6Ve6/j93c2+Xi34KAfJ5/Xvj4+O/u7sSKVJd4Wo6QjXE+IeOwfQcNJoBeQ8Gdbf/Mmf///5GX6NEAAAcrSURBVHja3JbpX9pIGMchiWkgEaOBtaGinBLEyopFBeMqtYKI4kGt2lILFsUoXa3WdZcc/dd3JheHAvaz7/Z5Ec2Q7/yeaw7Lz/9klv8rfnM+Orz5cXLjZsL+67h9eCq9Vaxvzc6v3W6+/TX85kN6ixdokkQQCaE5vrg28Qv4a2yFQcpSi/HzH6efi+/UaEAwWAtepuvv3tw/B//hqZGQqDFSmyHC7v0z8EldlZQQEgTfMgF23h8/T+gEhQGrcQYrMBKVtvfDb4qU/j3DMK3SdIKWsNs++M1iS8R8W/gULyG1771w+/stQWpTpFpzByb09MRHEwaoxUxToGtaZiBrE72cXzMyhcDiIRgCHxJPIxKt5aF23gMf0iquz8BJmAAFpUStxvG0xIA3arcHPsvrJM1wvFTDeEGQeKCewCo1jgRDwKuJrrh9C3osIfyiz+NboZFKxU0xJEYmeJbBhPoKiKyMDXfHd0mJWSETnoKiKCmgSioFDKFr4T1lbn/fgkHf+PGu+A+A12imMqdAqzNUXlFCFP+gOD41CKJBcCB4bKSnOmitB5VWSgnMrSjhCnu8D1hoS1xP/KcH1BhZdGi4c4VNAh/I5PGyRjdQqje+A6YXPIpup/DhHlMUh44f1hAJ6x77z3OwVjG/0ml7Ot4gOWnxvkfbALw+2EnPGc43ojWk3qNt7hdpiSp0ajcMukHQPB/4o3vPf8TKQgc+pqXdkpEtgGewE7THel/j66dtdBLA1XAYRXK8AGbxC/6RHvjbCuOE0Kklk8lcg/+OicaJcOhfTflTVYCHuYvX3XH7QCxcUAol9i6VursLha+VfcLPHwamZjfSAgxi6QId6oFnC5awsjdoWYjFPrOlB3QONAtJjrwsetiq2jkzgfc9nPdklJBDyXvGj+Zf+jIKe7pPoNFoOHwyoyaQKFcD9z3wzbwSGnT6fCMB9u5UmWMLYwTJQo5QC2AB6r122ukBJeVWnA6HIwlLnp/bI/w5wI3tJR3LjcZMbvVzL/xHwOG+M6s2mFeSjRm0QRyDYnyCOEv/0fOYGM/vha4N3J1S5hoZhCAcYBro/AwV63NIjafuzL4rLSjOZYKeIT45j9XUnQTs/Y7Inbqp/pABeIPBqsTystr0/pd9T9jprZIGO9CHa4gTPHairxr/eP/rwai+YdzlWQfALSHu4qTxfHxiQKVTaBINvfCjDFo1Fmzjor/zP+0BNXdgxSTdqRe5w0bT2hq+293mdWDOSJ5DWbgwd4uGpSPxXW5WGzGddhYWHsDRguqpO5x9jjq4HY3BnjtcRRGGe/Xqn38YC6SraVt84jnXwo0FgC8kOK7s+mv91St6RhVnZ72Vqeln4EM+cFY43SHgdj584c9ormdFbx3Jbk73v9PuvNCCvx67ntPzlmG2xUvUhQpZz9roxHdwXx4e7Yb/fdXc7o81PFcUxW2ry+Wy5miM4gQkEAh0uxKfXWbdLXs1XGxZURRnXZpZrVbXegT/rUvm571itnncQPctWZso2hAdd61GIzIuf32y5zduL0VxtwQPWG2vB7QP0OKKVaejOI7L8lP4+S3r+wY+zSZfGPvGPlFlt8FQ3BCPQPYpfOjWs3QHtMVLJqmU0NLe9XVhsBpOwyER0+D1oE534t8Hsn/KctwLokxUgeunD6FwCA2xMGtAPAdhjkr55afwoaksGpHlAKTnWUK9ZIAt15k/U+mK5voSuoI9Vre/fZPOBcFQKg4+PXsXg7urVra0Stvqmud4mTp4hN/s+lAIy8ErIC7Oz8aITzqegYkUL4tawQ+ivEvudP7Gt6SPpCpewJ8BfN+pb/aq71dG2kjayLuJ3/vC+gB+EBe9Xm/8KEQs67hShMmgIRsNylFuFe9UL1IGHXHNAtr77ZYN7htNB8LxJmCnyaBZULpJ6/g4ZZQCX83FAS1u3675xnTaX/GKFdLl+gIaDZeFpU78rS9oDnzZEmHstqPJKc9n90LJPThyBUZIVRtMv8Q1v9Xx8bzxigddWo1t7yZ//zgSCwRiK6CO0PUD2OR4hMnhHfiPtYiJr4a8Jj4MbHNe7UC4RtTfc5wsd+DD6RbxxTZ8chtkrcJGIlqX41GqTVzFp3wmfmCNi5rNT74Z3nwHi2BjZW11AtdzgvxIfSBl4l/Klzr+bfLvzSNYA1u9xTfmz8f4lLmA5HWfgV8eTa7BEohxox1xeZ1F5Ef4fTrYnL4oGjb7QZ3JVgk2W4KJPMZvmWbo9KWJ27QsXKHm3DkhJT/Gs6z55lo0abV5wCSL5txL/CMa4PYPUXN+5qwTj68aXwa5MP4Efj/VDA4TW3BV3PQMp7Wlgnfg555mcPFO8RbXMbXv8Oh6pG3J7IRM8bq3Q/zKLFqUQ3GteNYvbepG1XG57O0Qt9Hmd1bOKC1qbZH/zbK78FWzYMJ2aZoXPq7kr8ZvORr+iUSjJzQb/Gpa5l8BBgBZTppAyfsf0wAAAABJRU5ErkJggg==' width='62' height='80' style='float: left; margin-right: 28px;' alt="" /> 
<!-- <img src="add.png" alt="" /> --> 
</body> 
</html> 

我想轉換此HTML文件爲pdf ...

現在用下面的Java代碼...

import java.io.DataOutputStream; 
import java.io.File; 
import java.io.FileInputStream; 
import java.io.FileOutputStream; 
import java.io.IOException; 
import java.io.InputStream; 
import java.io.StringReader; 
import java.net.URL; 
import java.nio.charset.Charset; 

import org.apache.commons.io.IOUtils; 
import org.apache.pdfbox.encoding.Encoding; 
import org.jsoup.Jsoup; 
import org.jsoup.safety.Whitelist; 
import org.jsoup.select.Elements; 
import org.w3c.tidy.Tidy; 

import com.itextpdf.text.Document; 
import com.itextpdf.text.DocumentException; 
import com.itextpdf.text.Image; 
import com.itextpdf.text.pdf.PdfWriter; 
import com.itextpdf.tool.xml.Pipeline; 
import com.itextpdf.tool.xml.XMLWorker; 
import com.itextpdf.tool.xml.XMLWorkerFontProvider; 
import com.itextpdf.tool.xml.XMLWorkerHelper; 
import com.itextpdf.tool.xml.css.CssFilesImpl; 
import com.itextpdf.tool.xml.css.StyleAttrCSSResolver; 
import com.itextpdf.tool.xml.html.CssAppliersImpl; 
import com.itextpdf.tool.xml.html.HTML; 
import com.itextpdf.tool.xml.html.TagProcessor; 
import com.itextpdf.tool.xml.html.TagProcessorFactory; 
import com.itextpdf.tool.xml.html.Tags; 
import com.itextpdf.tool.xml.parser.XMLParser; 
import com.itextpdf.tool.xml.pipeline.css.CSSResolver; 
import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline; 
import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline; 
import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline; 
import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext; 
import com.itextpdf.tool.xml.pipeline.html.ImageProvider; 
import com.pdfcrowd.Client; 

public class App 
{ 


    public static void main(String[] args) throws DocumentException, IOException 
    { 

    // step 1 
    Document document = new Document(); 
    document.newPage(); 
    // step 2 
    PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream("pdf.pdf")); 
    // step 3 
    document.open(); 
    // step 4 
    XMLWorkerHelper.getInstance().parseXHtml(writer, document, 
      new FileInputStream("index.html")); 
    //step 5 
    document.close(); 
    System.out.println("PDF Created!"); 
    } 
} 

我得到以下錯誤...

Exception in thread "main" ExceptionConverter: java.io.IOException: The document has no pages. 
at com.itextpdf.text.pdf.PdfPages.writePageTree(PdfPages.java:113) 
at com.itextpdf.text.pdf.PdfWriter.close(PdfWriter.java:1243) 
at com.itextpdf.text.pdf.PdfDocument.close(PdfDocument.java:849) 
at com.itextpdf.text.Document.close(Document.java:416) 
at App.main(App.java:64) 

請幫助我如何將圖像轉換爲pdf格式的HTML文件使用itext。我能夠轉換該HTML文件,如果我沒有圖像或如果我硬編碼的圖像路徑。在此先感謝

+2

可能重複的[iText - HTML到PDF - 圖像不以PDF顯示](http:// stackov erflow.com/questions/15273933/itext-html-to-pdf-image-is-not-displayed-in-pdf) – Keerthivasan

回答

0

如果您的pdf頁面中沒有內容,則會發生此例外情況。 嘗試通過你的InputStream這樣

String str="<html> 

<body> 
<img src='data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAD4AAABQCAMAAAB24TZcAAAABGdBTUEAANbY1E9YMgAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAGAUExURdSmeJp2SHlbQIRoSUg2J499a8KebqeHZuGufBEVJPz7+3NWPVxGMduwhPXEktnX1mtROLq7t5WDc2VMNv3LmKB8TMSidMbFxLGlmXlhSMSddpJUL+y8i3VlVqedlOzr6gUIF2lXRLCLY4ZyXLyYaYhtUYiJhJFyU1dBLLiVZnlwZrWRY/Hx8b+2rbySaJh9YqeooDw4NygnKvvJlpyblzksIUhGRryYckc7MPjGlKODX5x8VVA8K+azgM3FvDInHK2JW2ZbUOHh4Xt2cFpaWKeAUM6kel1RRJmUjo5vSrWzrJJ1WFhLQCQmMuK1iJiMgmthWPPCkOm3hEtBOunm5LCNXnJtZquEXmNkYvG+i7Ctq+y5hrWRbKqSeaN/WqmFVYFgQh8aGOa4isWkd8mcby4vONDNy0AwI5h2U19JMxkdLzIuL1JBMjQ3P5Z6Ve6/j93c2+Xi34KAfJ5/Xvj4+O/u7sSKVJd4Wo6QjXE+IeOwfQcNJoBeQ8Gdbf/Mmf///5GX6NEAAAcrSURBVHja3JbpX9pIGMchiWkgEaOBtaGinBLEyopFBeMqtYKI4kGt2lILFsUoXa3WdZcc/dd3JheHAvaz7/Z5Ec2Q7/yeaw7Lz/9klv8rfnM+Orz5cXLjZsL+67h9eCq9Vaxvzc6v3W6+/TX85kN6ixdokkQQCaE5vrg28Qv4a2yFQcpSi/HzH6efi+/UaEAwWAtepuvv3tw/B//hqZGQqDFSmyHC7v0z8EldlZQQEgTfMgF23h8/T+gEhQGrcQYrMBKVtvfDb4qU/j3DMK3SdIKWsNs++M1iS8R8W/gULyG1771w+/stQWpTpFpzByb09MRHEwaoxUxToGtaZiBrE72cXzMyhcDiIRgCHxJPIxKt5aF23gMf0iquz8BJmAAFpUStxvG0xIA3arcHPsvrJM1wvFTDeEGQeKCewCo1jgRDwKuJrrh9C3osIfyiz+NboZFKxU0xJEYmeJbBhPoKiKyMDXfHd0mJWSETnoKiKCmgSioFDKFr4T1lbn/fgkHf+PGu+A+A12imMqdAqzNUXlFCFP+gOD41CKJBcCB4bKSnOmitB5VWSgnMrSjhCnu8D1hoS1xP/KcH1BhZdGi4c4VNAh/I5PGyRjdQqje+A6YXPIpup/DhHlMUh44f1hAJ6x77z3OwVjG/0ml7Ot4gOWnxvkfbALw+2EnPGc43ojWk3qNt7hdpiSp0ajcMukHQPB/4o3vPf8TKQgc+pqXdkpEtgGewE7THel/j66dtdBLA1XAYRXK8AGbxC/6RHvjbCuOE0Kklk8lcg/+OicaJcOhfTflTVYCHuYvX3XH7QCxcUAol9i6VursLha+VfcLPHwamZjfSAgxi6QId6oFnC5awsjdoWYjFPrOlB3QONAtJjrwsetiq2jkzgfc9nPdklJBDyXvGj+Zf+jIKe7pPoNFoOHwyoyaQKFcD9z3wzbwSGnT6fCMB9u5UmWMLYwTJQo5QC2AB6r122ukBJeVWnA6HIwlLnp/bI/w5wI3tJR3LjcZMbvVzL/xHwOG+M6s2mFeSjRm0QRyDYnyCOEv/0fOYGM/vha4N3J1S5hoZhCAcYBro/AwV63NIjafuzL4rLSjOZYKeIT45j9XUnQTs/Y7Inbqp/pABeIPBqsTystr0/pd9T9jprZIGO9CHa4gTPHairxr/eP/rwai+YdzlWQfALSHu4qTxfHxiQKVTaBINvfCjDFo1Fmzjor/zP+0BNXdgxSTdqRe5w0bT2hq+293mdWDOSJ5DWbgwd4uGpSPxXW5WGzGddhYWHsDRguqpO5x9jjq4HY3BnjtcRRGGe/Xqn38YC6SraVt84jnXwo0FgC8kOK7s+mv91St6RhVnZ72Vqeln4EM+cFY43SHgdj584c9ormdFbx3Jbk73v9PuvNCCvx67ntPzlmG2xUvUhQpZz9roxHdwXx4e7Yb/fdXc7o81PFcUxW2ry+Wy5miM4gQkEAh0uxKfXWbdLXs1XGxZURRnXZpZrVbXegT/rUvm571itnncQPctWZso2hAdd61GIzIuf32y5zduL0VxtwQPWG2vB7QP0OKKVaejOI7L8lP4+S3r+wY+zSZfGPvGPlFlt8FQ3BCPQPYpfOjWs3QHtMVLJqmU0NLe9XVhsBpOwyER0+D1oE534t8Hsn/KctwLokxUgeunD6FwCA2xMGtAPAdhjkr55afwoaksGpHlAKTnWUK9ZIAt15k/U+mK5voSuoI9Vre/fZPOBcFQKg4+PXsXg7urVra0Stvqmud4mTp4hN/s+lAIy8ErIC7Oz8aITzqegYkUL4tawQ+ivEvudP7Gt6SPpCpewJ8BfN+pb/aq71dG2kjayLuJ3/vC+gB+EBe9Xm/8KEQs67hShMmgIRsNylFuFe9UL1IGHXHNAtr77ZYN7htNB8LxJmCnyaBZULpJ6/g4ZZQCX83FAS1u3675xnTaX/GKFdLl+gIaDZeFpU78rS9oDnzZEmHstqPJKc9n90LJPThyBUZIVRtMv8Q1v9Xx8bzxigddWo1t7yZ//zgSCwRiK6CO0PUD2OR4hMnhHfiPtYiJr4a8Jj4MbHNe7UC4RtTfc5wsd+DD6RbxxTZ8chtkrcJGIlqX41GqTVzFp3wmfmCNi5rNT74Z3nwHi2BjZW11AtdzgvxIfSBl4l/Klzr+bfLvzSNYA1u9xTfmz8f4lLmA5HWfgV8eTa7BEohxox1xeZ1F5Ef4fTrYnL4oGjb7QZ3JVgk2W4KJPMZvmWbo9KWJ27QsXKHm3DkhJT/Gs6z55lo0abV5wCSL5txL/CMa4PYPUXN+5qwTj68aXwa5MP4Efj/VDA4TW3BV3PQMp7Wlgnfg555mcPFO8RbXMbXv8Oh6pG3J7IRM8bq3Q/zKLFqUQ3GteNYvbepG1XG57O0Qt9Hmd1bOKC1qbZH/zbK78FWzYMJ2aZoXPq7kr8ZvORr+iUSjJzQb/Gpa5l8BBgBZTppAyfsf0wAAAABJRU5ErkJggg==' width='62' height='80' style='float: left; margin-right: 28px;' alt="" /> 
<!-- <img src="add.png" alt="" /> --> 
</body> 
</html>" 

InputStream is = new ByteArrayInputStream(str.getBytes()); 
XMLWorkerHelper.getInstance().parseXHtml(writer, document, is); 
+0

仍然得到與上述代碼相同的異常 – madas

4

你需要實現自定義圖像標籤處理器來處理嵌入在HTML中的圖像:

package com.example.itext.processor; 

import java.util.ArrayList; 
import java.util.List; 
import java.util.Map; 

import com.itextpdf.text.Chunk; 
import com.itextpdf.text.Element; 
import com.itextpdf.text.Image; 
import com.itextpdf.text.log.Level; 
import com.itextpdf.text.log.Logger; 
import com.itextpdf.text.log.LoggerFactory; 
import com.itextpdf.text.pdf.codec.Base64; 
import com.itextpdf.tool.xml.NoCustomContextException; 
import com.itextpdf.tool.xml.Tag; 
import com.itextpdf.tool.xml.WorkerContext; 
import com.itextpdf.tool.xml.exceptions.LocaleMessages; 
import com.itextpdf.tool.xml.exceptions.RuntimeWorkerException; 
import com.itextpdf.tool.xml.html.HTML; 
import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext; 

public class ImageTagProcessor extends com.itextpdf.tool.xml.html.Image { 

private final Logger logger = LoggerFactory.getLogger(getClass()); 

/* 
* (non-Javadoc) 
* 
* @see com.itextpdf.tool.xml.TagProcessor#endElement(com.itextpdf.tool.xml.Tag, java.util.List, com.itextpdf.text.Document) 
*/ 
@Override 
public List<Element> end(final WorkerContext ctx, final Tag tag, final List<Element> currentContent) { 
    final Map<String, String> attributes = tag.getAttributes(); 
    String src = attributes.get(HTML.Attribute.SRC); 
    List<Element> elements = new ArrayList<Element>(1); 
    if (null != src && src.length() > 0) { 
     Image img = null; 
     if (src.startsWith("data:image/")) { 
      final String base64Data = src.substring(src.indexOf(",") + 1); 
      try { 
       img = Image.getInstance(Base64.decode(base64Data)); 
      } catch (Exception e) { 
       if (logger.isLogging(Level.ERROR)) { 
        logger.error(String.format(LocaleMessages.getInstance().getMessage(LocaleMessages.HTML_IMG_RETRIEVE_FAIL), src), e); 
       } 
      } 
      if (img != null) { 
       try { 
        final HtmlPipelineContext htmlPipelineContext = getHtmlPipelineContext(ctx); 
        elements.add(getCssAppliers().apply(new Chunk((com.itextpdf.text.Image) getCssAppliers().apply(img, tag, htmlPipelineContext), 0, 0, true), tag, 
         htmlPipelineContext)); 
       } catch (NoCustomContextException e) { 
        throw new RuntimeWorkerException(e); 
       } 
      } 
     } 

     if (img == null) { 
      elements = super.end(ctx, tag, currentContent); 
     } 
    } 
    return elements; 
} 
} 

下面的代碼註冊自定義圖像標籤處理器和覆羽一個HTML文檔爲PDF

public static void main(String[] args) { 
    convertHtmlToPdf(); 

} 

private static void convertHtmlToPdf() { 
    try { 
     final OutputStream file = new FileOutputStream(new File("C:\\Test.pdf")); 
     final Document document = new Document(); 
     final PdfWriter writer = PdfWriter.getInstance(document, file); 
     document.open(); 
     final TagProcessorFactory tagProcessorFactory = Tags.getHtmlTagProcessorFactory(); 
     tagProcessorFactory.removeProcessor(HTML.Tag.IMG); 
     tagProcessorFactory.addProcessor(new ImageTagProcessor(), HTML.Tag.IMG); 

     final CssFilesImpl cssFiles = new CssFilesImpl(); 
     cssFiles.add(XMLWorkerHelper.getInstance().getDefaultCSS()); 
     final StyleAttrCSSResolver cssResolver = new StyleAttrCSSResolver(cssFiles); 
     final HtmlPipelineContext hpc = new HtmlPipelineContext(new CssAppliersImpl(new XMLWorkerFontProvider())); 
     hpc.setAcceptUnknown(true).autoBookmark(true).setTagFactory(tagProcessorFactory); 
     final HtmlPipeline htmlPipeline = new HtmlPipeline(hpc, new PdfWriterPipeline(document, writer)); 
     final Pipeline<?> pipeline = new CssResolverPipeline(cssResolver, htmlPipeline); 
     final XMLWorker worker = new XMLWorker(pipeline, true); 
     final Charset charset = Charset.forName("UTF-8"); 
     final XMLParser xmlParser = new XMLParser(true, worker, charset); 
     final InputStream is = new FileInputStream("C:\\test.html"); 
     xmlParser.parse(is, charset); 

     is.close(); 
     document.close(); 
     file.close(); 
    } catch (Exception e) { 
     e.printStackTrace(); 
     // TODO 
    } 
}