2011-08-18 184 views
15

我想讀一個字的文件在Java如何在Java中讀取Doc或Docx文件?

import org.apache.poi.poifs.filesystem.*; 
import org.apache.poi.hpsf.DocumentSummaryInformation; 
import org.apache.poi.hwpf.*; 
import org.apache.poi.hwpf.extractor.*; 
import org.apache.poi.hwpf.usermodel.HeaderStories; 

import java.io.*; 

public class ReadDocFileFromJava { 

    public static void main(String[] args) { 
     /**This is the document that you want to read using Java.**/ 
     String fileName = "C:\\Path to file\\Test.doc"; 

     /**Method call to read the document (demonstrate some useage of POI)**/ 
     readMyDocument(fileName); 

    } 
    public static void readMyDocument(String fileName){ 
     POIFSFileSystem fs = null; 
     try { 
      fs = new POIFSFileSystem(new FileInputStream(fileName)); 
      HWPFDocument doc = new HWPFDocument(fs); 

      /** Read the content **/ 
      readParagraphs(doc); 

      int pageNumber=1; 

      /** We will try reading the header for page 1**/ 
      readHeader(doc, pageNumber); 

      /** Let's try reading the footer for page 1**/ 
      readFooter(doc, pageNumber); 

      /** Read the document summary**/ 
      readDocumentSummary(doc); 

     } catch (Exception e) { 
      e.printStackTrace(); 
     } 
    } 

    public static void readParagraphs(HWPFDocument doc) throws Exception{ 
     WordExtractor we = new WordExtractor(doc); 

     /**Get the total number of paragraphs**/ 
     String[] paragraphs = we.getParagraphText(); 
     System.out.println("Total Paragraphs: "+paragraphs.length); 

     for (int i = 0; i < paragraphs.length; i++) { 

      System.out.println("Length of paragraph "+(i +1)+": "+ paragraphs[i].length()); 
      System.out.println(paragraphs[i].toString()); 

     } 

    } 

    public static void readHeader(HWPFDocument doc, int pageNumber){ 
     HeaderStories headerStore = new HeaderStories(doc); 
     String header = headerStore.getHeader(pageNumber); 
     System.out.println("Header Is: "+header); 

    } 

    public static void readFooter(HWPFDocument doc, int pageNumber){ 
     HeaderStories headerStore = new HeaderStories(doc); 
     String footer = headerStore.getFooter(pageNumber); 
     System.out.println("Footer Is: "+footer); 

    } 

    public static void readDocumentSummary(HWPFDocument doc) { 
     DocumentSummaryInformation summaryInfo=doc.getDocumentSummaryInformation(); 
     String category = summaryInfo.getCategory(); 
     String company = summaryInfo.getCompany(); 
     int lineCount=summaryInfo.getLineCount(); 
     int sectionCount=summaryInfo.getSectionCount(); 
     int slideCount=summaryInfo.getSlideCount(); 


    enter code here 
     System.out.println("---------------------------"); 
     System.out.println("Category: "+category); 
     System.out.println("Company: "+company); 
     System.out.println("Line Count: "+lineCount); 
     System.out.println("Section Count: "+sectionCount); 
     System.out.println("Slide Count: "+slideCount); 

    } 

} 

http://sanjaal.com/java/tag/java-and-docx-format/

我想讀一個DOC或DOCX文件中的Java

+1

你還沒有真的在這裏問過一個問題。沒有進一步的細節,這個問題可能會被關閉。您的目標是什麼(查看,處理,編輯,打印)?你試過什麼了?什麼不起作用?你有錯誤嗎? – OverZealous

回答

19

這裏是ReadDoc/docx.java的代碼:這將讀取dox/docx文件並將其內容打印到控制檯。您可以按自己的方式自定義它。

import java.io.*; 
import org.apache.poi.hwpf.HWPFDocument; 
import org.apache.poi.hwpf.extractor.WordExtractor; 

public class ReadDocFile 
{ 
    public static void main(String[] args) 
    { 
     File file = null; 
     WordExtractor extractor = null; 
     try 
     { 

      file = new File("c:\\New.doc"); 
      FileInputStream fis = new FileInputStream(file.getAbsolutePath()); 
      HWPFDocument document = new HWPFDocument(fis); 
      extractor = new WordExtractor(document); 
      String[] fileData = extractor.getParagraphText(); 
      for (int i = 0; i < fileData.length; i++) 
      { 
       if (fileData[i] != null) 
        System.out.println(fileData[i]); 
      } 
     } 
     catch (Exception exep) 
     { 
      exep.printStackTrace(); 
     } 
    } 
} 
+9

您可能想要對該例外進行一些操作。在這種情況下, –

+1

字提取器只能給出doc文件的文本。 它甚至沒有提及段落開始或結束的位置...... – Wolverine

+1

除了文本(圖像,條形碼,..)之外的內容如何。你能否進一步編輯代碼來讀取完整的數據? –

相關問題