2013-03-08 84 views
0
Directory directory = FSDirectory.open(indexDir); 
     IndexReader reader = DirectoryReader.open(directory); 
     IndexSearcher searcher = new IndexSearcher(reader); 
     Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41); 

     QueryParser parser = new QueryParser(Version.LUCENE_41, "contents", analyzer); 
     Query query = parser.parse(queryStr); 
     System.out.println("Searching for: " + query.toString("contents")); 
     TopDocs results = searcher.search(query, maxHits); 

     ScoreDoc[] hits = results.scoreDocs; 
     int numTotalHits = results.totalHits; 

     System.out.println("\n\n\n-----------------------Results--------------------------\n\n\n"); 
     System.out.println(numTotalHits + " total matching documents"); 


     for (int i = 0; i < hits.length; i++) { 
      int docId = hits[i].doc; 
      Document d = searcher.doc(docId); 
      System.out.println(i+":File name is"+d.get("filename")); 
     } 

     System.out.println("Found " + hits.length); 

我在搜索模塊中使用了上述代碼。現在代碼工作正常,但我得到的輸出Apache lucene搜索代碼打印null

390:File name is/home/maclean/Installations/apache-tomcat-7.0.21/logs/localhost_access_log.2012-12-31.txt 
391:File name isnull 
392:File name isnull 
393:File name isnull 
394:File name isnull 
395:File name isnull 
396:File name isnull 
397:File name isnull 
398:File name isnull 
399:File name isnull 
400:File name is/home/maclean/Installations/apache-tomcat-7.0.21/logs/localhost_access_log.2013-01-09.txt 
401:File name isnull 
402:File name isnull 
403:File name isnull 
404:File name isnull 
405:File name isnull 
406:File name isnull 
407:File name isnull 
408:File name isnull 
409:File name isnull 
410:File name is/home/maclean/Installations/apache-tomcat-7.0.21/logs/localhost_access_log.2013-01-10.txt 

我在這裏只打印具有查詢字符串但我得到了太多的結果和大多數的結果有文件名空的文件名爲什麼會這樣?

索引我使用此代碼

import java.io.BufferedReader; 
import java.io.File; 
import java.io.FileInputStream; 
import java.io.FileNotFoundException; 
import java.io.FileReader; 
import java.io.IOException; 
import java.io.InputStreamReader; 
import org.apache.lucene.analysis.Analyzer; 
import org.apache.lucene.analysis.standard.StandardAnalyzer; 
import org.apache.lucene.document.Document; 
import org.apache.lucene.document.Field; 
import org.apache.lucene.document.LongField; 
import org.apache.lucene.document.StringField; 
import org.apache.lucene.document.TextField; 
import org.apache.lucene.index.IndexWriter; 
import org.apache.lucene.index.IndexWriterConfig; 
import org.apache.lucene.index.IndexWriterConfig.OpenMode; 
import org.apache.lucene.index.Term; 
import org.apache.lucene.store.Directory; 
import org.apache.lucene.store.FSDirectory; 
import org.apache.lucene.util.Version; 



public class SimpleFileIndexer { 

    public static void main() throws Exception { 

     File dataDir = new File("/home/maclean/Installations/apache-tomcat-7.0.21/logs"); 
     File indexDir = new File("/home/maclean/NetBeansProjects/LogSearchEngine/Result"); 

     SimpleFileIndexer indexer = new SimpleFileIndexer(); 

     int numIndex = indexer.index(indexDir, dataDir); 

     System.out.println("Total files indexed " + numIndex); 

    } 

    private int index(File indexDir, File dataDir) throws Exception { 
    // API and code to convert text into indexable/searchable tokens. 
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41); 
    //To store an index on disk 
    Directory directory = FSDirectory.open(indexDir); 
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_41, analyzer); 
     int numIndexed; 
     try (IndexWriter indexWriter = new IndexWriter(directory, config)) { 
      indexDirectory(indexWriter, dataDir); 
      numIndexed = indexWriter.maxDoc(); 
      indexWriter.close(); 

     } 

     return numIndexed; 


    } 

    private void indexDirectory(IndexWriter indexWriter, File dataDir) throws IOException { 

     File[] files = dataDir.listFiles(); 
     for (int i = 0; i < files.length; i++) { 
      File f = files[i]; 
      if (f.isDirectory()) { 
       indexDirectory(indexWriter, f); 
      } 
      else { 
       indexFileWithIndexWriter(indexWriter, f); 
      } 
     } 

    } 

    private void indexFileWithIndexWriter(IndexWriter indexWriter, File file) throws IOException { 

     FileInputStream fis = null; 
     if (file.isHidden() || file.isDirectory() || !file.canRead() || !file.exists()) { 
      return; 
     } 

     System.out.println("Indexing file " + file.getCanonicalPath()); 

     try { 
      fis = new FileInputStream(file); 
     } catch (FileNotFoundException fnfe) { 
      System.out.println("File Not Found"+fnfe); 

     } 

     Document doc = new Document(); 
     doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); 
     doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); 

     if (indexWriter.getConfig().getOpenMode() == OpenMode.CREATE) { 
      // New index, so we just add the document (no old document can be there): 
      System.out.println("adding " + file); 
      indexWriter.addDocument(doc); 
     } else { 
      // Existing index (an old copy of this document may have been indexed) so 
     // we use updateDocument instead to replace the old one matching the exact 
      // path, if present: 
      System.out.println("updating " + file); 
      indexWriter.updateDocument(new Term("path", file.getPath()), doc); 
      } 


     fis.close(); 




    } 

}* 
+0

我建議帶上您的索引並用[Luke](https://code.google.com/p/luke/)打開它。它會顯示索引的內容。也許對於你查詢過的文件沒有設置或沒有存儲? – mindas 2013-03-08 09:45:46

+0

是否可以使用lucene打印行號以及整個行 – 2013-03-08 09:53:43

回答

1

命中數組比numTotalHits長,所以你的for循環的限制應該是numTotalHits代替hits.length。

+0

我仍然可以得到相同的結果 – 2013-03-08 10:15:11