2008-09-23 76 views
46

我正在尋找一種方法來在Lucene中查詢自動完成/建議。我搜索了一下並稍微玩了一下,但我所見過的所有例子似乎都是在Solr中設置過濾器。我們不使用Solr,並且不打算在不久的將來使用Solr,Solr顯然只是圍繞Lucene進行包裝,所以我想必須有辦法做到這一點!如何在Lucene中查詢自動完成/建議?

我已經研究過使用EdgeNGramFilter,我意識到我必須在索引字段上運行過濾器並獲取令牌,然後將它們與輸入的查詢進行比較......我只是努力工作使兩者之間的連接成爲一些代碼,所以非常感謝幫助!

要清楚我在找什麼(我意識到我沒有過分清楚,對不起) - 我正在尋找一種解決方案,當搜索一個術語時,它會返回一個建議列表查詢。當在搜索字段中輸入'inter'時,它會返回一個建議查詢列表,例如'internet','international'等。

+0

Lucene現在有一些代碼專門做自動完成/建議。請參閱http://stackoverflow.com/questions/24968697/how-to-implements-auto-suggest-using-lucenes-new-analyzinginfixsuggester-api/25301811#25301811瞭解如何使用它的答案。 – 2014-08-14 06:59:48

回答

35

基於@Alexandre Victoor的回答,我寫了基於Lucene的拼寫檢查器在contrib包(和使用包含在它的LuceneDictionary)一個小的類,它正是我想要的。

這允許使用單個字段從單一來源索引重新索引,併爲術語提供建議。結果按原始索引中與該術語匹配的文檔數進行排序,因此更多熱門詞彙首先出現。似乎工作得很好:)

import java.io.IOException; 
import java.io.Reader; 
import java.util.ArrayList; 
import java.util.HashMap; 
import java.util.Iterator; 
import java.util.List; 
import java.util.Map; 

import org.apache.lucene.analysis.Analyzer; 
import org.apache.lucene.analysis.ISOLatin1AccentFilter; 
import org.apache.lucene.analysis.LowerCaseFilter; 
import org.apache.lucene.analysis.StopFilter; 
import org.apache.lucene.analysis.TokenStream; 
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; 
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter.Side; 
import org.apache.lucene.analysis.standard.StandardFilter; 
import org.apache.lucene.analysis.standard.StandardTokenizer; 
import org.apache.lucene.document.Document; 
import org.apache.lucene.document.Field; 
import org.apache.lucene.index.CorruptIndexException; 
import org.apache.lucene.index.IndexReader; 
import org.apache.lucene.index.IndexWriter; 
import org.apache.lucene.index.Term; 
import org.apache.lucene.search.IndexSearcher; 
import org.apache.lucene.search.Query; 
import org.apache.lucene.search.ScoreDoc; 
import org.apache.lucene.search.Sort; 
import org.apache.lucene.search.TermQuery; 
import org.apache.lucene.search.TopDocs; 
import org.apache.lucene.search.spell.LuceneDictionary; 
import org.apache.lucene.store.Directory; 
import org.apache.lucene.store.FSDirectory; 

/** 
* Search term auto-completer, works for single terms (so use on the last term 
* of the query). 
* <p> 
* Returns more popular terms first. 
* 
* @author Mat Mannion, [email protected] 
*/ 
public final class Autocompleter { 

    private static final String GRAMMED_WORDS_FIELD = "words"; 

    private static final String SOURCE_WORD_FIELD = "sourceWord"; 

    private static final String COUNT_FIELD = "count"; 

    private static final String[] ENGLISH_STOP_WORDS = { 
    "a", "an", "and", "are", "as", "at", "be", "but", "by", 
    "for", "i", "if", "in", "into", "is", 
    "no", "not", "of", "on", "or", "s", "such", 
    "t", "that", "the", "their", "then", "there", "these", 
    "they", "this", "to", "was", "will", "with" 
    }; 

    private final Directory autoCompleteDirectory; 

    private IndexReader autoCompleteReader; 

    private IndexSearcher autoCompleteSearcher; 

    public Autocompleter(String autoCompleteDir) throws IOException { 
     this.autoCompleteDirectory = FSDirectory.getDirectory(autoCompleteDir, 
       null); 

     reOpenReader(); 
    } 

    public List<String> suggestTermsFor(String term) throws IOException { 
     // get the top 5 terms for query 
     Query query = new TermQuery(new Term(GRAMMED_WORDS_FIELD, term)); 
     Sort sort = new Sort(COUNT_FIELD, true); 

     TopDocs docs = autoCompleteSearcher.search(query, null, 5, sort); 
     List<String> suggestions = new ArrayList<String>(); 
     for (ScoreDoc doc : docs.scoreDocs) { 
      suggestions.add(autoCompleteReader.document(doc.doc).get(
        SOURCE_WORD_FIELD)); 
     } 

     return suggestions; 
    } 

    @SuppressWarnings("unchecked") 
    public void reIndex(Directory sourceDirectory, String fieldToAutocomplete) 
      throws CorruptIndexException, IOException { 
     // build a dictionary (from the spell package) 
     IndexReader sourceReader = IndexReader.open(sourceDirectory); 

     LuceneDictionary dict = new LuceneDictionary(sourceReader, 
       fieldToAutocomplete); 

     // code from 
     // org.apache.lucene.search.spell.SpellChecker.indexDictionary(
     // Dictionary) 
     IndexReader.unlock(autoCompleteDirectory); 

     // use a custom analyzer so we can do EdgeNGramFiltering 
     IndexWriter writer = new IndexWriter(autoCompleteDirectory, 
     new Analyzer() { 
      public TokenStream tokenStream(String fieldName, 
        Reader reader) { 
       TokenStream result = new StandardTokenizer(reader); 

       result = new StandardFilter(result); 
       result = new LowerCaseFilter(result); 
       result = new ISOLatin1AccentFilter(result); 
       result = new StopFilter(result, 
        ENGLISH_STOP_WORDS); 
       result = new EdgeNGramTokenFilter(
        result, Side.FRONT,1, 20); 

       return result; 
      } 
     }, true); 

     writer.setMergeFactor(300); 
     writer.setMaxBufferedDocs(150); 

     // go through every word, storing the original word (incl. n-grams) 
     // and the number of times it occurs 
     Map<String, Integer> wordsMap = new HashMap<String, Integer>(); 

     Iterator<String> iter = (Iterator<String>) dict.getWordsIterator(); 
     while (iter.hasNext()) { 
      String word = iter.next(); 

      int len = word.length(); 
      if (len < 3) { 
       continue; // too short we bail but "too long" is fine... 
      } 

      if (wordsMap.containsKey(word)) { 
       throw new IllegalStateException(
         "This should never happen in Lucene 2.3.2"); 
       // wordsMap.put(word, wordsMap.get(word) + 1); 
      } else { 
       // use the number of documents this word appears in 
       wordsMap.put(word, sourceReader.docFreq(new Term(
         fieldToAutocomplete, word))); 
      } 
     } 

     for (String word : wordsMap.keySet()) { 
      // ok index the word 
      Document doc = new Document(); 
      doc.add(new Field(SOURCE_WORD_FIELD, word, Field.Store.YES, 
        Field.Index.UN_TOKENIZED)); // orig term 
      doc.add(new Field(GRAMMED_WORDS_FIELD, word, Field.Store.YES, 
        Field.Index.TOKENIZED)); // grammed 
      doc.add(new Field(COUNT_FIELD, 
        Integer.toString(wordsMap.get(word)), Field.Store.NO, 
        Field.Index.UN_TOKENIZED)); // count 

      writer.addDocument(doc); 
     } 

     sourceReader.close(); 

     // close writer 
     writer.optimize(); 
     writer.close(); 

     // re-open our reader 
     reOpenReader(); 
    } 

    private void reOpenReader() throws CorruptIndexException, IOException { 
     if (autoCompleteReader == null) { 
      autoCompleteReader = IndexReader.open(autoCompleteDirectory); 
     } else { 
      autoCompleteReader.reopen(); 
     } 

     autoCompleteSearcher = new IndexSearcher(autoCompleteReader); 
    } 

    public static void main(String[] args) throws Exception { 
     Autocompleter autocomplete = new Autocompleter("/index/autocomplete"); 

     // run this to re-index from the current index, shouldn't need to do 
     // this very often 
     // autocomplete.reIndex(FSDirectory.getDirectory("/index/live", null), 
     // "content"); 

     String term = "steve"; 

     System.out.println(autocomplete.suggestTermsFor(term)); 
     // prints [steve, steven, stevens, stevenson, stevenage] 
    } 

} 
+2

請注意,這是爲舊版本的Lucene創建的。在當前版本(4.4.0)中,在Analyzer類上實現的抽象方法是createComponents(String fieldName,Reader reader)。請參閱http://lucene.apache.org/core/4_4_0/core/org/apache/lucene/analysis/Analyzer.html – Casper 2013-07-24 12:49:30

4

您可以使用類別PrefixQuery「dictionary 「指數。類LuceneDictionary也可能有幫助。

看看這個article。它解釋瞭如何實現該功能「你的意思是?」可用於現代搜索引擎,如Google。您可能不需要像文章中描述的那樣複雜的東西。不過文章解釋瞭如何使用Lucene拼寫包。

構建「字典」索引的一種方法是迭代LuceneDictionary。

希望它可以幫助

+1

這是一個教科書的例子,說明爲什麼只有鏈接的答案沒有很好的答案,因爲這個鏈接現在已經腐爛了。 – 2015-11-16 21:23:12

24

這裏是墊的實現的音譯爲C#的Lucene.NET,與使用jQuery的自動完成功能的接線文本框中的片段一起。

<input id="search-input" name="query" placeholder="Search database." type="text" /> 

... JQuery的自動完成:

// don't navigate away from the field when pressing tab on a selected item 
$("#search-input").keydown(function (event) { 
    if (event.keyCode === $.ui.keyCode.TAB && $(this).data("autocomplete").menu.active) { 
     event.preventDefault(); 
    } 
}); 

$("#search-input").autocomplete({ 
    source: '@Url.Action("SuggestTerms")', // <-- ASP.NET MVC Razor syntax 
    minLength: 2, 
    delay: 500, 
    focus: function() { 
     // prevent value inserted on focus 
     return false; 
    }, 
    select: function (event, ui) { 
     var terms = this.value.split(/\s+/); 
     terms.pop(); // remove dropdown item 
     terms.push(ui.item.value.trim()); // add completed item 
     this.value = terms.join(" "); 
     return false; 
    }, 
}); 

...這裏的ASP.NET MVC控制器代碼:

// 
    // GET: /MyApp/SuggestTerms?term=something 
    public JsonResult SuggestTerms(string term) 
    { 
     if (string.IsNullOrWhiteSpace(term)) 
      return Json(new string[] {}); 

     term = term.Split().Last(); 

     // Fetch suggestions 
     string[] suggestions = SearchSvc.SuggestTermsFor(term).ToArray(); 

     return Json(suggestions, JsonRequestBehavior.AllowGet); 
    } 

...這是墊在C#代碼:

using System; 
using System.Collections.Generic; 
using System.Linq; 
using System.Text; 
using Lucene.Net.Store; 
using Lucene.Net.Index; 
using Lucene.Net.Search; 
using SpellChecker.Net.Search.Spell; 
using Lucene.Net.Analysis; 
using Lucene.Net.Analysis.Standard; 
using Lucene.Net.Analysis.NGram; 
using Lucene.Net.Documents; 

namespace Cipher.Services 
{ 
    /// <summary> 
    /// Search term auto-completer, works for single terms (so use on the last term of the query). 
    /// Returns more popular terms first. 
    /// <br/> 
    /// Author: Mat Mannion, [email protected] 
    /// <seealso cref="http://stackoverflow.com/questions/120180/how-to-do-query-auto-completion-suggestions-in-lucene"/> 
    /// </summary> 
    /// 
    public class SearchAutoComplete { 

     public int MaxResults { get; set; } 

     private class AutoCompleteAnalyzer : Analyzer 
     { 
      public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader) 
      { 
       TokenStream result = new StandardTokenizer(kLuceneVersion, reader); 

       result = new StandardFilter(result); 
       result = new LowerCaseFilter(result); 
       result = new ASCIIFoldingFilter(result); 
       result = new StopFilter(false, result, StopFilter.MakeStopSet(kEnglishStopWords)); 
       result = new EdgeNGramTokenFilter(
        result, Lucene.Net.Analysis.NGram.EdgeNGramTokenFilter.DEFAULT_SIDE,1, 20); 

       return result; 
      } 
     } 

     private static readonly Lucene.Net.Util.Version kLuceneVersion = Lucene.Net.Util.Version.LUCENE_29; 

     private static readonly String kGrammedWordsField = "words"; 

     private static readonly String kSourceWordField = "sourceWord"; 

     private static readonly String kCountField = "count"; 

     private static readonly String[] kEnglishStopWords = { 
      "a", "an", "and", "are", "as", "at", "be", "but", "by", 
      "for", "i", "if", "in", "into", "is", 
      "no", "not", "of", "on", "or", "s", "such", 
      "t", "that", "the", "their", "then", "there", "these", 
      "they", "this", "to", "was", "will", "with" 
     }; 

     private readonly Directory m_directory; 

     private IndexReader m_reader; 

     private IndexSearcher m_searcher; 

     public SearchAutoComplete(string autoCompleteDir) : 
      this(FSDirectory.Open(new System.IO.DirectoryInfo(autoCompleteDir))) 
     { 
     } 

     public SearchAutoComplete(Directory autoCompleteDir, int maxResults = 8) 
     { 
      this.m_directory = autoCompleteDir; 
      MaxResults = maxResults; 

      ReplaceSearcher(); 
     } 

     /// <summary> 
     /// Find terms matching the given partial word that appear in the highest number of documents.</summary> 
     /// <param name="term">A word or part of a word</param> 
     /// <returns>A list of suggested completions</returns> 
     public IEnumerable<String> SuggestTermsFor(string term) 
     { 
      if (m_searcher == null) 
       return new string[] { }; 

      // get the top terms for query 
      Query query = new TermQuery(new Term(kGrammedWordsField, term.ToLower())); 
      Sort sort = new Sort(new SortField(kCountField, SortField.INT)); 

      TopDocs docs = m_searcher.Search(query, null, MaxResults, sort); 
      string[] suggestions = docs.ScoreDocs.Select(doc => 
       m_reader.Document(doc.Doc).Get(kSourceWordField)).ToArray(); 

      return suggestions; 
     } 


     /// <summary> 
     /// Open the index in the given directory and create a new index of word frequency for the 
     /// given index.</summary> 
     /// <param name="sourceDirectory">Directory containing the index to count words in.</param> 
     /// <param name="fieldToAutocomplete">The field in the index that should be analyzed.</param> 
     public void BuildAutoCompleteIndex(Directory sourceDirectory, String fieldToAutocomplete) 
     { 
      // build a dictionary (from the spell package) 
      using (IndexReader sourceReader = IndexReader.Open(sourceDirectory, true)) 
      { 
       LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete); 

       // code from 
       // org.apache.lucene.search.spell.SpellChecker.indexDictionary(
       // Dictionary) 
       //IndexWriter.Unlock(m_directory); 

       // use a custom analyzer so we can do EdgeNGramFiltering 
       var analyzer = new AutoCompleteAnalyzer(); 
       using (var writer = new IndexWriter(m_directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED)) 
       { 
        writer.MergeFactor = 300; 
        writer.SetMaxBufferedDocs(150); 

        // go through every word, storing the original word (incl. n-grams) 
        // and the number of times it occurs 
        foreach (string word in dict) 
        { 
         if (word.Length < 3) 
          continue; // too short we bail but "too long" is fine... 

         // ok index the word 
         // use the number of documents this word appears in 
         int freq = sourceReader.DocFreq(new Term(fieldToAutocomplete, word)); 
         var doc = MakeDocument(fieldToAutocomplete, word, freq); 

         writer.AddDocument(doc); 
        } 

        writer.Optimize(); 
       } 

      } 

      // re-open our reader 
      ReplaceSearcher(); 
     } 

     private static Document MakeDocument(String fieldToAutocomplete, string word, int frequency) 
     { 
      var doc = new Document(); 
      doc.Add(new Field(kSourceWordField, word, Field.Store.YES, 
        Field.Index.NOT_ANALYZED)); // orig term 
      doc.Add(new Field(kGrammedWordsField, word, Field.Store.YES, 
        Field.Index.ANALYZED)); // grammed 
      doc.Add(new Field(kCountField, 
        frequency.ToString(), Field.Store.NO, 
        Field.Index.NOT_ANALYZED)); // count 
      return doc; 
     } 

     private void ReplaceSearcher() 
     { 
      if (IndexReader.IndexExists(m_directory)) 
      { 
       if (m_reader == null) 
        m_reader = IndexReader.Open(m_directory, true); 
       else 
        m_reader.Reopen(); 

       m_searcher = new IndexSearcher(m_reader); 
      } 
      else 
      { 
       m_searcher = null; 
      } 
     } 


    } 
} 
+0

您是否可以添加執行代碼的C#驅動程序代碼段,以及代碼來建立索引?我可以讓你的代碼編譯得很好,但我很難搞清楚如何構建我的目錄,以便可以通過上面的代碼查詢。 – erik 2012-03-06 21:17:11

+0

這個目錄以前是如何編入索引的嗎?我可以在使用雪球分析器創建的索引上運行此操作嗎?或者我應該使用一個根本沒有分析過的領域? (問上面同樣的問題) – NSjonas 2012-10-05 00:33:48

+0

使用JAVA Solr nd Jquery或javaScript的任何示例? – Juhan 2015-04-10 10:32:08

4

除上述(非常感謝)p如果你使用的是.NET 3.5,你需要包含EdgeNGramTokenFilter的代碼 - 或者至少我使用的是Lucene 2.9.2 - 這個過濾器從.NET版本中缺失,就我而言可以說。我不得不在2.9.3上找到.NET 4的在線版本,並且返回端口 - 希望這可以讓程序對某個人不那麼痛苦......

編輯:也請注意,由SuggestTermsFor()函數的計數按升序返回的數組,你可能要扭轉這種局面,首先得到最流行的術語在列表

using System.IO; 
using System.Collections; 
using Lucene.Net.Analysis; 
using Lucene.Net.Analysis.Tokenattributes; 
using Lucene.Net.Util; 

namespace Lucene.Net.Analysis.NGram 
{ 

/** 
* Tokenizes the given token into n-grams of given size(s). 
* <p> 
* This {@link TokenFilter} create n-grams from the beginning edge or ending edge of a input token. 
* </p> 
*/ 
public class EdgeNGramTokenFilter : TokenFilter 
{ 
    public static Side DEFAULT_SIDE = Side.FRONT; 
    public static int DEFAULT_MAX_GRAM_SIZE = 1; 
    public static int DEFAULT_MIN_GRAM_SIZE = 1; 

    // Replace this with an enum when the Java 1.5 upgrade is made, the impl will be simplified 
    /** Specifies which side of the input the n-gram should be generated from */ 
    public class Side 
    { 
     private string label; 

     /** Get the n-gram from the front of the input */ 
     public static Side FRONT = new Side("front"); 

     /** Get the n-gram from the end of the input */ 
     public static Side BACK = new Side("back"); 

     // Private ctor 
     private Side(string label) { this.label = label; } 

     public string getLabel() { return label; } 

     // Get the appropriate Side from a string 
     public static Side getSide(string sideName) 
     { 
      if (FRONT.getLabel().Equals(sideName)) 
      { 
       return FRONT; 
      } 
      else if (BACK.getLabel().Equals(sideName)) 
      { 
       return BACK; 
      } 
      return null; 
     } 
    } 

    private int minGram; 
    private int maxGram; 
    private Side side; 
    private char[] curTermBuffer; 
    private int curTermLength; 
    private int curGramSize; 
    private int tokStart; 

    private TermAttribute termAtt; 
    private OffsetAttribute offsetAtt; 

    protected EdgeNGramTokenFilter(TokenStream input) : base(input) 
    { 
     this.termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); 
     this.offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute)); 
    } 

    /** 
    * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range 
    * 
    * @param input {@link TokenStream} holding the input to be tokenized 
    * @param side the {@link Side} from which to chop off an n-gram 
    * @param minGram the smallest n-gram to generate 
    * @param maxGram the largest n-gram to generate 
    */ 
    public EdgeNGramTokenFilter(TokenStream input, Side side, int minGram, int maxGram) 
     : base(input) 
    { 

     if (side == null) 
     { 
      throw new System.ArgumentException("sideLabel must be either front or back"); 
     } 

     if (minGram < 1) 
     { 
      throw new System.ArgumentException("minGram must be greater than zero"); 
     } 

     if (minGram > maxGram) 
     { 
      throw new System.ArgumentException("minGram must not be greater than maxGram"); 
     } 

     this.minGram = minGram; 
     this.maxGram = maxGram; 
     this.side = side; 
     this.termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); 
     this.offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute)); 
    } 

    /** 
    * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range 
    * 
    * @param input {@link TokenStream} holding the input to be tokenized 
    * @param sideLabel the name of the {@link Side} from which to chop off an n-gram 
    * @param minGram the smallest n-gram to generate 
    * @param maxGram the largest n-gram to generate 
    */ 
    public EdgeNGramTokenFilter(TokenStream input, string sideLabel, int minGram, int maxGram) 
     : this(input, Side.getSide(sideLabel), minGram, maxGram) 
    { 

    } 

    public override bool IncrementToken() 
    { 
     while (true) 
     { 
      if (curTermBuffer == null) 
      { 
       if (!input.IncrementToken()) 
       { 
        return false; 
       } 
       else 
       { 
        curTermBuffer = (char[])termAtt.TermBuffer().Clone(); 
        curTermLength = termAtt.TermLength(); 
        curGramSize = minGram; 
        tokStart = offsetAtt.StartOffset(); 
       } 
      } 
      if (curGramSize <= maxGram) 
      { 
       if (!(curGramSize > curTermLength   // if the remaining input is too short, we can't generate any n-grams 
        || curGramSize > maxGram)) 
       {  // if we have hit the end of our n-gram size range, quit 
        // grab gramSize chars from front or back 
        int start = side == Side.FRONT ? 0 : curTermLength - curGramSize; 
        int end = start + curGramSize; 
        ClearAttributes(); 
        offsetAtt.SetOffset(tokStart + start, tokStart + end); 
        termAtt.SetTermBuffer(curTermBuffer, start, curGramSize); 
        curGramSize++; 
        return true; 
       } 
      } 
      curTermBuffer = null; 
     } 
    } 

    public override Token Next(Token reusableToken) 
    { 
     return base.Next(reusableToken); 
    } 
    public override Token Next() 
    { 
     return base.Next(); 
    } 
    public override void Reset() 
    { 
     base.Reset(); 
     curTermBuffer = null; 
    } 
} 
} 
4

我的代碼基於lucene 4.2,可以幫助你

import java.io.File; 
import java.io.IOException; 

import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; 
import org.apache.lucene.index.DirectoryReader; 
import org.apache.lucene.index.IndexWriterConfig; 
import org.apache.lucene.index.IndexWriterConfig.OpenMode; 
import org.apache.lucene.search.spell.Dictionary; 
import org.apache.lucene.search.spell.LuceneDictionary; 
import org.apache.lucene.search.spell.PlainTextDictionary; 
import org.apache.lucene.search.spell.SpellChecker; 
import org.apache.lucene.store.Directory; 
import org.apache.lucene.store.FSDirectory; 
import org.apache.lucene.store.IOContext; 
import org.apache.lucene.store.RAMDirectory; 
import org.apache.lucene.util.Version; 
import org.wltea4pinyin.analyzer.lucene.IKAnalyzer4PinYin; 


/** 
* 
* 
* @author <a href="mailto:[email protected]"></a> 
* @version 2013-11-25上午11:13:59 
*/ 
public class LuceneSpellCheckerDemoService { 

private static final String INDEX_FILE = "/Users/r/Documents/jar/luke/youtui/index"; 
private static final String INDEX_FILE_SPELL = "/Users/r/Documents/jar/luke/spell"; 

private static final String INDEX_FIELD = "app_name_quanpin"; 

public static void main(String args[]) { 

    try { 
     // 
     PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new IKAnalyzer4PinYin(
       true)); 

     // read index conf 
     IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_42, wrapper); 
     conf.setOpenMode(OpenMode.CREATE_OR_APPEND); 

     // read dictionary 
     Directory directory = FSDirectory.open(new File(INDEX_FILE)); 
     RAMDirectory ramDir = new RAMDirectory(directory, IOContext.READ); 
     DirectoryReader indexReader = DirectoryReader.open(ramDir); 

     Dictionary dic = new LuceneDictionary(indexReader, INDEX_FIELD); 


     SpellChecker sc = new SpellChecker(FSDirectory.open(new File(INDEX_FILE_SPELL))); 
     //sc.indexDictionary(new PlainTextDictionary(new File("myfile.txt")), conf, false); 
     sc.indexDictionary(dic, conf, true); 
     String[] strs = sc.suggestSimilar("zhsiwusdazhanjiangshi", 10); 
     for (int i = 0; i < strs.length; i++) { 
      System.out.println(strs[i]); 
     } 
     sc.close(); 
    } catch (IOException e) { 
     e.printStackTrace(); 
    } 
} 


}