2013-03-02 60 views
3

我已經成功地創建了文檔並做了一些複雜的搜索,但是在分組某些搜索結果時遇到了問題。如何將Lucene.Net搜索的結果分組?

有些書在搜索後顯示很好。隨着此Author分組需要完成哪些將基於相同的搜索查詢。

例,

Author Name  | Count 
A    | 12 
B    | 2 

我使用Lucene.Net 3.0.3.0不支持分組,但可能有一些變通。我也需要與價格範圍相同的功能。

+0

你不這樣做,Lucene是不是一個關係數據庫。您使用方面:https://cwiki.apache.org/LUCENENET/simple-faceted-search.html – 2013-03-06 21:14:07

回答

2

如果您編寫自定義Collector,一切皆有可能。您所描述的是方面,並且可以通過自己計算文檔值輕鬆解決。核心部分是呼叫IndexSearcher.Search超載接受收集器。收集器應讀取值,通常通過字段緩存實現來實現,並進行所需的計算。

這是一個簡短的演示,使用我演示項目Corelicious.Lucene中的一些類。

var postTypes = new Dictionary<Int32, Int32>(); 
searcher.Search(query, new DelegatingCollector((reader, doc, scorer) => { 
    var score = scorer.Score(); 
    if (score > 0) { 
     var postType = SingleFieldCache.Default.GetInt32(reader, "PostTypeId", doc); 
     if (postType.HasValue) { 
      if (postTypes.ContainsKey(postType.Value)) { 
       postTypes[postType.Value]++; 
      } else { 
       postTypes[postType.Value] = 1; 
      } 
     } 
    } 
})); 

全碼:

using System; 
using System.Collections.Generic; 
using System.IO; 
using System.Linq; 
using System.Text.RegularExpressions; 
using System.Xml; 
using Corelicious.Lucene; 
using Lucene.Net.Analysis; 
using Lucene.Net.Analysis.Standard; 
using Lucene.Net.Documents; 
using Lucene.Net.Index; 
using Lucene.Net.QueryParsers; 
using Lucene.Net.Search; 
using Lucene.Net.Store; 
using Directory = Lucene.Net.Store.Directory; 
using Version = Lucene.Net.Util.Version; 

namespace ConsoleApplication { 
    public static class Program { 
     public static void Main(string[] args) { 
      Console.WriteLine ("Creating directory..."); 
      var directory = new RAMDirectory(); 
      var analyzer = new StandardAnalyzer(Version.LUCENE_30); 
      CreateIndex(directory, analyzer); 

      var userQuery = "calculate pi"; 
      var queryParser = new QueryParser(Version.LUCENE_30, "Body", analyzer); 
      var query = queryParser.Parse(userQuery); 
      Console.WriteLine("Query: '{0}'", query); 

      var indexReader = IndexReader.Open(directory, readOnly: true); 
      var searcher = new IndexSearcher(indexReader); 

      var postTypes = new Dictionary<Int32, Int32>(); 
      searcher.Search(query, new DelegatingCollector((reader, doc, scorer) => { 
       var score = scorer.Score(); 
       if (score > 0) { 
        var postType = SingleFieldCache.Default.GetInt32(reader, "PostTypeId", doc); 
        if (postType.HasValue) { 
         if (postTypes.ContainsKey(postType.Value)) { 
          postTypes[postType.Value]++; 
         } else { 
          postTypes[postType.Value] = 1; 
         } 
        } 
       } 
      })); 

      Console.WriteLine("Post type summary"); 
      Console.WriteLine("Post type | Count"); 

      foreach(var pair in postTypes.OrderByDescending(x => x.Value)) { 
       var postType = (PostType)pair.Key; 
       Console.WriteLine("{0,-10} | {1}", postType, pair.Value); 
      } 

      Console.ReadLine(); 
     } 

     public enum PostType { 
      Question = 1, 
      Answer = 2, 
      Tag = 4 
     } 

     public static void CreateIndex(Directory directory, Analyzer analyzer) { 
      using (var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED)) 
      using (var xmlStream = File.OpenRead("/Users/sisve/Downloads/Stack Exchange Data Dump - Sept 2011/Content/092011 Mathematics/posts.xml")) 
      using (var xmlReader = XmlReader.Create(xmlStream)) { 
       while (xmlReader.ReadToFollowing("row")) { 
        var tags = xmlReader.GetAttribute("Tags") ?? String.Empty; 
        var title = xmlReader.GetAttribute("Title") ?? String.Empty; 
        var body = xmlReader.GetAttribute("Body"); 

        var doc = new Document(); 

        // tags are stored as <tag1><tag2> 
        foreach (Match match in Regex.Matches(tags, "<(.*?)>")) { 
         doc.Add(new Field("Tags", match.Groups[1].Value, Field.Store.NO, Field.Index.NOT_ANALYZED)); 
        } 

        doc.Add(new Field("Title", title, Field.Store.NO, Field.Index.ANALYZED)); 
        doc.Add(new Field("Body", body, Field.Store.NO, Field.Index.ANALYZED)); 
        doc.Add(new Field("PostTypeId", xmlReader.GetAttribute("PostTypeId"), Field.Store.NO, Field.Index.NOT_ANALYZED)); 

        writer.AddDocument(doc); 
       } 

       writer.Optimize(); 
       writer.Commit(); 
      } 
     } 
    } 
}