2016-01-13 158 views
1

文章架構:如何使用whoosh搜索關鍵字

下面是我創建的文章架構。

class ArticleSchema(SchemaClass): 
    title = TEXT(
     phrase=True, sortable=True, stored=True, 
     field_boost=2.0, spelling=True, analyzer=StemmingAnalyzer()) 
    keywords = KEYWORD(
     commas=True, field_boost=1.5, lowercase=True) 
    authors = KEYWORD(stored=True, commas=True, lowercase=True) 
    content = TEXT(spelling=True, analyzer=StemmingAnalyzer()) 
    summary = TEXT(spelling=True, analyzer=StemmingAnalyzer()) 
    published_time = DATETIME(stored=True, sortable=True) 
    permalink = STORED 
    thumbnail = STORED 
    article_id = ID(unique=True, stored=True) 
    topic = TEXT(spelling=True, stored=True) 
    series_id = STORED 
    tags = KEYWORD(commas=True, lowercase=True) 

搜索查詢

FIELD_TIME = 'published_time' 
FIELD_TITLE = 'title' 
FIELD_PUBLISHER = 'authors' 
FIELD_KEYWORDS = 'keywords' 
FIELD_CONTENT = 'content' 
FIELD_TOPIC = 'topic' 

def search_query(search_term=None, page=1, result_len=10): 
    '''Search the provided query.''' 
    if not search_term or search_term == '': 
     return None, 0 
    if not index.exists_in(INDEX_DIR, indexname=INDEX_NAME): 
     return None, 0 
    ix = get_index() 
    parser = qparser.MultifieldParser(
      [FIELD_TITLE, FIELD_PUBLISHER, FIELD_KEYWORDS, FIELD_TOPIC], 
      ix.schema) 
    query = parser.parse(search_term) 
    query.normalize() 
    search_results = [] 
    with ix.searcher() as searcher: 
     results = searcher.search_page(
      query, 
      pagenum=page, 
      pagelen=result_len, 
      sortedby=[sorting_timestamp, scores], 
      reverse=True, 
      terms=True 
     ) 
     if results.scored_length() > 0: 
      for hit in results: 
       search_results.append(append_to(hit)) 
      return (search_results, results.pagecount) 

    parser = qparser.MultifieldParser(
      [FIELD_TITLE, FIELD_PUBLISHER, FIELD_TOPIC], 
      ix.schema, termclass=FuzzyTerm) 
    parser.add_plugin(qparser.FuzzyTermPlugin()) 
    query = parser.parse(search_term) 
    query.normalize() 
    search_results = [] 
    with ix.searcher() as searcher: 
     results = searcher.search_page(
      query, 
      pagenum=page, 
      pagelen=result_len, 
      sortedby=[sorting_timestamp, scores], 
      reverse=True, 
      terms=True 
     ) 
     if results.scored_length() > 0: 
      for hit in results: 
       search_results.append(append_to(hit)) 
      return (search_results, results.pagecount) 
    return None, 0 

當我試圖標題搜索工作,但對於作者和關鍵字的檢索不能正常工作。我無法理解我在這裏做了什麼錯誤。我從api獲取數據,然後運行索引。這一切都工作正常。但是,當我通過關鍵字authorskeywords搜索時,它不起作用。

+0

請舉例說明您在作者中存儲的內容以及您搜索的內容。 –

回答

0

兩個authorskeywordsKEYWORD類型不支持詞組搜索這意味着你應該用準確的關鍵字或其衍生物之一,因爲你使用的是詞幹搜索的。我想你應該使用TEXT

從嗖文檔

whoosh.fields.KEYWORD

這種類型被設計用於空間 - 或逗號分隔的關鍵字。這個 類型被索引和搜索(和可選地存儲)。爲了節省空間, 它不支持短語搜索