2012-04-16 55 views
0

我剛開始學習solr。我已經安裝了apache tomcat服務器和solr 3.5。我已成功配置solr以便從Oracle數據庫搜索一個實體的數據。我有問題,當我在數據-config.xml中增加了兩個實體和Solr的schema.xml.I添加字段配置我的數據-config.xml中像這樣...有錯誤,同時在solr中聲明多個實體

<dataConfig> 
    <dataSource name="JdbcDataSource" 
     driver="oracle.jdbc.driver.OracleDriver" 
     url="jdbc:oracle:thin:@//192.168.1.3:1521/orcl" 
     user="SSOHANI" 
     password="Ssohani123"/> 

    <document name="doc"> 
     <entity name="PROJECTS" 
       query="select PROJECTS.ID, PROJECTS.BATCH_ID, PROJECTS.OPERATION, PROJECTS.NAME, 
       PROJECTS.DESCRIPTION, PROJECTS.ESTIMATED_COST, PROJECTS.GRANTOR_AGENCY_ID, 
       PROJECTS.GRANTEE_AGENCY_ID, PROJECTS.PROJECT_STATUS_ID, 
       PROJECTS.PROJECT_TYPE_ID, PROJECTS.START_DATE, PROJECTS.END_DATE, 
       NVL(PROJECTS.TRACS_PARENT_PROJECT_ID,0) TRACS_PARENT_PROJECT_ID, 
       NVL(PROJECTS.STATE_PARENT_PROJECT_ID,0) STATE_PARENT_PROJECT_ID, 
       NVL(PROJECTS.PLAN_ID,0) PLAN_ID, 
       NVL(PROJECTS.PLAN_ID_TYPE,0) PLAN_ID_TYPE, 
       NVL(PROJECTS.TRACS_ID,0) TRACS_ID, 
       NVL(PROJECTS.STATE_ID,0) STATE_ID, 
       PROJECTS.VALID, PROJECTS.APPLIED, 
       NVL(PROJECTS.COMMENTS,'NULL') COMMENTS, 
       PROJECTS.GENERATED_PLAN_ID, PROJECTS.TRACS_PROJECT_ID, 
       PROJECTS.STATE_PLAN_ID from SSOHANI.PROJECTS" > 

     <field column="ID" name="projects_id" /> 
     <field column="BATCH_ID" name="projects_batch_id" /> 
     <field column="OPERATION" name="projects_operation" /> 
     <field column="NAME" name="projects_name" /> 
     <field column="DESCRIPTION" name="projects_description" /> 
     <field column="ESTIMATED_COST" name="projects_estimated_cost" /> 
     <field column="GRANTOR_AGENCY_ID" name="projects_grantor_agency_id" /> 
     <field column="GRANTEE_AGENCY_ID" name="projects_grantee_agency_id" /> 
     <field column="PROJECT_STATUS_ID" name="projects_project_status_id" /> 
     <field column="PROJECT_TYPE_ID" name="projects_project_type_id" /> 
     <field column="START_DATE" name="projects_start_date" /> 
     <field column="END_DATE" name="projects_end_date" /> 
     <field column="TRACS_PARENT_PROJECT_ID" name="projects_tracs_parent_project_id" /> 
     <field column="STATE_PARENT_PROJECT_ID" name="projects_state_parent_project_id" /> 
     <field column="PLAN_ID" name="projects_plan_id" /> 
     <field column="PLAN_ID_TYPE" name="projects_plan_id_type" /> 
     <field column="TRACS_ID" name="projects_tracs_id" /> 
     <field column="STATE_ID" name="projects_state_id" /> 
     <field column="VALID" name="projects_valid" /> 
     <field column="APPLIED" name="projects_applied" /> 
     <field column="COMMENTS" name="projects_comments" /> 
     <field column="GENERATED_PLAN_ID" name="projects_generated_plan_id" /> 
     <field column="TRACS_PROJECT_ID" name="projects_tracs_project_id" /> 
     <field column="STATE_PLAN_ID" name="projects_state_plan_id" /> 

     </entity> 

    <entity name="PLANS" 
      query="select PLANS.ID, PLANS.BATCH_ID, PLANS.OPERATION, PLANS.NAME, PLANS.DESCRIPTION, 
       PLANS.CONTACT_ID, PLANS.PLAN_TYPE_ID, PLANS.AGENCY_ID, PLANS.START_DATE, 
       NVL(PLANS.END_DATE,0) END_DATE, 
       NVL(PLANS.TRACS_PARENT_PLAN_ID,0) TRACS_PARENT_PLAN_ID, 
       NVL(PLANS.STATE_PARENT_PLAN_ID,0) STATE_PARENT_PLAN_ID, 
       NVL(PLANS.TRACS_ID,0) TRACS_ID, 
       NVL(PLANS.STATE_ID,0) STATE_ID,   
       PLANS.VALID, PLANS.APPLIED, 
       NVL(PLANS.COMMENTS,'NULL') COMMENTS from SSOHANI.PLANS" > 

     <field column="ID" name="plans_id" /> 
     <field column="BATCH_ID" name="plans_batch_id" /> 
     <field column="OPERATION" name="plans_operation" /> 
     <field column="NAME" name="plans_name" /> 
     <field column="DESCRIPTION" name="plans_description" /> 
     <field column="CONTACT_ID" name="plans_contact_id" /> 
     <field column="PLAN_TYPE_ID" name="plans_plan_type_id" />  
     <field column="AGENCY_ID" name="plans_agency_id" /> 
     <field column="START_DATE" name="plans_start_date" /> 
     <field column="END_DATE" name="plans_end_date" /> 
     <field column="TRACS_PARENT_PLAN_ID" name="plans_tracs_parent_plan_id" /> 
     <field column="STATE_PARENT_PLAN_ID" name="plans_state_parent_plan_id" /> 
     <field column="TRACS_ID" name="plans_tracs_id" /> 
     <field column="STATE_ID" name="plans_state_id" /> 
     <field column="VALID" name="plans_valid" /> 
     <field column="APPLIED" name="plans_applied" /> 
     <field column="COMMENTS" name="plans_comments" />  
    </entity> 

</document> 
</dataConfig> 

和我配置我的schema.xml中像這樣...

<schema> 
<fields> 

     <field name="projects_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="projects_batch_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="projects_operation" type="string" indexed="true" stored="true" required="true"/> 
     <field name="projects_name" type="string" indexed="true" stored="true" required="true"/> 
     <field name="projects_description" type="string" indexed="true" stored="true" required="true"/> 
     <field name="projects_estimated_cost" type="long" indexed="true" stored="true" required="true"/> 
     <field name="projects_grantor_agency_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="projects_grantee_agency_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="projects_project_status_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="projects_project_type_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="projects_start_date" type="date" indexed="true" stored="true" required="true"/> 
     <field name="projects_end_date" type="date" indexed="true" stored="true" required="true"/>  
     <field name="projects_tracs_parent_project_id" type="long" indexed="true" stored="true" required="true"/>  
     <field name="projects_state_parent_project_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="projects_plan_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="projects_plan_id_type" type="long" indexed="true" stored="true" required="true"/> 
     <field name="projects_tracs_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="projects_state_id" type="long" indexed="true" stored="true" required="true"/>  
     <field name="projects_valid" type="string" indexed="true" stored="true" required="true"/> 
     <field name="projects_applied" type="string" indexed="true" stored="true" required="true"/> 
     <field name="projects_comments" type="string" indexed="true" stored="true" required="true"/> 
     <field name="projects_generated_plan_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="projects_tracs_project_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="projects_state_plan_id" type="long" indexed="true" stored="true" required="true"/> 

       <!--   fields for plan enity --> 

     <field name="plans_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="plans_batch_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="plans_operation" type="string" indexed="true" stored="true" required="true"/> 
     <field name="plans_name" type="string" indexed="true" stored="true" required="true"/> 
     <field name="plans_description" type="string" indexed="true" stored="true" required="true"/> 
     <field name="plans_contact_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="plans_plan_type_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="plans_agency_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="plans_start_date" type="date" indexed="true" stored="true" required="true"/> 
     <field name="plans_end_date" type="date" indexed="true" stored="true" required="true"/> 
     <field name="plans_tracs_parent_plan_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="plans_state_parent_plan_id" type="string" indexed="true" stored="true" required="true"/>  
     <field name="plans_tracs_id" type="long" indexed="true" stored="true" required="true"/>  
     <field name="plans_state_id" type="long" indexed="true" stored="true" required="true"/> 
     <field name="plans_valid" type="string" indexed="true" stored="true" required="true"/> 
     <field name="plans_applied" type="string" indexed="true" stored="true" required="true"/> 
     <field name="plans_comments" type="string" indexed="true" stored="true" required="true"/> 

    </fields> 

    <uniqueKey>projects_id</uniqueKey> 
    <uniqueKey>plans_id</uniqueKey> 
    <defaultSearchField>projects_id</defaultSearchField> 
</schema> 

和我的solrconfig.xml中是...

<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler"> 
     <lst name="defaults"> 
      <str name="config">/opt/solr/core0/conf/data-config.xml</str> 
     </lst> 
    </requestHandler> 

現在,當我運行一個完整的導入命令我收到以下錯誤。 。

Apr 16, 2012 4:11:46 PM org.apache.solr.handler.dataimport.SolrWriter upload 
WARNING: Error creating document : SolrInputDocument[{projects_tracs_id=projects_tracs_id(1.0)={0}, projects_name=projects_name(1.0)={Minnesota Firearms Safety Training Program}, projects_description=projects_description(1.0)={To train 17,500 students and 425 new instructors at 45 recruiting workshops. Hold one statwide training academy. Award 2,650 recognition awards for length of service.}, projects_comments=projects_comments(1.0)={NULL}, projects_plan_id=projects_plan_id(1.0)={0}, projects_end_date=projects_end_date(1.0)={2002-12-31 00:00:00.0}, projects_tracs_parent_project_id=projects_tracs_parent_project_id(1.0)={0}, projects_plan_id_type=projects_plan_id_type(1.0)={0}, projects_project_status_id=projects_project_status_id(1.0)={4}, projects_state_plan_id=projects_state_plan_id(1.0)={1126}, projects_estimated_cost=projects_estimated_cost(1.0)={600000}, projects_valid=projects_valid(1.0)={N}, projects_grantor_agency_id=projects_grantor_agency_id(1.0)={1154}, projects_start_date=projects_start_date(1.0)={2001-12-31 00:00:00.0}, projects_applied=projects_applied(1.0)={N}, projects_state_id=projects_state_id(1.0)={0}, projects_batch_id=projects_batch_id(1.0)={1433468017}, projects_generated_plan_id=projects_generated_plan_id(1.0)={2050667163}, projects_id=projects_id(1.0)={2009553709}, projects_operation=projects_operation(1.0)={INSERT}, projects_state_parent_project_id=projects_state_parent_project_id(1.0)={0}, projects_grantee_agency_id=projects_grantee_agency_id(1.0)={1235}, projects_tracs_project_id=projects_tracs_project_id(1.0)={1123}, projects_project_type_id=projects_project_type_id(1.0)={3}}] 
org.apache.solr.common.SolrException: [doc=2009553709] missing required field: plans_applied 
    at org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:346) 
    at org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:60) 
    at org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:115) 
    at org.apache.solr.handler.dataimport.SolrWriter.upload(SolrWriter.java:73) 
    at org.apache.solr.handler.dataimport.DataImportHandler$1.upload(DataImportHandler.java:293) 
    at org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:636) 
    at org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:268) 
    at org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:187) 
    at org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:359) 
    at org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:427) 
    at org.apache.solr.handler.dataimport.DataImporter$1.run(DataImporter.java:408) 

solr無法讀取第二個實體的任何字段。 任何人都可以幫我解決這個問題..? 請告訴我我在配置data-config.xml或schema.xml或兩個文件時犯了什麼錯誤..

回答

0

雖然從堆棧跟蹤看起來像您的計劃文檔之一缺少所需的plans_applied字段中的值,我認爲首先需要注意的是數據不應該在Solr中規範化。它應該在輸入索引之前變平。

因此,您應該在這兩個表之間創建一個連接(直接在data-config.xml中,而不是您的查詢),以便每個生成的(連接)錶行成爲Solr文件。

這樣,當您想要獲取有關單個項目的所有數據時,它將全部位於單個文檔中 - 無需爲此類用例進行連接。

在Solr中,您應該擁抱冗餘,而不是關係和約束。

有意義嗎?

+0

請詳細說明一下.... 在此先感謝 – 2012-04-16 13:13:20

+0

問具體問題,有什麼不清楚的地方? – 2012-04-16 13:22:01

+0

如何加入這兩個表,以及我應該如何配置我的schema.xml ..? 我看了wiki但是不能理解..所以如果你能用一個小例子來解釋我會有所幫助.. 謝謝。 – 2012-04-16 13:27:53