2011-11-21 65 views
4

我想分類真正長的文本行。字符串。Weka示例,文本行的簡單分類

這裏是我得到的錯誤:

Exception in thread "main" java.lang.IllegalStateException: No input instance format defined 
at weka.filters.unsupervised.attribute.StringToWordVector.input(StringToWordVector.java:681) 
at org.berlin.weka.test.TestWeka.main(TestWeka.java:61) 

這裏是代碼,但我不斷收到例外,也許這是不正確設置。

package org.berlin.weka.test; 

import weka.classifiers.Classifier; 
import weka.classifiers.functions.SMO; 
import weka.core.Attribute; 
import weka.core.FastVector; 
import weka.core.Instance; 
import weka.core.Instances; 
import weka.filters.Filter; 
import weka.filters.unsupervised.attribute.StringToWordVector; 

public class TestWeka { 

    /* 
    * java -cp %WEKA_HOME% 
     weka.classifiers.meta.FilteredClassifier 
     -t ReutersAcq-train.arff 
     -T ReutersAcq-test.arff 
     -W "weka.classifiers.functions.SMO -N 2" 
     -F "weka.filters.unsupervised.attribute.StringToWordVector -S" 
    */ 

    public static void main(final String [] args) throws Exception { 
     System.out.println("Running"); 

     final StringToWordVector filter = new StringToWordVector(); 
     final Classifier classifier = new SMO(); 

     // Create numeric attributes. 
     final String[] keywords = { "test1", "test2"}; 
     FastVector attributes = new FastVector(keywords.length + 1); 
     for (int i = 0 ; i < keywords.length; i++) { 
      attributes.addElement(new Attribute(keywords[i])); 
     }   
     // Add class attribute. 
     final FastVector classValues = new FastVector(2); 
     classValues.addElement("miss"); 
     classValues.addElement("hit"); 

     attributes.addElement(new Attribute("Class", classValues)); 

     final Instances data = new Instances("Data1", attributes, 100); 
     data.setClassIndex(data.numAttributes() - 1); 

     /////////// 

     Instance instance = new Instance(10); 
     instance.setDataset(data); 
     // instance.setValue(testset.attribute(0),testset.attribute(0).addStringValue(obj.toString())); 
     System.out.println("==>." + data.attribute(0)); 
     instance.setValue(data.attribute(0), data.attribute(0).addStringValue("test1")); 
     instance.setDataset(data); 

     // Add class value to instance. 
     instance.setClassValue(1.0); 

     // Add instance to training data. 
     data.add(instance); 

     // Use filter. 
     filter.input(instance); 
     Instances filteredData = Filter.useFilter(data, filter); 

     // Rebuild classifier. 
     classifier.buildClassifier(filteredData);    
    } 

} // End of the class // 

回答

4

你可以嘗試通過設置輸入格式第一如下:
//rest of your code
// Use filter
filter.input(instance);
filter.setInputFormat(data); //data instances that you are going to input to the filter
Instances filteredData = Filter.useFilter(data, filter);

例如,你可以參考http://weka.wikispaces.com/Use+WEKA+in+your+Java+code 和刪除過濾器。 (注意它們在將實例傳遞給過濾器之前設置了過濾器的輸入格式)