2011-10-16 16 views
0

我想對於k進行聚類聚類數據如下所示: https://cwiki.apache.org/MAHOUT/clustering-of-synthetic-control-data.html 然而,當映射精簡工作即將發生我得到的錯誤錯誤,同時意味着對輸入數據算法k均值

11/10/16 21:05:57 INFO mapred.JobClient: Task Id : attempt_201110161920_0008_m_000000_0,  Status : FAILED 
Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector 
at java.net.URLClassLoader$1.run(URLClassLoader.java:202) 
at java.security.AccessController.doPrivileged(Native Method) 
at java.net.URLClassLoader.findClass(URLClassLoader.java:190) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:306) 
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:247) 
at java.lang.Class.forName0(Native Method) 
at java.lang.Class.forName(Class.java:247) 
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:762) 
at org.apache.hadoop.io.WritableName.getClass(WritableName.java:71) 
at org.apache.hadoop.io.SequenceFile$Reader.getValueClass(SequenceFile.java:1613) 
at org.apache.hadoop.io.SequenceFile$Reader.init(SequenceFile.java:1555) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1428) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1417) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1412) 
at org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader.initialize(SequenceFileRecordReader.java:50) 
at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.initialize(MapTask.java:418) 
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:620) 
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305) 
at org.apache.hadoop.mapred.Child.main(Child.java:170) 

11/10/16 21:06:03 INFO mapred.JobClient: Task Id : attempt_201110161920_0008_m_000000_1, Status : FAILED 
Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector 
at java.net.URLClassLoader$1.run(URLClassLoader.java:202) 
at java.security.AccessController.doPrivileged(Native Method) 
at java.net.URLClassLoader.findClass(URLClassLoader.java:190) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:306) 
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:247) 
at java.lang.Class.forName0(Native Method) 
at java.lang.Class.forName(Class.java:247) 
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:762) 
at org.apache.hadoop.io.WritableName.getClass(WritableName.java:71) 
at org.apache.hadoop.io.SequenceFile$Reader.getValueClass(SequenceFile.java:1613) 
at org.apache.hadoop.io.SequenceFile$Reader.init(SequenceFile.java:1555) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1428) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1417) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1412) 
at org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader.initialize(SequenceFileRecordReader.java:50) 
at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.initialize(MapTask.java:418) 
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:620) 
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305) 
at org.apache.hadoop.mapred.Child.main(Child.java:170) 

11/10/16 21:06:09 INFO mapred.JobClient: Task Id : attempt_201110161920_0008_m_000000_2,  Status : FAILED 
    Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector 
at java.net.URLClassLoader$1.run(URLClassLoader.java:202) 
at java.security.AccessController.doPrivileged(Native Method) 
at java.net.URLClassLoader.findClass(URLClassLoader.java:190) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:306) 
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:247) 
at java.lang.Class.forName0(Native Method) 
at java.lang.Class.forName(Class.java:247) 
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:762) 
at org.apache.hadoop.io.WritableName.getClass(WritableName.java:71) 
at org.apache.hadoop.io.SequenceFile$Reader.getValueClass(SequenceFile.java:1613) 
at org.apache.hadoop.io.SequenceFile$Reader.init(SequenceFile.java:1555) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1428) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1417) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1412) 
at org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader.initialize(SequenceFileRecordReader.java:50) 
at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.initialize(MapTask.java:418) 
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:620) 
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305) 
at org.apache.hadoop.mapred.Child.main(Child.java:170) 

11/10/16 21:06:18 INFO mapred.JobClient: Job complete: job_201110161920_0008 
11/10/16 21:06:18 INFO mapred.JobClient: Counters: 3 
11/10/16 21:06:18 INFO mapred.JobClient: Job Counters 
11/10/16 21:06:18 INFO mapred.JobClient:  Launched map tasks=4 
11/10/16 21:06:18 INFO mapred.JobClient:  Data-local map tasks=4 
11/10/16 21:06:18 INFO mapred.JobClient:  Failed map tasks=1 
Exception in thread "main" java.lang.InterruptedException: K-Means Iteration failed processing output/clusters-0/part-randomSeed 
at org.apache.mahout.clustering.kmeans.KMeansDriver.runIteration(KMeansDriver.java:363) 
at org.apache.mahout.clustering.kmeans.KMeansDriver.buildClustersMR(KMeansDriver.java:310) 
at org.apache.mahout.clustering.kmeans.KMeansDriver.buildClusters(KMeansDriver.java:237) 
at org.apache.mahout.clustering.kmeans.KMeansDriver.run(KMeansDriver.java:152) 
at org.apache.mahout.clustering.syntheticcontrol.kmeans.Job.run(Job.java:149) 
at org.apache.mahout.clustering.syntheticcontrol.kmeans.Job.main(Job.java:60) 
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) 
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) 
at java.lang.reflect.Method.invoke(Method.java:597) 
at org.apache.hadoop.util.ProgramDriver$ProgramDescription.invoke(ProgramDriver.java:68) 
at org.apache.hadoop.util.ProgramDriver.driver(ProgramDriver.java:139) 
at org.apache.mahout.driver.MahoutDriver.main(MahoutDriver.java:187) 
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) 
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) 
at java.lang.reflect.Method.invoke(Method.java:597) 
at org.apache.hadoop.util.RunJar.main(RunJar.java:156) 

有人可以請告訴我如何糾正這一點。這真的意味着很多。 謝謝你的時間。

回答

1

這意味着您沒有提供將工作運行到Hadoop所需的所有類。您發送的JAR文件必須打包所有依賴項,包括來自其核心和數學模塊的所有Mahout類。幸運的是,Mahout爲你做到了這一點。請參閱它在創建時創建的「作業」文件,該文件出現在target/中。

+0

你看我的例子/目標目錄或新生成的目標目錄? – Pavan

+0

他們說Mahout的mahout-examples- $ MAHOUT_VERSION.job會執行實際的集羣任務,因此需要創建它。你能告訴我如何創建這個?我非常感謝你的幫助。我想我在這一步出錯了。 – Pavan

+0

只是:'mvn包' –

0

你缺少org.apache.mahout.math包:

Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector