上運行Hadoop的字數程序

我試圖運行在PUMA基準給出的字數程序上運行Hadoop的字數程序

The WordCount.java file is as follows: 

/** 
* Licensed to the Apache Software Foundation (ASF) under one 
* or more contributor license agreements. See the NOTICE file 
* distributed with this work for additional information 
* regarding copyright ownership. The ASF licenses this file 
* to you under the Apache License, Version 2.0 (the 
* "License"); you may not use this file except in compliance 
* with the License. You may obtain a copy of the License at 
* 
*  http://www.apache.org/licenses/LICENSE-2.0 
* 
* Unless required by applicable law or agreed to in writing, software 
* distributed under the License is distributed on an "AS IS" BASIS, 
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
* See the License for the specific language governing permissions and 
* limitations under the License. 
*/ 

package org.apache.hadoop.examples; 

import java.io.IOException; 
import java.util.ArrayList; 
import java.util.Date; 
import java.util.List; 
import java.util.StringTokenizer; 

import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 

public class WordCount { 

    public static class TokenizerMapper 
    extends Mapper<Object, Text, Text, IntWritable>{ 

    private final static IntWritable one = new IntWritable(1); 
    private Text word = new Text(); 

    public void map(Object key, Text value, Context context 
    ) throws IOException, InterruptedException { 
     StringTokenizer itr = new StringTokenizer(value.toString()); 
     while (itr.hasMoreTokens()) { 
     word.set(itr.nextToken()); 
     context.write(word, one); 
     } 
    } 
    } 

    public static class IntSumReducer 
    extends Reducer<Text,IntWritable,Text,IntWritable> { 
    private IntWritable result = new IntWritable(); 

    public void reduce(Text key, Iterable<IntWritable> values, 
     Context context 
    ) throws IOException, InterruptedException { 
     int sum = 0; 
     for (IntWritable val : values) { 
     sum += val.get(); 
     } 
     result.set(sum); 
     context.write(key, result); 
    } 
    } 

    public static void main(String[] args) throws Exception { 
    Configuration conf = new Configuration(); 
    Job job = new Job(conf, "wordcount"); 
    job.setJarByClass(WordCount.class); 
    job.setMapperClass(TokenizerMapper.class); 
    job.setCombinerClass(IntSumReducer.class); 
    job.setReducerClass(IntSumReducer.class); 
    job.setOutputKeyClass(Text.class); 
    job.setOutputValueClass(IntWritable.class); 

    List<String> other_args = new ArrayList<String>(); 
    for(int i=0; i < args.length; ++i) { 
     try { 
     if ("-r".equals(args[i])) { 
      job.setNumReduceTasks(Integer.parseInt(args[++i])); 
     } else { 
      other_args.add(args[i]); 
     } 
     } catch (NumberFormatException except) { 
     System.out.println("ERROR: Integer expected instead of " + args[i]); 
     System.err.println("Usage: wordcount <numReduces> <in> <out>"); 
     System.exit(2);   
     } catch (ArrayIndexOutOfBoundsException except) { 
     System.out.println("ERROR: Required parameter missing from " + 
      args[i-1]); 
     System.err.println("Usage: wordcount <numReduces> <in> <out>"); 
     System.exit(2); 
     } 
    } 
    // Make sure there are exactly 2 parameters left. 
    if (other_args.size() != 2) { 
     System.out.println("ERROR: Wrong number of parameters: " + 
      other_args.size() + " instead of 2."); 
     System.err.println("Usage: wordcount <numReduces> <in> <out>"); 
     System.exit(2); 
    } 

    FileInputFormat.addInputPath(job, new Path(other_args.get(0))); 
    FileOutputFormat.setOutputPath(job, new Path(other_args.get(1))); 
    Date startIteration = new Date(); 
    Boolean waitforCompletion = job.waitForCompletion(true) ; 
    Date endIteration = new Date(); 
    System.out.println("The iteration took " 
     + (endIteration.getTime() - startIteration.getTime())/1000 
     + " seconds."); 
    System.exit(waitforCompletion ? 0 : 1); 
    } 
}

我用下面的命令獲取的麻煩，得到了以下結果：

#javac -cp /opt/local/share/java/hadoop-1.2.1/hadoop-core-1.2.1.jar -d wordcount_classes WordCount.java 

#jar -cvf wordcount.jar -C wordcount_classes/ .

和輸出，我得到的是：

added manifest 
adding: org/(in = 0) (out= 0)(stored 0%) 
adding: org/apache/(in = 0) (out= 0)(stored 0%) 
adding: org/apache/hadoop/(in = 0) (out= 0)(stored 0%) 
adding: org/apache/hadoop/examples/(in = 0) (out= 0)(stored 0%) 
adding: org/apache/hadoop/examples/WordCount$IntSumReducer.class(in = 1793) (out= 750)(deflated 58%) 
adding: org/apache/hadoop/examples/WordCount$TokenizerMapper.class(in = 1790) (out= 764)(deflated 57%) 
adding: org/apache/hadoop/examples/WordCount.class(in = 3131) (out= 1682)(deflated 46%) 
adding: org/myorg/(in = 0) (out= 0)(stored 0%) 
adding: org/myorg/WordCount$IntSumReducer.class(in = 1759) (out= 745)(deflated 57%) 
adding: org/myorg/WordCount$TokenizerMapper.class(in = 1756) (out= 759)(deflated 56%) 
adding: org/myorg/WordCount.class(in = 3080) (out= 1676)(deflated 45%) 


#hadoop jar wordcount.jar WordCount ../input/file01.txt ../output/

我得到了以下輸出：

Exception in thread "main" java.lang.NoClassDefFoundError: WordCount (wrong name: org/apache/hadoop/examples/WordCount) 
    at java.lang.ClassLoader.defineClass1(Native Method) 
    at java.lang.ClassLoader.defineClass(ClassLoader.java:800) 
    at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142) 
    at java.net.URLClassLoader.defineClass(URLClassLoader.java:449) 
    at java.net.URLClassLoader.access$100(URLClassLoader.java:71) 
    at java.net.URLClassLoader$1.run(URLClassLoader.java:361) 
    at java.net.URLClassLoader$1.run(URLClassLoader.java:355) 
    at java.security.AccessController.doPrivileged(Native Method) 
    at java.net.URLClassLoader.findClass(URLClassLoader.java:354) 
    at java.lang.ClassLoader.loadClass(ClassLoader.java:425) 
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) 
    at java.lang.ClassLoader.loadClass(ClassLoader.java:412) 
    at java.lang.ClassLoader.loadClass(ClassLoader.java:358) 
    at java.lang.Class.forName0(Native Method) 
    at java.lang.Class.forName(Class.java:270) 
    at org.apache.hadoop.util.RunJar.main(RunJar.java:205)

我在此網站中應用了前面介紹的所有過程，但沒有任何工作適合我。如果有人告訴我如何解決這個問題，我會非常感激。

來源

2014-10-10 user3678107

正如我在下面的解決方案中描述的，你可能會錯過你正在生成的jar文件內的嵌套類。 – r2d2oid 2016-01-08 00:41:03

包語句改爲

package org.myorg;

並運行完整的類名程序。

看着你的輸出，你似乎在不同的路徑（=包）中包含了兩次WordCount類，但是當你運行程序時，你沒有指定任何包。

來源

2014-10-10 18:20:22

我更改了包org.myorg，得到了如下結果： – user3678107 2014-10-11 00:25:13

hadoop jar wordcount.jar org/apache/hadoop/examples/WordCount ../input/file01.txt ../output/

我覺得問題出在那裏，因爲你沒有使用完整的類名。

來源

2014-10-11 20:37:50 proutray

我還沒有發現全部類名的部分。你能給我舉一個例子嗎？ – user3678107 2014-10-12 19:00:46

取代'WordCount' 我使用的是'org/apache/hadoop/examples/WordCount' 在這種情況下，您必須使用<包名稱>/，因爲您的類不在默認包中。 – proutray 2014-10-13 21:00:52

你的wordcount.jar有兩個Wordount類，用限定符指定你想運行哪一個類。

e.g

hadoop jar wordcount.jar org.apache.hadoop.examples.WordCount ../input/file01.txt ../output/

或

hadoop jar wordcount.jar org.myorg.WordCount ../input/file01.txt ../output/

來源

2014-10-12 05:38:01

你WordCount class已經有兩個嵌套類內部本身，即：TokenizerMapper和IntSumReducer。

您需要確保這些類包含在您正在生成的jar文件中。這裏是我工作的命令：

jar cvf WordCount.jar WordCount.class WordCount\$TokenizerMapper.class WordCount\$IntSumReducer.class

來源

2016-01-08 00:39:04 r2d2oid

上運行Hadoop的字數程序

回答

相關問題