2017-05-07 4 views
0

我想執行的文本文件一個簡單的MapReduce,但它不是做的輸出。這是我的代碼:的MapReduce沒有產生一個輸出

import java.io.IOException; 
import java.util.StringTokenizer; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 

public class WordCount { 

    public static class TokenizerMapper 
    extends Mapper<Object, Text, Text, IntWritable>{ 

private final static IntWritable one = new IntWritable(1); 
private Text word = new Text(); 

public void map(Object key, Text value, Context context 
       ) throws IOException, InterruptedException { 
    StringTokenizer itr = new StringTokenizer(value.toString()); 
    while (itr.hasMoreTokens()) { 
    word.set(itr.nextToken()); 
    context.write(word, one); 
    } 
} 
} 

    public static class IntSumReducer 
    extends Reducer<Text,IntWritable,Text,IntWritable> { 
private IntWritable result = new IntWritable(); 

    public void reduce(Text key, Iterable<IntWritable> values, 
        Context context 
        ) throws IOException, InterruptedException { 
     int sum = 0; 
     for (IntWritable val : values) { 
     sum += val.get(); 
     } 
     result.set(sum) ; 
     context.write(key, result); 
    } 
    } 

    public static void main(String[] args) throws Exception { 
    Configuration conf = new Configuration(); 
    Job job = Job.getInstance(conf, "word count"); 
    job.setJarByClass(WordCount.class); 
    job.setMapperClass(TokenizerMapper.class); 
    job.setCombinerClass(IntSumReducer.class); 
    job.setReducerClass(IntSumReducer.class); 
    job.setOutputKeyClass(Text.class); 
    job.setOutputValueClass(IntWritable.class); 
    FileInputFormat.addInputPath(job, new Path(args[0])); 
    FileOutputFormat.setOutputPath(job, new Path(args[1])); 
    System.exit(job.waitForCompletion(true) ? 0 : 1); 
    } 
} 

我jar文件的執行過程中收到此錯誤:

17/05/07 23:10:53 WARN mapred.LocalJobRunner: job_local973452829_0001 
java.lang.Exception:  org.apache.hadoop.mapreduce.task.reduce.Shuffle$ShuffleError: error in shuffle in localfetcher#1 
at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462) 
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:529) 
Caused by: org.apache.hadoop.mapreduce.task.reduce.Shuffle$ShuffleError: error in shuffle in localfetcher#1 
at org.apache.hadoop.mapreduce.task.reduce.Shuffle.run(Shuffle.java:134) 
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:376) 
at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:319) 
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) 
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) 
at java.lang.Thread.run(Thread.java:748) 
Caused by: java.io.FileNotFoundException: /app/hadoop/tmp%20/mapred/local/localRunner/hduser/jobcache/job_local973452829_0001/attempt_local973452829_0001_m_000000_0/output/file.out.index 
at org.apache.hadoop.fs.RawLocalFileSystem.open(RawLocalFileSystem.java:193) 
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:764) 
at org.apache.hadoop.io.SecureIOUtils.openFSDataInputStream(SecureIOUtils.java:156) 
at org.apache.hadoop.mapred.SpillRecord.<init>(SpillRecord.java:70) 
at org.apache.hadoop.mapred.SpillRecord.<init>(SpillRecord.java:62) 
at org.apache.hadoop.mapred.SpillRecord.<init>(SpillRecord.java:57) 
at org.apache.hadoop.mapreduce.task.reduce.LocalFetcher.copyMapOutput(LocalFetcher.java:123) 
at org.apache.hadoop.mapreduce.task.reduce.LocalFetcher.doCopy(LocalFetcher.java:101) 
at org.apache.hadoop.mapreduce.task.reduce.LocalFetcher.run(LocalFetcher 

有什麼問題在我的代碼?我在ubuntu下14.04用Hadoop 2.4

回答

0

這是你的錯誤的一部分:在

Caused by: java.io.FileNotFoundException: /app/hadoop/tmp%20/mapred/local/localRunner/hduser/jobcache/job_local973452829_0001/attempt_local973452829_0001_m_000000_0/output/file.out.index

我猜它與配置屬性hadoop.tmp.dir一個問題,你core-site.xml因爲Hadoop是無法存儲臨時輸出文件(從映射)到您的磁盤。

使Hadoop的創建它來存儲中間輸出或將其設置到某個目錄具有相應權限的自己的臨時目錄,您可以刪除該屬性。

0

嘗試寫HDFS像這些

Hadoop的罐子(jar文件名)命令(輸入名稱)(輸出名稱)

還是你前檢查到導出你的(jar文件)你有沒有采取任何警告?

相關問題