2013-02-12 83 views
1

我試圖在hadoop中運行mapreduce程序。基本上它需要一個文本文件作爲輸入,其中每一行是一個json文本。即時通訊使用簡單的JSON解析這個數據在我的映射器和減速機做一些其他的東西。我在hadoop/lib文件夾中包含了簡單的json jar文件。這裏是下面的代碼Hadoop:無法運行mapreduce程序..java.io.IOException:錯誤= 12

package org.myorg; 

import java.io.IOException; 
import java.util.Iterator; 
import java.util.*; 
import org.json.simple.JSONArray; 
import org.json.simple.JSONObject; 
import org.json.simple.parser.JSONParser; 
import org.json.simple.parser.ParseException; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.*; 
import org.apache.hadoop.mapreduce.*; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 
import org.apache.hadoop.util.GenericOptionsParser; 

public class ALoc 
{ 
    public static class AMapper extends Mapper<Text, Text, Text, Text> 
    { 
     private Text kword = new Text(); 
     private Text vword = new Text(); 
     JSONParser parser = new JSONParser(); 

     public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{ 
      try { 
       String line = value.toString(); 
       Object obj = parser.parse(line); 

       JSONObject jsonObject = (JSONObject) obj; 
       String val = (String)jsonObject.get("m1") + "," + (String)jsonObject.get("m3"); 
       kword.set((String)jsonObject.get("m0")); 
       vword.set(val); 
       context.write(kword, vword); 
      } 
      catch (IOException e) { 
       e.printStackTrace(); 
      } 
      catch (ParseException e) { 
       e.printStackTrace(); 
      }    
     } 
    } 

    public static class CountryReducer 
     extends Reducer<Text,Text,Text,Text> 
    { 
     private Text result = new Text(); 
     public void reduce(Text key, Iterable<Text> values, 
     Context context 
     ) throws IOException, InterruptedException 
     { 
      int ccount = 0; 
      HashMap<Text, Integer> hm = new HashMap<Text, Integer>(); 

      for (Text val : values) 
      { 
       if(hm.containsKey(val)){ 
         Integer n = (Integer)hm.get(val); 
         hm.put(val, n+1); 
       }else{ 
         hm.put(val, new Integer(1)); 
       } 
      } 
      Set set = hm.entrySet(); 
      Iterator i = set.iterator(); 
      String agr = ""; 

      while(i.hasNext()) { 
       Map.Entry me = (Map.Entry)i.next(); 
       agr += "|" + me.getKey() + me.getValue(); 
      } 
      result.set(agr); 
      context.write(key, result); 
     } 
    } 

    public static void main(String[] args) throws Exception 
    { 
     Configuration conf = new Configuration(); 
     Job job = new Job(conf, "ALoc"); 
     job.setJarByClass(ALoc.class); 
     job.setMapperClass(AMapper.class); 
     job.setReducerClass(CountryReducer.class); 
     job.setOutputKeyClass(Text.class); 
     job.setOutputValueClass(Text.class); 
     job.setInputFormatClass(TextInputFormat.class); 
     FileInputFormat.addInputPath(job, new Path(args[0])); 
     FileOutputFormat.setOutputPath(job, new Path(args[1])); 
     System.exit(job.waitForCompletion(true) ? 0 : 1); 
    } 
} 

當我嘗試運行的工作。它給出了以下錯誤。 我在一個aws微型實例單節點中運行這個。 我一直在關注這個教程http://www.michael-noll.com/tutorials/running-hadoop-on-ubuntu-linux-single-node-cluster/

[email protected]:/$ bin/hadoop jar ALoc.jar org.myorg.ALoc /user/hadoop/adata /user/hadoop/adata-op5 -D mapred.reduce.tasks=16 
13/02/12 08:39:50 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same. 
13/02/12 08:39:50 INFO input.FileInputFormat: Total input paths to process : 1 
13/02/12 08:39:50 INFO util.NativeCodeLoader: Loaded the native-hadoop library 
13/02/12 08:39:50 WARN snappy.LoadSnappy: Snappy native library not loaded 
13/02/12 08:39:51 INFO mapred.JobClient: Running job: job_201302120714_0006 
13/02/12 08:39:52 INFO mapred.JobClient: map 0% reduce 0% 
13/02/12 08:40:10 INFO mapred.JobClient: Task Id : attempt_201302120714_0006_m_000000_0, Status : FAILED 
java.lang.RuntimeException: Error while running command to get file permissions : java.io.IOException: Cannot run program "/bin/ls": java.io.IOException: error=12, Cannot allocate memory 
    at java.lang.ProcessBuilder.start(ProcessBuilder.java:475) 
    at org.apache.hadoop.util.Shell.runCommand(Shell.java:200) 
    at org.apache.hadoop.util.Shell.run(Shell.java:182) 
    at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:375) 
    at org.apache.hadoop.util.Shell.execCommand(Shell.java:461) 
    at org.apache.hadoop.util.Shell.execCommand(Shell.java:444) 
    at org.apache.hadoop.fs.FileUtil.execCommand(FileUtil.java:710) 
    at org.apache.hadoop.fs.RawLocalFileSystem$RawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:443) 
    at org.apache.hadoop.fs.RawLocalFileSystem$RawLocalFileStatus.getOwner(RawLocalFileSystem.java:426) 
    at org.apache.hadoop.mapred.TaskLog.obtainLogDirOwner(TaskLog.java:267) 
    at org.apache.hadoop.mapred.TaskLogsTruncater.truncateLogs(TaskLogsTruncater.java:124) 
    at org.apache.hadoop.mapred.Child$4.run(Child.java:260) 
    at java.security.AccessController.doPrivileged(Native Method) 
    at javax.security.auth.Subject.doAs(Subject.java:416) 
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121) 
    at org.apache.hadoop.mapred.Child.main(Child.java:249) 
Caused by: java.io.IOException: java.io.IOException: error=12, Cannot allocate memory 
    at java.lang.UNIXProcess.<init>(UNIXProcess.java:164) 
    at java.lang.ProcessImpl.start(ProcessImpl.java:81) 
    at java.lang.ProcessBuilder.start(ProcessBuilder.java:468) 
    ... 15 more 

    at org.apache.hadoop.fs.RawLocalFileSystem$RawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:468) 
    at org.apache.hadoop.fs.RawLocalFileSystem$RawLocalFileStatus.getOwner(RawLocalFileSystem.java:426) 
    at org.apache.hadoop.mapred.TaskLog.obtainLogDirOwner(TaskLog.java:267) 
    at org.apache.hadoop.mapred.TaskLogsTruncater.truncateLogs(TaskLogsTruncater.java:124) 
    at org.apache.hadoop.mapred.Child$4.run(Child.java:260) 
    at java.security.AccessController.doPrivileged(Native Method) 
    at javax.security.auth.Subject.doAs(Subject.java:416) 
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121) 
    at org.apache.hadoop.mapred.Child.main(Child.java:249) 

回答

1

我猜你必須在具有非常少的內存(700MB〜)微實例嘗試的Hadoop。

嘗試增加HADOOP Heapsize參數(在hadoop/conf/hadoop-env.sh中)..作爲基本原因是叉處理所需的內存不足

相關問題