2014-09-24 56 views
0

我是新來的Hadoop mapreduce.I想實現在地圖搜索減少,所以我的輸入文件是這樣的讀取參數

key1 value1,value3 
key2 value2,value6 

我想找到鑰匙哪個用戶會通過值列表作爲argument.for這是我主(驅動器)命令行類是這樣

public static void main(String[] args) { 
    JobClient client = new JobClient(); 
    JobConf conf = new JobConf(NameSearchJava.class); 

// write now I am trying with writing search key in code (Joy),later I'll 
//try to pass argument while running job from hadoop. 

    conf.set("searcKey", "Joy"); 
    conf.setJobName("Search"); 

    conf.setOutputKeyClass(Text.class); 
    conf.setOutputValueClass(Text.class); 

    FileInputFormat.setInputPaths(conf, new Path(args[0])); 
    FileOutputFormat.setOutputPath(conf, new Path(args[1])); 


    conf.setMapperClass(SearchMapper.class); 

    conf.setReducerClass(SearchReducer.class); 
    client.setConf(conf); 

    try { 
     JobClient.runJob(conf); 
    } catch (Exception e) { 
     e.printStackTrace(); 
    } 
    } 
} 

and my configure function is: 

     String item ; 
     public void configure(JobConf job) { 
      { 
      item = job.get("test"); 
      System.out.println(item); 
      System.err.println("search" + item); 
      } 

我應該在哪裏寫映射或Reducer.How配置功能我可以用這個項目參數做比較,在減速。就是這樣的在hadoop中使用參數的正確方法?

+0

你想在unix中做類似grep的事嗎? – user3484461 2014-09-24 06:27:08

+0

你想使用mapreduce在你的文件中找到一個字符串嗎? – 2014-09-24 06:47:00

+0

相似問題:http://stackoverflow.com/questions/25962454/mapreduce-old-api-passing-command-line-argument-to-map/25968934#25968934 – 2014-09-24 06:49:07

回答

1

閱讀命令行參數在Driver類如下 -

conf.set("searchKey", args[2]); 

其中ARGS [2]將搜索關鍵字作爲第三個參數傳遞。

String searchWord; 

    public void configure(JobConf jc) 
    { 
     searchWord = jc.get("searchKey"); 
    } 

這將使你的鑰匙在映射功能進行搜索 -

的配置方法應該在映射如下編碼。

可以使用邏輯如下執行映射器本身比較 -

public void map(LongWritable key, Text value, 
      OutputCollector<Text, IntWritable> out, Reporter reporter) 
      throws IOException 
    { 
     String[] input = value.toString().split(" "); 

     for(String word:input) 
     { 
      if (word.equalsIgnoreCase(searchWord)) 
       out.collect(new Text(word), new IntWritable(1)); 
     } 
    } 

讓我知道,如果這有助於!

+0

是的......好@Hadooper – 2014-09-24 06:56:48

+0

非常感謝@ hadooper ...它的工作.. :) – user2895589 2014-09-24 17:04:42

1

加上Hadooper的答案。

這是完整的代碼。

你可以參考Hadooper的答案來解釋。

import java.io.IOException; 
import java.util.StringTokenizer; 

import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.conf.Configured; 
import org.apache.hadoop.fs.FileSystem; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 
import org.apache.hadoop.util.Tool; 
import org.apache.hadoop.util.ToolRunner; 

/** 
* @author Unmesha sreeveni 
* @Date 23 sep 2014 
*/ 
public class StringSearchDriver extends Configured implements Tool { 
    public static class Map extends 
    Mapper<LongWritable, Text, Text, IntWritable> { 

     private final static IntWritable one = new IntWritable(1); 
     private Text word = new Text(); 

     public void map(LongWritable key, Text value, Context context) 
       throws IOException, InterruptedException { 
      Configuration conf = context.getConfiguration(); 
      String line = value.toString(); 
      String searchString = conf.get("word"); 
      StringTokenizer tokenizer = new StringTokenizer(line); 
      while (tokenizer.hasMoreTokens()) { 
       String token = tokenizer.nextToken(); 
       if(token.equals(searchString)){ 
        word.set(token); 
        context.write(word, one); 
       } 

      } 
     } 
    } 

    public static class Reduce extends 
    Reducer<Text, IntWritable, Text, IntWritable> { 

     public void reduce(Text key, Iterable<IntWritable> values, 
       Context context) throws IOException, InterruptedException { 

      int sum = 0; 
      for (IntWritable val : values) { 
       sum += val.get(); 
      } 
      context.write(key, new IntWritable(sum)); 
     } 
    } 
    public static void main(String[] args) throws Exception { 
     Configuration conf = new Configuration(); 
     int res = ToolRunner.run(conf, new StringSearchDriver(), args); 
     System.exit(res); 

    } 
    @Override 
    public int run(String[] args) throws Exception { 
     // TODO Auto-generated method stub 
     if (args.length != 3) { 
      System.out 
      .printf("Usage: Search String <input dir> <output dir> <search word> \n"); 
      System.exit(-1); 
     } 

     String source = args[0]; 
     String dest = args[1]; 
     String searchword = args[2]; 
     Configuration conf = new Configuration(); 
     conf.set("word", searchword); 
     Job job = new Job(conf, "Search String"); 
     job.setJarByClass(StringSearchDriver.class); 
     FileSystem fs = FileSystem.get(conf); 

     Path in =new Path(source); 
     Path out =new Path(dest); 
     if (fs.exists(out)) { 
      fs.delete(out, true); 
     } 

     job.setMapOutputKeyClass(Text.class); 
     job.setMapOutputValueClass(IntWritable.class); 
     job.setOutputKeyClass(Text.class); 
     job.setOutputValueClass(IntWritable.class); 
     job.setMapperClass(Map.class); 
     job.setReducerClass(Reduce.class); 
     job.setInputFormatClass(TextInputFormat.class); 
     job.setOutputFormatClass(TextOutputFormat.class); 
     FileInputFormat.addInputPath(job, in); 
     FileOutputFormat.setOutputPath(job, out); 
     boolean sucess = job.waitForCompletion(true); 
     return (sucess ? 0 : 1); 
    } 
} 
+0

嗨Sreeveni ..你的解決方案工作得很好,但現在寫我使用舊的API,所以我接受了Hadooper的解決方案。但你給了我很好的新API的例子..我會嘗試從現在開始使用新的API使用你的代碼作爲示例..非常感謝你.. :) ..是否有可能在這裏接受多個答案。 – user2895589 2014-09-24 17:08:21

+0

這很好:)你只能upvote。 – 2014-09-25 04:45:29