MapReduce ： Combiner的使用(以平均数为例) 并结合in-mapper design pattern 实例

Genie13

浏览: 196837 次

最近访客更多访客>>

elashu

canofy

longzhiwuing

mmhotsky

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

Hadoop

in-mapper design pattern Combiner hadoop apache desgin

没有使用Combiner 和 in-mapper desgin pattern


import java.io.IOException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;



public class digitaver1 {
	
	public static class mapper extends Mapper<LongWritable, Text, Text, IntWritable>{

		@Override
		protected void map(LongWritable key, Text value,Context context)
				throws IOException, InterruptedException {
			String[] ss = value.toString().split(":");
			context.write(new Text(ss[0]), new IntWritable(Integer.parseInt(ss[1])));
		}
		
	}

	public static class reducer extends Reducer<Text, IntWritable, Text, DoubleWritable>{

		@Override
		protected void reduce(Text key, Iterable<IntWritable> value,
				Context context) throws IOException, InterruptedException {
			int sum = 0;
			int cnt = 0;
			while(value.iterator().hasNext()){
				sum += value.iterator().next().get();
				cnt+=1;
			}
			context.write(key, new DoubleWritable((double)sum/(double)cnt));
		}
	}
	
public static void main(String[] args) {
		
		try {
			Job job = new Job();
			job.setJarByClass(digitaver1.class);
			job.setJobName("digitaver1");
			
			FileInputFormat.addInputPath(job, new Path(args[0]));
			FileOutputFormat.setOutputPath(job, new Path(args[1]));
			
			job.setMapperClass(mapper.class);
			job.setReducerClass(reducer.class);
			
			job.setInputFormatClass(TextInputFormat.class);
			job.setOutputFormatClass(TextOutputFormat.class);
			
			job.setMapOutputKeyClass(Text.class);
			job.setMapOutputValueClass(IntWritable.class);
			
			job.setOutputKeyClass(Text.class);
			job.setOutputValueClass(DoubleWritable.class);
			
			System.exit( job.waitForCompletion(true) ? 0 : 1 );
			
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (InterruptedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (ClassNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
	}
}


使用Combiner
	public static class mapper extends Mapper<LongWritable, Text, Text, pair>{

		@Override
		protected void map(LongWritable key, Text value,Context context)
				throws IOException, InterruptedException {
			String[] ss = value.toString().split(":");
			pair p = new pair(Integer.parseInt(ss[1]), 1);
			context.write(new Text(ss[0]), p);
		}
		
	}
	
	public static class combiner extends Reducer<Text, pair, Text, pair>{

		@Override
		protected void reduce(Text key, Iterable<pair> value,
				Context context)
				throws IOException, InterruptedException {
			int sum = 0;
			int cnt = 0;
			while(value.iterator().hasNext()){
				pair p = value.iterator().next();
				sum += p.getLeft().get();
				cnt += p.getRight().get();
			}
			context.write(key, new pair(sum,cnt));
		}
		
	}
	
	public static class reducer extends Reducer<Text, pair, Text, DoubleWritable>{

		@Override
		protected void reduce(Text key, Iterable<pair> value,
				Context context) throws IOException, InterruptedException {
			int sum = 0;
			int cnt = 0;
			while(value.iterator().hasNext()){
				pair p = value.iterator().next();
				sum += p.getLeft().get();
				cnt += p.getRight().get();
			}
			context.write(key, new DoubleWritable((double)sum/(double)cnt));
		}
	}

main函数都一样


使用in-mapper design pattern
	public static class mapper extends Mapper<LongWritable, Text, Text, pair>{

		private Map<String,String> map ;

		@Override
		protected void setup(Context context) throws IOException,
				InterruptedException {
			// TODO Auto-generated method stub
			map = new HashMap<String, String>();
		}

		//处理完所有的输入文件再一起传给reducer或者combiner
		//以前map在执行过程中会一边执行一边讲输出的部分结构先传输给reducer  按照上面的话  效率会不会受影响?
		//虽然数据少了，但是开始的时间也推迟了？？堵塞延迟小了？？
		//负载平衡？？网络中总的数据量少了？？
		@Override
		protected void map(LongWritable key, Text value,Context context)
				throws IOException, InterruptedException {
			String[] ss = value.toString().split(":");
			if(!map.containsKey(ss[0])){
				map.put(ss[0], ss[1]+":"+1);
			}else{
				String tmp = map.get(ss[0]);
				String[] tt = tmp.split(":");
				int ta = Integer.parseInt(ss[1])+Integer.parseInt(tt[0]);
				int tb = Integer.parseInt(tt[1])+1;
				map.put(ss[0], ta+":"+tb);
			}
		}
		
		@Override
		protected void cleanup(Context context) throws IOException,
				InterruptedException {
			for(Map.Entry<String, String> e : map.entrySet()){
				String[] tt = e.getValue().split(":");
				pair p = new pair(Integer.parseInt(tt[0]), Integer.parseInt(tt[1]));
				context.write(new Text(e.getKey()), p);
			}
		}
	}
	
	public static class reducer extends Reducer<Text, pair, Text, DoubleWritable>{

		@Override
		protected void reduce(Text key, Iterable<pair> value,
				Context context) throws IOException, InterruptedException {
			int sum = 0;
			int cnt = 0;
			while(value.iterator().hasNext()){
				pair p = value.iterator().next();
				sum += p.getLeft().get();
				cnt += p.getRight().get();
			}
			context.write(key, new DoubleWritable((double)sum/(double)cnt));
		}
	}

in-mapper design pattern：单个mapper结果进行聚集

Combiner：所有的mapper结果进行聚集

0
顶

0
踩

分享到：

MapReduce : 新版API 自定义InputFormat 把 ... | Hadoop in-mapper combining 实例

2012-04-10 18:51
浏览 4348
评论(0)
分类:编程语言
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

MapReduce ： Combiner的使用(以平均数为例) 并结合in-mapper design pattern 实例

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

MapReduce ： Combiner的使用(以平均数为例) 并结合in-mapper design pattern 实例

评论

发表评论

相关推荐

多表join的一个优化思路

好的网站

Hadoop 任务流程

Hadoop关于最大map reducer数目

java.io.IOException:Typemismatch in key from map:expected org.apache.hadoop.io

HDFS 输入文件避免切分

Hadoop 开启debug信息

Hadoop 关于0.95/1.75 * （number of nodes）误解

MapReduce ReadingList

"hadoop fs 和hadoop dfs的区别"

Hadoop 自动清除日志

DistributedCache FileNotFoundException

Cygwin 不支持native lib 不支持使用native lib 提供的压缩

Hadoop 在Window下搭建 守护进程启动问题

Cygwin ssh Connection closed by ::1

Eclipse：Run on Hadoop 没有反应

Hadoop SequcenceFile 处理多个小文件

Hadoop 自定义计数器

MapReduce : 新版API 自定义InputFormat 把整个文件作为一条记录处理

Hadoop NameNode backup

最近访客更多访客>>

Hadoop 在Window下搭建守护进程启动问题