MapReduce（2）

mingyun

浏览: 216102 次
性别:
来自: 哈尔滨

最近访客更多访客>>

lijun0349

haiyupeter

huangguangdong

cutecoot

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

Hadoop

hadoop

一、去除重复的内容

源文件：
192.168.234.21
192.168.234.22
192.168.234.21
192.168.234.21
192.168.234.23
192.168.234.21
192.168.234.21
192.168.234.21
192.168.234.25
192.168.234.21
192.168.234.21
192.168.234.26
192.168.234.21

1.新建 Path : distinct ，上传文件

2.Mapper.java

public class DistinctMapper extends Mapper<LongWritable, Text, Text, NullWritable> {

	@Override
	protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)
			throws IOException, InterruptedException {
		context.write(value, NullWritable.get());
	}
}

因为无需统计任务内容，所以value传空，故可用 NullWritable

3.Reducer.java

public class DistinctReducer extends Reducer<Text, NullWritable, Text, NullWritable> {

	@Override
	protected void reduce(Text key, Iterable<NullWritable> value,
			Reducer<Text, NullWritable, Text, NullWritable>.Context context) throws IOException, InterruptedException {
		
		context.write(key, NullWritable.get());
	}
}

4.Driver

public class DistinctDriver {

	public static void main(String[] args) throws Exception {

		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(DistinctDriver.class);
		job.setMapperClass(DistinctMapper.class);
		job.setReducerClass(DistinctReducer.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(NullWritable.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(NullWritable.class);
		
		FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.76.131:9000/distinct/distinct.txt"));
		FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.76.131:9000/distinct/result"));
	
		job.waitForCompletion(true);
	}

}

二、面向对象编程

源文件：
手机号码地区姓名流量使用
13877779999 bj zs 2145
13766668888 sh ls 1028
13766668888 sh ls 9987
13877779999 bj zs 5678
13544445555 sz ww 10577
13877779999 sh zs 2145
13766668888 sh ls 9987

1.面向对象编程，建立对象

package com.study.flow.day01;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;

public class FlowBean implements Writable{

	private String phone ;
	private String addr ;
	private String name ;
	private Integer flow ;
	
	
	
	public String getPhone() {
		return phone;
	}
	public void setPhone(String phone) {
		this.phone = phone;
	}
	public String getAddr() {
		return addr;
	}
	public void setAddr(String addr) {
		this.addr = addr;
	}
	public String getName() {
		return name;
	}
	public void setName(String name) {
		this.name = name;
	}
	public Integer getFlow() {
		return flow;
	}
	public void setFlow(Integer flow) {
		this.flow = flow;
	}
	
	// 反序列化，顺序要与序列化时的顺序相同
	@Override
	public void readFields(DataInput input) throws IOException {
		
		this.phone = input.readUTF();
		this.addr = input.readUTF();
		this.name = input.readUTF();
		this.flow = input.readInt();
	}
	
	// 序列化
	@Override
	public void write(DataOutput output) throws IOException {
		output.writeUTF(phone);
		output.writeUTF(addr);
		output.writeUTF(name);
		output.writeInt(flow);
		
	}
	@Override
	public String toString() {
		return "FlowBean [phone=" + phone + ", addr=" + addr + ", name=" + name + ", flow=" + flow + "]";
	}
	
	
}

2.Mapper

public class FlowMapper extends Mapper<LongWritable, Text, Text, FlowBean> {

	@Override
	protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, FlowBean>.Context context)
			throws IOException, InterruptedException {
		String line = value.toString();
		String [] datas = line.split(" ");
		FlowBean bean = new FlowBean();
		bean.setPhone(datas[0]);
		bean.setAddr(datas[1]);
		bean.setName(datas[2]);
		bean.setFlow(Integer.valueOf(datas[3]));
		
		context.write(new Text(bean.getName()), bean);
	}
}

3.Reducer


public class FlowReducer extends Reducer<Text, FlowBean, Text, FlowBean> {

	@Override
	protected void reduce(Text key, Iterable<FlowBean> values, Reducer<Text, FlowBean, Text, FlowBean>.Context context)
			throws IOException, InterruptedException {
		
		Integer sum = 0 ;
		FlowBean bean = new FlowBean();
		for(FlowBean flowBean : values){
			sum = sum + flowBean.getFlow();
			bean.setPhone(flowBean.getPhone());
			bean.setAddr(flowBean.getAddr());
			bean.setName(flowBean.getName());
		}
		bean.setFlow(sum);
		
		context.write(key, bean);
	}
}

4.Driver

public class FlowDriver {

	public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {

		// 配置文件
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		
		// 设置运行入口，Mapper，Reducer 
		job.setJarByClass(FlowDriver.class);
		job.setMapperClass(FlowMapper.class);
		job.setReducerClass(FlowReducer.class);
		// 设置Mapper 的key value 类型
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(FlowBean.class);
		// 设置Reducer 的 key value 类型
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(FlowBean.class);
		// 设置文件的读入 输出目录
		FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.76.131:9000/flow"));
		FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.76.131:9000/flow/result"));
		
		job.waitForCompletion(true);
		
	}

}

分享到：

MapReduce（3） | MapReduce

2017-06-04 18:46
浏览 554
评论(0)
分类:编程语言
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

MapReduce（2）

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

MapReduce（2）

评论

发表评论

相关推荐

hadoop

Hadoop2.0 HA 集群搭建步骤

MapReduce（4）

MapReduce（3）

MapReduce

Hadoop插件

HDFS命令

搭建Hadoop

最近访客更多访客>>