2.x MapReduce的测试类

username2

浏览: 746788 次
性别:
来自: 黑龙江

最近访客更多访客>>

dsh_oliver

杭州007

loginboot

xmmdream

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

Hadoop学习笔记

1 wordcount

2 倒排序

3 自定义分区（不同规则输出到不同的文件）

4 自定义文件输出

5 统计文件流

1 自定义输出类

package com.wzt.mapreduce.custom;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
 * @author root
 *
 * @param <Text> reduce 输出的key类型     value类型  
 * @param <LongWritable>
 */
public class MyCustomOutputFormat<Text, LongWritable> extends FileOutputFormat<Text, LongWritable>{
	
	
	@Override
	public RecordWriter<Text, LongWritable> getRecordWriter(TaskAttemptContext job)
			throws IOException, InterruptedException {

        Configuration conf = job.getConfiguration();  
		FileSystem hdfs = FileSystem.get(conf);
		FSDataOutputStream os1 = hdfs.create(new Path("/wc/output1/file1.log"));
		FSDataOutputStream os2 = hdfs.create(new Path("/wc/output2/file2.log"));
		
		return new MyRecordWriter<Text, LongWritable>( os1, os2);
	}

	public static class MyRecordWriter<Text, LongWritable> extends RecordWriter<Text, LongWritable>{
		FSDataOutputStream os1 =  null ;
		FSDataOutputStream os2  = null;
		
		public MyRecordWriter(FSDataOutputStream os1, FSDataOutputStream os2) {
			 this.os1 = os1 ;
			 this.os2 = os2 ; 
		}

		@Override
		public void write(Text key, LongWritable value2) throws IOException,
				InterruptedException {

			Long hang = Long.parseLong( value2.toString());
			
			if(hang%2==0){
				os1.writeBytes(key.toString() );
			}else{
				os2.writeBytes(key.toString() );
			}
		}

		@Override
		public void close(TaskAttemptContext context) throws IOException,
				InterruptedException {
			 if(os1!=null){
				 os1.close();
			 }
			 if(os2!=null){
				 os2.close();
			 }
			
		}
	}
}

2 Mapper 数据整理类

package com.wzt.mapreduce.custom;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class CIOMapper extends Mapper<LongWritable, Text , Text, LongWritable >{

	@Override
	protected void map(LongWritable key, Text value, Context context)
			throws IOException, InterruptedException {
		 
		String line = value.toString();
		String[] words  = StringUtils.split(line, " ");
		for(String word :words ){
			context.write( new Text(word) , key );; 
		}
	}
}

3 运行的主类（Map中数据直接输出所以没有使用到reducer）

package com.wzt.mapreduce.custom;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 
public class CIORunner {

	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		
		Configuration conf = new Configuration() ; 
		Job job = Job.getInstance(conf) ;
		
		job.setJarByClass(CIORunner.class );
		
		job.setMapperClass( CIOMapper.class );
		//job.setReducerClass( CIOReducer.class ); 没有reducer就不用了 
		
		job.setOutputKeyClass( Text.class );
		job.setOutputValueClass(LongWritable.class );
		
		job.setMapOutputKeyClass( Text.class);
		job.setMapOutputValueClass( LongWritable.class );
		
		job.setOutputFormatClass(MyCustomOutputFormat.class);
		
		FileInputFormat.setInputPaths(job,  "/wc/input/xiyou.txt");
		FileOutputFormat.setOutputPath(job,  new Path("/wc/outputcount"));
//		FileInputFormat.setInputPaths(job,  "D:\\wordcount\\wordcount.txt");
//		FileOutputFormat.setOutputPath(job,  new Path("D:\\wordcount\\output"));
 		job.waitForCompletion(true) ; 
		
	}
}

hadoop_test1.zip (65 KB)
下载次数: 1

分享到：

MapReduce中自定义Combiner | Sqoop

2016-01-27 09:58
浏览 585
评论(0)
分类:开源软件
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

2.x MapReduce的测试类

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

2.x MapReduce的测试类

评论

发表评论

相关推荐

strom使用示例

Hadoop2.x动态添加或删除datanode

MapReduce2中自定义排序分组

MapReduce中自定义Combiner

Sqoop

kafka使用与安装

storm 的安装使用

Hbase 的Java API 操作

Hbase 的java API 操作

Hbase集群安装

HIVE的安装与使用

HA 下执行JAVA操作hdfs

hadoop 2.x集群安装与配置

zookeeper安装

hadoop 2.x wordcount练习

Hadoop 2.x单节点部署学习。

SequenceFile和MapFile使用

重新编译Hadoop

Hadoop 中数据的序列化与反序列化

Hadoop基于文件的数据结构

最近访客更多访客>>