MapReduce中Shuffle过程:
原理参考 :
http://weixiaolu.iteye.com/blog/1474172
数据类型:
1.数据类型都实现Writable接口,以便这些类型定义的数据可以被序列化进行网络传输和文件存储。
2.基本数据类型
BooleanWritable、ByteWritable、DoubleWritable
FloatWritable、IntWritable、LongWritable
Text:使用UTF8格式存储文本
NULLWritable:当<key,value>中的key或value为空时使用
MapReduce-—WordCount:(MapReduce编程模型与以下程序一致)
package com.fb.hadoop.mapreduce; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class WordCount extends Configured implements Tool{ //Map Class public static class Mapper extends org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, IntWritable>{ private Text mapOutputKey=new Text(); private final static IntWritable mapOutputValue=new IntWritable(1); @Override public void map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { String lineValue=value.toString(); StringTokenizer stringTokenizer=new StringTokenizer(lineValue); while(stringTokenizer.hasMoreTokens()){ String wordValue=stringTokenizer.nextToken(); //set value mapOutputKey.set(wordValue); //output context.write(mapOutputKey, mapOutputValue); } } } //Reduce Class public static class Reducer extends org.apache.hadoop.mapreduce.Reducer<Text, IntWritable,Text, IntWritable>{ private IntWritable outputValue=new IntWritable(); @Override public void reduce(Text key, Iterable<IntWritable> values, org.apache.hadoop.mapreduce.Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { int sum=0; //iterator for(IntWritable value:values){ sum+=value.get(); } outputValue.set(sum); context.write(key, outputValue); } } //Driver public int run(String[] args) throws Exception{ //get configuretion Configuration conf=getConf(); //create Job Job job=Job.getInstance(conf,this.getClass().getSimpleName()); //run jar job.setJarByClass(this.getClass()); //set Job //input -> map -> reduce -> output //1.input Path inPath=new Path(args[0]); FileInputFormat.addInputPath(job, inPath); //2.map job.setMapperClass(Mapper.class); job.setMapOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //3.shuffle /******************shuffle**************************** //1.partitioner job.setPartitionerClass(cls); //2.sort job.setSortComparatorClass(cls); //3.optional,combiner job.setCombinerClass(cls); //4.group job.setGroupingComparatorClass(cls); ******************shuffle****************************/ //4.reduce job.setReducerClass(Reducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //set reduce number //job.setNumReduceTasks(10); //5.output Path outPath=new Path(args[1]); FileOutputFormat.setOutputPath(job, outPath); //submit job boolean isSuccess=job.waitForCompletion(true); return isSuccess?0:1; } public static void main(String[] args) throws Exception { Configuration conf=new Configuration(); int status=ToolRunner.run(conf, new WordCount(),args); System.exit(status); } }
相关推荐
赠送jar包:hadoop-mapreduce-client-jobclient-2.6.5.jar; 赠送原API文档:hadoop-mapreduce-client-jobclient-2.6.5-javadoc.jar; 赠送源代码:hadoop-mapreduce-client-jobclient-2.6.5-sources.jar; 赠送...
赠送jar包:hadoop-mapreduce-client-app-2.6.5.jar; 赠送原API文档:hadoop-mapreduce-client-app-2.6.5-javadoc.jar; 赠送源代码:hadoop-mapreduce-client-app-2.6.5-sources.jar; 赠送Maven依赖信息文件:...
赠送jar包:hadoop-mapreduce-client-core-2.5.1.jar; 赠送原API文档:hadoop-mapreduce-client-core-2.5.1-javadoc.jar; 赠送源代码:hadoop-mapreduce-client-core-2.5.1-sources.jar; 赠送Maven依赖信息文件:...
赠送jar包:hadoop-mapreduce-client-jobclient-2.6.5.jar; 赠送原API文档:hadoop-mapreduce-client-jobclient-2.6.5-javadoc.jar; 赠送源代码:hadoop-mapreduce-client-jobclient-2.6.5-sources.jar; 赠送...
赠送jar包:hadoop-mapreduce-client-app-2.6.5.jar; 赠送原API文档:hadoop-mapreduce-client-app-2.6.5-javadoc.jar; 赠送源代码:hadoop-mapreduce-client-app-2.6.5-sources.jar; 赠送Maven依赖信息文件:...
赠送jar包:hadoop-mapreduce-client-app-2.7.3.jar; 赠送原API文档:hadoop-mapreduce-client-app-2.7.3-javadoc.jar; 赠送源代码:hadoop-mapreduce-client-app-2.7.3-sources.jar; 赠送Maven依赖信息文件:...
赠送jar包:hadoop-mapreduce-client-core-2.6.5.jar 赠送原API文档:hadoop-mapreduce-client-core-2.6.5-javadoc.jar 赠送源代码:hadoop-mapreduce-client-core-2.6.5-sources.jar 包含翻译后的API文档:...
赠送jar包:hadoop-mapreduce-client-core-2.7.3.jar; 赠送原API文档:hadoop-mapreduce-client-core-2.7.3-javadoc.jar; 赠送源代码:hadoop-mapreduce-client-core-2.7.3-sources.jar; 赠送Maven依赖信息文件:...
hadoop-mapreduce-examples-2.6.5.jar 官方案例源码
hadoop-mapreduce-examples-2.7.1.jar
赠送jar包:hadoop-mapreduce-client-common-2.6.5.jar; 赠送原API文档:hadoop-mapreduce-client-common-2.6.5-javadoc.jar; 赠送源代码:hadoop-mapreduce-client-common-2.6.5-sources.jar; 赠送Maven依赖信息...
hadoop-mapreduce-client-core-2.6.5.jar
hadoop-mapreduce-client-core-2.5.1.jar,mapreduce必备组件,供学习使用 欢迎下载
hadoop-mapreduce-client-hs-2.7.1.jar
hadoop中的demo,wordcount列子用到的JAR包 用法: # 在容器里运行WordCount程序,该程序需要2个参数...hadoop jar hadoop-mapreduce-examples-2.7.1-sources.jar org.apache.hadoop.examples.WordCount input output
hadoop-mapreduce-client-core-2.7.1.jar,java开发的jar包需要的直接下载
hadoop-mapreduce-client-jobclient-0.23.1.jar 用来进行HDFS的基本测试 可以进行IO读写等功能的测试
赠送jar包:hadoop-mapreduce-client-common-2.7.3.jar; 赠送原API文档:hadoop-mapreduce-client-common-2.7.3-javadoc.jar; 赠送源代码:hadoop-mapreduce-client-common-2.7.3-sources.jar; 赠送Maven依赖信息...
Hadoop实现了一个分布式文件系统(Hadoop Distributed File System),简称HDFS。HDFS有高容错性的特点,并且设计用来部署在低廉的(low-cost)硬件上;而且它提供高吞吐量(high throughput)来访问应用程序的数据...