hadoop0.18.3 到 0.20.2

eryk

浏览: 582092 次
性别:
来自: 北京

最近访客更多访客>>

leileishizhutou

zhangzi

fhtwins

Ansel_qiang

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

hadoop

以前用的是0.18.3，现在改用0.20.2，結果发现mapreduce的接口变了好多，而《mapreduce 权威指南》这本书上还是0.18.3的接口，这里记录一下今天下午的探索：

最大的变化是作業配置那部分，新的版本里面不再使用JobConf, 而是使用了Job，这里的Job继承自JobContext，它集成了JobConf 。

Job里面还是用了相同的设置inputPath, outputPath, inputFormat, outputFormat之类的，主要的不同我认为有以下几个：

1. 初始化不一样，

前者： JobConf conf = new JobConf(getConf(), WordCount.class );

后才： Job job = new Job(conf, "word count" );

2. 执行不同：

前者: JobClient.runJob(conf)

后才：job.waitForCompletion(true )

3. 最隐含是变化：

前者：setMapperClass(class<? extends MapReduceBase implements Mapper>) 和 setReducerClass(class<? extends MapReducerBase implements Reducer>)

后者：setMapperClass(class<? extends Mapper>) 和 setReducerClass(class<? extends Reducer>)

也就是说Map类和Reduce也有所变化，并且在import的时候要注意，

前者的mapper类和reduce类不仅要extends xxxbase父类，而且要implements mapper和reduce 接口，且

import org.apache.hadoop.mapred.MapReduceBase,

import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.Reducer;

后才的mapper类和reduce类只要extends Mapper Reducer父类。

具体的比较程序如下：

前者出自《mapreduce 权威指南》，是旧版本的一个程序：

Mapper类:

import java.io.IOException;
import  org.apache.hadoop.io.IntWritable;
import  org.apache.hadoop.io.LongWritable;
import  org.apache.hadoop.io.Text;
import  org.apache.hadoop.mapred.MapReduceBase;
import  org.apache.hadoop.mapred.Mapper;
import  org.apache.hadoop.mapred.OutputCollector;
import  org.apache.hadoop.mapred.Reporter;
public class MaxTemperatureMapper extends MapReduceBase
implements Mapper<LongWritable, Text, Text, IntWritable> {
private static final int MISSING = 9999;
public void map(LongWritable key, Text value,
OutputCollector<Text, IntWritable> output, Reporter reporter)
throws IOException {
}
}
String line = value.toString();
String year = line.substring(15, 19);
int airTemperature;
if (line.charAt(87) == '+') { // parseInt doesn't like leading plus signs
airTemperature = Integer.parseInt(line.substring(88, 92));
} else {
airTemperature = Integer.parseInt(line.substring(87, 92));
}
String quality = line.substring(92, 93);
if (airTemperature != MISSING && quality.matches("[01459]")) {
output.collect(new Text(year), new IntWritable(airTemperature));
}

Reducer类：

import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class MaxTemperatureReducer extends MapReduceBase
implements Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterator<IntWritable> values,
OutputCollector<Text, IntWritable> output, Reporter reporter)
throws IOException {
}
}
int maxValue = Integer.MIN_VALUE;
while (values.hasNext()) {
maxValue = Math.max(maxValue, values.next().get());
}
output.collect(key, new IntWritable(maxValue));
}

}

主类

import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
public class MaxTemperature {
public static void main(String[] args) throws IOException {
if (args.length != 2) {
System.err.println("Usage: MaxTemperature <input path> <output path>");
System.exit(-1);
}
JobConf conf = new JobConf(MaxTemperature.class);
conf.setJobName("Max temperature");
FileInputFormat.addInputPath(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
conf.setMapperClass(MaxTemperatureMapper.class);
conf.setReducerClass(MaxTemperatureReducer.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
}
}
JobClient.runJob(conf);

｝

我修改后的新版本程序：

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

public class MaxTemperatureMapper extends Mapper<LongWritable,Text,Text,IntWritable> {

    private static final int MISSING = 9999;

    public void map(LongWritable key, Text value,
            OutputCollector<Text, IntWritable> output, Reporter reporter)
            throws IOException {
        // TODO Auto-generated method stub
        String line = value.toString();
        String year = line.substring(15, 19);
        int airTemperature;
        if(line.charAt(87)=='+'){
            airTemperature = Integer.parseInt(line.substring(88, 92));
        }else{
            airTemperature = Integer.parseInt(line.substring(87, 92));
        }
        String quality = line.substring(92, 93);
        if(airTemperature!=MISSING && quality.matches("[01459]")){
            output.collect(new Text(year), new IntWritable(airTemperature));
        }
    }

}

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

public class MaxTemperatureReducer extends Reducer<Text,IntWritable,Text,IntWritable> {

    public void reduce(Text key, Iterator<IntWritable> values,
            OutputCollector<Text,IntWritable> output, Reporter reporter) throws IOException {

        int maxValue = Integer.MIN_VALUE;
        while (values.hasNext()) {
            maxValue = Math.max(maxValue, values.next().get());
        }
        output.collect(key, new IntWritable(maxValue));
    }
}

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MaxTemperature {
    public static void main(String [] argc) throws IOException, InterruptedException, ClassNotFoundException {
        if(argc.length != 2){
            System.out.println("Usage: MaxTemperature <input> <output>");
            System.exit(-1);
        }

        Configuration conf = new Configuration();
        Job j = new Job(conf,"Max Temperature");

        j.setJarByClass(MaxTemperature.class);

        j.setMapperClass(MaxTemperatureMapper.class);
        j.setReducerClass(MaxTemperatureReducer.class);

        j.setOutputKeyClass(Text.class);
        j.setOutputValueClass(IntWritable.class);

        FileInputFormat.addInputPath(j, new Path(argc[0]));
        FileOutputFormat.setOutputPath(j, new Path(argc[1]));

        System.exit(j.waitForCompletion(true) ? 0 : 1);

    }
}

参考： http://blog.csdn.net/amuseme_lu/archive/2010/05/13/5588545.aspx

转自：http://blog.csdn.net/JiaoYanChen/archive/2010/08/16/5816573.aspx

分享到：

Changes of Hadoop 0.20笔记 | 自定义hadoop map/reduce输入文件切割Inpu ...

2011-07-01 13:10
浏览 1798
评论(0)
分类:互联网
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论