Mapper,Reducer,Wrapper的Java模板 -

zhrglchp

浏览: 115336 次
性别:
来自: 北京

最近访客更多访客>>

linxl2011

gggfff39

7jkl

happyzhaow

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

Mapper,Reducer,Wrapper的Java模板

博客分类：

hadoop

http://www.easyigloo.org/?p=1145

很多时候想要测试hadoop上的一个想法，要求快速创建并运行任务。每个任务包含了至少3个组件。

Mapper类
Reducer类
Wrapper类

下面的代码用以产生空模板，只是将变量替换成自己的类名

MAPPER
-----------------------------------------------------------------------------------------------------------------------------------
MAPPER
-----------------------------------------------------------------------------------------------------------------------------------
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;

/* In case you are using Multiple outputs */
//import org.apache.hadoop.io.NullWritable;
//import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;

public class Mapper extends Mapper {
    private Configuration conf;
    private Text outputKey = new Text();
    private Text outputValue = new Text();
    private String line = null;

    /* In case you are using Multiple outputs */
    //private NullWritable outputValue = NullWritable.get();
    //private MultipleOutputs contextMulti = null;

    @Override
    public void setup(Mapper.Context context) {
        this.conf = context.getConfiguration();

        /* In case you are using Multiple outputs */
        //contextMulti = new MultipleOutputs(context);
    }

    @Override
    public void map(LongWritable key, Text values, Context context)
            throws IOException, InterruptedException {
    }

    @Override
    public void cleanup (Mapper.Context context)throws IOException, InterruptedException {

        /* In case you are using Multiple outputs */
        //contextMulti.close();
    }
}

-----------------------------------------------------------------------------------------------------------------------------------
REDUCER
-----------------------------------------------------------------------------------------------------------------------------------
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/* In case you are using Multiple outputs */
//import org.apache.hadoop.io.NullWritable;
//import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;

public class Reducer extends Reducer {
    private Configuration conf;
    private Text outputKey = new Text();
    private Text outputValue = new Text();
    private String line = null;

    /* In case you are using Multiple outputs */
    //private NullWritable outputValue = NullWritable.get();
    //private MultipleOutputs contextMulti = null;

    @Override
    public void setup(Reducer.Context context) {
        this.conf = context.getConfiguration();

        /* In case you are using Multiple outputs */
        //contextMulti = new MultipleOutputs(context);
    }

    @Override
    public void reduce(Text key, Iterable values, Context context)
            throws IOException, InterruptedException {
    }

    @Override
    public void cleanup(Reducer.Context context) {
        /* In case you are using Multiple outputs */
        //contextMulti.close();
    }
}
-----------------------------------------------------------------------------------------------------------------------------------
WRAPPER
这个类用到下面两个类

https://sites.google.com/site/hadoopandhive/home/ExtendedFileUtil.java?attredirects=0&d=1

https://sites.google.com/site/hadoopandhive/home/StringUtil.java?attredirects=0&d=1

-----------------------------------------------------------------------------------------------------------------------------------

import StringUtil;

import ExtendedFileUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;
import java.text.ParseException;

public class extends Configured implements Tool, Constants {
    private Configuration conf = null;
    private Job job = null;
    private String inputDirList = null;
    private String outputDir = null;
    private String[] filesToProcess = null;
    private int totalReducers = 0;
    private int jobRes = 0;
    private ExtendedFileUtil fileUtil = new ExtendedFileUtil();

    public static void main(String[] args) throws Exception {
         ob = new ();
        int jobRes = ToolRunner.run(ob, args);
    }

    public int run(String[] args)
            throws ClassNotFoundException, IOException, InterruptedException, ParseException {
        jobRes = readCmdArgs(args);
        if (jobRes == 0) {
            jobRes = readConfig();
        }
        if (jobRes == 0) {
            jobRes = runMrJob();
        }
        return jobRes;
    }

    private int readCmdArgs(String[] args) {
        if (args.length == 2) {
            inputDirList = args[0];
            outputDir = args[1];
        } else {
            printUsage();
            System.exit(1);
        }
        return 0;
    }

    private int readConfig() throws IOException, InterruptedException, ClassNotFoundException {
        conf = new Configuration();
        //conf.set("SET_NEW_CONFIG_NAME", SET_NEW_CONFIG_VALUE);
        job = new Job(conf);
        if ((job.getJar() == null) || (job.getJar() == "")) {
            job.setJarByClass(.class);
        }
        return 0;
    }

    private int runMrJob()
            throws IOException, InterruptedException, ClassNotFoundException {
        filesToProcess = fileUtil.getFilesOnly(inputDirList, true);
        job.setJobName("");
        TextInputFormat.addInputPaths(job, StringUtil.arrayToString(filesToProcess, ","));
        TextOutputFormat.setOutputPath(job, new Path(outputDir));
        System.out.println("Input Dir: " + inputDirList);
        System.out.println("Output Dir: " + outputDir);

        job.setMapperClass(Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(Reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        totalReducers = Math.round((fileUtil.size(inputDirList) / 134217728) * 0.1F);
        totalReducers = Math.max(totalReducers, 1);
        job.setNumReduceTasks(totalReducers );
        deleteOutputDirectory(outputDir);
        jobRes = job.waitForCompletion(true) ? 0 : 1;
        deleteLogsDirectory();
        fileUtil.removeAllZeroByteFiles(outputDir);
        return 0;
    }

    private int deleteOutputDirectory(String outputDir) throws IOException {
        fileUtil.removeHdfsPath(new Path(outputDir).toString());
        return 0;
    }

    private int printUsage() {
        System.out.println("USAGE:
");
        return 0;
    }

    private int deleteLogsDirectory()
            throws IOException {
        Path outputLogPath = new Path(new Path(outputDir).toString() + "/" + "_logs");
        fileUtil.removeHdfsPath(outputLogPath.toString());
        return 0;
    }
}

分享到：

hadoo 文件常用操作 | hadoop基础知识

2012-03-02 08:24
浏览 1122
评论(0)
分类:非技术
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Mapper,Reducer,Wrapper的Java模板

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Mapper,Reducer,Wrapper的Java模板

评论

发表评论

相关推荐

mapreduce Bet

hadoop 输出格式

hadoop mapreduce 原理

hadoop搭建问题

hadoop输出文件格式

hadoop 学习

hadoop提高性能建议

hadoop例子

hadoop

Hadoop Hive与Hbase整合

hive hadoop 代码解析

Hadoop MapReduce操作MySQL

hadoop hdfs常用操作类

hdfs 操作类自己的

hadoo 文件常用操作

hadoop基础知识

hadoop 自己封装的接口

HadoopFileUtil

hadoop ExtendedFileUtil

hadoop StringUtil

最近访客更多访客>>