import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; public class ImportDataFromMongoReducer extends Reducer<Text, Text, Text, Text> { private static final Log LOG = LogFactory .getLog(ImportDataFromMongoReducer.class); private MultipleOutputs out; public void setup(Context context) { out = new MultipleOutputs(context); } private String generateFileName(Text k) { return k.toString() + "/part"; } @Override public void reduce(final Text pKey, final Iterable<Text> pValues, final Context pContext) throws IOException, InterruptedException { for (final Text value : pValues) { // pContext.write(pKey, value); out.write(NullWritable.get(), value, generateFileName(pKey)); } } protected void cleanup(Context context) throws IOException, InterruptedException { out.close(); } }
References
http://hadoop.apache.org/docs/current/api/org/apache/hadoop/mapreduce/lib/output/MultipleOutputs.html
http://www.infoq.com/articles/HadoopOutputFormat
相关推荐
"Data Analytics with Hadoop: An Introduction for Data Scientists" ISBN: 1491913703 | 2016 | PDF | 288 pages | 7 MB Ready to use statistical and machine-learning techniques across large data sets? ...
Hadoop 2.x is spreading its wings to cover a variety of application paradigms and solve a wider range of data problems. It is rapidly becoming a general-purpose cluster platform for all data ...
Hadoop: The Definitive Guide, 4th Edition Get ready to unlock the power of your data. With the fourth edition of this comprehensive guide, you’ll learn how to build and maintain reliable, scalable,...
With this digital Early Release edition of Hadoop: The Definitive Guide, you get the entire book bundle in its earliest form – the author’s raw and unedited content – so you can take advantage of ...
- **书名**:《Hadoop:The Definitive Guide》(第二版) - **作者**:Tom White - **前言作者**:Doug Cutting - **出版社**:O'Reilly Media, Inc. - **出版日期**:2010年10月 - **版权**:版权所有 © 2011 Tom...
实战Hadoop:开启通向云计算的捷径
《Hadoop:权威指南》是了解和掌握Apache Hadoop生态系统不可或缺的一本著作。这本书由Tom White撰写,全面深入地介绍了Hadoop的各个组件及其工作原理,对于初学者和专业人士来说都是一份宝贵的参考资料。 Hadoop是...
资源名称:云计算Hadoop:快速部署Hadoop集群内容简介: 近来云计算越来越热门了,云计算已经被看作IT业的新趋势。云计算可以粗略地定义为使用自己环境之外的某一服务提供的可伸缩计算资源,并按使用量付费。可以...
pdf+epub This book will teach you how to deploy large-scale datasets in deep neural networks with Hadoop for optimal...this book will then show you how to set up the Hadoop environment for deep learning.
Ideal for enterprise architects, IT managers, application architects, and data engineers, this book shows you how to overcome the many challenges that emerge during Hadoop projects. You'll explore the...
大数据处理框架:Hadoop:Hadoop生态系统概览.docx
大数据处理框架:Hadoop:Hadoop数据存储格式.docx
大数据处理框架:Hadoop:Hadoop数据处理框架Pig.docx
大数据处理框架:Hadoop:Hadoop集群部署与管理.docx
大数据处理框架:Hadoop:大数据与Hadoop简介.docx
大数据处理框架:Hadoop:Hadoop性能优化与故障排查.docx
大数据处理框架:Hadoop:Hadoop实时数据处理框架Flume.docx
大数据处理框架:Hadoop:HadoopYARN架构与资源管理.docx