Hadoop简单的Map/Reduce

a123159521

浏览: 469956 次
性别:
来自: 杭州

最近访客更多访客>>

u012363178

zhaodengfeng1989

fhtwins

wangwen135

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

Hadoop
Hbase

/**
 * 需求:
 *     统计Hbase数据库中某个字符串的使用人数
 *     字符串存储格式 1002;1003,2003,1443;1232,232
 *	   最后统计 模块   使用人数
 *       1    1002    X
 *       ...
 *     把最后的统计信息存储到结果表中.
 */
public class CountUse {

	public static Logger loger = Wloger.loger;

	/**
	 * Internal Mapper to be run by Hadoop.
	 */
	public static class MapTask extends TableMapper<UdcSellHTable, IntWritable> {
		private UdcSellHTable udcSell = new UdcSellHTable();
		private String tableName;
		private byte[] family;
		private HashMap<String, byte[]> indexes;
		private UdcSellHTable word = new UdcSellHTable();
		private final static IntWritable one = new IntWritable(1);

		/**
		 * Map 统计使用人数
		 */
		@SuppressWarnings("static-access")
		@Override
		protected void map(ImmutableBytesWritable rowKey, Result result,
				Context context) throws IOException, InterruptedException {
			// 搜索数据库设置到对象当中
			for (Map.Entry<String, byte[]> entity : indexes.entrySet()) {
				String column = entity.getKey();
				PropertyName property = new PropertyName(family, entity
						.getValue());
				Object type = udcSell.types.get(column);
				Object value = CountUseUtil.changeValue(new Entity(tableName,
						result), property, type);// 数据库中的值
				try {
					loger.info("debug:"+udcSell);
					loger.info("debug:"+UdcSellHTable.filter(column));
					loger.info("debug:"+value);
					BeanUtils.setProperty(udcSell, UdcSellHTable.filter(column),
							value);
				} catch (Exception e) {
					loger.info("对象属性定义有问题，请check[" + udcSell + ":" + column
							+ "[" + value + "]]");
				}
			}
			// filter 属性过滤，过滤掉不符合规则的记录
			String widget = udcSell.getWidget();// widget数据,//674324;321,321,312;3321
			word.setWidget(widget);//设置widget
			loger.debug("debug:"+widget);
			List<String> keys = CountUseUtil.getKeys(widget);

			for (String key : keys) {
				word.setUserId(udcSell.getUserId());
				word.setDate(udcSell.getDate());
				word.setModuleId(Long.parseLong(key));
				word.setUseCount(1);
				loger.info("Map <word>" + word.toString() + "</word>");
				context.write(word, one);
			}
		}

		@SuppressWarnings("static-access")
		@Override
		protected void setup(Context context) throws IOException,
				InterruptedException {
			Configuration configuration = context.getConfiguration();
			tableName = configuration.get("input.tablename");
			String familyName = configuration.get("input.familyname");
			udcSell.setDate(CountUseUtil.yestoday());
			String[] fields = new String[] { udcSell.COLUMN_USERID,
					udcSell.getWidgetDate() };
			indexes = new HashMap<String, byte[]>();
			for (String field : fields) {
				indexes.put(field, Bytes.toBytes(field));
			}
			family = Bytes.toBytes(familyName);
		}
	}

	/**
	 * Reduce进行合计
	 *
	 */
	public static class CombinerTask extends
			Reducer<UdcSellHTable, IntWritable, UdcSellHTable, IntWritable> {
		private IntWritable result = new IntWritable();

		public void reduce(UdcSellHTable key, Iterable<IntWritable> values,
				Context context) throws IOException, InterruptedException {

			int sum = 0;
			for (IntWritable val : values) {
				sum += val.get();
			}
			result.set(sum);
			loger.info("combiner <key>" + key + "</key>");
			loger.info("combiner <sum>" + sum + "</sum>");
			context.write(key, result);
		}
	}

	/**
	 * Reduce合计入库
	 *
	 */
	public static class Reduce extends
			TableReducer<UdcSellHTable, IntWritable, ImmutableBytesWritable> {
		private UdcSellHTable udcSell = new UdcSellHTable();
		private String tableName;
		private IntWritable result = new IntWritable();
		private HashMap<String, byte[]> indexes;
		private byte[] family;

		public void reduce(UdcSellHTable key, Iterable<IntWritable> values,
				Context context) throws IOException, InterruptedException {
			int sum = 0;
			for (IntWritable val : values) {
				sum += val.get();
			}
			result.set(sum);
			key.setUseCount(result.get());
			loger.info("Reduce <key>" + key + "</key>");
			loger.info("Reduce <sum>" + sum + "</sum>");
			String keyStr = "1_" + key.getModuleId();
			Put put = new Put(Bytes.toBytes(keyStr));// 以模块id为key
			for (Map.Entry<String, byte[]> entity : indexes.entrySet()) {
				try {
					put.add(family, entity.getValue(), Bytes.toBytes(BeanUtils
							.getProperty(key, entity.getKey())));
				} catch (Exception e) {
					loger.info("对象属性定义有问题，请check[" + udcSell + ":"
							+ entity.getKey() + "[" + entity.getValue() + "]]");
				}
			}
			ImmutableBytesWritable imw = new ImmutableBytesWritable(Bytes
					.toBytes(tableName));
			context.write(imw, put);
		}

		@SuppressWarnings("static-access")
		@Override
		protected void setup(Context context) throws IOException,
				InterruptedException {
			Configuration configuration = context.getConfiguration();
			tableName = configuration.get("output.tablename");
			String familyName = configuration.get("output.familyname");
			family = Bytes.toBytes(familyName);
			String[] fields = new String[] { udcSell.COLUMN_USERID,udcSell.COLUMN_MODULEID,
					udcSell.COLUMN_USE };
			 indexes = new HashMap<String, byte[]>();
			for (String field : fields) {
				indexes.put(field, Bytes.toBytes(field));
			}
		}
	}

	/**
	 *
	 *
	 * Job configuration.
	 */
	public static Job configureJob(Configuration conf, String[] args)
			throws IOException {
		String inputTable = "udc_sell";
		String inputFamily = "s_year";
		String outputTable = "job_result";
		String outputFamily = "s_base";
		conf.set("input.tablename", inputTable);
		conf.set("input.familyname", inputFamily);
		conf.set(TableInputFormat.INPUT_TABLE, inputTable);

		conf.set("output.tablename", outputTable);
		conf.set("output.familyname", outputFamily);
		conf.set(TableOutputFormat.OUTPUT_TABLE, outputTable);
		Job job = new Job(conf, inputTable);
		job.setNumReduceTasks(1);
		job.setJarByClass(CountUse.class);
		job.setMapperClass(MapTask.class);
		job.setMapOutputKeyClass(UdcSellHTable.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setCombinerClass(CombinerTask.class);
		job.setReducerClass(Reduce.class);
		job.setInputFormatClass(TableInputFormat.class);
		job.setOutputFormatClass(MultiTableOutputFormat.class);
		return job;
	}

	public static void main(String[] args) throws Exception {
		Configuration conf = HBaseConfiguration.create();
		String[] otherArgs = new GenericOptionsParser(conf, args)
				.getRemainingArgs();
		loger.info("开始 job");
		Job job = configureJob(conf, otherArgs);
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}

分享到：

Eclipse优化2 | HadoopType

2011-11-16 10:38
浏览 3071
评论(2)
分类:编程语言
查看更多

2 楼 a123159521 2013-11-05

jianxin 写道

请问UdcSellHTable这个类哪里来的啊？

i found your name is very fimaliar, you recognized me?

1 楼 jianxin 2013-11-05

请问UdcSellHTable这个类哪里来的啊？

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Hadoop简单的Map/Reduce

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Hadoop简单的Map/Reduce

评论

发表评论

相关推荐

HadoopType

把xls的数据导到Hbase

Hbase建索引分析

Hadoop WordCount进阶

Hadoop WordCount解读

hadoop环境搭建

最近访客更多访客>>