Hbase MapReduce Integration -

ilnba

浏览: 266042 次
性别:
来自: 苏州

最近访客更多访客>>

wscbc

tcpdump2015

lybsdu

yy629

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

Hbase MapReduce Integration

博客分类：

hadoop
Big Data

Delete Hbase rows example

$hadoop jar ./sponge-hserver.jar com.citi.sponge.mapreduce.MRDeleteRows -Dtable="elf_log" -DstartKey="10000:1365663164575:88888:testhome" -DstopKey="10000:1365663164575:88890:testhome" -Dquorum="vm-15c2-3bbf.nam.nsroot.net,vm-ab1f-dd21.nam.nsroot.net,vm-cb03-2277.nam.nsroot.net"

$hadoop jar ./sponge-hserver.jar com.citi.sponge.mapreduce.MRDeleteRows -Dtable="elf_log" -Dappid="10000" -DstartTime="2010-01-01-01-01" -DstopTime="2014-01-01-01-01" -Dquorum="vm-15c2-3bbf.nam.nsroot.net,vm-ab1f-dd21.nam.nsroot.net,vm-cb03-2277.nam.nsroot.net"

import java.io.IOException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map.Entry;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class MRDeleteRows extends Configured implements Tool {
	String startRowKey;
	String stopRowKey;
	String quorum;
	String table; 
	
	String startTime;
	String stopTime;
	String appID;
	
	public String getStartTime() {
		return startTime;
	}

	public String getStopTime() {
		return stopTime;
	}

	public String getAppID() {
		return appID;
	}

	public String getQuorum() {
		return quorum;
	}

	public String getStartRowKey() {
		return startRowKey;
	}

	public String getStopRowKey() {
		return stopRowKey;
	}
	
	public String getTable() {
		return table;
	}


	@Override
	public int run(String[] arg0) throws Exception {
		Configuration conf = getConf();
		for (Entry<String, String> entry : conf) {
			if (entry.getKey().equals("startKey")) {
				this.startRowKey = entry.getValue();
			}
			if (entry.getKey().equals("stopKey")) {
				this.stopRowKey = entry.getValue();
			}

			if (entry.getKey().equals("quorum")) {
				this.quorum = entry.getValue();
			}
			
			if (entry.getKey().equals("table")) {
				this.table = entry.getValue();
			}
			
			if (entry.getKey().equals("startTime")) {
				this.startTime = entry.getValue();
			}
			if (entry.getKey().equals("stopTime")) {
				this.stopTime = entry.getValue();
			}
			
			if (entry.getKey().equals("appid")) {
				this.appID = entry.getValue();
			}
			 
		}
		return 0;
	}
	
	static String getRowKey(String appID, String time){ 
		
		DateFormat df =  new SimpleDateFormat("yyyy-MM-dd-HH-mm");
		Date date = null;   
		try{
			date = df.parse(time);
		}catch(ParseException e){
			System.out.println("Please input correct date format");
			System.exit(1);
		}
		
		return appID + ":" + date.getTime();
		
	}
	 

	static class DeleteMapper extends
			TableMapper<ImmutableBytesWritable, Delete> {
		public DeleteMapper() {
		} 
		@Override
		public void map(ImmutableBytesWritable row, Result value,
				Context context) throws IOException { 
			ImmutableBytesWritable userKey = new ImmutableBytesWritable(row.get());
			try{
				Delete delete = new Delete(row.get());
				context.write(userKey, delete);
			} catch (InterruptedException e){
				e.printStackTrace();
				throw new IOException(e);
			} 
		}

	}
	 

	public static void main(String[] args) throws Exception {
		MRDeleteRows deleteElf = new MRDeleteRows();
		ToolRunner.run(deleteElf, args);
		Configuration config = HBaseConfiguration.create();
		config.set("hbase.zookeeper.quorum", deleteElf.getQuorum());
	 	Job job = new Job(config, "DeleteHbaseRowkeys"); 
		job.setJarByClass(MRDeleteRows.class);
		Scan scan = new Scan();
	 	System.out.println("quorum: " + deleteElf.getQuorum());
		System.out.println("table: " + deleteElf.getTable());
		if(deleteElf.getStartRowKey()!=null && deleteElf.getStopRowKey()!=null){
			System.out.println("startkey: " + deleteElf.getStartRowKey());
			System.out.println("stopkey: " + deleteElf.getStopRowKey());
			scan.setStartRow(deleteElf.getStartRowKey().getBytes());
			scan.setStopRow(deleteElf.getStopRowKey().getBytes()); 
		} 
		if(deleteElf.getAppID()!=null && deleteElf.getStartTime()!=null && deleteElf.getStopTime()!=null){
			System.out.println("AppID: " + deleteElf.getAppID());
			System.out.println("start time: " + deleteElf.getStartTime());
			System.out.println("stop time: " + deleteElf.getStopTime()); 			
			scan.setStartRow(getRowKey(deleteElf.getAppID(),deleteElf.getStartTime()).getBytes());
			scan.setStopRow(getRowKey(deleteElf.getAppID(),deleteElf.getStopTime()).getBytes()); 
		}
		
		scan.setCacheBlocks(false);

		TableMapReduceUtil.initTableMapperJob(deleteElf.getTable(), scan,
				DeleteMapper.class, ImmutableBytesWritable.class, Delete.class,
				job);
		TableMapReduceUtil.initTableReducerJob(deleteElf.getTable(), null, job);

		boolean b = job.waitForCompletion(true);
		if (!b) {
			throw new IOException("error with job!");
		}
	}

}

Hbase Loader MapReduce Example

import java.io.IOException;
import java.util.Calendar;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

/**
 * Sample Uploader MapReduce
 * <p>
 * This is EXAMPLE code. You will need to change it to work for your context.
 * <p>
 * Uses {@link TableReducer} to put the data into HBase. Change the InputFormat
 * to suit your data. In this example, we are importing a CSV file.
 * <p>
 * 
 * <pre>
 * row,family,qualifier,value
 * </pre>
 * <p>
 * The table and columnfamily we're to insert into must preexist.
 * <p>
 * There is no reducer in this example as it is not necessary and adds
 * significant overhead. If you need to do any massaging of data before
 * inserting into HBase, you can do this in the map as well.
 * <p>
 * Do the following to start the MR job:
 * 
 * <pre>
 * ./bin/hadoop org.apache.hadoop.hbase.mapreduce.SampleUploader /tmp/input.csv TABLE_NAME
 * </pre>
 * <p>
 * This code was written against HBase 0.21 trunk.
 * 
 * Before running this job, please make sure set HADOOP_CLASSPATH. You need to include zookeeper.jar and hbase-0.90.4-cdh3u3.jar
 */
public class BulkLoaderToHbase {

	private static final String NAME = "BulkLoaderToHbase";
	private static byte[] SYSINFO;
	private static byte[] CONTENT;
	private static byte[] APP_ID;
	private static byte[] ENV;
	private static byte[] HOSTNAME;
	private static byte[] BODY;
	private static byte[] LOG_FILE_NAME;
	private static byte[] LOG_TYPE;
	private static byte[] LOG_FILE_PATH;
	
	private static byte[] appId_v;
	private static byte[] env_v;
	private static byte[] hostname_v;
	private static byte[] logPath_v;
	private static byte[] logFileName_v;
	private static byte[] logType_v;
	
	private static long nano = 0;
	
	static class Uploader extends
			Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {

		private long checkpoint = 100;
		private long count = 0;

		@Override
		public void map(LongWritable key, Text line, Context context)
				throws IOException {

			Calendar cal = Calendar.getInstance();
			String rowkey = Bytes.toString(appId_v) + ":" + cal.getTimeInMillis() + ":" + (nano++) + ":" + Bytes.toString(hostname_v);
			byte[] rowKeyValue = Bytes.toBytes(rowkey);
			Put put = new Put(rowKeyValue);

			put.add(SYSINFO, APP_ID, appId_v);
			put.add(SYSINFO, ENV, env_v);
			put.add(SYSINFO, HOSTNAME, hostname_v);
			put.add(CONTENT, BODY, line.getBytes());
			put.add(CONTENT, LOG_FILE_PATH, logPath_v);
			put.add(CONTENT, LOG_FILE_NAME, logFileName_v);
			put.add(CONTENT, LOG_TYPE, logType_v);

			// Uncomment below to disable WAL. This will improve performance but
			// means
			// you will experience data loss in the case of a RegionServer
			// crash.
			// put.setWriteToWAL(false);

			try {
				context.write(new ImmutableBytesWritable(rowKeyValue), put);
			} catch (InterruptedException e) {
				e.printStackTrace();
			}

			// Set status every checkpoint lines
			if (++count % checkpoint == 0) {
				context.setStatus("Emitting Put " + count);
			}
		}
	}

	/**
	 * Job configuration.
	 */
	public static Job configureJob(Configuration conf, String[] args)
			throws IOException {
		
	    SYSINFO = Bytes.toBytes("sysInfo");
	    CONTENT = Bytes.toBytes("content");
	    APP_ID = Bytes.toBytes("appId");
	    ENV = Bytes.toBytes("env");
	    HOSTNAME = Bytes.toBytes("hostName");
	    BODY = Bytes.toBytes("body");
	    LOG_FILE_PATH = Bytes.toBytes("logFilePath");
	    LOG_FILE_NAME = Bytes.toBytes("logFileName");
	    LOG_TYPE = Bytes.toBytes("logType");
		
		Path inputPath = new Path(args[0]);
		String tableName = args[1];
		appId_v = Bytes.toBytes(args[2]);
		env_v = Bytes.toBytes(args[3]);
		hostname_v = Bytes.toBytes(args[4]);
		logPath_v = Bytes.toBytes(args[5]);
		logFileName_v = Bytes.toBytes(args[6]);
		logType_v = Bytes.toBytes(args[7]);
		
		
		Job job = new Job(conf, NAME + "_" + tableName);
		job.setJarByClass(Uploader.class);
		FileInputFormat.setInputPaths(job, inputPath);
		job.setInputFormatClass(TextInputFormat.class);
		job.setMapperClass(Uploader.class);
		// No reducers. Just write straight to table. Call initTableReducerJob
		// because it sets up the TableOutputFormat.
		TableMapReduceUtil.initTableReducerJob(tableName, null, job);
		job.setNumReduceTasks(0);
		return job;
	}

	/**
	 * Main entry point.
	 * 
	 * @param args
	 *            The command line parameters.
	 * @throws Exception
	 *             When running the job fails.
	 */
	public static void main(String[] args) throws Exception {
		Configuration conf = HBaseConfiguration.create();
		String[] otherArgs = new GenericOptionsParser(conf, args)
				.getRemainingArgs();
		if (otherArgs.length != 8) {
			System.err
					.println("Wrong number of arguments: " + otherArgs.length);
			System.err.println("Usage: " + NAME + " <input> <tablename> <appId> <env> <hostname> <logpath> <logFileName> <logType>");
			System.exit(-1);
		}
		Job job = configureJob(conf, otherArgs);
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}

分享到：

flumeng hbase jmx integration | Spring By example -- aop

2013-04-11 16:16
浏览 1352
评论(0)
分类:编程语言
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Hbase MapReduce Integration

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Hbase MapReduce Integration

评论

发表评论

相关推荐

flumeng hbase jmx integration

flume ng performance tuning

flume ng arc and configuration

Hadoop best practise

Clouder SCM Manager FAQ

最近访客更多访客>>