基于MapReduce的HBase开发（续）

tenght

浏览: 52071 次

最近访客更多访客>>

jxqc_job

汽车城路

极品拖拉机

aubdiy

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

示例

代码：

importjava.io.ByteArrayOutputStream;
importjava.io.DataOutputStream;
importjava.io.IOException;
importjava.util.HashMap;

importorg.apache.hadoop.conf.Configuration;
importorg.apache.hadoop.hbase.HBaseConfiguration;
importorg.apache.hadoop.hbase.HColumnDescriptor;
importorg.apache.hadoop.hbase.HTableDescriptor;
importorg.apache.hadoop.hbase.client.HBaseAdmin;
importorg.apache.hadoop.hbase.client.HTable;
importorg.apache.hadoop.hbase.client.Put;
importorg.apache.hadoop.hbase.client.Result;
importorg.apache.hadoop.hbase.client.Scan;
importorg.apache.hadoop.hbase.io.ImmutableBytesWritable;
importorg.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat;
importorg.apache.hadoop.hbase.mapreduce.TableInputFormat;
importorg.apache.hadoop.hbase.util.Base64;
importorg.apache.hadoop.hbase.util.Bytes;
importorg.apache.hadoop.io.Writable;
importorg.apache.hadoop.mapreduce.Job;
importorg.apache.hadoop.mapreduce.Mapper;

publicclassIndexBuilder{

 //索引表唯一的一列为 INDEX_ROW，其中 INDEX 为列族
 privatestaticfinalbyte[]INDEX_COLUMN=Bytes.toBytes("INDEX");
 privatestaticfinalbyte[]INDEX_QUALIFIER=Bytes.toBytes("ROW");

 //实现 Map 类
 publicstaticclassMapextends
  Mapper<ImmutableBytesWritable,Result,ImmutableBytesWritable,Writable>{

  //存储了“列名”到“表名——列名”的映射
  //前者用于获取某列的值，并作为索引表的键值；后者用户作为索引表的表名
  privateHashMap<byte[],ImmutableBytesWritable>indexes;
  privatebyte[]family;

  //实现 map 函数
  publicvoidmap(ImmutableBytesWritablekey,Resultvalue,
    Contextcontext)throwsIOException,InterruptedException{
   for(java.util.Map.Entry<byte[],ImmutableBytesWritable>index:indexes
     .entrySet()){
    //获取列名
    byte[]qualifier=index.getKey();
    //索引表的表名
    ImmutableBytesWritabletableName=index.getValue();
    //根据“列族：列名”获得元素值
    byte[]newValue=value.getValue(family,qualifier);

    if(newValue!=null){
     //以列值作为行健，在列“INDEX：ROW”中插入行健
     Putput=newPut(newValue);
     put.add(INDEX_COLUMN,INDEX_QUALIFIER,key.get());

     //在 tableName 表上执行 put
     //操作使用 MultipleOutputFormat 时，
     //第二个参数必须是 Put 和 Delete 类型
     context.write(tableName,put);
    }
   }
  }

  //setup为Mapper中的方法，该方法只在任务初始化时执行一次
  protectedvoidsetup(Contextcontext)throwsIOException,
    InterruptedException{
   Configurationconf=context.getConfiguration();

   //通过 Configuration.set()方法传递参数
   StringtableName=conf.get("index.tablename");
   String[]fields=conf.getStrings("index.fields");

   //fields 内为需要做索引的列名
   StringfamilyName=conf.get("index.familyname");
   family=Bytes.toBytes(familyName);

   //初始化 indexes 方法
   indexes=newHashMap<byte[],ImmutableBytesWritable>();

   for(Stringfield:fields){
    //如果给 name 做索引，则索引表的名称为“heroes‐name”
    indexes.put(Bytes.toBytes(field),
newImmutableBytesWritable(
      Bytes.toBytes(tableName+"‐"+field)));
   }
  }
 }

 //初始化示例数据表——“heroes”
 publicstaticvoidinitHBaseTable(Configurationconf,StringtableName)
   throwsIOException{
  //创建表描述
  HTableDescriptorhtd=newHTableDescriptor(tableName);
  //创建列族描述
  HColumnDescriptorcol=newHColumnDescriptor("info");

  htd.addFamily(col);

  HBaseAdminhAdmin=newHBaseAdmin(conf);

  if(hAdmin.tableExists(tableName)){
   System.out.println("该数据表已经存在，正在重新创建。");
   hAdmin.disableTable(tableName);
   hAdmin.deleteTable(tableName);
  }

  System.out.println("创建表："+tableName);
  //创建表
  hAdmin.createTable(htd);
  HTabletable=newHTable(conf,tableName);
  System.out.println("向表中插入数据");
  //添加数据
  addRow(table,"1","info","name","peter");
  addRow(table,"1","info","email","peter@heroes.com");
  addRow(table,"1","info","power","absorbabilities");

  addRow(table,"2","info","name","hiro");
  addRow(table,"2","info","email","hiro@heroes.com");
  addRow(table,"2","info","power","bendtimeandspace");

  addRow(table,"3","info","name","sylar");
  addRow(table,"3","info","email","sylar@heroes.com");
  addRow(table,"3","info","power","hnowhowthingswork");

  addRow(table,"4","info","name","claire");
  addRow(table,"4","info","email","claire@heroes.com");
  addRow(table,"4","info","power","heal");

  addRow(table,"5","info","name","noah");
  addRow(table,"5","info","email","noah@heroes.com");
  addRow(table,"5","info","power","caththepeoplewithablities");
 }

 //添加一条数据
 privatestaticvoidaddRow(HTabletable,Stringrow,
StringcolumnFamily,Stringcolumn,Stringvalue)throwsIOException{
  Putput=newPut(Bytes.toBytes(row));
  //参数出分别：列族、列、值
  put.add(Bytes.toBytes(columnFamily),Bytes.toBytes(column),
    Bytes.toBytes(value));
  table.put(put);
 }

 //创建数据库表
 publicstaticvoidcreateIndexTable(Configurationconf,
StringtableName)throwsException{
  //新建一个数据库管理员
  HBaseAdminhAdmin=newHBaseAdmin(conf);

  if(hAdmin.tableExists(tableName)){
   System.out.println("该数据表已经存在，正在重新创建。");
   hAdmin.disableTable(tableName);
   hAdmin.deleteTable(tableName);
  }

  //新建一个表的描述
  HTableDescriptortableDesc=newHTableDescriptor(tableName);
  //在描述里添加列族
  tableDesc.addFamily(newHColumnDescriptor(INDEX_COLUMN));

  //根据配置好的描述建表
  hAdmin.createTable(tableDesc);
  System.out.println("创建"+tableName+"表成功");
 }

 publicstaticJobconfigureJob(Configurationconf,StringjobName)
   throwsIOException{
  Jobjob=newJob(conf,jobName);
  job.setJarByClass(IndexBuilder.class);

  //设置 Map 处理类
  job.setMapperClass(Map.class);

  //设置 Reduce 个数
  job.setNumReduceTasks(0);

  //设置输入和输出格式
  job.setInputFormatClass(TableInputFormat.class);
  job.setOutputFormatClass(MultiTableOutputFormat.class);

  returnjob;
 }

 privatestaticStringconvertScanToString(Scanscan)
throwsIOException{
  ByteArrayOutputStreamout=newByteArrayOutputStream();
  DataOutputStreamdos=newDataOutputStream(out);
  scan.write(dos);
  returnBase64.encodeBytes(out.toByteArray());
 }

 publicstaticvoidmain(String[]args)throwsException{
  Configurationconf=HBaseConfiguration.create();
  conf.set("hbase.zookeeper.quorum","master");
  conf.set("hbase.zookeeper.property.clientPort","2181");

  StringtableName="heroes";
  StringcolumnFamily="info";
  String[]fields={"name","power"};
  //第一步：初始化数据库表
  IndexBuilder.initHBaseTable(conf,tableName);

  //第二步：创建索引表
  for(Stringfield:fields){
   IndexBuilder.createIndexTable(conf,tableName+"‐"+field);
  }

  //第三步：进行 MapReduce 处理
  conf.set("mapred.job.tracker","master:9001");
  conf.set(TableInputFormat.SCAN,convertScanToString(newScan()));
  conf.set(TableInputFormat.INPUT_TABLE,tableName);
 //设置传递属性值
  conf.set("index.tablename",tableName);
  conf.set("index.familyname",columnFamily);
  conf.setStrings("index.fields",fields);

  Jobjob=IndexBuilder.configureJob(conf,"IndexBuilder");

  System.exit(job.waitForCompletion(true)?0:1);
 }
}

编译完成后，可在hbase shell下运行：list，查看所创建的表，其他命令来操作表，在此不再赘述。

分享到：