记录中的最大行max_row

yugouai

浏览: 500138 次
性别:
来自: 深圳

最近访客更多访客>>

淡定情绪

spaceandroid

fengbin2005

hundun

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

Hive分享

import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;

@Description(name = "maxrow", value = "_FUNC_(expr) - Returns the maximum value of expr and values of associated columns as a struct")
public class GenericUDAFMaxRow extends AbstractGenericUDAFResolver {

  static final Log LOG = LogFactory.getLog(GenericUDAFMaxRow.class.getName());

  @Override
  public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
    // Verify that the first parameter supports comparisons.
    ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]);
    if (!ObjectInspectorUtils.compareSupported(oi)) {
      throw new UDFArgumentTypeException(0, "Cannot support comparison of map<> type or complex type containing map<>.");
    }
    return new GenericUDAFMaxRowEvaluator();
  }

  // @UDFType(distinctLike=true)
  public static class GenericUDAFMaxRowEvaluator extends GenericUDAFEvaluator {

    ObjectInspector[] inputOIs;
    ObjectInspector[] outputOIs;
    ObjectInspector structOI;

    @Override
    public ObjectInspector init(Mode mode, ObjectInspector[] parameters) throws HiveException {
      super.init(mode, parameters);

      int length = parameters.length;
      if (length > 1 || !(parameters[0] instanceof StructObjectInspector)) {
        assert(mode == Mode.COMPLETE || mode == Mode.FINAL);
        initMapSide(parameters);

      } else {
        assert(mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2);
        assert(parameters.length == 1 && parameters[0] instanceof StructObjectInspector);
        initReduceSide((StructObjectInspector) parameters[0]);
      }

      return structOI;
    }

    /* Initialize the UDAF on the map side. */
    private void initMapSide(ObjectInspector[] parameters) throws HiveException {
      int length = parameters.length;
      outputOIs = new ObjectInspector[length];
      List<String> fieldNames = new ArrayList<String>(length);
      List<ObjectInspector> fieldOIs = Arrays.asList(outputOIs);

      for (int i = 0; i < length; i++) {
        fieldNames.add("col" + i); // field names are not made available! :(
        outputOIs[i] = ObjectInspectorUtils.getStandardObjectInspector(parameters[i]);
      }

      inputOIs = parameters;
      structOI = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
    }

    /* Initialize the UDAF on the reduce side (or the map side in some cases). */
    private void initReduceSide(StructObjectInspector inputStructOI) throws HiveException {
      List<? extends StructField> fields = inputStructOI.getAllStructFieldRefs();
      int length = fields.size();
      inputOIs = new ObjectInspector[length];
      outputOIs = new ObjectInspector[length];
      for (int i = 0; i < length; i++) {
        StructField field = fields.get(i);
        inputOIs[i] = field.getFieldObjectInspector();
        outputOIs[i] = ObjectInspectorUtils.getStandardObjectInspector(inputOIs[i]);
      }
      structOI = ObjectInspectorUtils.getStandardObjectInspector(inputStructOI);
    }

    static class MaxAgg implements AggregationBuffer {
      Object[] objects;
    }

    @Override
    public AggregationBuffer getNewAggregationBuffer() throws HiveException {
      MaxAgg result = new MaxAgg();
      return result;
    }

    @Override
    public void reset(AggregationBuffer agg) throws HiveException {
      MaxAgg maxagg = (MaxAgg) agg;
      maxagg.objects = null;
    }

    @Override
    public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
      merge(agg, parameters);
    }

    @Override
    public Object terminatePartial(AggregationBuffer agg) throws HiveException {
      return terminate(agg);
    }

    @Override
    public void merge(AggregationBuffer agg, Object partial) throws HiveException {
      if (partial != null) {
        MaxAgg maxagg = (MaxAgg) agg;
        List<Object> objects;
        if (partial instanceof Object[]) {
          objects = Arrays.asList((Object[]) partial);
        } else if (partial instanceof LazyBinaryStruct) {
          objects = ((LazyBinaryStruct) partial).getFieldsAsList();
        } else {
          throw new HiveException("Invalid type: " + partial.getClass().getName());
        }

        boolean isMax = false;
        if (maxagg.objects == null) {
          isMax = true;
        } else {
          int cmp = ObjectInspectorUtils.compare(maxagg.objects[0], outputOIs[0], objects.get(0), inputOIs[0]);
          if (cmp < 0) {
            isMax = true;
          }
        }

        if (isMax) {
          int length = objects.size();
          maxagg.objects = new Object[length];
          for (int i = 0; i < length; i++) {
            maxagg.objects[i] = ObjectInspectorUtils.copyToStandardObject(objects.get(i), inputOIs[i]);
          }
        }
      }
    }

    @Override
    public Object terminate(AggregationBuffer agg) throws HiveException {
      MaxAgg maxagg = (MaxAgg) agg;
      return Arrays.asList(maxagg.objects);
    }
  }
}

maxrow(compare_col,col1,col2,col3....)根据输入的compare列进行比较，返回最大行，包含值compare-col，col1，col2...返回结构是struct，需要根据struct结构取值

分享到：

DBUnit与H2内存数据库结合(单元测试) | 再谈GenericUDAF（以collect_set源码分析 ...

2013-05-26 11:52
浏览 2214
评论(0)
分类:开源软件
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

记录中的最大行max_row

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

记录中的最大行max_row

评论

发表评论

相关推荐

HIVE窗口及分析函数simple及说明

HIVE 窗口及分析函数 应用场景

Hive0.11.0版本新特征

HIVE GenericeUDF------row_number

HIVE MAP排序 GenericUDF

再谈GenericUDAF（以collect_set源码分析）

HIVE调优的两个好用参数

Hive授权（Security配置）

Hive索引Demo

Hive索引

Hive文件存储格式的测试比较

Hive文件存储格式

hive的hive.exec.parallel参数说明

Hive决定reducer个数的标准

Hive中UDTF的编写与使用

hive udaf开发入门和运行过程详解

Hive自定义函数（generic）

Hive自定义函数（simple）

Hive锁与并发模型

HIVE Explain

最近访客更多访客>>

HIVE 窗口及分析函数应用场景