- 浏览: 351842 次
- 性别:
- 来自: 杭州
-
最新评论
-
lvyuan1234:
你好,你那个sample.txt文件可以分享给我吗
hive insert overwrite into -
107x:
不错,谢谢!
hive 表的一些默认值 -
on_way_:
赞
Hadoop相关书籍 -
bupt04406:
dengkanghua 写道出来这个问题该怎么解决?hbase ...
Unexpected state导致HMaster abort -
dengkanghua:
出来这个问题该怎么解决?hbase master启动不起来。
Unexpected state导致HMaster abort
文章列表
Hive 的 OutputCommitter
public class ExecDriver extends Task<MapredWork> implements Serializable {
public int execute(DriverContext driverContext) {
ShimLoader.getHadoopShims().setNullOutputFormat(job);
}
}
public class Hadoop20Shims implements HadoopShims {
...
hive LATERAL VIEW 行转列
- 博客分类:
- Hive
drop table lateralview;
create table lateralview (col1 string, col2 string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
LOAD DATA LOCAL INPATH '/home/tianzhao/book/lateralview.txt'
OVERWRITE INTO TABLE lateralview;
lateralview.txt 中的数据是
r1 a,b
r2 d,e
select col1, col2 from lateralview;
r1 a,b
...
hive complex type
- 博客分类:
- Hive
数据:
1,100|3,20|2,70|5,100
建表:
CREATE TABLE complex (
col1 MAP<INT, INT>
)
ROW FORMAT DELIMITED
COLLECTION ITEMS TERMINATED BY '|'
MAP KEYS TERMINATED BY ','
STORED AS TEXTFILE;
;
LOAD DATA LOCAL INPATH '/home/tianzhao/book/complex.txt'
OVERWRITE INTO TABLE complex;
SELECT col1[1] FROM comp ...
CREATE TABLE escape (id STRING, name STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '"';
LOAD DATA LOCAL INPATH '/home/tianzhao/book/escape.txt'
OVERWRITE INTO TABLE escape;
escape.txt的内容是:
Joe"2\"3333
Hank"2\"3333
表没有转义,那么两个“都会当作字段分隔符,查询结果是:
select * from escape;
Joe 2 ...
hive 两个不同类型的columns进行比较
- 博客分类:
- Hive
select case when "ab1234"<>"123" then 1 else 0 end as flag from src limit 1;
1
select case when "ab1234"<>123 then 1 else 0 end as flag from src limit 1;
0
select case when "ab1234"<> cast(123 as bigint) then 1 else 0 end as flag from src li ...
(1)startGroup:清空各个表的RowContainer
(2)processOp:根据tag,把row add到表对应的RowContainer中。
(3)endGroup:RowContainer中的数据进行join并输出。
RowContainer添加数据时:内存中的数据条数是否达到了blockSize(默认是25000)个,如果有,则写入一个临时文件,如果没有放入内存的数组(currentWriteBlock)中。所以数据可能文件中有,内存中也有,也可能是只有内存中有。
读取时:
(1)first
如果文件中有,则读取文件中的数据,返回第一条,如果没有就读取内存中的值,返回第 ...
Hadoop.The.Definitive.Guide.2nd.Edition 79页
hadoop默认的压缩算法。
DEFLATE org.apache.hadoop.io.compress.DefaultCodec
结果数据压缩是否开启,下面的配置为true,所以开启。
这个是最终的结果数据:
<property>
<name>hive.exec.compress.output</name>
<value>true</value>
<description> This controls whether ...
hive ColumnPruner
- 博客分类:
- Hive
Optimizer
public void initialize(HiveConf hiveConf) {
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCP)) {
transformations.add(new ColumnPruner());
}
}
create table tab1 (col1 string, col2 string, col3 int, col4 string, col5 string, col6 string, col7 string);
explain ...
接http://bupt04406.iteye.com/blog/1151545
create table tablePartition(s string) partitioned by(pt string);
alter table tablePartition add if not exists partition(pt='1');
alter table tablePartition set TBLPROPERTIES ('EXTERNAL'='TRUE'); //内部表转外部表
alter table tablePartition set TBLPROPERTIES ('EXTE ...
Driver:
public int compile(String command) {
ctx = new Context(conf); //
}
public Context(Configuration conf) throws IOException {
this(conf, generateExecutionId());
}
/**
* Generate a unique executionId. An executionId, together with user name and
* the con ...
CREATE TABLE records (year STRING, temperature INT, quality INT)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t';
LOAD DATA LOCAL INPATH '/home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt' OVERWRITE INTO TABLE records;
hive> explain
> LOAD DATA LOCAL INPATH '/home/tianzhao ...
vi
vi esc 的状态下:
:set nu
:set number
:n // 定位某一行
G // 文件末尾
dd // 删除一行
d // 删除
u // 误修改,需要恢复之前的状态。
gg // 文件开头
10j // 往下10行
10k // 往上10 ...
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-Create%2FDropTable
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
[(col_name data_type [COMMENT col_comment], ...)]
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comme ...
还原建表语句见:http://www.tbdata.org/archives/939
tianzhao@ubuntu:~$ mysql -uhive -p123456
mysql> use hive
mysql> show tables;
+--------------------+
| Tables_in_hive |
+--------------------+
| BUCKETING_COLS |
| COLUMNS |
| DBS |
| PARTITIONS |
| PARTITION_K ...
见 Hadoop.The.Definitive.Guide.2nd.Edition P388
Thus, the statement:
CREATE TABLE ...;
is identical to the more explicit:
CREATE TABLE ...
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
...