- 浏览: 156649 次
- 性别:
- 来自: 深圳
文章分类
最新评论
-
lyaqys:
lz实现的OptimisticExclusiveLock有点问 ...
java park/unpark 【java并发】基于JUC CAS原理,自己实现简单独占锁
insert overwrite table AD_MATE_FT PARTITION (pt='2012-03-05 10')
select
temp0.mrid as material_id ,
temp9.name as material_name,
'pv' as AD_EFT_MATE_CAL_TYPE,
temp0.uid as temp0,
temp0.bussiness_id as bussiness_id ,
'HOURLY' as time_type,
'2012-03-05 10' as date_desc,
temp0.effect_sum as ad_eft_mate_cnt,
temp1.EXPOSE_sum as ad_show_cnt,
temp2.click_sum as ad_click_cnt,
temp3.ad_percent as ad_click_rate,
temp4.ad_browse as pageview_cnt,
temp5.ad_clibro as pvcnt_div_adclick,
temp7.ad_step as ad_bounce_rate,
temp6.ad_percon as ad_conv_rate,
temp8.ad_avgtime as avg_stay_time,
to_date(unix_timestamp())
from (select * from sum0_effect_hour_temp where pt='2012-03-05 10')temp0
join (select * from sum0_expose_hour_temp where pt='2012-03-05 10')temp1
on (temp0.mrid=temp1.mrid and temp1.pt=temp0.pt and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id)
join(select * from sum0_click_hour_temp where pt='2012-03-05 10')temp2
on(temp2.mrid=temp1.mrid and temp1.pt=temp2.pt and temp2.uid=temp1.uid and temp2.bussiness_id=temp1.bussiness_id)
join(select * from sum0_click_percent_hour_temp where pt='2012-03-05 10')temp3
on(temp2.mrid=temp3.mrid and temp3.pt=temp2.pt and temp2.uid=temp3.uid and temp2.bussiness_id=temp3.bussiness_id)
join(select * from sum0_click_browse_hour_temp where pt='2012-03-05 10')temp4
on(temp4.mrid=temp3.mrid and temp3.pt=temp4.pt and temp4.uid=temp3.uid and temp4.bussiness_id=temp3.bussiness_id)
join(select * from sum0_clibro_hour_rate_temp where pt='2012-03-05 10')temp5
on(temp4.mrid=temp5.mrid and temp5.pt=temp4.pt and temp4.uid=temp5.uid and temp4.bussiness_id=temp5.bussiness_id)
join(select * from sum0_per_con_hour_temp where pt='2012-03-05 10')temp6
on(temp6.mrid=temp5.mrid and temp5.pt=temp6.pt and temp6.uid=temp5.uid and temp6.bussiness_id=temp5.bussiness_id)
join(select * from sum0_steprate_hour_temp where pt='2012-03-05 10')temp7
on(temp6.mrid=temp7.mrid and temp7.pt=temp6.pt and temp6.uid=temp7.uid and temp6.bussiness_id=temp7.bussiness_id)
join(select * from avg0_time_hour_temp where pt='2012-03-05 10')temp8
on(temp8.mrid=temp7.mrid and temp7.pt=temp8.pt and temp8.uid=temp7.uid and temp8.bussiness_id=temp7.bussiness_id)
join(select * from AD_MATERIAL_DIM)temp9
on(temp8.mrid=temp9.id and temp8.uid=temp9.uid )
where temp0.pt=temp1.pt =temp2.pt=temp3.pt=temp4.pt=temp5.pt=temp6.pt=temp7.pt=temp8.pt;
按小时计算效果表的总记录
create table if not exists sum0_effect_hour_temp
(
effect_sum int,
MRID string,
uid string,
bussiness_id string
)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_effect_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(effect_id) as effect_sum ,
temp0.mrid as mrid,
temp0.bussiness_id as bussiness_id,
temp0.uid as uid
from
(
select * from ad_PATH where pt='2012-03-05 10'
) temp0
left outer join
(
select effect_id AS effect_id ,PAGE_ID AS PAGE_ID ,pt as pt
from ad_effect
where pt='2012-03-05 10'
) temp1
on(temp1.PAGE_ID=temp0.PAGE_ID)
where temp0.pt=temp1.pt and bussiness_id is not NULL
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;
按小时计算点击表的曝光数
create table if not exists sum0_expose_hour_temp
(
EXPOSE_sum int,
MRID string,
uid string,
bussiness_id string
)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_expose_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(temp1.PAGE_ID) as EXPOSE_sum,
temp0.mrid as mrid,
temp0.bussiness_id as bussiness_id,
temp0.uid as uid
from (select * from ad_PATH where pt='2012-03-05 10' and BUSSINESS_ID is not NULL ) temp0
left outer join(
select PAGE_ID ,pt from ad_expose_click WHERE pt='2012-03-05 10' AND action='expose'
) temp1
on(temp1.PAGE_ID=temp0.PAGE_ID)
where temp0.pt=temp1.pt
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;
按小时计算点击表的点击数
create table if not exists sum0_click_hour_temp
(
click_sum int,
MRID string,
uid string,
bussiness_id string
)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_click_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(temp1.PAGE_ID) as click_sum,
temp0.mrid as mrid,
temp0.bussiness_id as bussiness_id,
temp0.uid as uid
from (select * from ad_PATH where pt='2012-03-05 10' and BUSSINESS_ID is not NULL ) temp0
left outer join(
select PAGE_ID ,pt from ad_expose_click WHERE pt='2012-03-05 10' AND action='click'
) temp1
on(temp1.PAGE_ID=temp0.PAGE_ID)
where temp0.pt=temp1.pt
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;
按小时的点击率
create table if not exists sum0_click_percent_hour_temp
(
ad_percent double,
MRID string,
uid string,
bussiness_id string
)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_click_percent_hour_temp PARTITION (pt='2012-03-05 10')
select
click.click_sum/expose.EXPOSE_sum as ad_percent,
click.mrid as mrid,
click.uid as uid,
click.bussiness_id as bussiness_id
from
(select * from sum0_click_hour_temp where pt='2012-03-05 10') click
JOIN(select pt as pt ,uid as uid,bussiness_id as bussiness_id ,mrid as mrid ,EXPOSE_sum as EXPOSE_sum
from sum0_expose_hour_temp WHERE pt ='2012-03-05 10' )expose
on (click.MRID=expose.mrid and click.uid=expose.uid and click.bussiness_id=expose.bussiness_id)
where expose.pt=click.pt;
小时级页面浏览数
create table if not exists sum0_click_browse_hour_temp
(
ad_browse int,
MRID string,
uid string,
bussiness_id string
)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_click_browse_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(distinct(tmp2.page_id)) as ad_browse,
tmp2.mrid as mrid,
tmp2.uid as uid,
tmp2.bussiness_id as bussiness_id
from (select * from ad_effect where pt ='2012-03-05 10') tmp1
join (select page_id, pt, SESSION_ID, bussiness_id, mrid ,uid from ad_PATH where pt='2012-03-05 10'
and bussiness_id is not NULL) tmp2
on (tmp1.page_id = tmp2.page_id)
join (select pt as pt,SESSION_ID as SESSION_ID, bussiness_id, uid from ad_SESSION
where SOURCE_TYPE ='Direct' and pt = '2012-03-05 10') tmp3
on (tmp2.SESSION_ID=tmp3.SESSION_ID and tmp2.uid=tmp3.uid and tmp2.bussiness_id=tmp3.bussiness_id)
where tmp1.pt=tmp2.pt=tmp3.pt
GROUP BY tmp2.mrid,tmp2.uid,tmp2.bussiness_id;
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
安小时网页浏览/广告点击
create table if not exists sum0_clibro_hour_rate_temp
(
ad_clibro double,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_clibro_hour_rate_temp PARTITION (pt='2012-03-05 10')
select
temp0.ad_browse/temp1.click_sum as ad_clibro,
temp0.mrid as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from (select * from sum0_click_browse_hour_temp where pt = '2012-03-05 10')temp0
join(select bussiness_id,uid,pt,click_sum as click_sum, mrid as mrid from sum0_click_hour_temp where pt='2012-03-05 10') temp1
on(temp0.mrid=temp1.mrid and temp1.pt=temp0.pt and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id )
where temp0.pt=temp1.pt;
转化率
create table if not exists sum0_per_con_hour_temp
(
ad_percon double,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_per_con_hour_temp PARTITION (pt='2012-03-05 10')
select
temp0.effect_sum/temp1.click_sum as ad_clibro,
temp0.mrid as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from
(select * from sum0_effect_hour_temp where pt ='2012-03-05 10' ) temp0
join(select * from sum0_click_hour_temp where pt ='2012-03-05 10') temp1
on(temp0.mrid=temp1.mrid and temp1.pt=temp0.pt and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id );
where temp0.pt=temp1.pt;
跳出率:
进入1步的离开数量
create table if not exists sum0_step1_hour_temp
(
ad_step1 int ,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_step1_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(distinct(temp0.session_id)) as ad_step1,
temp0.MRID as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from (select * from AD_PATH where pt ='2012-03-05 10' and session_step='1') temp0
join ( select * from ad_session where pt ='2012-03-05 10' and source_type='3') temp1
on(temp0.SESSION_ID=temp1.SESSION_ID and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id)
where temp0.pt=temp1.pt
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;
进入所有步的离开数量
create table if not exists sum0_stepn_hour_temp
(
ad_stepn int ,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_stepn_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(distinct(temp0.session_id)) as ad_stepn,
temp0.MRID as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from (select * from AD_PATH where pt ='2012-03-05 10' ) temp0
join ( select * from ad_session where pt ='2012-03-05 10' and source_type='3') temp1
on(temp0.SESSION_ID=temp1.SESSION_ID and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id)
where temp0.pt=temp1.pt
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;
// 计算跳出率
create table if not exists sum0_steprate_hour_temp
(
ad_step double,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_steprate_hour_temp PARTITION (pt='2012-03-05 10')
select
temp0.ad_step1/temp1.ad_stepn as ad_step,
temp0.MRID as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from (select * from sum0_step1_hour_temp where pt ='2012-03-05 10' ) temp0
join ( select * from sum0_stepn_hour_temp where pt ='2012-03-05 10' )temp1
on(
temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id and temp0.mrid =temp0.mrid
)
where temp0.pt=temp1.pt;
平均停留时间:
create table if not exists avg0_time_hour_temp
(
ad_avgtime double,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table avg0_time_hour_temp PARTITION (pt='2012-03-05 10')
select
avg(temp1.session_stay_seconds) as ad_avgtime,
temp0.MRID as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from (select * from ad_path where pt ='2012-03-05 10') temp0
join
(select * from ad_session where pt ='2012-03-05 10') temp1
on (temp0.SESSION_ID=temp1.SESSION_ID and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id)
where temp0.pt=temp1.pt
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;
select
temp0.mrid as material_id ,
temp9.name as material_name,
'pv' as AD_EFT_MATE_CAL_TYPE,
temp0.uid as temp0,
temp0.bussiness_id as bussiness_id ,
'HOURLY' as time_type,
'2012-03-05 10' as date_desc,
temp0.effect_sum as ad_eft_mate_cnt,
temp1.EXPOSE_sum as ad_show_cnt,
temp2.click_sum as ad_click_cnt,
temp3.ad_percent as ad_click_rate,
temp4.ad_browse as pageview_cnt,
temp5.ad_clibro as pvcnt_div_adclick,
temp7.ad_step as ad_bounce_rate,
temp6.ad_percon as ad_conv_rate,
temp8.ad_avgtime as avg_stay_time,
to_date(unix_timestamp())
from (select * from sum0_effect_hour_temp where pt='2012-03-05 10')temp0
join (select * from sum0_expose_hour_temp where pt='2012-03-05 10')temp1
on (temp0.mrid=temp1.mrid and temp1.pt=temp0.pt and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id)
join(select * from sum0_click_hour_temp where pt='2012-03-05 10')temp2
on(temp2.mrid=temp1.mrid and temp1.pt=temp2.pt and temp2.uid=temp1.uid and temp2.bussiness_id=temp1.bussiness_id)
join(select * from sum0_click_percent_hour_temp where pt='2012-03-05 10')temp3
on(temp2.mrid=temp3.mrid and temp3.pt=temp2.pt and temp2.uid=temp3.uid and temp2.bussiness_id=temp3.bussiness_id)
join(select * from sum0_click_browse_hour_temp where pt='2012-03-05 10')temp4
on(temp4.mrid=temp3.mrid and temp3.pt=temp4.pt and temp4.uid=temp3.uid and temp4.bussiness_id=temp3.bussiness_id)
join(select * from sum0_clibro_hour_rate_temp where pt='2012-03-05 10')temp5
on(temp4.mrid=temp5.mrid and temp5.pt=temp4.pt and temp4.uid=temp5.uid and temp4.bussiness_id=temp5.bussiness_id)
join(select * from sum0_per_con_hour_temp where pt='2012-03-05 10')temp6
on(temp6.mrid=temp5.mrid and temp5.pt=temp6.pt and temp6.uid=temp5.uid and temp6.bussiness_id=temp5.bussiness_id)
join(select * from sum0_steprate_hour_temp where pt='2012-03-05 10')temp7
on(temp6.mrid=temp7.mrid and temp7.pt=temp6.pt and temp6.uid=temp7.uid and temp6.bussiness_id=temp7.bussiness_id)
join(select * from avg0_time_hour_temp where pt='2012-03-05 10')temp8
on(temp8.mrid=temp7.mrid and temp7.pt=temp8.pt and temp8.uid=temp7.uid and temp8.bussiness_id=temp7.bussiness_id)
join(select * from AD_MATERIAL_DIM)temp9
on(temp8.mrid=temp9.id and temp8.uid=temp9.uid )
where temp0.pt=temp1.pt =temp2.pt=temp3.pt=temp4.pt=temp5.pt=temp6.pt=temp7.pt=temp8.pt;
按小时计算效果表的总记录
create table if not exists sum0_effect_hour_temp
(
effect_sum int,
MRID string,
uid string,
bussiness_id string
)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_effect_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(effect_id) as effect_sum ,
temp0.mrid as mrid,
temp0.bussiness_id as bussiness_id,
temp0.uid as uid
from
(
select * from ad_PATH where pt='2012-03-05 10'
) temp0
left outer join
(
select effect_id AS effect_id ,PAGE_ID AS PAGE_ID ,pt as pt
from ad_effect
where pt='2012-03-05 10'
) temp1
on(temp1.PAGE_ID=temp0.PAGE_ID)
where temp0.pt=temp1.pt and bussiness_id is not NULL
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;
按小时计算点击表的曝光数
create table if not exists sum0_expose_hour_temp
(
EXPOSE_sum int,
MRID string,
uid string,
bussiness_id string
)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_expose_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(temp1.PAGE_ID) as EXPOSE_sum,
temp0.mrid as mrid,
temp0.bussiness_id as bussiness_id,
temp0.uid as uid
from (select * from ad_PATH where pt='2012-03-05 10' and BUSSINESS_ID is not NULL ) temp0
left outer join(
select PAGE_ID ,pt from ad_expose_click WHERE pt='2012-03-05 10' AND action='expose'
) temp1
on(temp1.PAGE_ID=temp0.PAGE_ID)
where temp0.pt=temp1.pt
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;
按小时计算点击表的点击数
create table if not exists sum0_click_hour_temp
(
click_sum int,
MRID string,
uid string,
bussiness_id string
)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_click_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(temp1.PAGE_ID) as click_sum,
temp0.mrid as mrid,
temp0.bussiness_id as bussiness_id,
temp0.uid as uid
from (select * from ad_PATH where pt='2012-03-05 10' and BUSSINESS_ID is not NULL ) temp0
left outer join(
select PAGE_ID ,pt from ad_expose_click WHERE pt='2012-03-05 10' AND action='click'
) temp1
on(temp1.PAGE_ID=temp0.PAGE_ID)
where temp0.pt=temp1.pt
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;
按小时的点击率
create table if not exists sum0_click_percent_hour_temp
(
ad_percent double,
MRID string,
uid string,
bussiness_id string
)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_click_percent_hour_temp PARTITION (pt='2012-03-05 10')
select
click.click_sum/expose.EXPOSE_sum as ad_percent,
click.mrid as mrid,
click.uid as uid,
click.bussiness_id as bussiness_id
from
(select * from sum0_click_hour_temp where pt='2012-03-05 10') click
JOIN(select pt as pt ,uid as uid,bussiness_id as bussiness_id ,mrid as mrid ,EXPOSE_sum as EXPOSE_sum
from sum0_expose_hour_temp WHERE pt ='2012-03-05 10' )expose
on (click.MRID=expose.mrid and click.uid=expose.uid and click.bussiness_id=expose.bussiness_id)
where expose.pt=click.pt;
小时级页面浏览数
create table if not exists sum0_click_browse_hour_temp
(
ad_browse int,
MRID string,
uid string,
bussiness_id string
)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_click_browse_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(distinct(tmp2.page_id)) as ad_browse,
tmp2.mrid as mrid,
tmp2.uid as uid,
tmp2.bussiness_id as bussiness_id
from (select * from ad_effect where pt ='2012-03-05 10') tmp1
join (select page_id, pt, SESSION_ID, bussiness_id, mrid ,uid from ad_PATH where pt='2012-03-05 10'
and bussiness_id is not NULL) tmp2
on (tmp1.page_id = tmp2.page_id)
join (select pt as pt,SESSION_ID as SESSION_ID, bussiness_id, uid from ad_SESSION
where SOURCE_TYPE ='Direct' and pt = '2012-03-05 10') tmp3
on (tmp2.SESSION_ID=tmp3.SESSION_ID and tmp2.uid=tmp3.uid and tmp2.bussiness_id=tmp3.bussiness_id)
where tmp1.pt=tmp2.pt=tmp3.pt
GROUP BY tmp2.mrid,tmp2.uid,tmp2.bussiness_id;
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
安小时网页浏览/广告点击
create table if not exists sum0_clibro_hour_rate_temp
(
ad_clibro double,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_clibro_hour_rate_temp PARTITION (pt='2012-03-05 10')
select
temp0.ad_browse/temp1.click_sum as ad_clibro,
temp0.mrid as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from (select * from sum0_click_browse_hour_temp where pt = '2012-03-05 10')temp0
join(select bussiness_id,uid,pt,click_sum as click_sum, mrid as mrid from sum0_click_hour_temp where pt='2012-03-05 10') temp1
on(temp0.mrid=temp1.mrid and temp1.pt=temp0.pt and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id )
where temp0.pt=temp1.pt;
转化率
create table if not exists sum0_per_con_hour_temp
(
ad_percon double,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_per_con_hour_temp PARTITION (pt='2012-03-05 10')
select
temp0.effect_sum/temp1.click_sum as ad_clibro,
temp0.mrid as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from
(select * from sum0_effect_hour_temp where pt ='2012-03-05 10' ) temp0
join(select * from sum0_click_hour_temp where pt ='2012-03-05 10') temp1
on(temp0.mrid=temp1.mrid and temp1.pt=temp0.pt and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id );
where temp0.pt=temp1.pt;
跳出率:
进入1步的离开数量
create table if not exists sum0_step1_hour_temp
(
ad_step1 int ,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_step1_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(distinct(temp0.session_id)) as ad_step1,
temp0.MRID as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from (select * from AD_PATH where pt ='2012-03-05 10' and session_step='1') temp0
join ( select * from ad_session where pt ='2012-03-05 10' and source_type='3') temp1
on(temp0.SESSION_ID=temp1.SESSION_ID and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id)
where temp0.pt=temp1.pt
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;
进入所有步的离开数量
create table if not exists sum0_stepn_hour_temp
(
ad_stepn int ,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_stepn_hour_temp PARTITION (pt='2012-03-05 10')
select
COUNT(distinct(temp0.session_id)) as ad_stepn,
temp0.MRID as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from (select * from AD_PATH where pt ='2012-03-05 10' ) temp0
join ( select * from ad_session where pt ='2012-03-05 10' and source_type='3') temp1
on(temp0.SESSION_ID=temp1.SESSION_ID and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id)
where temp0.pt=temp1.pt
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;
// 计算跳出率
create table if not exists sum0_steprate_hour_temp
(
ad_step double,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table sum0_steprate_hour_temp PARTITION (pt='2012-03-05 10')
select
temp0.ad_step1/temp1.ad_stepn as ad_step,
temp0.MRID as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from (select * from sum0_step1_hour_temp where pt ='2012-03-05 10' ) temp0
join ( select * from sum0_stepn_hour_temp where pt ='2012-03-05 10' )temp1
on(
temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id and temp0.mrid =temp0.mrid
)
where temp0.pt=temp1.pt;
平均停留时间:
create table if not exists avg0_time_hour_temp
(
ad_avgtime double,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;
insert overwrite table avg0_time_hour_temp PARTITION (pt='2012-03-05 10')
select
avg(temp1.session_stay_seconds) as ad_avgtime,
temp0.MRID as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from (select * from ad_path where pt ='2012-03-05 10') temp0
join
(select * from ad_session where pt ='2012-03-05 10') temp1
on (temp0.SESSION_ID=temp1.SESSION_ID and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id)
where temp0.pt=temp1.pt
GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;
相关推荐
在本课程设计中,主题是“基于Antlr4的Hive SQL解析”,这涉及到了两个主要的技术领域:Antlr4和Hive SQL。Antlr4是一个强大的解析工具,用于生成解析器和词法分析器,它能处理各种语言的语法,包括SQL。而Hive SQL...
本文将深入探讨Hive SQL如何被编译成MapReduce任务,以及在这个过程中涉及到的关键原理。 1. MapReduce实现基本SQL操作的原理: - **Join的实现原理**:在Hive中,Join操作通常通过MapReduce来实现。例如,在一个...
HiveSQL 实战题目 HiveSQL 是一种基于 Hadoop 的数据仓库工具,用于存储、查询和分析大规模数据。本文档提供了 HiveSQL 实战题目,旨在帮助读者熟悉 HiveSQL 的基本概念和应用场景。 一、HiveSQL 基础 HiveSQL 是...
《最强HiveSQL开发指南》是一本专注于Hive性能调优和实战操作的教程,旨在帮助读者深入理解和熟练运用Hive进行大数据处理。Hive作为Apache Hadoop生态系统中的一个组件,主要用于处理和分析大规模分布式存储的数据。...
1.上传tar包 2.解压 tar -zxvf hive-1.2.1.tar.gz 3.安装mysql数据库 推荐yum 在线安装 4.配置hive (a)配置HIVE_HOME环境变量 vi conf/hive-env.sh ... hive -e ‘sql’ bin/hive -e 'select * from t_test'
HiveSQL编译原理是大数据处理领域中的一个重要概念,它主要涉及到如何将用户提交的SQL语句转换为可执行的MapReduce或者Tez任务。在Hadoop生态系统中,Hive作为一个基于HDFS的数据仓库工具,提供了对大规模数据集进行...
### Hive SQL练习题库知识点详解 #### 一、Hive SQL概述 Hive 是一个基于 Hadoop 的数据仓库工具,可以将结构化的数据文件映射为一张数据库表,并提供简单的 SQL 查询功能,使不熟悉 MapReduce 的用户可以利用 SQL...
Hive SQL详解经典 在大数据处理领域,Hive是一个基于Hadoop的数据仓库工具,它允许用户使用SQL(称为HiveQL)查询存储在Hadoop分布式文件系统(HDFS)中的大型数据集。这篇博客深入探讨了Hive SQL的使用,帮助用户...
Apache Hive 是一个基于 Hadoop 的数据仓库工具,可以将结构化的数据文件映射为一张数据库表,并提供简单的 SQL 查询功能,使不熟悉 MapReduce 的开发人员也能用 SQL 语言进行数据查询。Hive 在内部将 SQL 语句转换...
### HiveSQL解析原理详解 #### 一、引言 Hive作为一款建立在Hadoop之上的数据仓库系统,被广泛应用于大数据处理场景中。对于众多企业和组织而言,它不仅是存储和管理海量数据的有效手段,同时也是进行数据分析的...
// 具有子查询的sql String hql = "select id,name from (select id from table_1 where id={p0}) t1 inner join (select name --this is name\n from table_2) t2"; // 获取id字段的血缘 LineageNode idNode = ...
hivesql计算访问者年龄的SQL
HiveSQL详细和优化,以及部分个人点评 This is a brief tutorial that provides an introduction on how to use Apache Hive HiveQL with Hadoop Distributed File System. This tutorial can be your first step ...
### HiveSQL优化手册 #### 数据倾斜优化 **数据倾斜**是指在执行Hive查询时,由于数据分布不均,导致一部分任务处理的数据量远大于其他任务的现象,这将严重影响整个查询的性能。针对数据倾斜问题,可以从以下几个...
【秘密】Hive SQL 开发规范 Hive作为Hadoop生态系统中的重要组件,为企业提供了对大规模数据集的高效处理和分析能力。它允许用户通过类似于SQL的查询语言(HiveQL)来操作分布式存储的数据,简化了MapReduce编程...
hivesql解析 支持版本<!--<hive.version>1.0.0</hive.version> <hadoop.version>2.4.1</hadoop.version> --> <hive.version>2.0.1</hive.version> <hadoop.version>2.7.2</hadoop.version>