码迷,mamicode.com
首页 > 数据库 > 详细

hive分析脚步p_fact_bi_browser_t.sql

时间:2018-12-14 19:59:21      阅读:131      评论:0      收藏:0      [点我收藏+]

标签:sql   not   interval   browser   min   head   table   random   day   

 


set hive.cli.print.header=true;
set mapred.max.split.size=256000000;
set mapred.min.split.size.per.node=256000000;
set mapred.min.split.size.per.rack=256000000;
set hive.exec.reducers.max=200;
set hive.exec.reducers.bytes.per.reducer=1000000000;
set hive.exec.compress.output=false;
set hive.exec.compress.intermediate=true;
set hive.exec.parallel=true;
set hive.exec.parallel.thread.number=8;
set hive.map.aggr=true;
set hive.groupby.mapaggr.checkinterval=100000;
set hive.groupby.skewindata=true;
set hive.warehouse.subdir.inherit.perms=false;
set hive.skewjoin.key=100000;
set hive.optimize.skewjoin=true;
set hive.auto.convert.join=false;
set hive.vectorized.execution.enabled=false;

use parsedb;

insert overwrite table FACT_BI_BROWSER_T_00
select
‘o2o‘ as channel,
get_json_object(t.json, ‘$.session_id‘) as session_id,
get_json_object(t.json, ‘$.user_id‘) as app_id,
get_json_object(t.json, ‘$.ip‘) as ip,
get_json_object(t.json, ‘$.ref_url‘) as ref_url,
get_json_object(t.json, ‘$.url‘) as url,
get_json_object(t.json, ‘$.stay_time‘) as stay_time,
get_json_object(t.json, ‘$.log_time‘) as log_time,
get_json_object(t.json, ‘$.browser‘) as browser
from ods_db.ber_o2o_pv t
where ( get_json_object(t.json, ‘$.session_id‘) is not null and get_json_object(t.json, ‘$.session_id‘) <> ‘‘ )
and get_json_object(t.json, ‘$.extend‘) is null
and get_json_object(t.json, ‘$.log_time‘) >= ‘${v_fm_tm}‘
and get_json_object(t.json, ‘$.log_time‘) < ‘${v_to_tm}‘;


--insert overwrite table FACT_BI_BROWSER_T_00
--select
--t.channel,
--t.session_id,
--t.app_id,
--t.ip,
--t.ref_url,
--t.url,
--t.stay_time,
--t.log_time,
--t.browser
--from FACT_BI_BROWSER_T_00 t;

--2浏览量(PV)
insert overwrite table FACT_BI_BROWSER_T_01
select
t.browser as BR_NAME,
count(t.url) as BI_VALUE,
to_date(t.log_time) as BI_DATETIME,
t.channel as BI_CHANNEL
from FACT_BI_BROWSER_T_00 t
group by t.channel,
t.browser,
to_date(t.log_time);

insert overwrite table FACT_BI_BROWSER_T partition (year=‘${year}‘,month=‘${month}‘,day=‘${day}‘)
select
regexp_replace(reflect("java.util.UUID", "randomUUID"), "-", "") as ID,
1 as BI_TYPE,
t.BR_NAME,
t.BI_VALUE,
CONCAT(t.BI_DATETIME,‘ 00:00:00‘) AS BI_DATETIME,
t.BI_CHANNEL
from FACT_BI_BROWSER_T_01 t;

 

hive分析脚步p_fact_bi_browser_t.sql

标签:sql   not   interval   browser   min   head   table   random   day   

原文地址:https://www.cnblogs.com/heguoxiu/p/10120644.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!