码迷,mamicode.com
首页 > 数据库 > 详细

hive-sql

时间:2019-05-09 10:56:16      阅读:146      评论:0      收藏:0      [点我收藏+]

标签:ems   split   row   exp   字符串   建表   exists   group by   示例   

#原始示例数据
#1,k1|k2|k3|k1
#2,k1|k1|k5|k3|k6
#3,k3|k6|k7|k8

#建表语句
CREATE TABLE IF NOT EXISTS words(id INT,kw STRING)
ROW FORMAT delimited
FIELDS TERMINATED BY ‘,‘
collection items TERMINATED BY ‘|‘;

#加载数据(local从本地,否则从hdfs;overwrite覆盖原有数据)
load data [local] inpath ‘J:/test.txt‘ [overwrite] into table words;

#字段转行(拼接成1个字段),结果如下
1 k1
1 k2
1 k3
1 k1
2 k1
2 k1
2 k5
2 k3
2 k6
3 k3
3 k6
3 k7
3 k8
SELECT t1.id,CONCAT_WS(‘:‘,t1.k_w,CAST(COUNT(1) AS STRING)) AS kw_time FROM (
SELECT id,k_w FROM words lateral VIEW explode(split(kw,‘\\|‘)) f AS k_w
) AS t1
GROUP BY t1.id,t1.k_w;


#字段转行
SELECT t1.id ,t1.k_w ,COUNT(1) FROM (
SELECT id,k_w FROM words lateral VIEW explode(split(kw,‘\\|‘)) f AS k_w
) AS t1
GROUP BY t1.id,t1.k_w;


#相同id数据分组后,将多行转1个字段类型为集合字符串,结果如下
1 k1:2,k2:1,k3:1
2 k1:2,k3:1,k5:1,k6:1
3 k3:1,k6:1,k7:1,k8:1
SELECT t2.id AS id,CONCAT_WS(‘,‘,collect_set(t2.kw_time)) AS kw_time_set FROM (
SELECT t1.id AS id,CONCAT_WS(‘:‘,t1.k_w,CAST(COUNT(1) AS STRING)) AS kw_time FROM (
SELECT id,k_w FROM words lateral VIEW explode(split(kw,‘\\|‘)) f AS k_w
) AS t1
GROUP BY t1.id,t1.k_w
) AS t2
GROUP BY t2.id;

#相同id数据分组后,将多行转1个字段类型为集合,结果如下
1 ["k1:2","k2:1","k3:1"]
2 ["k1:2","k3:1","k5:1","k6:1"]
3 ["k3:1","k6:1","k7:1","k8:1"]
SELECT t2.id AS id,collect_set(t2.kw_time) AS kw_time_set FROM (
SELECT t1.id AS id,CONCAT_WS(‘:‘,t1.k_w,CAST(COUNT(1) AS STRING)) AS kw_time FROM (
SELECT id,k_w FROM words lateral VIEW explode(split(kw,‘\\|‘)) f AS k_w
) AS t1
GROUP BY t1.id,t1.k_w
) AS t2
GROUP BY t2.id;

hive-sql

标签:ems   split   row   exp   字符串   建表   exists   group by   示例   

原文地址:https://www.cnblogs.com/zzq-include/p/10836877.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!