首页 > 其他 > 详细

Spark项目实战从0到1之(28)数仓项目(九)数仓搭建-DWT 层

时间:2020-09-27 16:06:26      阅读:30      评论:0      收藏:0      [点我收藏+]

2.1 设备主题宽表


技术分享图片

1)建表语句

drop table if exists dwt_uv_topic;
create external table dwt_uv_topic
(
`mid_id` string COMMENT 设备唯一标识,
`user_id` string COMMENT 用户标识,
`version_code` string COMMENT 程序版本号,
`version_name` string COMMENT 程序版本名,
`lang` string COMMENT 系统语言,
`source` string COMMENT 渠道号,
`os` string COMMENT 安卓系统版本,
`area` string COMMENT 区域,
`model` string COMMENT 手机型号,
`brand` string COMMENT 手机品牌,
`sdk_version` string COMMENT sdkVersion,
`gmail` string COMMENT gmail,
`height_width` string COMMENT 屏幕宽高,
`app_time` string COMMENT 客户端日志产生时的时间,
`network` string COMMENT 网络模式,
`lng` string COMMENT 经度,
`lat` string COMMENT 纬度,
`login_date_first` string comment 首次活跃时间,
`login_date_last` string comment 末次活跃时间,
`login_day_count` bigint comment 当日活跃次数,
`login_count` bigint comment 累积活跃天数
)
stored as parquet
location /warehouse/gmall/dwt/dwt_uv_topic;

2)数据装载

insert overwrite table dwt_uv_topic
select
nvl(new.mid_id,old.mid_id),
nvl(new.user_id,old.user_id),
nvl(new.version_code,old.version_code),
nvl(new.version_name,old.version_name),
nvl(new.lang,old.lang),
nvl(new.source,old.source),
nvl(new.os,old.os),
nvl(new.area,old.area),
nvl(new.model,old.model),
nvl(new.brand,old.brand),
nvl(new.sdk_version,old.sdk_version),
nvl(new.gmail,old.gmail),
nvl(new.height_width,old.height_width),
nvl(new.app_time,old.app_time),
nvl(new.network,old.network),
nvl(new.lng,old.lng),
nvl(new.lat,old.lat),
if(old.mid_id is null,2020-03-10,old.login_date_first),
if(new.mid_id is not null,2020-03-10,old.login_date_last),
if(new.mid_id is not null, new.login_count,0),
nvl(old.login_count,0)+if(new.login_count>0,1,0)
from
(
select
*
from dwt_uv_topic
)old
full outer join
(
select
*
from dws_uv_detail_daycount
where dt=2020-03-10
)new
on old.mid_id=new.mid_id;

3)查询加载结果

select * from dwt_uv_topic limit 5;

2.2 会员主题宽表
宽表字段怎么来?维度关联的事实表度量值+开头、结尾+累积+累积一个时间段



技术分享图片


1)建表语句

drop table if exists dwt_user_topic;
create external table dwt_user_topic
(
user_id string comment 用户 id,
login_date_first string comment 首次登录时间,
login_date_last string comment 末次登录时间,
login_count bigint comment 累积登录天数,
login_last_30d_count bigint comment 最近 30 日登录天数,
order_date_first string comment 首次下单时间,
order_date_last string comment 末次下单时间,
order_count bigint comment 累积下单次数,
order_amount decimal(16,2) comment 累积下单金额,
order_last_30d_count bigint comment 最近 30 日下单次数,
order_last_30d_amount bigint comment 最近 30 日下单金额,
payment_date_first string comment 首次支付时间,
payment_date_last string comment 末次支付时间,
payment_count decimal(16,2) comment 累积支付次数,
payment_amount decimal(16,2) comment 累积支付金额,
payment_last_30d_count decimal(16,2) comment 最近 30 日支付次数,
payment_last_30d_amount decimal(16,2) comment 最近 30 日支付金额
)COMMENT 用户主题宽表
stored as parquet
location /warehouse/gmall/dwt/dwt_user_topic/
tblproperties ("parquet.compression"="lzo");

3)查询加载结果

select * from dwt_user_topic limit 5;

2.3 商品主题宽表
1)建表语句

create external table dwt_sku_topic
(
sku_id string comment sku_id,
spu_id string comment spu_id,
order_last_30d_count bigint comment 最近 30 日被下单次数,
order_last_30d_num bigint comment 最近 30 日被下单件数,
order_last_30d_amount decimal(16,2) comment 最近 30 日被下单金额,
order_count bigint comment 累积被下单次数,
order_num bigint comment 累积被下单件数,
order_amount decimal(16,2) comment 累积被下单金额,
payment_last_30d_count bigint comment 最近 30 日被支付次数,
payment_last_30d_num bigint comment 最近 30 日被支付件数,
payment_last_30d_amount decimal(16,2) comment 最近 30 日被支付金额,
payment_count bigint comment 累积被支付次数,
payment_num bigint comment 累积被支付件数,
payment_amount decimal(16,2) comment 累积被支付金额,
refund_last_30d_count bigint comment 最近三十日退款次数,
refund_last_30d_num bigint comment 最近三十日退款件数,
refund_last_30d_amount decimal(10,2) comment 最近三十日退款金额,
refund_count bigint comment 累积退款次数,
refund_num bigint comment 累积退款件数,
refund_amount decimal(10,2) comment 累积退款金额,
cart_last_30d_count bigint comment 最近 30 日被加入购物车次数,
cart_last_30d_num bigint comment 最近 30 日被加入购物车件数,
cart_count bigint comment 累积被加入购物车次数,
cart_num bigint comment 累积被加入购物车件数,
favor_last_30d_count bigint comment 最近 30 日被收藏次数,
favor_count bigint comment 累积被收藏次数,
appraise_last_30d_good_count bigint comment 最近 30 日好评数,
appraise_last_30d_mid_count bigint comment 最近 30 日中评数,
appraise_last_30d_bad_count bigint comment 最近 30 日差评数,
appraise_last_30d_default_count bigint comment 最近 30 日默认评价数,
appraise_good_count bigint comment 累积好评数,
appraise_mid_count bigint comment 累积中评数,
appraise_bad_count bigint comment 累积差评数,
appraise_default_count bigint comment 累积默认评价数
)COMMENT 商品主题宽表
stored as parquet
location /warehouse/gmall/dwt/dwt_sku_topic/
tblproperties ("parquet.compression"="lzo");

2)数据装载

insert overwrite table dwt_sku_topic
select
nvl(new.sku_id,old.sku_id), sku_info.spu_id,
nvl(new.order_count30,0),
nvl(new.order_num30,0),
nvl(new.order_amount30,0),
nvl(old.order_count,0) + nvl(new.order_count,0),
nvl(old.order_num,0) + nvl(new.order_num,0),
nvl(old.order_amount,0) + nvl(new.order_amount,0),
nvl(new.payment_count30,0),
nvl(new.payment_num30,0),
nvl(new.payment_amount30,0),
nvl(old.payment_count,0) + nvl(new.payment_count,0),
nvl(old.payment_num,0) + nvl(new.payment_count,0),
nvl(old.payment_amount,0) + nvl(new.payment_count,0),
nvl(new.refund_count30,0),
nvl(new.refund_num30,0),
nvl(new.refund_amount30,0),
nvl(old.refund_count,0) + nvl(new.refund_count,0),
nvl(old.refund_num,0) + nvl(new.refund_num,0),
nvl(old.refund_amount,0) + nvl(new.refund_amount,0),
nvl(new.cart_count30,0),
nvl(new.cart_num30,0),
nvl(old.cart_count,0) + nvl(new.cart_count,0),
nvl(old.cart_num,0) + nvl(new.cart_num,0),
nvl(new.favor_count30,0),
nvl(old.favor_count,0) + nvl(new.favor_count,0),
nvl(new.appraise_good_count30,0),
nvl(new.appraise_mid_count30,0),
nvl(new.appraise_bad_count30,0),
nvl(new.appraise_default_count30,0) ,
nvl(old.appraise_good_count,0) + nvl(new.appraise_good_count,0),
nvl(old.appraise_mid_count,0) + nvl(new.appraise_mid_count,0),
nvl(old.appraise_bad_count,0) + nvl(new.appraise_bad_count,0),
nvl(old.appraise_default_count,0) + nvl(new.appraise_default_count,0)
from
(
select
sku_id,
spu_id,
order_last_30d_count,
order_last_30d_num,
order_last_30d_amount,
order_count,
order_num,
order_amount ,
payment_last_30d_count,
payment_last_30d_num,
payment_last_30d_amount,
payment_count,
payment_num,
payment_amount,
refund_last_30d_count,
refund_last_30d_num,
refund_last_30d_amount,
refund_count,
refund_num,
refund_amount,
cart_last_30d_count,
cart_last_30d_num,
cart_count,
cart_num,
favor_last_30d_count,
favor_count,
appraise_last_30d_good_count,
appraise_last_30d_mid_count,
appraise_last_30d_bad_count,
appraise_last_30d_default_count,
appraise_good_count,
appraise_mid_count,
appraise_bad_count,
appraise_default_count
from dwt_sku_topic
)old
full outer join
(
select
sku_id,
sum(if(dt=2020-03-10, order_count,0 )) order_count,
sum(if(dt=2020-03-10,order_num ,0 )) order_num,
sum(if(dt=2020-03-10,order_amount,0 )) order_amount ,
sum(if(dt=2020-03-10,payment_count,0 )) payment_count,
sum(if(dt=2020-03-10,payment_num,0 )) payment_num,
sum(if(dt=2020-03-10,payment_amount,0 )) payment_amount,
sum(if(dt=2020-03-10,refund_count,0 )) refund_count,
sum(if(dt=2020-03-10,refund_num,0 )) refund_num,
sum(if(dt=2020-03-10,refund_amount,0 )) refund_amount,
sum(if(dt=2020-03-10,cart_count,0 )) cart_count,
sum(if(dt=2020-03-10,cart_num,0 )) cart_num,
sum(if(dt=2020-03-10,favor_count,0 )) favor_count,
sum(if(dt=2020-03-10,appraise_good_count,0 )) appraise_good_count,
sum(if(dt=2020-03-10,appraise_mid_count,0 ) ) appraise_mid_count ,
sum(if(dt=2020-03-10,appraise_bad_count,0 )) appraise_bad_count,
sum(if(dt=2020-03-10,appraise_default_count,0 )) appraise_default_count,
sum(order_count) order_count30 ,
sum(order_num) order_num30,
sum(order_amount) order_amount30,
sum(payment_count) payment_count30,
sum(payment_num) payment_num30,
sum(payment_amount) payment_amount30,
sum(refund_count) refund_count30,
sum(refund_num) refund_num30,
sum(refund_amount) refund_amount30,
sum(cart_count) cart_count30,
sum(cart_num) cart_num30,
sum(favor_count) favor_count30,
sum(appraise_good_count) appraise_good_count30,
sum(appraise_mid_count) appraise_mid_count30,
sum(appraise_bad_count) appraise_bad_count30,
sum(appraise_default_count) appraise_default_count30
from dws_sku_action_daycount
where dt >= date_add (2020-03-10, -30)
group by sku_id
)new
on new.sku_id = old.sku_id
left join
(select * from dwd_dim_sku_info where dt=2020-03-10) sku_info
on nvl(new.sku_id,old.sku_id)= sku_info.id;

3)查询加载结果

select * from dwt_sku_topic limit 5;

2.4 DWT 层数据导入脚本
1)vim dws_to_dwt.sh
在脚本中填写如下内容

#!/bin/bash
APP=gmall
hive=/opt/modules/hive/bin/hive
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
sql="
insert overwrite table ${APP}.dwt_uv_topic
select
nvl(new.mid_id,old.mid_id),
nvl(new.user_id,old.user_id),
nvl(new.version_code,old.version_code),
nvl(new.version_name,old.version_name),
nvl(new.lang,old.lang),
nvl(new.source,old.source),
nvl(new.os,old.os),
nvl(new.area,old.area),
nvl(new.model,old.model),
nvl(new.brand,old.brand),
nvl(new.sdk_version,old.sdk_version),
nvl(new.gmail,old.gmail),
nvl(new.height_width,old.height_width),
nvl(new.app_time,old.app_time),
nvl(new.network,old.network),
nvl(new.lng,old.lng),
nvl(new.lat,old.lat),
nvl(old.login_date_first,$do_date),
if(new.login_count>0,$do_date,old.login_date_last),
nvl(new.login_count,0),
nvl(new.login_count,0)+nvl(old.login_count,0)
from
(
select
*
from ${APP}.dwt_uv_topic
)old
full outer join
(
select
*
from ${APP}.dws_uv_detail_daycount
where dt=$do_date
)new
on old.mid_id=new.mid_id;


insert overwrite table ${APP}.dwt_user_topic
select
nvl(new.user_id,old.user_id),
if(old.login_date_first is null and
new.login_count>0,$do_date,old.login_date_first),
if(new.login_count>0,$do_date,old.login_date_last),
nvl(old.login_count,0)+if(new.login_count>0,1,0),
nvl(new.login_last_30d_count,0),
if(old.order_date_first is null and
new.order_count>0,$do_date,old.order_date_first),
if(new.order_count>0,$do_date,old.order_date_last),
nvl(old.order_count,0)+nvl(new.order_count,0),
nvl(old.order_amount,0)+nvl(new.order_amount,0),
nvl(new.order_last_30d_count,0),
nvl(new.order_last_30d_amount,0),
if(old.payment_date_first is null and
new.payment_count>0,$do_date,old.payment_date_first),
if(new.payment_count>0,$do_date,old.payment_date_last),
nvl(old.payment_count,0)+nvl(new.payment_count,0),
nvl(old.payment_amount,0)+nvl(new.payment_amount,0),
nvl(new.payment_last_30d_count,0),
nvl(new.payment_last_30d_amount,0)
from
(
select
*
from ${APP}.dwt_user_topic
)old
full outer join
(
select
user_id,
sum(if(dt=$do_date,login_count,0)) login_count,
sum(if(dt=$do_date,order_count,0)) order_count,
sum(if(dt=$do_date,order_amount,0)) order_amount,
sum(if(dt=$do_date,payment_count,0)) payment_count,
sum(if(dt=$do_date,payment_amount,0)) payment_amount,
sum(if(order_count>0,1,0)) login_last_30d_count,
sum(order_count) order_last_30d_count,
sum(order_amount) order_last_30d_amount,
sum(payment_count) payment_last_30d_count,
sum(payment_amount) payment_last_30d_amount
from ${APP}.dws_user_action_daycount
where dt>=date_add( $do_date,-30)
group by user_id
)new
on old.user_id=new.user_id;
with
sku_act as
(
select
sku_id,
sum(if(dt=$do_date, order_count,0 )) order_count,
sum(if(dt=$do_date,order_num ,0 )) order_num,
sum(if(dt=$do_date,order_amount,0 )) order_amount ,
sum(if(dt=$do_date,payment_count,0 )) payment_count,
sum(if(dt=$do_date,payment_num,0 )) payment_num,
sum(if(dt=$do_date,payment_amount,0 )) payment_amount,
sum(if(dt=$do_date,refund_count,0 )) refund_count,
sum(if(dt=$do_date,refund_num,0 )) refund_num,
sum(if(dt=$do_date,refund_amount,0 )) refund_amount,
sum(if(dt=$do_date,cart_count,0 )) cart_count,
sum(if(dt=$do_date,cart_num,0 )) cart_num,
sum(if(dt=$do_date,favor_count,0 )) favor_count,
sum(if(dt=$do_date,appraise_good_count,0 )) appraise_good_count,
sum(if(dt=$do_date,appraise_mid_count,0 ) ) appraise_mid_count ,
sum(if(dt=$do_date,appraise_bad_count,0 )) appraise_bad_count,
sum(if(dt=$do_date,appraise_default_count,0 )) appraise_default_count,
sum( order_count ) order_count30 ,
sum( order_num ) order_num30,
sum(order_amount ) order_amount30,
sum(payment_count ) payment_count30,
sum(payment_num ) payment_num30,
sum(payment_amount ) payment_amount30,
sum(refund_count ) refund_count30,
sum(refund_num ) refund_num30,
sum(refund_amount ) refund_amount30,
sum(cart_count ) cart_count30,
sum(cart_num ) cart_num30,
sum(favor_count ) favor_count30,
sum(appraise_good_count ) appraise_good_count30,
sum(appraise_mid_count ) appraise_mid_count30,
sum(appraise_bad_count ) appraise_bad_count30,
sum(appraise_default_count ) appraise_default_count30
from ${APP}.dws_sku_action_daycount
where dt>=date_add ( $do_date,-30)
group by sku_id
),
sku_topic
as
(
select
sku_id,
spu_id,
order_last_30d_count,
order_last_30d_num,
order_last_30d_amount,
order_count,
order_num,
order_amount ,
payment_last_30d_count,
payment_last_30d_num,
payment_last_30d_amount,
payment_count,
payment_num,
payment_amount,
refund_last_30d_count,
refund_last_30d_num,
refund_last_30d_amount ,
refund_count ,
refund_num ,
refund_amount ,
cart_last_30d_count ,
cart_last_30d_num ,
cart_count ,
cart_num ,
favor_last_30d_count ,
favor_count ,
appraise_last_30d_good_count ,
appraise_last_30d_mid_count ,
appraise_last_30d_bad_count ,
appraise_last_30d_default_count ,
appraise_good_count ,
appraise_mid_count ,
appraise_bad_count ,
appraise_default_count
from ${APP}.dwt_sku_topic
)


insert overwrite table ${APP}.dwt_sku_topic
select
nvl(sku_act.sku_id,sku_topic.sku_id) ,
sku_info.spu_id,
nvl (sku_act.order_count30,0) ,
nvl (sku_act.order_num30,0) ,
nvl (sku_act.order_amount30,0) ,
nvl(sku_topic.order_count,0)+ nvl (sku_act.order_count,0) ,
nvl(sku_topic.order_num,0)+ nvl (sku_act.order_num,0) ,
nvl(sku_topic.order_amount,0)+ nvl (sku_act.order_amount,0),
nvl (sku_act.payment_count30,0),
nvl (sku_act.payment_num30,0),
nvl (sku_act.payment_amount30,0),
nvl(sku_topic.payment_count,0)+ nvl (sku_act.payment_count,0) ,
nvl(sku_topic.payment_num,0)+ nvl (sku_act.payment_count,0) ,
nvl(sku_topic.payment_amount,0)+ nvl (sku_act.payment_count,0) ,
nvl (refund_count30,0),
nvl (sku_act.refund_num30,0),
nvl (sku_act.refund_amount30,0),
nvl(sku_topic.refund_count,0)+ nvl (sku_act.refund_count,0),
nvl(sku_topic.refund_num,0)+ nvl (sku_act.refund_num,0),
nvl(sku_topic.refund_amount,0)+ nvl (sku_act.refund_amount,0),
nvl(sku_act.cart_count30,0) ,
nvl(sku_act.cart_num30,0) ,
nvl(sku_topic.cart_count ,0)+ nvl (sku_act.cart_count,0),
nvl( sku_topic.cart_num ,0)+ nvl (sku_act.cart_num,0),
nvl(sku_act.favor_count30 ,0) ,
nvl (sku_topic.favor_count ,0)+ nvl (sku_act.favor_count,0),
nvl (sku_act.appraise_good_count30 ,0) ,
nvl (sku_act.appraise_mid_count30 ,0) ,
nvl (sku_act.appraise_bad_count30 ,0) ,
nvl (sku_act.appraise_default_count30 ,0) ,
nvl (sku_topic.appraise_good_count ,0)+ nvl
(sku_act.appraise_good_count,0) ,
nvl (sku_topic.appraise_mid_count ,0)+ nvl
(sku_act.appraise_mid_count,0) ,
nvl (sku_topic.appraise_bad_count ,0)+ nvl
(sku_act.appraise_bad_count,0) ,
nvl (sku_topic.appraise_default_count ,0)+ nvl
(sku_act.appraise_default_count,0)
from sku_act
full outer join sku_topic
on sku_act.sku_id =sku_topic.sku_id
left join
(select * from ${APP}.dwd_dim_sku_info where dt=$do_date) sku_info
on nvl(sku_topic.sku_id,sku_act.sku_id)= sku_info.id;
"
$hive -e "$sql"

2)增加脚本执行权限

chmod 770 dws_to_dwt.sh

3)执行脚本导入数据

dws_to_dwt.sh 2020-03-11

4)查看导入数据

select * from dwt_uv_topic limit 5;
select * from dwt_user_topic limit 5;
select * from dwt_sku_topic limit 5;

 

Spark项目实战从0到1之(28)数仓项目(九)数仓搭建-DWT 层

原文:https://www.cnblogs.com/huanghanyu/p/13738989.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!