背景: 有些报表除了需要每天的数据走势图,有时还需要每周的平均和每月的平均,再和上周或月作比较,计算差值,然后分别形成周平均报表和月平均报表。
日报表:每天计算报表数据
周报表:每周某个时间只计算一次
月报表:每月每个时间只计算一次
以下设计可以作为比较通用的周月差值计算方式。
考虑到查询条件的跨年,所以设计表的时候有一个日期是标志某个周期内的数据,这样查询就能通过这个日期连续的查询。etldate
结果展示:
以下是详细代码,包括设计的表和思路:
#!/usr/bin/env bash # ************************************************************************ # Useage: dwd_base_sall_week_month.sh[data_date] # Author: author # Version: V14.00.001 # Func Desc : XXX报表计算周报 月报 # modify describe : # yyyymmdd version author modified VERSION="V14.00.001" #初始版本为001,后面修改为递增1 if [ $# -gt 1 ];then # 参数个数,需视具体参数修改 echo "Params error." echo "Useage: dwd_base_sall_week_month.sh [data_date]" exit 1 fi logfile=$BIPROG_ROOT/logs/`basename $0`.log #定义写日志文件名 #写日志函数 writelog() { echo `date "+%Y-%m-%d %H:%M:%S"`" $1"|tee -a $logfile } #执行shell 命令 EXESHELL() { vIniNum=`expr ${vIniNum} + 1` if [ ${vIniNum} -lt ${vCurNum} ] then writelog "STEP ${vIniNum}: < ${vCurNum} SKIP ..." return 0 fi writelog "STEP ${vIniNum}: $CMD " eval $CMD if [ $? -ne 0 ] then writelog "SH Command failed! exit..." exit 1 fi } vDay=${1:-"`lastday YYYY-MM-DD`"} #如果没有传日期的参数,默认取昨天 dtstr=`date -d "0 day ago " +%Y-%m-%d" "%H:%M:%S` serven_day_before_etldate=`date -d "7 day ago ${vDay}" +"%Y-%m-%d"` # 获取该日期在一年中的第几周 third_week=`date -d "0 day ago $vDay" +%U` # 获取该日期是星期几 current_day_week=`date -d "0 day ago $vDay" +%w` # 该日期的的年份 current_year=`date -d "0 day ago $vDay" +%Y` # 该日期的月份 current_month=`date -d "0 day ago $vDay " +%m` # 该日期的7天前的月份 serven_day_before_month=`date -d "7 day ago $vDay" +%m` # 该日期的 日 current_day=`date -d "0 day ago $vDay" +%d` # 该日期7天前的日 serven_day_before=`date -d "7 day ago $vDay" +%d` # 定义变量 判断是否 执行周或月 count_num=0 # 周 月报 # drop table biuser.temp_period_order_cnt ; # create table biuser.temp_period_order_cnt ( # etldate string comment '日期', # year_val string comment '年份', # week_third string comment '第几周', # etldate_tag string comment '范围标签 01.04-01.10', # current_period_avg string comment '当前周/月平均值', # last_period_avg string comment '上周/月平均值', # difference_val string comment '差值' , # hb_val string comment '环比值', # data_type string comment '数据类型', # time_dim string comment '周/月', # addtimes string comment '加载时间' # ) comment 'XXX报表周/月报表' # partitioned by (dt string,dataType string) # row format delimited # fields terminated by '|' # lines terminated by '\n' stored as textfile ; # data_type =1 pcXXX匹配率 # data_type =2 pc在线匹配率 # data_type =3 PCXXX搜索网络失败率 # data_type =4 PC本地XXX使用占比 # data_type =5 PCXXX手动搜索占比 # 开始计算周报 每周跑一次 每周日起跑 # 判断日期 SQL=" INSERT OVERWRITE TABLE biuser.temp_period_order_cnt PARTITION(DT='${vDay}',dataType='1') select '${vDay}' etldate , '$current_year' year_val, '$third_week' as week_third, '$serven_day_before_month.$serven_day_before-$current_month.$current_day' as etldate_tag, a.current_period_avg as current_period_avg, b.current_period_avg as last_period_avg, round((b.current_period_avg - a.current_period_avg),2) as difference_val, round((b.current_period_avg - a.current_period_avg)*100/b.current_period_avg,2) hb_val, a.data_type, a.time_dim, '${dtstr}' as addtimes from (select '1' as data_type, '周' as time_dim, round(avg((cast(local_go_net as bigint) +cast(network_go_net as bigint))*100/(cast(local_go_net as bigint) +cast(network_go_net as bigint)+cast(matche_order_fail_cnt as bigint))),2) current_period_avg from biuser.dwd_base_sall where dt>='${serven_day_before_etldate}' and dt<'${vDay}' )a left outer join (select etldate, etldate_tag, current_period_avg, last_period_avg, difference_val, hb_val, data_type, time_dim from biuser.temp_period_order_cnt where dt='${serven_day_before_etldate}' and datatype='1' and time_dim='周' ) b on a.data_type=b.data_type and a.time_dim=b.time_dim; " if [ $current_day_week = 0 ] then echo '周末' EXE_HIVE "${SQL}" count_num=1 else echo '非周末' fi #开始计算月报 每月月初 计算上个月的月报 每个月的月初01 计算上个月的 # 年 y_report=`date -d ''$vDay' -1 days' +%Y` # 月 m_report=`date -d ''$vDay' -1 days' +%m` m_Last_report_day=`date -d "${vDay} 1 month ago" +%Y-%m-%d` # 日 d_report=`date -d ''$vDay' -1 days' +%d` echo '年'$y_report echo '月'$m_report echo '一个月前的日期'$m_Last_report_day echo '日'$d_report #是周末 也是月初 SQL=" INSERT into TABLE biuser.temp_period_order_cnt PARTITION(DT='${vDay}',dataType='1') select '${vDay}' etldate , '$current_year' year_val , '$current_month' week_third, ' ' etldate_tag, a.current_period_avg as current_period_avg, b.current_period_avg as last_period_avg, round((b.current_period_avg - a.current_period_avg),2) as difference_val, round((b.current_period_avg - a.current_period_avg)*100/b.current_period_avg,2) hb_val, a.data_type, a.time_dim, '${dtstr}' as addtimes from (select '1' as data_type, '月' as time_dim, round(avg((cast(local_go_net as bigint) +cast(network_go_net as bigint))*100/(cast(local_go_net as bigint) +cast(network_go_net as bigint)+cast(matche_order_fail_cnt as bigint))),2) current_period_avg from biuser.dwd_base_sall where dt>='${m_Last_report_day}' and dt<'${vDay}' )a left outer join (select etldate, etldate_tag, current_period_avg, last_period_avg, difference_val, hb_val, data_type, time_dim from biuser.temp_period_order_cnt where dt='${m_Last_report_day}' and datatype='1' and time_dim='月' ) b on a.data_type=b.data_type and a.time_dim=b.time_dim; " # 如果是月初 不是周末 SQL2=" INSERT OVERWRITE TABLE biuser.temp_period_order_cnt PARTITION(DT='${vDay}',dataType='1') select '${vDay}' etldate , '$current_year' year_val , '$current_month' week_third, ' ' etldate_tag, a.current_period_avg as current_period_avg, b.current_period_avg as last_period_avg, round((b.current_period_avg - a.current_period_avg),4) as difference_val, round((b.current_period_avg - a.current_period_avg)/b.current_period_avg,4) hb_val, a.data_type, a.time_dim, '${dtstr}' as addtimes from (select '1' as data_type, '月' as time_dim, round(avg((cast(local_go_net as bigint) +cast(network_go_net as bigint))*100/(cast(local_go_net as bigint) +cast(network_go_net as bigint)+cast(matche_order_fail_cnt as bigint))),2) current_period_avg from biuser.dwd_base_sall where dt>='${m_Last_report_day}' and dt<'${vDay}' )a left outer join (select etldate, etldate_tag, current_period_avg, last_period_avg, difference_val, hb_val, data_type, time_dim from biuser.temp_period_order_cnt where dt='${m_Last_report_day}' and datatype='1' and time_dim='月' ) b on a.data_type=b.data_type and a.time_dim=b.time_dim; " if [ $(date -d ''$vDay' -1 days' +%d) -eq $(cal $m_report $y_report |xargs|awk '{print $NF}') ] then echo '月初' if [ $count_num = 1 ] then echo '是周末,是月初' EXE_HIVE "${SQL}" else EXE_HIVE "${SQL2}" fi count_num=2 else echo '非月初' false fi echo $count_num if [ $count_num = 0 ] then echo '不是周末,也不是月初' else ## CMD="/usr/local/mysql/bin/mysql -h 192.168.119.132 -uroot -pbi_hive -e \"delete from biuser.temp_period_order_cnt where etldate='${vDay}' AND data_type='1';\"" EXESHELL # 同步数据到 mysql CMD="/home/hadoop/hadoop/sqoop-1.4.4/bin/sqoop export --connect jdbc:mysql://192.168.119.132:3306/bidata?characterEncoding=utf8 --username root --password xxxx --table temp_period_order_cnt --export-dir /user/hive/warehouse/biuser.db/temp_period_order_cnt/dt=${vDay}/datatype=1 --input-fields-terminated-by '|' --null-non-string '0' --null-string '0';" EXESHELL fi
原文:http://blog.csdn.net/lili72/article/details/42882743