2.1 数据准备
(1)数据集下载与查看
cd /usr/local
ls
sudo mkdir bigdatacase
sudo chown -R hadoop:hadoop ./bigdatacase
cd bigdatacase
mkdir dataset
cd ~/下载
unzip user_table.zip -d /usr/local/bigdatacase/dataset
cd /usr/local/bigdatacase/dataset
ls
(2)数据集预处理
sed -i ‘1d‘ raw_user.csv
head -5 raw_user.csv
gedit pre_deal.sh
bash ./pre_deal.sh raw_user.csv user_table.txt
ls
head -10 user_table.txt
(3)把数据集导入HDFS中
start_all.sh
jps
hdfs dfs -mkdir -p /bigdatacase/dataset
hdfs dfs -put /usr/local/bigdatacase/dataset/user_table.txt /bigdatacase/dataset
hdfs dfs -ls /bigdatacase/dataset/
hdfs dfs -cat /bigdatacase/dataset/user_table.txt | head -10
(4)在Hive上创建数据库
hive
create database dblab;
use dblab;
CREATE EXTERNAL TABLE dblab.big_user(id INT,uid STRING,item_id STRING,behavior_type INT,item_category STRING,visit_date DATE,province STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘\t‘
STORED AS TEXTFILE
LOCATION ‘/bigdatacase/dataset‘;
select * from big_user limit 10;
2.2 Hive数据分析
(1)用户行为分析需求:2014-12-11~12号有多少条购买商品的记录
分析步骤
(2)用户行为分析需求:分析每月1-31号购买情况
(3)自定义需求:2014-12-12号当天按省份统计购买数量
原文:https://www.cnblogs.com/fbs1/p/14197385.html