grunt> ls hdfs://mycluster/user/root/.staging <dir> hdfs://mycluster/user/root/employee <dir> hdfs://mycluster/user/root/people <dir> grunt> cd .. grunt> ls hdfs://mycluster/user/cloudera <dir> hdfs://mycluster/user/history <dir> hdfs://mycluster/user/hive <dir> hdfs://mycluster/user/root <dir> hdfs://mycluster/user/test3 <dir> hdfs://mycluster/user/test_hive <dir>
yum install pig
export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce
[root@host1 impala]# source /etc/profile [root@host1 impala]# echo $HADOOP_MAPRED_HOME /usr/lib/hadoop-mapreduce
$ pig
, use fs.defaultFS 2015-02-02 08:29:03,302 [main] INFO org.apache.hadoop.conf.Configuration.deprecation - fs.default.name is deprecated. Instead, use fs.defaultFS grunt>试着运行ls命令可以看到当前用户目录下的文件
grunt> ls hdfs://mycluster/user/root/.staging <dir> hdfs://mycluster/user/root/employee <dir> hdfs://mycluster/user/root/people <dir>还可以cd 到上一级,再ls
grunt> cd .. grunt> ls hdfs://mycluster/user/cloudera <dir> hdfs://mycluster/user/history <dir> hdfs://mycluster/user/hive <dir> hdfs://mycluster/user/root <dir> hdfs://mycluster/user/test3 <dir> hdfs://mycluster/user/test_hive <dir>怎么样?是不是比直接输入 hdfs dfs -ls / 这一大串命令爽多了?
(Dec 10 01:22:11 NetworkManager: <INFO> hello world [start] (Dec 10 03:56:43 NetworkManager: <WARN> Oops! There is an error! (Dec 10 04:10:18 NetworkManager: <WARN> Please check the database ... (Dec 10 05:22:11 NetworkManager: <INFO> hello world [end]
grunt > cd hdfs://mycluster/ grunt > cd user grunt > mkdir pig grunt > cd pig grunt > copyFromLocal /root/logs logs
hdfs dfs -put /root/logs /user/pig
grunt> messages = LOAD '/user/pig/logs'; grunt> warns = FILTER messages BY $0 MATCHES '.*WARN+.*'; grunt> DUMP warns;
((Dec 10 03:56:43 NetworkManager: <WARN> Oops! There is an error!) ((Dec 10 04:10:18 NetworkManager: <WARN> Please check the database ...)
messages = LOAD '/user/pig/logs';
warns = FILTER messages BY $0 MATCHES '.*WARN+.*';
DUMP warns;
4000001,Kristina,Chung,55,Pilot 4000002,Paige,Chen,74,Teacher 4000003,Sherri,Melton,34,Firefighter 4000004,Gretchen,Hill,66,Computer hardware engineer 4000005,Karen,Puckett,74,Lawyer 4000006,Patrick,Song,42,Veterinarian 4000007,Elsie,Hamilton,43,Pilot 4000008,Hazel,Bender,63,Carpenter
grunt > cd /user/pig grunt > copyFromLocal /root/customers ./customers
hbase(main):001:0> create 'customers', 'customers_data'
raw_data = LOAD 'hdfs:/user/pig/customers' USING PigStorage(',') AS ( custno:chararray, firstname:chararray, lastname:chararray, age:int, profession:chararray ); STORE raw_data INTO 'hbase://customers' USING org.apache.pig.backend.hadoop.hbase.HBaseStorage( 'customers_data:firstname customers_data:lastname customers_data:age customers_data:profession' );
$ PIG_CLASSPATH=/usr/lib/hbase/hbase-client-0.98.6-cdh5.2.1.jar:/usr/lib/zookeeper/zookeeper-3.4.5-cdh5.3.0.jar /usr/bin/pig /root/Load_HBase_Customers.pig这里的jar包根据你们的实际情况改变
hbase(main):001:0> scan 'customers'
hive > CREATE TABLE occupations(code STRING, description STRING,salary INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\054';
11-0000,Management occupations,96150 11-1011,Chief executives,151370 11-1021,General and operations managers,103780 11-1031,Legislators,33880 11-2011,Advertising and promotions managers,91100
hive> LOAD DATA LOCAL INPATH '/root/occupations.txt' INTO TABLE occupations;
occ_data = LOAD 'occupations' USING org.apache.hcatalog.pig.HCatLoader(); salaries = GROUP occ_data ALL; out = FOREACH salaries GENERATE AVG(occ_data.salary); DUMP out;
SELECT AVG(salary) FROM occupations;这样看起来pig反而比Hive还麻烦了?其实不是的,我只是用这个例子来说明pig跟hive之间的交互,并没有任何的比较性。
Alex 的 Hadoop 菜鸟教程: 第16课 Pig 安装使用教程
原文:http://blog.csdn.net/nsrainbow/article/details/43426061