$ sudo yum install impala # Binaries for daemons $ sudo yum install impala-server # Service start/stop script $ sudo yum install impala-state-store # Service start/stop script $ sudo yum install impala-catalog # Service start/stop script
$ sudo yum install impala # Binaries for daemons $ sudo yum install impala-server # Service start/stop script
--> Finished Dependency Resolution Error: Package: hadoop-libhdfs-2.5.0+cdh5.2.1+578-1.cdh5.2.1.p0.14.el6.x86_64 (cloudera-cdh5) Requires: hadoop-hdfs = 2.5.0+cdh5.2.1+578-1.cdh5.2.1.p0.14.el6 Installed: hadoop-hdfs-2.5.0+cdh5.3.0+781-1.cdh5.3.0.p0.54.el6.x86_64 (@cloudera-cdh5) hadoop-hdfs = 2.5.0+cdh5.3.0+781-1.cdh5.3.0.p0.54.el6 Available: hadoop-hdfs-2.5.0+cdh5.2.1+578-1.cdh5.2.1.p0.14.el6.x86_64 (cloudera-cdh5) hadoop-hdfs = 2.5.0+cdh5.2.1+578-1.cdh5.2.1.p0.14.el6 You could try using --skip-broken to work around the problem You could try running: rpm -Va --nofiles --nodigest
<property> <name>dfs.client.read.shortcircuit</name> <value>true</value> </property> <property> <name>dfs.domain.socket.path</name> <value>/var/run/hdfs-sockets/dn._PORT</value> </property> <property> <name>dfs.client.file-block-storage-locations.timeout.millis</name> <value>10000</value> </property>
[root@host1 run]# mkdir /var/run/hdfs-sockets/ [root@host1 run]# chown -R hdfs.hdfs /var/run/hdfs-sockets/
usermod -a -G hadoop impala usermod -a -G hdfs impala
<property> <name>dfs.datanode.hdfs-blocks-metadata.enabled</name> <value>true</value> </property>
$ sudo service impala-state-store start $ sudo service impala-catalog start $ sudo service impala-server start
E0202 08:01:24.944171 29251 cpu-info.cc:135] CPU does not support the Supplemental SSE3 (SSSE3) instruction set, which is required. Exiting if Supplemental SSE3 is not functional...
sudo yum install impala-shell
IMPALA_CATALOG_SERVICE_HOST=<span style="font-family: Arial, Helvetica, sans-serif;">host1 </span>IMPALA_STATE_STORE_HOST=host1 IMPALA_STATE_STORE_PORT=24000 IMPALA_BACKEND_PORT=22000 IMPALA_LOG_DIR=/var/log/impala IMPALA_CATALOG_SERVICE_HOST=host1 export IMPALA_STATE_STORE_ARGS=${IMPALA_STATE_STORE_ARGS:- -log_dir=${IMPALA_LOG_DIR} -state_store_port=${IMPALA_STATE_STORE_PORT}} IMPALA_SERVER_ARGS=" -log_dir=${IMPALA_LOG_DIR} -catalog_service_host=${IMPALA_CATALOG_SERVICE_HOST} -state_store_port=${IMPALA_STATE_STORE_PORT} -use_statestore -state_store_host=${IMPALA_STATE_STORE_HOST} -be_port=${IMPALA_BACKEND_PORT}" export ENABLE_CORE_DUMPS=${ENABLE_COREDUMPS:-false}
hdfs dfs -mkdir -p /user/cloudera/sample_data/tab1 /user/cloudera/sample_data/tab2在本地建立文本tab1.csv
1,true,123.123,2012-10-24 08:55:00 2,false,1243.5,2012-10-25 13:40:00 3,false,24453.325,2008-08-22 09:33:21.123 4,false,243423.325,2007-05-12 22:32:21.33454 5,true,243.325,1953-04-22 09:11:33tab2.csv
1,true,12789.123 2,false,1243.5 3,false,24453.325 4,false,2423.3254 5,true,243.325 60,false,243565423.325 70,true,243.325 80,false,243423.325 90,true,243.325
$ hdfs dfs -put tab1.csv /user/cloudera/sample_data/tab1 $ hdfs dfs -ls /user/cloudera/sample_data/tab1 Found 1 items -rw-r--r-- 1 cloudera cloudera 192 2013-04-02 20:08 /user/cloudera/sample_data/tab1/tab1.csv $ hdfs dfs -put tab2.csv /user/cloudera/sample_data/tab2 $ hdfs dfs -ls /user/cloudera/sample_data/tab2 Found 1 items -rw-r--r-- 1 cloudera cloudera 158 2013-04-02 20:09 /user/cloudera/sample_data/tab2/tab2.csv
DROP TABLE IF EXISTS tab1; -- The EXTERNAL clause means the data is located outside the central location -- for Impala data files and is preserved when the associated Impala table is dropped. -- We expect the data to already exist in the directory specified by the LOCATION clause. CREATE EXTERNAL TABLE tab1 ( id INT, col_1 BOOLEAN, col_2 DOUBLE, col_3 TIMESTAMP ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LOCATION '/user/cloudera/sample_data/tab1'; DROP TABLE IF EXISTS tab2; -- TAB2 is an external table, similar to TAB1. CREATE EXTERNAL TABLE tab2 ( id INT, col_1 BOOLEAN, col_2 DOUBLE ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LOCATION '/user/cloudera/sample_data/tab2'; DROP TABLE IF EXISTS student; CREATE TABLE student ( id INT, name STRING ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
[xmseapp03:21000] > select * from tab1; Query: select * from tab1 +----+-------+------------+-------------------------------+ | id | col_1 | col_2 | col_3 | +----+-------+------------+-------------------------------+ | 1 | true | 123.123 | 2012-10-24 08:55:00 | | 2 | false | 1243.5 | 2012-10-25 13:40:00 | | 3 | false | 24453.325 | 2008-08-22 09:33:21.123000000 | | 4 | false | 243423.325 | 2007-05-12 22:32:21.334540000 | | 5 | true | 243.325 | 1953-04-22 09:11:33 | +----+-------+------------+-------------------------------+ Fetched 5 row(s) in 6.91s
1|AAAAAAAABAAAAAAA|980124|7135|32946|2452238|2452208|Mr.|Javier|Lewis|Y|9|12|1936|CHILE||Javie r.Lewis@VFAxlnZEvOx.org|2452508| 2|AAAAAAAACAAAAAAA|819667|1461|31655|2452318|2452288|Dr.|Amy|Moses|Y|9|4|1966|TOGO||Amy.Moses@ Ovk9KjHH.com|2452318| 3|AAAAAAAADAAAAAAA|1473522|6247|48572|2449130|2449100|Miss|Latisha|Hamilton|N|18|9|1979|NIUE|| Latisha.Hamilton@V.com|2452313| 4|AAAAAAAAEAAAAAAA|1703214|3986|39558|2450030|2450000|Dr.|Michael|White|N|7|6|1983|MEXICO||Mic hael.White@i.org|2452361| 5|AAAAAAAAFAAAAAAA|953372|4470|36368|2449438|2449408|Sir|Robert|Moran|N|8|5|1956|FIJI||Robert. Moran@Hh.edu|2452469|然后上传到hdfs上
hdfs dfs -put costomer.dat /user/hive/tpcds/customer/
-- -- store_sales fact table and surrounding dimension tables only -- create database tpcds; use tpcds; drop table if exists customer; create external table customer ( c_customer_sk int, c_customer_id string, c_current_cdemo_sk int, c_current_hdemo_sk int, c_current_addr_sk int, c_first_shipto_date_sk int, c_first_sales_date_sk int, c_salutation string, c_first_name string, c_last_name string, c_preferred_cust_flag string, c_birth_day int, c_birth_month int, c_birth_year int, c_birth_country string, c_login string, c_email_address string, c_last_review_date string ) row format delimited fields terminated by '|' location '/user/hive/tpcds/customer.dat';
impala-shell -i localhost -f customer_setup.sql
$ hdfs dfs -mkdir -p /user/impala/data/logs/year=2015/month=01/day=01/host=host1 $ hdfs dfs -mkdir -p /user/impala/data/logs/year=2015/month=02/day=22/host=host2并上传文本文件a.txt
1,jack 2,michael
3,sara 4,john
hdfs dfs -put /root/a.txt /user/impala/data/logs/year=2015/month=01/day=01/host=host1 hdfs dfs -put /root/b.txt /user/impala/data/logs/year=2015/month=02/day=22/host=host2
create external table logs (id int, name string) partitioned by (year string, month string, day string, host string) row format delimited fields terminated by ',' location '/user/impala/data/logs';
alter table logs add partition (year="2015",month="01",day="01",host="host1"); alter table logs add partition (year="2015",month="02",day="22",host="host2");
select * from logs
insert into logs partition (year="2015", month="01", day="01", host="host1") values (6,"ted");再查一下
select * from logs;
<dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-jdbc</artifactId> <version>0.14.0</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.2.0</version> </dependency>
Connection con = DriverManager.getConnection("jdbc:hive2://host1:10000/default", "hive", "");改成
Connection con = DriverManager.getConnection("jdbc:hive2://host1:21050/;auth=noSasl, "", "");为了简化例子,我把ImpalaJdbcClient简化成只有查询部分了
package org.crazycake.play_hive; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; public class ImpalaJdbcClient { /** * 注意:hive-server2 引用的driver是 org.apache.hive.* 而 hive-server 是 * org.apache.hadoop.hive.* */ private static String driverName = "org.apache.hive.jdbc.HiveDriver"; /** * @param args * @throws SQLException */ public static void main(String[] args) throws SQLException { try { Class.forName(driverName); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(1); } // Impala的默认端口是 21050 Connection con = DriverManager.getConnection("jdbc:hive2://xmseapp03:21050/;auth=noSasl", "", ""); Statement stmt = con.createStatement(); // select * query String sql = "select * from logs"; System.out.println("Running: " + sql); ResultSet res = stmt.executeQuery(sql); while (res.next()) { System.out.println(String.valueOf(res.getInt(1)) + "\t" + res.getString(2)); } } }
Running: select * from logs 3 sara 4 john 6 ted 1 jack 2 michael
Alex 的 Hadoop 菜鸟教程: 第15课 Impala 安装使用教程