hiveql basic

时间：2015-03-24 08:02:26 阅读：231 评论：0 收藏：0 [点我收藏+]

set hive.cli.print.current.db=true;
set hive.mapred.mode=strict;
set hive.mapred.mode=nonstrict;
SHOW PARTITIONS tablename;

--Dynamic Partition Inserts --by position not by names

INSERT OVERWRITE TABLE employees
PARTITION (country, state)
SELECT ..., se.cnty, se.st
FROM staged_employees se;

SET hive.map.aggr=true;

----with this way , we can not generate the temporary table

FROM (
SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,
round(salary * (1 - deductions["Federal Taxes"])) as salary_minus_fed_taxes
FROM employees
) e
SELECT e.name, e.salary_minus_fed_taxes
WHERE e.salary_minus_fed_taxes > 70000;

--When Hive Can Avoid MapReduce

set hive.exec.mode.local.auto=true;

--Hive supports the classic SQL JOINstatement, but only equi-joinsare supported.
--Hive also assumes that the lasttable in the query is the largest
--It attempts to buffer the other tables and then stream the last table through
-- you should structure your join queries so the largest table is last.

SELECT /*+ STREAMTABLE(s) */ s.ymd, s.symbol, s.price_close, d.dividend
FROM stocks s JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol
WHERE s.symbol = ‘AAPL‘;

set hive.auto.convert.join=true;
hive.mapjoin.smalltable.filesize=25000000;--table size less than this can use in map phase
SELECT /*+ MAPJOIN(d) */ s.ymd, s.symbol, s.price_close, d.dividend
FROM stocks s JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol
WHERE s.symbol = ‘AAPL‘;

set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
set hive.optimize.bucketmapjoin=true;
set hive.optimize.bucketmapjoin.sortedmerge=true;

--Using DISTRIBUTE BY ... SORT BYor the shorthand CLUSTER BYclauses is a way to exploit
--the parallelism of SORT BY, yet achieve a total ordering across the output files.
--this method is better than use order by (just one reducer);

--Queries that Sample Data

SELECT * from numbers TABLESAMPLE(BUCKET 3 OUT OF 10 ON rand()) s;
SELECT * FROM numbersflat TABLESAMPLE(0.1 PERCENT) s;--block sampling

--index

CREATE INDEX employees_index
ON TABLE employees (country)
AS ‘org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler‘
WITH DEFERRED REBUILD;

hiveql basic

原文：http://www.cnblogs.com/huaxiaoyao/p/4361736.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)