create table sospdm.tmp_yinfei_test_01 ( id string ) partitioned by (statis_date string) ; create table sospdm.tmp_yinfei_test_02 ( id string ) partitioned by (statis_date string) ; select t1.* from tmp_yinfei_test_01 t1 left join tmp_yinfei_test_02 t2 on t1.id=t2.id where t1.statis_date=‘20190408‘ and t2.statis_date=‘20190408‘ ; select t1.* from tmp_yinfei_test_01 t1 left join tmp_yinfei_test_02 t2 on t1.id=t2.id and t1.statis_date=‘20190408‘ and t2.statis_date=‘20190408‘ ; select t1.* from ( select * from tmp_yinfei_test_01 where statis_date=‘20190408‘ ) t1 left join ( select * from tmp_yinfei_test_02 where statis_date=‘20190408‘ ) t2 on t1.id=t2.id ; =========================test1===================================== explain select t1.* from tmp_yinfei_test_01 t1 left join tmp_yinfei_test_02 t2 on t1.id=t2.id where t1.statis_date=‘20190408‘ and t2.statis_date=‘20190408‘ ; hive> explain select t1.* > from tmp_yinfei_test_01 t1 > left join tmp_yinfei_test_02 t2 > on t1.id=t2.id > where t1.statis_date=‘20190408‘ and t2.statis_date=‘20190408‘ > ; OK STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (statis_date = ‘20190408‘) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: id (type: string) sort order: + Map-reduce partition columns: id (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE TableScan alias: t2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: id (type: string) sort order: + Map-reduce partition columns: id (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: statis_date (type: string) Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 keys: 0 id (type: string) 1 id (type: string) outputColumnNames: _col0, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (_col6 = ‘20190408‘) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col0 (type: string), ‘20190408‘ (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink Time taken: 0.399 seconds, Fetched: 58 row(s) 结论:t2表会扫全表 =========================test2===================================== explain select t1.* from tmp_yinfei_test_01 t1 left join tmp_yinfei_test_02 t2 on t1.id=t2.id and t1.statis_date=‘20190408‘ and t2.statis_date=‘20190408‘ ; STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: id (type: string) sort order: + Map-reduce partition columns: id (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: statis_date (type: string) TableScan alias: t2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (statis_date = ‘20190408‘) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: id (type: string) sort order: + Map-reduce partition columns: id (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 filter predicates: 0 {(VALUE._col0 = ‘20190408‘)} 1 keys: 0 id (type: string) 1 id (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink 结论:t1表会扫全表 =========================test3===================================== explain select t1.* from ( select * from tmp_yinfei_test_01 where statis_date=‘20190408‘ ) t1 left join ( select * from tmp_yinfei_test_02 where statis_date=‘20190408‘ ) t2 on t1.id=t2.id ; STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: tmp_yinfei_test_01 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (statis_date = ‘20190408‘) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: id (type: string), ‘20190408‘ (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string) TableScan alias: tmp_yinfei_test_02 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (statis_date = ‘20190408‘) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: id (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: true Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink
原文:https://www.cnblogs.com/yin-fei/p/10751608.html