1、Oracle中的SQL
select count(1) from a_V_PWYZL_CUSTACCT_PSMIS t where not exists (select 1 from tb_show_multi_question q WHERE q.dqmp_rule_code = ‘仅比对系统有‘ and q.dqmp_role_id = ‘105754659‘ and q.DQMP_target_id = t.dqmp_mrid) AND NOT EXISTS (select /*+ index(s) */ 1 from a_V_PWYZL_CUSTACCT_GIS s where s.dqmp_cpk = t.dqmp_cpk) and t.is_repeat = ‘0‘;
2、Hive/Shark版
select count(1) from ( select t.*,q.dqmp_question_id(列索引q.0) as f1,s.gis_mrid(列索引s.0)as f2 from (select * from a_V_PWYZL_CUSTACCT_PSMIS t where t.is_repeat(列索引t.19) = ‘0‘) t left outer join (select * from tb_show_multi_question q where q.dqmp_rule_code(列索引q.26)= ‘仅比对系统有‘ and q.dqmp_role_id(列索引q.31)= ‘105754659‘ ) q on q.DQMP_target_id(列索引q.13)= t.dqmp_mrid(列索引t.32) left outer join a_V_PWYZL_CUSTACCT_GIS s on s.dqmp_cpk(列索引s.31)= t.dqmp_cpk(列索列t.31) ) tv where tv.f1 is NULL and tv.f2 is NULL;
3、Spark编程实现
package cn.com.gzkit.sparkimport org.apache.spark.SparkContextimport org.apache.spark.SparkContext._import org.apache.spark.rdd.PairRDDFunctions;object test2 {
def main(args: Array[String]) {var master="spark://kit-b1:7077";
var sc=new SparkContext(master, "HdfsTest",System.getenv("SPARK_HOME"),SparkContext.jarOfClass(this.getClass));var file1 = sc.textFile("hdfs://kit-b1/demodata/utf8_a_v_pwyzl_custacct_psmis.txt").map(_.split(‘|‘));var file2 = sc.textFile("hdfs://kit-b1/demodata/utf8_a_v_pwyzl_custacct_gis.txt").map(_.split(‘|‘)).map(m=>(m(31),m(0)));var file3 = sc.textFile("hdfs://kit-b1/demodata/utf8_tb_show_multi_question.txt").map(_.split(‘|‘)).map(m=>(m(13),m(31),m(26),m(0)));file1.cache();file2.cache();file3.cache();//file1.count();
//file2.count();
//file3.count();
System.out.println("-----------begin-----------------");var t=file1.filter(_(19)=="0");
var t1=t.map(t=>(t(32),t));var q=file3.filter(_._2=="105754659").filter(_._3=="仅比对系统有");var q1=q.map(q=>(q._1,q));var tq=t1.leftOuterJoin(q1);System.out.println("-----------end-----------------");System.out.println("-----------begin2-----------------");var t2=tq.map(m=>(m._2._1(31),m._2));var s=file2;var ts=t2.leftOuterJoin(s);var rs=ts.filter(_._2._2==None).filter(_._2._1._2==None);System.out.println("sava file");rs.saveAsTextFile("hdfs://kit-b1/demodata/test/02");
System.out.println("sava file end");var v2=rs.count();System.out.println("v2="+v2);System.out.println("-----------end2-----------------");sc.stop();}}
Spark编程实现SQL查询的实例,布布扣,bubuko.com
原文:http://www.cnblogs.com/lam99v/p/3793031.html