package db.insert; import java.util.Iterator; import java.util.StringTokenizer; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.VoidFunction; public class SparkInsertData { public static void main(String[] args){ //初始化sparkContext, SparkConf sparkConf = new SparkConf().setAppName("HBaseTest").setMaster("local[2]"); JavaSparkContext sc = new JavaSparkContext(sparkConf); final HBaseDBDao hb = new HBaseDBDao(); try { final String tableName = "mapCar"; hb.deleteTable(tableName); // 第一步:创建数据库表:“mapCar” String[] columnFamilys = { "cids", "gis", "times"}; if(!hb.isExist(tableName)){ hb.createTable(tableName, columnFamilys); } hb.initHTable(tableName); // 第二步:向数据表的添加数据 // 添所有车辆数据到表中 JavaRDD<String> fcar = sc.textFile( "/usr/local/myjar/mongo/地图数据/mongo/MongoDB/mapCar.txt", 10); fcar.foreachPartition(new VoidFunction<Iterator<String>>(){ private static final long serialVersionUID = 1L; @Override public void call(Iterator<String> iter) throws Exception { while(iter.hasNext()){ String s = iter.next(); StringTokenizer stk = new StringTokenizer(s); String cid = stk.nextToken(); String lat = stk.nextToken(); String lon = stk.nextToken(); String time = stk.nextToken(); int n = 13-time.length(); StringBuilder sb = new StringBuilder(time); for(int i=0;i<n;i++){ sb.insert(0, ‘0‘); } String row = cid + "_" + sb.toString(); hb.addRowBatch(tableName, row, "cids", "cid", cid); hb.addRowBatch(tableName, row, "gis", "lat", lat); hb.addRowBatch(tableName, row, "gis", "lon", lon); hb.addRowBatch(tableName, row, "times", "time", time); System.out.println("row: " + row + ", cid : " + cid + ", lat: " + lat + ", lon: " + lon + ", time: " + time); } hb.flushCommits(tableName); } }); System.out.println("插入完毕!"); // 第三步:获取所有数据 // hb.getAllRows(tableName); } catch (Exception e) { e.printStackTrace(); } } }
原文:http://www.cnblogs.com/gaopeng527/p/5002359.html