java插入mysql 300万-2200万数据，优化查询

时间：2015-12-22 02:05:00 阅读：1033 评论：0 收藏：0 [点我收藏+]

my.ini参数修改了下

table_cache=512
bulk_insert_buffer_size = 100M
innodb_additional_mem_pool_size=30M
innodb_flush_log_at_trx_commit=0
innodb_buffer_pool_size=207M
innodb_log_file_size=128M

?innodb_flush_log_at_trx_commit默认值1的意思是每一次事务提交或事务外的指令都需要把日志写入（flush）硬盘，这是很费时的。特别是使用电池供电缓存（Battery backed up cache）时。设成2对于很多运用，特别是从MyISAM表转过来的是可以的，它的意思是不写入硬盘而是写入系统缓存。日志仍然会每秒flush到硬盘，所以你一般不会丢失超过1-2秒的更新。设成0会更快一点，但安全方面比较差，即使MySQL挂了也可能会丢失事务的数据。而值2只会在整个操作系统挂了时才可能丢数据。对于事务要求很强，设置为0 是存在安全问题的

mysql创建表

CREATE TABLE `news` (
  `id` int(19) NOT NULL AUTO_INCREMENT,
  `title` varchar(30) DEFAULT NULL,
  `content` varchar(400) DEFAULT NULL,
  `type` varchar(30) DEFAULT NULL,
  PRIMARY KEY (`id`),
  UNIQUE KEY `PK_NEWS_ID` (`id`),
  KEY `INDEX_NEWS_ID_TYPE` (`id`,`type`),
  KEY `INDEX_NEWS_TYPE` (`type`)
) ENGINE=InnoDB AUTO_INCREMENT=1072779 DEFAULT CHARSET=utf8

插入数据一定要用?addBatch 超过300万条数据在我的笔记本上只需要近10分钟

	public static void main(String[] args) {
		 Connection conn = null;
	       
	        // MySQL的JDBC URL编写方式：jdbc:mysql://主机名称：连接端口/数据库的名称?参数=值
	        // 避免中文乱码要指定useUnicode和characterEncoding
	        // 执行数据库操作之前要在数据库管理系统上创建一个数据库，名字自己定，
	        // 下面语句之前就要先创建javademo数据库
	        String url = "jdbc:mysql://localhost:3306/javademo?"
	                + "user=root&password=root&useUnicode=true&characterEncoding=UTF8";
	       try {
	        Class.forName("com.mysql.jdbc.Driver");// 动态加载mysql驱动
            conn = DriverManager.getConnection(url);
	        
        String	sql = "insert into news(title,content,type) values(?,?,?)";
		
			PreparedStatement prep = conn.prepareStatement(sql);
			// 将连接的自动提交关闭，数据在传送到数据库的过程中相当耗时
			conn.setAutoCommit(false);
			long start = System.currentTimeMillis();
			String type = "java";
        	String content = "SUN公司开发Java语言的时候，在数据库这里只是提供了一个标准的接口，或者说是规范，叫做JDBC，这个东西单独放在那里没有任何作用，根本无法连接任何的数据库，只有当你下载相应的数据库驱动程序之后，才可以用JDBC通过这个驱动程序和数据库通信。sqlsever就有它的专门的驱动程序，在微软网站上可以下载到。SUN公司自己也开发了数据库驱动程序，叫做JDBC-ODBC驱动程序，是通过建立ODBC桥来连接Windows数据库，但是因为效率低并且有限制，现在一般都不推荐使用";
			for (int i = 0; i < 30; i++) {
				long start2 = System.currentTimeMillis();
				// 一次性执行插入10万条数据
				if(i==2){type="php";}
				else if(i==3){type="erlang";}
				else if(i==4){type="go";}
				else if(i==5){type="javascript";}
				else if(i==6){type="html";}
				else if(i==7){type="html5";}
				else if(i==8){type="css";}
				else if(i==9){type="python";}
				else if(i==10){type="ror";}
				else if(i==11){type="nodejs";}
				else if(i==12){type="spring";}
				else if(i==13){type="hibernate";}
				else if(i==14){type="ibatis";}
				else if(i==15){type="mybatis";}
				else if(i==16){type="mysql";}
				else if(i==17){type="msserver";}
				else if(i==18){type="oracle";}
				else if(i==19){type="redis";}
				else if(i==20){type="mangodb";}
				else if(i==21){type="c";}
				else if(i==22){type="c++";}
				else if(i==23){type="springside";}
				else if(i==24){type="springroo";}
				else if(i==25){type="sprintboot";}
				else if(i==26){type="sprintmvc";}
				else if(i==27){type="websocket";}
				else if(i==28){type="restful";}
				else if(i==29){type="comet";}
				for (int j = 0; j < 100000; j++) {
					prep.setString(1, "文章"+j);
					prep.setString(2, content);
					prep.setString(3, type);
					prep.addBatch();
				}
				// 预处理批量执行
				prep.executeBatch();
				prep.clearBatch();
				conn.commit();
				long end2 = System.currentTimeMillis();
				// 批量执行一次批量打印执行依次的时间
				 
				System.out.println(end2 - start2);
			}
			long end = System.currentTimeMillis();
			System.out.print("total: ");
			System.out.println(end - start);
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				conn.close();
			} catch (SQLException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
	}

?测试结果300万插入只需要597秒

15031
15248
18522
18419
19223
19613
20822
18701
19468
21290
19766
20956
19490
19089
20290
22568
20079
20556
20922
22659
21155
19782
20378
21053
19459
22388
20323
20011
18158
21199
total: 596625

? 机器上一共插入了300万的数据，测试下查询：

select COUNT(id) from news

Affected rows: 0
Time: 0.729ms

查询下分页

select id,title from news limit 1000000,20

Affected rows: 0
Time: 7.839ms

从第100万条查询20条记录，耗费了八秒时间，速度慢

[SQL] select id from news limit 1000000,20

Affected rows: 0
Time: 0.242ms

?id 是索引，所以更快点

? ?

[SQL] 
select id from news order by id limit 1000000,20 

Affected rows: 0
Time: 7.841ms

?order 加上去慢的太多了?

select id,title from news where id>=(select id from news order by id limit 1000000,1) limit 20

Affected rows: 0
Time: 8.115ms

只要有order by在效率是上不去

[SQL] 
 select id  from news where type = ‘ibatis‘    limit 1,500000;
Affected rows: 0
Time: 0.076ms

select id,title from news where type = ‘ibatis‘  limit 1000000,20

 

Affected rows: 0
Time: 0.163ms

? ??

[SQL] 
 select id,title,type from news where type = ‘ibatis‘    limit 1,500000;
Affected rows: 0
Time: 0.209ms

? ??

[SQL] 
 select id,title,content,type from news where type = ‘ibatis‘    limit 1,500000;
Affected rows: 0
Time: 0.471ms

?加上大字段会消耗一点性能

??
bubuko.com,布布扣
?

?可见复合索引带来性能的优势

??
bubuko.com,布布扣
?

? in的速度是惊人的，300万条记录里面里用in仅仅 0.004ms，最主要也是因为加了索引。

这里面有2个惊人的查询，这也刚好是分页的功能的查询

select id  from news where type = ‘ibatis‘    limit 1,500000;
select *  from news where id in(xxx,xxx,xxx,xxx...............)

最后把in的代码贴出来，大家也试试in的威力

 select *  from news where id in(
 2999999,2999998,2999997,2999996,2999995,2999994,2999993,2999992,2999991,2999990,2999989,
2999988,2999987,2999986,2999985,2999984,2999983,2999982,2999981,2999980,2999979,2999978,
2999977,2999976,2999975,2999974,2999973,2999972,2999971,2999970,2999969,2999968,2999967,
2999966,2999965,2999964,2999963,2999962,2999961,2999960,2999959,2999958,2999957,2999956,
2999955,2999954,2999953,2999952,2999951,2999950,2999949,2999948,2999947,2999946,2999945,
2999944,2999943,2999942,2999941,2999940,2999939,2999938,2999937,2999936,2999935,2999934,
2999933,2999932,2999931,2999930,2999929,2999928,2999927,2999926,2999925,2999924,2999923,
2999922,2999921,2999920,2999919,2999918,2999917,2999916,2999915,2999914,2999913,2999912,
2999911,2999910,2999909,2999908,2999907,2999906,2999905,2999904,2999903,2999902,2999901,
2999900,2999899,2999898,2999897,2999896,2999895,2999894,2999893,2999892,2999891,2999890,
2999889,2999888,2999887,2999886,2999885,2999884,2999883,2999882,2999881,2999880,2999879,
2999878,2999877,2999876,2999875,2999874,2999873,2999872,2999871,2999870,2999869,2999868,
2999867,2999866,2999865,2999864,2999863,2999862,2999861,2999860,2999859,2999858,2999857,
2999856,2999855,2999854,2999853,2999852,2999851,2999850,2999849,2999848,2999847,2999846,
2999845,2999844,2999843,2999842,2999841,2999840,2999839,2999838,2999837,2999836,2999835,
2999834,2999833,2999832,2999831,2999830,2999829,2999828,2999827,2999826,2999825,2999824,
2999823,2999822,2999821,2999820,2999819,2999818,2999817,2999816,2999815,2999814,2999813,2999812
)

?
bubuko.com,布布扣
?

最快的是0.003ms

试试in里面放1188个参数

bubuko.com,布布扣
?速度依然很惊人

现在数据已经添加到了2200万条数据，看看in的效果

bubuko.com,布布扣
?

?2200万数据的 in语句竟然只需0.002ms 这真惊人

java插入mysql 300万-2200万数据，优化查询

原文：http://maclab.iteye.com/blog/2265492

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)