环境准备:
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
<version>1.1.21</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.5</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-jdbc</artifactId>
<version>5.2.2.RELEASE</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.47</version>
</dependency>
定义pojo接收
private static final long serialVersionUID = 1L;
private int id;
private String title;
private String intro;
private String url;
private String source;
private Date publishTime;
代码爬取数据:
static JdbcTemplate jdbcTemplate = null;
public static void main(String[] args) throws IOException, ParseException {
//加载外部属性文件
Properties properties = new Properties();
InputStream inputStream = new FileInputStream(new File("src/main/resources/db.properties"));
properties.load(inputStream);
//获得数据库属性
String driver = properties.getProperty("jdbc.driverClass");
String url = properties.getProperty("jdbc.url");
String username = properties.getProperty("jdbc.username");
String password = properties.getProperty("jdbc.password");
// System.out.println(driver+"=="+url+"=="+username+"=="+password);
DruidDataSource dataSource = new DruidDataSource();
dataSource.setDriverClassName(driver);
dataSource.setUrl(url);
dataSource.setUsername(username);
dataSource.setPassword(password);
jdbcTemplate =new JdbcTemplate(dataSource);
//设置起始页
int page = 1;
while (true) {
String urlTencent ="https://pacaio.match.qq.com/irs/rcd?cid=135&token=6e92c215fb08afa901ac31eca115a34f&ext=world&page="+page+"&expIds=&callback=__jp4";
//确定路径
//String urlTencent = "https://pacaio.match.qq.com/irs/rcd?cid=89&token=4d4e2946f92c5708f32141479596d72e&id=&ext=bj&page="+page+"&expIds=&callback=__jp0";
CloseableHttpClient httpClient = HttpClients.createDefault();
HttpGet httpGet = new HttpGet(urlTencent);
httpGet.setHeader("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36");
CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
int statusCode = httpResponse.getStatusLine().getStatusCode();
if (statusCode==200) {
HttpEntity httpEntity = httpResponse.getEntity();
Gson gson = new Gson();
//转换
String html = EntityUtils.toString(httpEntity);
//得到json
String json = parseJson(html);
//转换成map
Map map = gson.fromJson(json, Map.class);
//判断有多少数据,然后退出循环
Object num = map.get("datanum");
String nums = num.toString();
Double double1 = Double.parseDouble(nums);
int number = double1.intValue();
if (number==0) {
break;
}
//得到页面的data
@SuppressWarnings("unchecked")
List<Map> list = (List<Map>) map.get("data");
//遍历集合
for (Map map2 : list) {
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Tencent tencent = new Tencent();
String title = map2.get("title").toString();
String intro = map2.get("intro").toString();
String turl = map2.get("url").toString();
String source = map2.get("source").toString();
Date publishTime = simpleDateFormat.parse(map2.get("publish_time").toString());
tencent.setTitle(title);
tencent.setUrl(turl);
tencent.setIntro(intro);
tencent.setSource(source);
tencent.setPublishTime(publishTime);
addNews(tencent);
}
}
page++;
}
}
public static void addNews(Tencent tencent) {
String sql = "insert into t_tencent (title,intro,url,source,publish_time) values (?,?,?,?,?)";
jdbcTemplate.update(sql,new Object[] {tencent.getTitle(),tencent.getIntro(),tencent.getUrl(),tencent.getSource(),tencent.getPublishTime()});
}
public static String parseJson(String data) {
int start = data.indexOf("(");
int end = data.lastIndexOf(")");
String html = data.substring(start+1, end);
return html;
}
结果如下:
原文:https://www.cnblogs.com/suspring/p/12510826.html