首页 > 数据库技术 > 详细

python+mysql抓取百度新闻的标题存到数据库

时间:2016-02-22 17:39:44      阅读:676      评论:0      收藏:0      [点我收藏+]
#!usr/bin/python
# -*- coding:utf-8 -*-
import urllib2

import re

import MySQLdb


class BaiDuNews:

def __init__(self):
self.baseurl = ‘http://news.baidu.com/‘

def getPage(self):
request = urllib2.Request(self.baseurl)
response = urllib2.urlopen(request)
# print response.read()
return response.read().decode(‘gbk‘)

def getContents(self,page):
pattern = re.compile(‘<li class="hd.*?<a.*?>(.*?)</a>‘, re.S)
items = re.findall(pattern, page)
contents = []
for item in items:
print item
contents.append(item.encode(‘utf-8‘))
return contents

def saveDB(self, contents):
db = MySQLdb.connect(host=‘127.0.0.1‘,user=‘root‘,passwd=‘‘,db=‘test‘,charset=‘utf8‘)
cur = db.cursor()
# sql = ‘CREATE TABLE baidunews (`id` INT NOT NULL PRIMARY ,`text` VARCHAR(255))‘
# cur.execute(sql)
sql2 = """INSERT INTO baidunews VALUES (NULL ,"%s")"""
for content in contents:
cur.execute(sql2 % (content))
cur.close()
db.commit()
db.close()


news = BaiDuNews()
news.saveDB(news.getContents(news.getPage()))

python+mysql抓取百度新闻的标题存到数据库

原文:http://www.cnblogs.com/luolizhi/p/5207557.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!