其实这些东西好像不用这样获取的,中国天气网上直接提供有天气的json数据接口,
不过最近想弄一个一键新闻或者其它例如糗百、微博热点、QQ热点聚合的东西。
先拿天气来练练手
其实这个不用模拟登陆,所以非常简单,上代码:
# -*- coding: utf-8 -*- import urllib import re import thread class Wathereather_Spider_Model: def __init__(self): self.ok = False def getHtml(self,url): page=urllib.urlopen(url) html=page.read() page.close() return html def getWeather(self,url): html = self.getHtml(url) reg='<dl><dt><a title=.*?>(.*?)</a></dt><dd><a href=.*?>.*?</a><a href=.*?><span>(.*?)</span></a>(.*?)<a href=.*?><b>(.*?)</b></a></dd></dl>' self.weatherList=re.compile(reg).findall(html) self.ok=True #return weatherList def start(self,pydaihao): #http://www.weather.com.cn/html/province/beijing.shtml url = "".join(["http://www.weather.com.cn/html/province/",pydaihao,".shtml"]) #print url thread.start_new_thread(self.getWeather,(url,)) weatherModel = Wathereather_Spider_Model() pydaihao ="guangdong" weatherModel.start(pydaihao) print "now getting weather of ",pydaihao while True: if weatherModel.ok: for weather in weatherModel.weatherList: print "" for li in weather: print str(li).decode('utf-8').encode('gb2312'), weatherModel.ok=False break
结果如下
python 获取省份的天气预报,布布扣,bubuko.com
原文:http://blog.csdn.net/xyz5354/article/details/38314815