python爬虫爬取各个城市历史天气及数据可视化

时间：2019-08-21 23:15:46 阅读：664 评论：0 收藏：0 [点我收藏+]

数据抓取

import asyncio
import aiohttp
from lxml import etree
import re
from collections import namedtuple

Args = namedtuple(‘Args‘,[‘city‘,‘year‘,‘month‘])
    
async def work(args):
    url = "http://www.tianqihoubao.com/lishi/%s/month/%d%02d.html" % (args.city, args.year, args.month)
    headers = {‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36‘}
    async with aiohttp.ClientSession() as session:
        async with session.get(url, headers=headers, timeout=1000, verify_ssl=False) as response:
            html = await response.text()
            ethtml = etree.HTML(html)
            result = [re.sub("\r\n *","",item.text)                         for item in ethtml.xpath("//table[@class=‘b‘]/tr/td") if not re.fullmatch("\r\n *",item.text)]
            return result

loop = asyncio.get_event_loop()
tasks = [asyncio.ensure_future(work(Args(‘wuhan‘, 2018, month))) for month in range(1,6)]
loop.run_until_complete(asyncio.wait(tasks))

for task in tasks:
    print(task.result())

原文：https://www.cnblogs.com/plyonfly/p/11391848.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)