import requests from lxml import etree #爬取知乎热度的前十名 headers = {‘User-Agent‘:‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362‘} url = "https://tophub.today/" response = requests.get(url,headers = headers) #发送get请求 html = etree.HTML(response.text) #构建一个xpath解析对象 a = html.xpath(".//span[@class=‘t‘]/text()")[50:60] #利用正则表达,爬取知乎排名前十名的标题 b = html.xpath(".//span[@class=‘e‘]/text()")[50:60] #利用正则表达,爬取知乎排名前十名的热度 for i in range(0 , len(a)): print("排名:{}--标题:{}------热度:{}".format(i+1,a[i],b[i])) #数据可视化
原文:https://www.cnblogs.com/FFDXMM/p/12541667.html