谏言:穷则独善其身,达则兼济天下
图片爬取
# pip install 库 安装第三方库 # import 库 导包 import requests # 爬虫请求第三方库 from lxml import etree # 数据提取第三方库 # 请求地址 url=‘http://pic.netbian.com/‘ count=1 # 请求头 headers={
# Cookie已过期,不能使用了
‘Cookie‘: ‘__cfduid=d752853af80c2e799aa8b8c814bb33d8e1588249955; zkhanecookieclassrecord=%2C53%2C; PHPSESSID=0tdkvgklarhle4hkqm8hfia011; ‘
‘Hm_lvt_526caf4e20c21f06a4e9209712d6a20e=1588252204,1588255851,1588255859,1588257133; Hm_lpvt_526caf4e20c21f06a4e9209712d6a20e=1588257147‘,
‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36‘
}
# 发送请求
response = requests.get(url,headers=headers).content.decode(‘gbk‘)
# 数据提取 -> url 后缀
html = etree.HTML(response)
# xpath # //div 全选页面中所有div标签
# //ul[@class="clearfix"]/li/a/@href
# 获取页面所以图片切片地址
clearfix =html.xpath(‘//ul[@class="clearfix"]/li/a/@href‘)
print(clearfix)
for url_i in clearfix: ID=url_i[8:-5]
urls=‘http://pic/netbian.com/downpic.php?id=‘ + ID + ‘&classid=66‘
# 下载图片接口
# http://pic/netbian.com/downpic.php?id=25761&classid=66
# http://pic/netbian.com/downpic.php?id=25790&classid=60
img_response = requests.get(urls,headers=headers)
f= open(‘./img/{}.jpg‘.format(count),‘ab‘)
f.write(img_response.content)
f.close()
count+=1
原文:https://www.cnblogs.com/python-study-notebook/p/12811663.html