import requests
from bs4 import BeautifulSoup
import re
response = requests.get(url="https://www.autohome.com.cn/news/")
response.encoding= response.apparent_encoding
suop = BeautifulSoup(response.text,features="lxml")
target = suop.find(id="auto-channel-lazyload-article")
li_list = target.find_all("li")
for i in li_list:
a = i.find("a")
if a:
print(a.attrs.get("href"))
test = a.find("h3").text
tx = re.findall(‘[\u4e00-\u9fa5a-zA-Z0-9]+‘,test)
txt = "".join(tx)
print(txt)
for pic_tag in a.find_all(‘img‘):
pic_link = pic_tag.get(‘src‘)
img_url = ‘http:‘ + str(pic_link)
print(img_url)
img_response = requests.get(url=img_url)
file_name = txt + ".jpg"
with open(file_name,"wb")as f:
f.write(img_response.content)
原文:https://www.cnblogs.com/linglinglingling/p/12233581.html