import requests from bs4 import BeautifulSoup import re response = requests.get(url="https://www.autohome.com.cn/news/") response.encoding= response.apparent_encoding suop = BeautifulSoup(response.text,features="lxml") target = suop.find(id="auto-channel-lazyload-article") li_list = target.find_all("li") for i in li_list: a = i.find("a") if a: print(a.attrs.get("href")) test = a.find("h3").text tx = re.findall(‘[\u4e00-\u9fa5a-zA-Z0-9]+‘,test) txt = "".join(tx) print(txt) for pic_tag in a.find_all(‘img‘): pic_link = pic_tag.get(‘src‘) img_url = ‘http:‘ + str(pic_link) print(img_url) img_response = requests.get(url=img_url) file_name = txt + ".jpg" with open(file_name,"wb")as f: f.write(img_response.content)
原文:https://www.cnblogs.com/linglinglingling/p/12233581.html