#https://www.4kbizhi.com/
import requests
from bs4 import BeautifulSoup
import re
import time
‘‘‘
获取源网页
‘‘‘
header = {‘user-agent‘:"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",}
url = ‘‘
for i in range(1,20):
print(‘开始第%d页‘%i)
if i == 1:
source_page = requests.get(url + ‘index.html‘, headers=header)
else:
source_page = requests.get(url + ‘index_‘ + str(i) + ‘.html‘, headers=header)
source_page = source_page.text.encode(‘iso-8859-1‘).decode(‘gbk‘)
‘‘‘
解析源网页,获取地址
‘‘‘
soup = BeautifulSoup(source_page)
a = 0
pattern = re.compile(‘s[a-z]+.+jpg‘)
pattern_follow = re.compile(‘[0-9]{6}[a-zA-Z0-9]{5}‘)
print(time.time())
for item in soup.find_all(‘a‘):
link = re.findall(pattern,str(item))
title = item.get_text()
link_follow = re.findall(pattern_follow,str(item))
a += 1
if link:
#对获取的地址拆分和拼接
link_str = link[0]
adder = link_str.split(‘"‘)
adder = adder[1].split(‘small‘)
pic = (url + adder[0] + link_follow[0] + ‘.jpg‘)
#获取图片地址
img = requests.get(pic)
‘‘‘
将图片写入指定的目录D:\壁纸\imgs\
‘‘‘
with open(r"D:\壁纸\imgs\\" + title + str(a) + ‘_‘ + str(i) + ".jpg",‘wb‘) as f:
f.write(img.content)
print(title + str(a) + ‘...200‘)
f.close()
print(‘第%d页结束‘%i)
print(time.time())
原文:https://www.cnblogs.com/jion/p/14807634.html