#coding=utf-8 #Python Version python3 #Desciption #python paperwalls.py serach keyword download dir eg:python 1.py kaori ../resource/wallpaper #download_pic 下载图片 #get_download_link 获取下载的链接 #getwallpaper 获取图片 保存到本地 #缺点: 没有用多线程 细节处理不是很好 没有一般通用性 import requests import re import os import sys # 代理地址 proxies = { "http": "http://127.0.0.1:25378", "https": "http://127.0.0.1:25378", } # 下载的目录 download_dir = ‘../resource/wallpaper/‘ #已经下载的数量 downloaded_num = 0 # 总共搜索的到的数量 total = 0 #下载图片 def download_pic(url, name, pic_type): #获取全局的变量 global proxies global download_dir global downloaded_num global total # 目录不存在创建目录下载图片 if not os.path.exists(download_dir): os.makedirs(download_dir) # 下载图片到指定的目录 r = requests.get(url, =proxiesproxies) # 请求成功 已下载数+1 downloaded_num += 1 #打开文件 写入图片 with open(‘%s/%s.%s‘%(download_dir, name, pic_type), ‘wb‘) as f: f.write(r.content) #格式化输出下载结果 eg:[ 101/817] 629538.jpg Done! print(‘[{:5d}/{}] {}.{} Done!‘.format(downloaded_num, total, name, pic_type)) #下载链接 def get_download_link(wallpaper_id, wallpaper_type, server, user_id): global proxies post_data = { ‘wallpaper_id‘: wallpaper_id, ‘type‘: wallpaper_type, ‘server‘: server, ‘user_id‘: user_id, } r = requests.post(‘https://wall.alphacoders.com/get_download_link.php‘, data=post_data, proxies=proxies) download_pic(r.text, wallpaper_id, wallpaper_type) #根据关键词获取壁纸 def getwallpaper(keyword): global proxies global total #正则匹配页数 p_nextpage = re.compile(r"<a id=‘next_page‘ href=[\‘\"](.+?)[\‘\"]>") #获取POST表单数据 \d+? 一个或多个数字 \w+? 一个或多个字符 p_item = re.compile(r‘data-id="(\d+?)" data-type="(\w+?)" data-server="(\w+?)" data-user-id="(\d+?)"‘) #设置当前页为第一页 page_num = 1 #循环 while 1: r_page = requests.get(‘https://wall.alphacoders.com/search.php?search=%s&lang=Chinese&page=%d‘ % (keyword.lower(), page_num), proxies=proxies) nextpage_link = p_nextpage.search(r_page.text) # 如果没有查询到结果 跳出循环 if nextpage_link == None: print("Sorry, we have no results for your search!") break if page_num == 1: # 获取第一个返回值 总数 total = int(re.search(r"<h1 class=‘center title‘>\s+?(\d+)(.+?)\s+?</h1>", r_page.text).group(1)) print("the %s wallpaper‘s total is %d" % (keyword, total)) for item in p_item.findall(r_page.text): wallpaper_id = item[0] wallpaper_type = item[1] server = item[2] user_id = item[3] get_download_link(wallpaper_id, wallpaper_type, server, user_id) # 没有下一页了 跳出循环 if nextpage_link.group(1) == ‘#‘: print("All wallpaper done!") break page_num += 1 #主函数 if __name__ == ‘__main__‘: if len(sys.argv) < 2 or len(sys.argv) > 3: usage_text = "Usage:\n\tpython getwallpaper.py miku [miki_pic]\nFirst param: the name of script\nSecond param: the wallpaper‘s keyword which you want to search\nThird param: the dir‘s name where you want to download in, optional, default in ../resource/wallpapers" print(usage_text) elif len(sys.argv) == 3: download_dir = str(sys.argv[2]) getwallpaper(str(sys.argv[1])) else: getwallpaper(str(sys.argv[1]))
原文:https://www.cnblogs.com/wpgraceii/p/10619816.html