首页 > 其他 > 详细

爬取美少女壁纸

时间:2020-05-28 21:25:58      阅读:73      评论:0      收藏:0      [点我收藏+]

1. 有时候想找些好看的壁纸,一个一个下载太慢了,作为一个菜鸡程序员,还是会点爬虫的,说到爬虫,当时还是python香了,说干就干

 

import requests
import  re,os
from lxml import etree
import threading


def getUrl(i):
    url_list=[]
    page_url="https://www.bizhizu.cn/search/动漫/"+str(i)+".html"
    page_res=requests.get(page_url).text
    html = etree.HTML(page_res)
    for i in range(1,19):

        html_data = html.xpath(//*[@class="imgcont"]/ul/li[+str(i)+]/a[1]/@href)
        name = html.xpath(//*[@class="imgcont"]/ul/li[+str(i)+]/a[2]/text())

        if len(html_data) > 0 and len(name)>0:
            path_name = []
            path_name.append(html_data[0])
            # print(path_name)
            path_name.append(name[0])
            url_list.append(path_name)

    print(url_list)
    return url_list


def saveImg(url,path_name):
    # url = "https://www.bizhizu.cn/pic/62690.html"
    res = requests.get(url).text
    html = etree.HTML(res)

    if ("" in path_name) or ("" in path_name) or ("" in path_name) or ("少女" in path_name):#你懂我意思吧
        r_name = path_name.replace("/", "-")
        if  not os.path.exists("/Volumes/HD2/downloadpic/" + r_name):
            os.mkdir("/Volumes/HD2/downloadpic/" + r_name)
        else:
            pass
        for i in range(1, 10):
            html_data = html.xpath(//*[@id="thumb"]/li[ + str(i) + ]/a/img/@src)
            # print(html_data)

            if len(html_data) > 0:
                img_url = re.findall(r"(https://.*?\.jpg)\.220\.146.jpg", html_data[0])
                # print(img_url)
                if  len(img_url)>0:
                    img = requests.get(img_url[0]+".source.jpg")
                    img_url_name = re.findall(r"https://uploadfile.bizhizu.cn/up/.*/.*/.*/(.*?)\.jpg\.220\.146.jpg",
                                              html_data[0])
                    r_name = path_name.replace("/", "-")
                    f = open("/Volumes/HD2/downloadpic/" + r_name + "/" + img_url_name[0] + ".jpg", ab)  # 存储图片,多媒体文件需要参数b(二进制文件)
                    f.write(img.content)  # 多媒体存储content
                    f.close()
                    print("保存成功:", path_name + / + img_url_name[0] + ".jpg")

    else:
        pass
def demo1():
    for x in range(1,18):
        urlList = getUrl(x)
        for url in urlList:
            saveImg(url[0], url[1])

def demo2():
    for x in range(18,37):
        urlList = getUrl(x)
        for url in urlList:
            saveImg(url[0], url[1])

if __name__==__main__:
  #开启两个线程 t1
= threading.Thread(target=demo1) t2 = threading.Thread(target=demo2) t1.start() t2.start()

 

爬取美少女壁纸

原文:https://www.cnblogs.com/s42-/p/12983525.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!