多线程下载图片

时间：2015-02-06 18:07:59 阅读：336 评论：0 收藏：0 [点我收藏+]

# -*- coding:utf8 -*-
from bs4 import BeautifulSoup
import os, sys, urllib2, urllib
import thread, threading
class downloader(threading.Thread):
    """docstring for downloader"""
    def __init__(self, url, name):
        threading.Thread.__init__(self)
        self.url = url
        self.name = name
    def run(self):
        print ‘downling from %s‘ % self.url
        urllib.urlretrieve(self.url, self.name)
threads=[]
def page_loop(page=1):
    url = ‘http://www.beautylegmm.com/Tiara/beautyleg-936.html?page=%s‘ % page
    content = urllib2.urlopen(url)
    soup = BeautifulSoup(content)
    my_girl = soup.find_all(‘img‘)  
    global x
    # 加入结束检测，写的不好....
    if len(my_girl) <5:
        print ‘已经全部抓取完毕‘
        sys.exit(0)
    print ‘开始抓取‘
    for girl in my_girl:
        link = girl.get(‘src‘)
        if ‘jpg‘ in link:
            flink = ‘http://www.beautylegmm.com‘ + link
        print flink
        path = ‘dbmeizi‘+‘/‘ + str(x) + flink[-4:]
        x = x + 1
        t = downloader(flink, path)
        threads.append(t)
        t.start()
        # content2 = urllib2.urlopen(flink).read()
        # with open(‘dbmeizi‘+‘/‘ + str(x) + flink[-4:],‘wb‘) as code: 
        #     code.write(content2)
       
    page = int(page) + 1
    print ‘开始抓取下一页‘
    print ‘the %s page‘ % page
    page_loop(page)
   
x = 1
page_loop()

来自为知笔记(Wiz)

多线程下载图片

原文：http://www.cnblogs.com/highroom/p/cbb0d977a78d35dac83bd56f5d08f61c.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)