# -*- coding:utf8 -*-from bs4 import BeautifulSoupimport os, sys, urllib2, urllibimport thread, threadingclass downloader(threading.Thread):"""docstring for downloader"""def __init__(self, url, name):threading.Thread.__init__(self)self.url = urlself.name = namedef run(self):print ‘downling from %s‘ % self.urlurllib.urlretrieve(self.url, self.name)threads=[]def page_loop(page=1):url = ‘http://www.beautylegmm.com/Tiara/beautyleg-936.html?page=%s‘ % pagecontent = urllib2.urlopen(url)soup = BeautifulSoup(content)my_girl = soup.find_all(‘img‘)global x# 加入结束检测,写的不好....if len(my_girl) <5:print ‘已经全部抓取完毕‘sys.exit(0)print ‘开始抓取‘for girl in my_girl:link = girl.get(‘src‘)if ‘jpg‘ in link:flink = ‘http://www.beautylegmm.com‘ + linkprint flinkpath = ‘dbmeizi‘+‘/‘ + str(x) + flink[-4:]x = x + 1t = downloader(flink, path)threads.append(t)t.start()# content2 = urllib2.urlopen(flink).read()# with open(‘dbmeizi‘+‘/‘ + str(x) + flink[-4:],‘wb‘) as code:# code.write(content2)page = int(page) + 1print ‘开始抓取下一页‘print ‘the %s page‘ % pagepage_loop(page)x = 1page_loop()
原文:http://www.cnblogs.com/highroom/p/cbb0d977a78d35dac83bd56f5d08f61c.html