#!/usr/bin/python
import sys
#正则表达式库
import re
import urllib
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
def getImg(html):
reg = r'src="(.=?\.jpg)"'
imgre = re.compile(reg)
imglist = re.findall(imgre,html)
x = 0
for imgurl in imglist:
urllib.urlretrieve(imgurl,'%s.jpg' % x)
x += 1
return imglist
if __name__ = "__main__":
if len(sys.argv) < 2:
sys.exit(-1)
htmurl = sys.argv[1]
html = getHtml(htmlurl)
imglist = getImg(html)
print imglist


Python爬虫--抓取单一页面上的图片文件学习
原文:http://blog.51cto.com/11822287/2053105