import re
import urllib
def getHtml(url):
html = urllib.urlopen(url)
scode = html.read()
return scode
def getImage(source):
reg = r'src="(.*?\.jpg)"'
imgre = re.compile(reg)
images = re.findall(imgre,source)
x = 0
for i in images:
urllib.urlretrieve(i,'%s.jpg' % x)
x+=1
source = getHtml('http://tieba.baidu.com/p/3237470549')
print getImage(source)python写的简单有效的爬虫代码,布布扣,bubuko.com
原文地址:http://blog.csdn.net/rainlesvio/article/details/38660987