标签:
选址的桌面壁纸网站汽车主题:
下面的两个print打开调试期间
#print tag #print attrs
#!/usr/bin/env python
import re
import urllib2
import HTMLParser
base = "http://desk.zol.com.cn"
path = '/home/mk/cars/'
star = ''
def get_url(html):
	parser = parse(False)
	request = urllib2.Request(html)
	response = urllib2.urlopen(request)
	resp = response.read()
	parser.feed(resp)
def download(url):
	content = urllib2.urlopen(url).read()
	format = '[0-9]*\.jpg';
	res = re.search(format,url);
	print 'downloading:',res.group()
	filename = path+res.group()
	f = open(filename,'w+')
	f.write(content)
	f.close()	 
class parse(HTMLParser.HTMLParser):
	def __init__(self,Index):
		self.Index = Index;
		HTMLParser.HTMLParser.__init__(self)
	def handle_starttag(self,tag,attrs):
		#print tag
		#print attrs
		if(self.Index):
			if not cmp(tag,'a'):
				if(len(attrs) == 4):
					if(attrs[0] ==('class','pic')):
						#print tag
						#print attrs
						new = base+attrs[1][1]
						print 'found a link:',new
						global star
						star = new
						get_url(new)
		else:
			if not cmp(tag,'img'):
				if(attrs[0] == ('id','bigImg')):
					#print tag
					#print attrs
					Image_url = attrs[1][1]
					print 'found a picture:',Image_url
					download(Image_url)
			if not cmp(tag,'a'):
				if (len(attrs) == 4):
					if (attrs[1] == ('class','next')):
						#print tag
						#print attrs
						next = base + attrs[2][1]
						print 'found a link:',next
						if (star != next):
							get_url(next)
Index_url = 'http://desk.zol.com.cn/qiche/'
con = urllib2.urlopen(Index_url).read()
Parser_index = parse(True)
Parser_index.feed(con)
唯一的缺点是,在网站上漂亮的壁纸桌面壁纸 。
。。
版权声明:本文博客原创文章,博客,未经同意,不得转载。
标签:
原文地址:http://www.cnblogs.com/lcchuguo/p/4741489.html