标签:blog http io ar os 使用 sp for on
因为平时爱好摄影,所以喜欢看看色影无忌论坛的获奖摄影作品,所以写了个小script用来抓取上面的获奖图片,亲自测试可以使用。
# -*- coding: UTF-8 -*-
#作者Rocky Chen
import re, urllib, sys, os, time, urllib2, cookielib, string
class Download:
def __init__(self, url):
self.url=url
def getPhotos(self):
#获取的是跳转收的各个页面的图片 如: http://vision.xitek.com/monthly/yuesaipingxuan/201404/14-149893.html
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
header = { 'User-Agent' : user_agent }
req=urllib2.Request(self.url,headers=header);
resp=urllib2.urlopen(req)
web_content=resp.read()
print web_content
all_link = re.findall(r'<div class="mshow"><a href="(.+?)" target="_blank">', web_content)
print all_link
print "All link done"
for link in all_link:
print "One link"
print link
req1=urllib2.Request(link,headers=header)
resp1=urllib2.urlopen(req1)
web_content_each=resp1.read()
print web_content_each
my_photos=re.findall(r'<img class="mimg" .+? src="(.+?)" .+? border=0/></div>',web_content_each)
print my_photos
for my_photo in my_photos:
file_name_obj=re.findall(r'http://.+?/(\w+.jpg)',my_photo)
file_name=file_name_obj[0]
print file_name
urllib.urlretrieve(my_photo,file_name)
time.sleep(4)
def Usage():
print "Usage: xitek_month_match.py http://--Help"
if __name__ == "__main__":
if len(sys.argv)<1:
Usage()
exit()
reload(sys)
sys.setdefaultencoding('utf-8')
for arg in sys.argv[1:]:
print arg
xitek=Download(arg)
xitek.getPhotos()
print "Done"
自动抓取一月到十二月的获奖图片
标签:blog http io ar os 使用 sp for on
原文地址:http://blog.csdn.net/yagamil/article/details/41942877