码迷,mamicode.com
首页 > 其他好文 > 详细

记一次 爬取LOL全皮肤原画保存到本地的实例

时间:2020-01-04 18:42:19      阅读:100      评论:0      收藏:0      [点我收藏+]

标签:ext   int   parse   color   lis   parent   format   enc   hand   

 1 #爬取lol全英雄皮肤
 2 import re
 3 import traceback #  异常跟踪
 4 import requests
 5 from bs4 import BeautifulSoup
6 #获取html 7 def get_url(url, hander): 8 try: 9 r = requests.get(url, headers=hander, timeout=30) 10 r.raise_for_status() 11 r.encoding = r.apparent_encoding 12 return r.text 13 except: 14 traceback.print_exc() #将异常信息打印出来 15 return "" 16 #解析html 17 def prasing_page(lst,html): 18 try: 19 soup = BeautifulSoup(html, "html.parser") 20 for a in soup.find_all(li, class_=re.compile(boxShadow)): 21 tag_a = a(a) 22 for i in tag_a: 23 lst.append(i[href]) 24 return lst 25 except: 26 traceback.print_exc() 27 return ""

28 #解析获取到的单个html并筛选和下载 29 def getUrl_prasingpag(lst, hander): 30 hero_img_url = [] 31 hero_skin_name = [] 32 hero_name = [] 33 for u in lst: 34 try: 35 r = requests.get(u, headers=hander, timeout=30) 36 r.raise_for_status() 37 r.encoding = r.apparent_encoding
38        #二次解析 39 soup = BeautifulSoup(r.text, "html.parser") 40 pag = soup.find_all(div, class_=re.compile(othersPifuBox)) 41 for m in pag: 42 tag_img = m(img) 43 tag_p = m(p) 44 tag_span = m(span) 45 for m in tag_p: 46 hero_skin_name.append(m.string) 47 for m in tag_img: 48 hero_img_url.append(m[src]) 49 for m in tag_span: 50 hero_name.append(m.string) 51 except: 52 traceback.print_exc() # 将异常信息打印出来 53 continue 54       
        #下载到本地
55 for i in range(len(hero_name)): 56 try: 57 path = O:/lol_hero_jpg/ + hero_skin_name[i]+--+ hero_name[i] + .jpg 58 f = open(path, wb) 59 r = requests.get(hero_img_url[i], stream=True) 60 f.write(r.content) 61 print("\r当前进度>>>>>>>>>>>>>>>>>>{:.0f}%>>>>>>>>>>>>>>>>>>".format(i * 100 / len(lst)), end="") 62 f.close() 63 except: 64 traceback.print_exc() # 将异常信息打印出来 65 continue 66 67 def main(): 68 hander = {"User-Agent":"Mozilla/5.0"} 69 deep = 43 #定义爬取页数 70 list = [] 71 for i in range(deep): 72 try: 73 url = "http://********/hero_"+str(1+i)+".shtml" 74 html = get_url(url, hander) 75 prasing_page(list, html) 76 getUrl_prasingpag(list, hander) 77 except: 78 continue 79 80 main()

记一次 爬取LOL全皮肤原画保存到本地的实例

标签:ext   int   parse   color   lis   parent   format   enc   hand   

原文地址:https://www.cnblogs.com/llww/p/12149699.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!