标签:pre tps url 图片 rom origin tree retrieve windows

import requests,lxml
images_url=‘https://www.fabiaoqing.com/search/search/keyword/%E8%9C%A1%E7%AC%94%E5%B0%8F%E6%96%B0/type/bq/page/1.html‘
head={‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36‘}
req_url_images=requests.get(url=images_url,headers=head)
req_url_images.text
from lxml import etree
html_images=etree.HTML(req_url_images.text)
images_get="//div[@class=‘searchbqppdiv tagbqppdiv‘]//img/@data-original"
images_last=html_images.xpath(images_get)
#用ulllib自带的保存文件的方式去保存
from urllib import request
#索引出序列用来做图片名字
Indexes=1
for images_save in images_last:
        #这个网站的图片都是以jpg结尾比较简单
        request.urlretrieve(images_save,r"image/"+‘%s.jpg‘%Indexes)
        Indexes+=1
from urllib import request
from lxml import etree
images_url=‘https://www.fabiaoqing.com/search/search/keyword/%E8%9C%A1%E7%AC%94%E5%B0%8F%E6%96%B0/type/bq/page/1.html‘
head={‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36‘}
new_way=request.Request(url=images_url,headers=head)
req_url_images=request.urlopen(new_way)
html_images=etree.HTML(req_url_images.read().decode(‘utf-8‘))
images_get="//div[@class=‘searchbqppdiv tagbqppdiv‘]//img/@data-original"
images_last=html_images.xpath(images_get)
#用ulllib自带的保存文件的方式去保存
#索引出序列用来做图片名字
Indexes=1
for images_save in images_last:
        #这个网站的图片都是以jpg结尾比较简单
        request.urlretrieve(images_save,r"image/"+‘%s.jpg‘%Indexes)
        Indexes+=1
pages = 1
Indexes = 1
while pages < 11:
    import requests, lxml
    images_url = ‘https://www.fabiaoqing.com/search/search/keyword/%E8%9C%A1%E7%AC%94%E5%B0%8F%E6%96%B0/type/bq/page/‘ + ‘%s.html‘ % pages
    head = {
        ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36‘}
    req_url_images = requests.get(url=images_url, headers=head)
    req_url_images.text
    pages += 1
    from lxml import etree
    html_images = etree.HTML(req_url_images.text)
    images_get = "//div[@class=‘searchbqppdiv tagbqppdiv‘]//img/@data-original"
    images_last = html_images.xpath(images_get)
    # 用ulllib自带的保存文件的方式去保存
    from urllib import request
    # 索引出序列用来做图片名字
    for images_save in images_last:
        # 这个网站的图片都是以jpg结尾比较简单
        request.urlretrieve(images_save, r"image/" + ‘%s.jpg‘ % Indexes)
        print(‘已经爬取了%s张‘ % Indexes)
        Indexes += 1
from urllib import request
from lxml import etree
pages=1
Indexes=1
while pages<11:
        images_url=‘https://www.fabiaoqing.com/search/search/keyword/%E8%9C%A1%E7%AC%94%E5%B0%8F%E6%96%B0/type/bq/page/‘+‘%s.html‘%pages
        head={‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36‘}
        new_way=request.Request(url=images_url,headers=head)
        req_url_images=request.urlopen(new_way)
        html_images=etree.HTML(req_url_images.read().decode(‘utf-8‘))
        images_get="//div[@class=‘searchbqppdiv tagbqppdiv‘]//img/@data-original"
        images_last=html_images.xpath(images_get)
        #用ulllib自带的保存文件的方式去保存
        #索引出序列用来做图片名字
        count=1
        for images_save in images_last:
                # 这个网站的图片都是以jpg结尾比较简单
                request.urlretrieve(images_save,"image/"+‘%s.jpg‘%Indexes)
                print(‘已经爬取了%s张‘%Indexes)
                Indexes+=1
        pages+=1
标签:pre tps url 图片 rom origin tree retrieve windows
原文地址:https://www.cnblogs.com/lcyzblog/p/11285962.html