码迷,mamicode.com
首页 > 其他好文 > 详细

大众点评爬虫

时间:2021-04-12 12:41:35      阅读:0      评论:0      收藏:0      [点我收藏+]

标签:lua   bsp   text   pen   pre   EDA   wow   tree   tor   

import requests
from lxml import etree
import csv

headers={
    User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36
}
cookies={
  Cookie: fspop=test; cy=70; cye=changchun; __guid=169583271.1176092058052156700.1618064807707.5415; _lxsdk_cuid=178bc2d991bc8-06f82d2a1ad0c8-3e604809-1fa400-178bc2d991ec8; _lxsdk=178bc2d991bc8-06f82d2a1ad0c8-3e604809-1fa400-178bc2d991ec8; _hc.v=8e6ff184-ecf9-beda-8556-f21cac38d549.1618064809; s_ViewType=10; ctu=a39fa7b43d5011077a7a6a13b07f7eab2586a77330045fd09fb1ec9fcd4ecbef; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1618064809,1618108223; dplet=18e634c44bc3d5ff4dc4d2377c0348ce; dper=e18ff3f28e86ce6d07b46b29a43464b7a9697e4b309dc739fb65478c72a0a4e1ac4eeb1e4858e57828c84156f0e7221b89ce58e7174f2e6bf336e124ae5c277bbd1b72b6716c024fccd8bbd09c27536eb08f23c8a6e50a5b20884368c4b64588; ll=7fd06e815b796be3df069dec7836c3df; ua=dpuser_7353802477; monitor_count=40; _lxsdk_s=178bec40f52-ba4-7b6-bfe%7C%7C173; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1618111329
}

dict={
\ue36f:9,
\uea1a:8,
\ue13b:7,
\uf680:6,
\uea7a:5,
\uf6db:4,
\ue1a7:3,
\ueef3:2,
\ue9cd:0

}#svg映射,注意自己查找






def gethtml(url):
    r=requests.get(url=url,headers=headers,cookies=cookies)
    r.encoding=utf-8
    html=r.text
    return html


def shiftnumber(num_list):
    count = ‘‘
    for num in num_list:

        if num in dict.keys():
            cc = dict[num]
        else:
            cc = num

        count += cc
    return count
# name_list=[]
# total_score_list=[]
# evaluation_num_list =[]
# per_capita_list=[]
# taste_score_list=[]
# environment_score_list=[]
# service_score_list=[]

for i in range(1,5):
    print(正在爬取第{}页.format(i))
    url=http://www.dianping.com/changchun/ch10/g110p{}.format(i)
    html=gethtml(url)
    tree = etree.HTML(html)
    #name = tree.xpath(‘//*[@id="shop-all-list"]/ul/li[1]/div[2]/div[1]/a/h4/text()‘)[0]
    li_list=tree.xpath(//*[@id="shop-all-list"]/ul/li)
    for li in li_list:
        name = li.xpath(.//div[@class="tit"]/a/h4/text())[0]

        total_score = li.xpath(./div[2]/div[2]/div/div[2]/text())[0]

        evaluation_num = li.xpath(./div[2]/div[2]/a[1]/b//text())
        evaluation_num = shiftnumber(evaluation_num)

        per_capita = li.xpath(./div[2]/div[2]/a[2]/b//text())
        per_capita = shiftnumber(per_capita)

        taste_score = li.xpath(./div[2]/span/span[1]/b//text())
        taste_score = shiftnumber(taste_score)

        environment_score = li.xpath(./div[2]/span/span[2]/b//text())
        environment_score = shiftnumber(environment_score)

        service_score = li.xpath(./div[2]/span/span[3]/b//text())
        service_score = shiftnumber(service_score)

        print(开始保存。。。。)
        with open (长春火锅店.csv,a,encoding=utf-8,newline=‘‘) as csvfile:#  newline=‘‘可以解决空行问题
            writer=csv.writer(csvfile)
            #writer.writerow([‘火锅店名称‘,‘总评分‘,‘评价人数‘,‘人均消费‘,‘口味‘,‘环境‘,‘服务‘])
            writer.writerow([name,total_score,evaluation_num,per_capita,taste_score,environment_score,service_score])

 

大众点评爬虫

标签:lua   bsp   text   pen   pre   EDA   wow   tree   tor   

原文地址:https://www.cnblogs.com/fengqing111/p/14643858.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!