码迷,mamicode.com
首页 > 其他好文 > 详细

抖音爬虫

时间:2019-01-28 12:13:17      阅读:468      评论:0      收藏:0      [点我收藏+]

标签:solution   panda   dig   imp   orm   lang   stat   color   write   

import requests
import time
import re
import json
import pandas as pd
headers= {user-agent: mobile}
def douyin_Spyder(id,url2):
    data=requests.get(http://aweme.snssdk.com/aweme/v1/user/?user_id={}&retry_type=retry_http&iid=59238161664&device_id=62578609382&ac=wifi&channel=aweGW&aid=1128&app_name=aweme&version_code=230&version_name=2.3.0&device_platform=android&ssmix=a&device_type=CHM-TL00H&device_brand=Honor&language=zh&os_api=19&os_version=4.4.4&uuid=745270478576539&openudid=589e358ee90e53&manifest_version_code=230&resolution=720*1280&dpi=320&update_version_code=2302&_rticket=1548395034447&ts={}&as=a1659a843a314c425a4355&cp=a518ca55a1a04624e1gkoo&mas=0141e7dcb9b69675674bffb55a194f1c3facaccc2c86ac4c2cc62c.format(id,time.time()),headers=headers)
    data2 = requests.get(url2,headers=headers)
    data2=requests.get(url2,headers=headers)
    content=data.content.decode(utf-8)
    dict_json = json.loads(content)
    print(******主页数据******)
    print(粉丝数:,dict_json[user][follower_count])
    print(获赞数:, dict_json[user][total_favorited])
    content2 = data2.content.decode(utf-8)
    dict_json2 = json.loads(content2)
    aweme_list=dict_json2[aweme_list]
    print(******视频区数据******)
    comment_count_list=[]
    digg_count_list = []
    share_count_list = []
    play_list = []
    forward_count_list = []
    id=[]
    desc=[]
    for i,key  in enumerate(aweme_list):
        # print(‘>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>‘)
        # print(‘标题:‘,key[‘desc‘])
        # print(‘ID:‘,key[‘aweme_id‘])
        video=key[video][play_addr][url_list][1]
        print(视频地址:,key[video][play_addr][url_list][1])
        res = requests.get(video, headers=headers)
        with open(rC:/Users/Administrator/Desktop/B站视频/迪丽热巴+str(i)+.mp4, wb) as f:
            f.write(res.content)
        # print(‘分享地址:‘,key[‘share_url‘])
        # print(‘评论数:‘,key[‘statistics‘][‘comment_count‘])
        # print(‘点赞数:‘, key[‘statistics‘][‘digg_count‘])
        # print(‘转发量:‘, key[‘statistics‘][‘share_count‘])
        # print(‘forward_count:‘, key[‘statistics‘][‘forward_count‘])
        comment_count_list .append(key[statistics][comment_count])
        digg_count_list.append(key[statistics][digg_count])
        share_count_list .append(key[statistics][share_count])
        forward_count_list.append( key[statistics][forward_count])
        play_list.append(key[share_url])
        id.append(key[aweme_id])
        desc.append(key[desc])
    df = pd.DataFrame({ID: id, 标题: desc,链接地址:play_list,评论数:comment_count_list,点赞数:digg_count_list,转发量:share_count_list})
    df=df.set_index(ID)
    tim=time.strftime(%Y-%m-%d,time.localtime(time.time()))
    df.to_excel(C:/Users/Administrator/Desktop/+str(tim)+-7.xlsx)


if __name__ == __main__:
    url2=input(url:)
    douyin_Spyder(79302973596,url2)

 

抖音爬虫

标签:solution   panda   dig   imp   orm   lang   stat   color   write   

原文地址:https://www.cnblogs.com/snackpython/p/10329204.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!