进程池爬去梨视频视频资源

时间：2019-03-03 22:11:44 阅读：267 评论：0 收藏：0 [点我收藏+]

标签：skin EOS 模块 att end test www. html append

# 导入requests网络请求模块
import requests
# 导入lxml标签匹配模块
from lxml import etree
# 导入re 正则匹配模块
import re
#导入系统路径模块
import os
# 导入进程模块
import multiprocessing

# 存在视频网址
mylist = []

# 请求函数
def Data(url):
    #发送请求
    test = requests.get(url)
    # with open(‘./pa.html‘,‘w‘) as pa:
    #     pa.write(test.text.encode(‘gbk‘,‘ignore‘).decode(‘gbk‘,‘ignore‘))
    # 返回二进制流
    return test.content

# 匹配标签函数
def Pipa():
    # 调用请求函数
    res = Data(‘https://www.pearvideo.com/category_10‘)
    # 利用etree完整HTML数据
    html = etree.HTML(res)
    # 匹配标签
    url = html.xpath(‘//*[@id="categoryList"]/li‘)
    # 循环匹配到的标签进行操作
    for i in url:
        # 拼接完整的详情页网址
        data = ‘https://www.pearvideo.com/‘ + str(i.xpath(‘./div/a/@href‘)[0])
        # 添加到准备好的列表内
        mylist.append(data)

# 定义好写入方法
def xiangqing(url):
    # 获取url切片用于视频名称
    name = str(url).split(‘/‘)[-1] + ‘.mp4‘
    print(name)
    # 调用请求方法获取详情页
    res = Data(url)
    #由于视频网址不在标签里而是在Jquery内所有没办法使用xpath 这里使用re匹配视频源所在网址
    url = re.findall(
        ‘srcUrl="(.*?)",vdoUrl=srcUrl,skinRes="//www.pearvideo.com/domain/skin",videoCDN="//video.pearvideo.com";‘,
        str(res))[0]
    # 调用请求方法把视频所在的网址放进去获取资源
    res = Data(url)
    # 设置路径
    path = "C:/Users/nxy/Videos/PLAYERUNKNOWN‘S BATTLEGROUNDS/"
    # 判断路径是否存在
    if not os.path.exists(path):
        # 不存在则创建
        os.makedirs(path)
    #写入
    with open(path + name, "wb") as f:
        f.write(res)


if __name__ == "__main__":
    #调用进行添加列表
    Pipa()
    print(mylist)
    #调用进程池
    pool = multiprocessing.Pool(4)
    # 用map方法传参调用进程池
    pool.map(xiangqing, mylist)

进程池爬去梨视频视频资源

标签：skin EOS 模块 att end test www. html append

原文地址：https://www.cnblogs.com/Niuxingyu/p/10467828.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行

进程池 爬去梨视频 视频资源

进程池爬去梨视频视频资源