python爬取m3u8视频文件()

import requests
import os
import aiohttp
import asyncio


first_m3u8_url = "https://cdn.zoubuting.com/20221129/waHIjBSS/index.m3u8"
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.56'
}
# 获取m3u8地址 -->  second_m3u8_url
first_m3u8_url_text = requests.get(url=first_m3u8_url,headers=headers).text
# first_m3u8_url_text = first_m3u8_url_text.strip()
# print(first_m3u8_url_text)

for line in first_m3u8_url_text.split('\n'):
    if line.startswith("/"):
        second_m3u8_url = "https://cdn.zoubuting.com" + line
# # print(second_m3u8_url)
#
# # 读取m3u8文件内容
second_url_text = requests.get(url=second_m3u8_url,headers=headers).text
# print(second_url_text)

# 将所有ts链接收集起来,放入列表中
ts_url_list = []
for ts_url in second_url_text.split('\n'):
    if ts_url.startswith("https://"):
        ts_url_list.append(ts_url)

# 创建下载目录
dirName = "movieDown"
if not os.path.exists(dirName):
    os.mkdir(dirName)

# 下载所有的ts文件
# for url in ts_url_list:
#     ts_name = url.split('/')[-1]
#     download_path = dirName + '/' + ts_name
#     data = requests.get(url=url,headers=headers).content
#     with open(download_path,'wb') as fp:
#         fp.write(data)
#         print(ts_name,"下载完成!")

# 协程改写
# 发起请求
async def get_url_data(url):
    async with aiohttp.ClientSession() as sess_requests:
        async with await sess_requests.get(url=url,headers=headers) as response:
            url_data = await response.read()
            return url_data,url

# 创建回调函数
def download(t):
    data,url = t.result()
    ts_name = url.split('/')[-1]
    download_path = dirName + '/' + ts_name
    data = requests.get(url=url,headers=headers).content
    with open(download_path,'wb') as fp:
        fp.write(data)
        print(ts_name,"下载完成!")
# 执行
tasks = []
for url in ts_url_list:
    # 创建任务
    c = get_url_data(url)
    task = asyncio.ensure_future(c)
    # 回调函数
    task.add_done_callback(download)
    tasks.append(task)
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
————————
import requests
import os
import aiohttp
import asyncio


first_m3u8_url = "https://cdn.zoubuting.com/20221129/waHIjBSS/index.m3u8"
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.56'
}
# 获取m3u8地址 -->  second_m3u8_url
first_m3u8_url_text = requests.get(url=first_m3u8_url,headers=headers).text
# first_m3u8_url_text = first_m3u8_url_text.strip()
# print(first_m3u8_url_text)

for line in first_m3u8_url_text.split('\n'):
    if line.startswith("/"):
        second_m3u8_url = "https://cdn.zoubuting.com" + line
# # print(second_m3u8_url)
#
# # 读取m3u8文件内容
second_url_text = requests.get(url=second_m3u8_url,headers=headers).text
# print(second_url_text)

# 将所有ts链接收集起来,放入列表中
ts_url_list = []
for ts_url in second_url_text.split('\n'):
    if ts_url.startswith("https://"):
        ts_url_list.append(ts_url)

# 创建下载目录
dirName = "movieDown"
if not os.path.exists(dirName):
    os.mkdir(dirName)

# 下载所有的ts文件
# for url in ts_url_list:
#     ts_name = url.split('/')[-1]
#     download_path = dirName + '/' + ts_name
#     data = requests.get(url=url,headers=headers).content
#     with open(download_path,'wb') as fp:
#         fp.write(data)
#         print(ts_name,"下载完成!")

# 协程改写
# 发起请求
async def get_url_data(url):
    async with aiohttp.ClientSession() as sess_requests:
        async with await sess_requests.get(url=url,headers=headers) as response:
            url_data = await response.read()
            return url_data,url

# 创建回调函数
def download(t):
    data,url = t.result()
    ts_name = url.split('/')[-1]
    download_path = dirName + '/' + ts_name
    data = requests.get(url=url,headers=headers).content
    with open(download_path,'wb') as fp:
        fp.write(data)
        print(ts_name,"下载完成!")
# 执行
tasks = []
for url in ts_url_list:
    # 创建任务
    c = get_url_data(url)
    task = asyncio.ensure_future(c)
    # 回调函数
    task.add_done_callback(download)
    tasks.append(task)
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))