python-bilibili-video

bilibili 视频爬取,低画质爬取

参考博客:https://blog.csdn.net/knighthood2001/article/details/139014757

主要的分析过程和思路都是参考的是他人的博客,自己加以验证

import re
import json
import tempfile # 临时文件
import subprocess

import requests
from lxml import etree

from moviepy.editor import VideoFileClip, AudioFileClip
from io import BytesIO

'''
    需求: 输入相应的bv号,直接下载视频
    参考博客: https://blog.csdn.net/knighthood2001/article/details/139014757

'''

url = 'https://www.bilibili.com/'

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
}


session = requests.session()



def download(bv,video_url, audio_url,new_headers):
    # video_bdata
    b_video = session.get(video_url, headers=new_headers).content
    # audio_bdata
    b_audio = session.get(audio_url, headers=new_headers).content

    use_ffmpeg(b_video,b_audio,bv)


# 使用 moviepy ----> 速度较慢
def use_moviepy(b_audio,b_video,bv):
    # 使用 moviepy 的运行速度太慢了

    # 假设你的字节流变量分别是 mp3_bytes 和 mp4_bytes
    mp3_stream = BytesIO(b_audio)
    mp4_stream = BytesIO(b_video)

    # 创建临时文件来存储字节流
    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video_file:
        temp_video_file.write(mp4_stream.read())
        video_path = temp_video_file.name

    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio_file:
        temp_audio_file.write(mp3_stream.read())
        audio_path = temp_audio_file.name

    # 使用moviepy加载临时文件
    video = VideoFileClip(video_path)
    audio = AudioFileClip(audio_path)

    # 将音频合成到视频中
    final_video = video.set_audio(audio)

    # 将合成后的视频保存为本地文件
    final_video.write_videofile(f"./{bv}.mp4", codec="libx264", audio_codec="aac")

# 使用 ffmpeg ----> 非常非常快!
def use_ffmpeg(b_video,b_audio,bv):
    # 假设你的字节流变量分别是 mp3_bytes 和 mp4_bytes
    mp3_stream = BytesIO(b_audio)
    mp4_stream = BytesIO(b_video)

    # 创建临时文件来存储字节流
    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video_file:
        temp_video_file.write(mp4_stream.read())
        video_path = temp_video_file.name

    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio_file:
        temp_audio_file.write(mp3_stream.read())
        audio_path = temp_audio_file.name

    # 使用ffmpeg命令行进行合成
    output_path = f"{bv}.mp4"
    subprocess.run([
        "ffmpeg", "-i", video_path, "-i", audio_path,
        "-c:v", "copy", "-c:a", "aac", "-strict", "experimental", output_path
    ])


# 难点解析出对应的链接
def get_video_and_audio_url(html):
    tree = etree.HTML(html)

    text = tree.xpath('/html/head/script[4]/text()')[0]

    # 解析
    data = re.findall(r'=(.*)', text)[0]

    data = json.loads(data)
    # print(data)

    video_url = data['data']['dash']['video'][0]['baseUrl']
    audio_url = data['data']['dash']['audio'][0]['baseUrl']

    return video_url, audio_url



def main(bv):
    # 获取session
    res = session.get(url, headers=headers)
    new_url = f'https://www.bilibili.com/video/{bv}'
    new_header = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
        "Referer": new_url,
    }
    new_res = session.get(new_url,headers=headers)
    video_url, audio_url = get_video_and_audio_url(new_res.text)
    download(bv,video_url,audio_url,new_header)

if __name__ == '__main__':
    bv = 'BV1AE4m1d7XT'
    main(bv)
github