bilibili 视频爬取,低画质爬取
参考博客:https://blog.csdn.net/knighthood2001/article/details/139014757
主要的分析过程和思路都是参考的是他人的博客,自己加以验证
import re
import json
import tempfile # 临时文件
import subprocess
import requests
from lxml import etree
from moviepy.editor import VideoFileClip, AudioFileClip
from io import BytesIO
'''
需求: 输入相应的bv号,直接下载视频
参考博客: https://blog.csdn.net/knighthood2001/article/details/139014757
'''
url = 'https://www.bilibili.com/'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
}
session = requests.session()
def download(bv,video_url, audio_url,new_headers):
# video_bdata
b_video = session.get(video_url, headers=new_headers).content
# audio_bdata
b_audio = session.get(audio_url, headers=new_headers).content
use_ffmpeg(b_video,b_audio,bv)
# 使用 moviepy ----> 速度较慢
def use_moviepy(b_audio,b_video,bv):
# 使用 moviepy 的运行速度太慢了
# 假设你的字节流变量分别是 mp3_bytes 和 mp4_bytes
mp3_stream = BytesIO(b_audio)
mp4_stream = BytesIO(b_video)
# 创建临时文件来存储字节流
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video_file:
temp_video_file.write(mp4_stream.read())
video_path = temp_video_file.name
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio_file:
temp_audio_file.write(mp3_stream.read())
audio_path = temp_audio_file.name
# 使用moviepy加载临时文件
video = VideoFileClip(video_path)
audio = AudioFileClip(audio_path)
# 将音频合成到视频中
final_video = video.set_audio(audio)
# 将合成后的视频保存为本地文件
final_video.write_videofile(f"./{bv}.mp4", codec="libx264", audio_codec="aac")
# 使用 ffmpeg ----> 非常非常快!
def use_ffmpeg(b_video,b_audio,bv):
# 假设你的字节流变量分别是 mp3_bytes 和 mp4_bytes
mp3_stream = BytesIO(b_audio)
mp4_stream = BytesIO(b_video)
# 创建临时文件来存储字节流
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video_file:
temp_video_file.write(mp4_stream.read())
video_path = temp_video_file.name
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_audio_file:
temp_audio_file.write(mp3_stream.read())
audio_path = temp_audio_file.name
# 使用ffmpeg命令行进行合成
output_path = f"{bv}.mp4"
subprocess.run([
"ffmpeg", "-i", video_path, "-i", audio_path,
"-c:v", "copy", "-c:a", "aac", "-strict", "experimental", output_path
])
# 难点解析出对应的链接
def get_video_and_audio_url(html):
tree = etree.HTML(html)
text = tree.xpath('/html/head/script[4]/text()')[0]
# 解析
data = re.findall(r'=(.*)', text)[0]
data = json.loads(data)
# print(data)
video_url = data['data']['dash']['video'][0]['baseUrl']
audio_url = data['data']['dash']['audio'][0]['baseUrl']
return video_url, audio_url
def main(bv):
# 获取session
res = session.get(url, headers=headers)
new_url = f'https://www.bilibili.com/video/{bv}'
new_header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
"Referer": new_url,
}
new_res = session.get(new_url,headers=headers)
video_url, audio_url = get_video_and_audio_url(new_res.text)
download(bv,video_url,audio_url,new_header)
if __name__ == '__main__':
bv = 'BV1AE4m1d7XT'
main(bv)