29、哔哩哔哩
<pre><code>moviepy==0.2.3.2
# !/usr/bin/python
# -*- coding:utf-8 -*-
# time: 2019/04/17--08:12
__author__ = 'Henry'
# pip3 install imageio==2.4.1
'''
项目: B站视频下载
版本1: 加密API版,不需要加入cookie,直接即可下载1080p视频
20190422 - 增加多P视频单独下载其中一集的功能
'''
import imageio
imageio.plugins.ffmpeg.download()
import requests, time, hashlib, urllib.request, re, json
from moviepy.editor import *
import os, sys
# 访问API地址
def get_play_list(start_url, cid, quality):
entropy = 'rbMCKn@KuamXWlPMoJGsKcbiJKUfkPF_8dABscJntvqhRSETg'
appkey, sec = ''.join([chr(ord(i) + 2) for i in entropy[::-1]]).split(':')
params = 'appkey=%s&cid=%s&otype=json&qn=%s&quality=%s&type=' % (appkey, cid, quality, quality)
chksum = hashlib.md5(bytes(params + sec, 'utf8')).hexdigest()
url_api = 'https://interface.bilibili.com/v2/playurl?%s&sign=%s' % (params, chksum)
headers = {
'Referer': start_url, # 注意加上referer
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
}
# print(url_api)
html = requests.get(url_api, headers=headers).json()
# print(json.dumps(html))
video_list = [html['durl'][0]['url']]
# print(video_list)
return video_list
# 下载视频
'''
urllib.urlretrieve 的回调函数:
def callbackfunc(blocknum, blocksize, totalsize):
@blocknum: 已经下载的数据块
@blocksize: 数据块的大小
@totalsize: 远程文件的大小
'''
def Schedule_cmd(blocknum, blocksize, totalsize):
speed = (blocknum * blocksize) / (time.time() - start_time)
# speed_str = " Speed: %.2f" % speed
speed_str = " Speed: %s" % format_size(speed)
recv_size = blocknum * blocksize
# 设置下载进度条
f = sys.stdout
pervent = recv_size / totalsize
percent_str = "%.2f%%" % (pervent * 100)
n = round(pervent * 50)
s = ('#' * n).ljust(50, '-')
f.write(percent_str.ljust(8, ' ') + '[' + s + ']' + speed_str)
f.flush()
# time.sleep(0.1)
f.write('\r')
def Schedule(blocknum, blocksize, totalsize):
speed = (blocknum * blocksize) / (time.time() - start_time)
# speed_str = " Speed: %.2f" % speed
speed_str = " Speed: %s" % format_size(speed)
recv_size = blocknum * blocksize
# 设置下载进度条
f = sys.stdout
pervent = recv_size / totalsize
percent_str = "%.2f%%" % (pervent * 100)
n = round(pervent * 50)
s = ('#' * n).ljust(50, '-')
print(percent_str.ljust(6, ' ') + '-' + speed_str)
f.flush()
time.sleep(2)
# print('\r')
# 字节bytes转化K\M\G
def format_size(bytes):
try:
bytes = float(bytes)
kb = bytes / 1024
except:
print("传入的字节格式不对")
return "Error"
if kb >= 1024:
M = kb / 1024
if M >= 1024:
G = M / 1024
return "%.3fG" % (G)
else:
return "%.3fM" % (M)
else:
return "%.3fK" % (kb)
# 下载视频
def down_video(video_list, title, start_url, page):
num = 1
print('[正在下载P{}段视频,请稍等...]:'.format(page) + title)
currentVideoPath = os.path.join(sys.path[0], 'bilibili_video', title) # 当前目录作为下载目录
for i in video_list:
opener = urllib.request.build_opener()
# 请求头
opener.addheaders = [
# ('Host', 'upos-hz-mirrorks3.acgvideo.com'), #注意修改host,不用也行
('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0) Gecko/20100101 Firefox/56.0'),
('Accept', '*/*'),
('Accept-Language', 'en-US,en;q=0.5'),
('Accept-Encoding', 'gzip, deflate, br'),
('Range', 'bytes=0-'), # Range 的值要为 bytes=0- 才能下载完整视频
('Referer', start_url), # 注意修改referer,必须要加的!
('Origin', 'https://www.bilibili.com'),
('Connection', 'keep-alive'),
]
urllib.request.install_opener(opener)
# 创建文件夹存放下载的视频
if not os.path.exists(currentVideoPath):
os.makedirs(currentVideoPath)
print(currentVideoPath)
# 开始下载
if len(video_list) > 1:
urllib.request.urlretrieve(url=i, filename=os.path.join(currentVideoPath, r'{}-{}.flv'.format(title, num)),reporthook=Schedule_cmd) # 写成mp4也行 title + '-' + num + '.flv'
else:
urllib.request.urlretrieve(url=i, filename=os.path.join(currentVideoPath, r'{}.flv'.format(title)),reporthook=Schedule_cmd) # 写成mp4也行 title + '-' + num + '.flv'
num += 1
# 合并视频
def combine_video(video_list, title):
currentVideoPath = os.path.join(sys.path[0], 'bilibili_video', title) # 当前目录作为下载目录
print(currentVideoPath)
if len(video_list) >= 2:
# 视频大于一段才要合并
print('[下载完成,正在合并视频...]:' + title)
# 定义一个数组
L = []
# 访问 video 文件夹 (假设视频都放在这里面)
root_dir = currentVideoPath
# 遍历所有文件
for file in sorted(os.listdir(root_dir), key=lambda x: int(x[x.rindex("-") + 1:x.rindex(".")])):
# 如果后缀名为 .mp4/.flv
if os.path.splitext(file)[1] == '.flv':
# 拼接成完整路径
filePath = os.path.join(root_dir, file)
# 载入视频
video = VideoFileClip(filePath)
# 添加到数组
L.append(video)
# 拼接视频
final_clip = concatenate_videoclips(L)
# 生成目标视频文件
final_clip.to_videofile(os.path.join(root_dir, r'{}.mp4'.format(title)), fps=24, remove_temp=False)
print('[视频合并完成]' + title)
else:
# 视频只有一段则直接打印下载完成
print('[视频合并完成]:' + title)
if __name__ == '__main__':
# 用户输入av号或者视频链接地址
print('*' * 30 + 'B站视频下载小助手' + '*' * 30)
start = input('请输入您要下载的B站av号或者视频链接地址:')
if start.isdigit() == True: # 如果输入的是av号
# 获取cid的api, 传入aid即可
start_url = 'https://api.bilibili.com/x/web-interface/view?aid=' + start
else:
# https://www.bilibili.com/video/av46958874/?spm_id_from=333.334.b_63686965665f7265636f6d6d656e64.16
start_url = 'https://api.bilibili.com/x/web-interface/view?aid=' + re.search(r'/av(\d+)/*', start).group(1)
# 视频质量
# <accept_format><![CDATA[flv,flv720,flv480,flv360]]></accept_format>
# <accept_description><![CDATA[高清 1080P,高清 720P,清晰 480P,流畅 360P]]></accept_description>
# <accept_quality><![CDATA[80,64,32,16]]></accept_quality>
quality = input('请输入您要下载视频的清晰度(1080p:80;720p:64;480p:32;360p:16)(填写80或64或32或16):')
# 获取视频的cid,title
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
}
html = requests.get(start_url, headers=headers).json()
data = html['data']
video_title=data["title"].replace(" ","_")
cid_list = []
if '?p=' in start:
# 单独下载分P视频中的一集
p = re.search(r'\?p=(\d+)',start).group(1)
cid_list.append(data['pages'][int(p) - 1])
else:
# 如果p不存在就是全集下载
cid_list = data['pages']
# print(cid_list)
for item in cid_list:
cid = str(item['cid'])
title = item['part']
if not title:
title = video_title
title = re.sub(r'[\/\\:*?"<>|]', '', title) # 替换为空的
print('[下载视频的cid]:' + cid)
print('[下载视频的标题]:' + title)
page = str(item['page'])
start_url = start_url + "/?p=" + page
video_list = get_play_list(start_url, cid, quality)
start_time = time.time()
down_video(video_list, title, start_url, page)
combine_video(video_list, title)
# 如果是windows系统,下载完成后打开下载目录
currentVideoPath = os.path.join(sys.path[0], 'bilibili_video') # 当前目录作为下载目录
if (sys.platform.startswith('win')):
os.startfile(currentVideoPath)
# 分P视频下载测试: https://www.bilibili.com/video/av19516333/
</code></pre>
<pre><code>
# coding: utf-8
# Writer: Mike_Shine
# Date: 2018-7-11
# 请尊重原创,谢谢。
# 这段代码是输入B站UP主的编号(User_Mid------进入up主的主页 https://space.bilibili.com/91236407/#/video。 其中91236407即为 User_Mid),然后爬取主页内的视频
# 有一点没有做的就是说这段代码只爬100个。如果想要爬所有的视频,你可以加一点点代码做翻页的动作,获取新的包
import json
import re
import os
import requests
import time
import datetime
import pickle
start_time = datetime.datetime.now()
# 从主页拿视频列表的函数
def get_Mainpage_Video(User_Mid):
headers = {
'Host': 'space.bilibili.com',
'Connection': 'keep-alive',
'Accept': 'application/json, text/plain, */*',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
'Referer': 'https://space.bilibili.com/' + str(User_Mid) + '/', # 这里是Mid
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
}
url = 'https://space.bilibili.com/ajax/member/getSubmitVideos?mid=' + str(
User_Mid) + '&pagesize=100&tid=0&page=1&keyword=&order=pubdate'
# 请求 Url 前面要加上 主机地址! 在这里就是space.xxxxxx
# 注意从浏览器里抓包可以看到完整的地址,而从Fiddler
# 最大的请求size是100
content = requests.get(url, headers=headers, verify=False).json()
i = content['data']['count'] # 视频个数
if i >= 100:
i = 100
video_List = []
for num in range(i):
aid = content['data']['vlist'][num]['aid']
title = content['data']['vlist'][num]['title']
author = content['data']['vlist'][num]['author']
tmp = {"aid": aid, "title": title, "author": author}
video_List.append(tmp)
return video_List
else:
video_List = []
for num in range(i):
aid = content['data']['vlist'][num]['aid']
title = content['data']['vlist'][num]['title']
author = content['data']['vlist'][num]['author']
tmp = {"aid": aid, "title": title, "author": author}
video_List.append(tmp)
return video_List
# 为了替换掉命名时的非法字符,不然下载创建路径时会报错
def sub(s):
patn_1 = re.compile(r'\?')
patn_2 = re.compile(r'\/')
patn_3 = re.compile(r'\\')
patn_4 = re.compile(r'\|')
patn_5 = re.compile(r'\:')
patn_6 = re.compile(r'\<')
patn_7 = re.compile(r'\>')
patn_8 = re.compile(r'\*')
patn_9 = re.compile(r'\:')
s = re.sub(patn_1, "", s)
s = re.sub(patn_2, "", s)
s = re.sub(patn_3, "", s)
s = re.sub(patn_4, "", s)
s = re.sub(patn_5, "", s)
s = re.sub(patn_6, "", s)
s = re.sub(patn_7, "", s)
s = re.sub(patn_8, "", s)
s = re.sub(patn_9, "", s)
return s
# 下面是创建路径
def Get_Path(Video_List):
path = r"D:/video/" + Video_List[0]['author'] + "/"
if not os.path.isdir(path):
os.makedirs(path)
return path
# 下载的函数
# 这里拿到URL就下载,因为URL是动态更新的,要注意这个点。
import urllib.request
import requests
import gzip
from io import BytesIO
def openUrl(url):
# headers = {'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'}
it_header = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.1 Safari/605.1.15'}
req = urllib.request.Request(url, headers=it_header)
response = urllib.request.urlopen(req) # 请求
html = response.read()
# print(html)
# Soup = BeautifulSoup(html, 'lxml')
print(html)
print('*************')
buff = BytesIO(html)
f = gzip.GzipFile(fileobj=buff)
res = f.read().decode('utf-8')
# print(res)
return res
def download(i, Video_List, path):
url = 'https://www.bilibili.com/video/av' + str(Video_List[i]['aid']) # 每个的Url
print(url)
# html = requests.get(url, verify=False).text
# html = requests.get(url)
# html = html.text
# html = html.read().decode("utf-8")
html = openUrl(url)
print('-----------------------')
print(html)
url_patn = re.compile(r'"base_url":"(.*?)","backup_url"')
Video_Url = []
# print(html)
Video_Url.append(re.findall(url_patn, html)[0]) # 这个是URL
print(Video_Url[0])
host_patn = re.compile('http://(.*?)/upgcxcode')
host = re.findall(host_patn, Video_Url[0])[0]
# 下面是下载内容
headers = {
'Host': host,
'Connection': 'keep-alive',
'Origin': 'https://www.bilibili.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
'Accept': '*/*',
'Referer': 'https://www.bilibili.com/video/av' + str(Video_List[i]['aid']) + "/",
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
}
title = sub(str(Video_List[i]['title'])) # 防止有不合法的命名符号出现。
if not os.path.exists(path + title + '.mp4'):
with open(path + title + '.mp4', 'wb') as f:
print("-------------------------STart LINE-------------------------")
localtime = time.strftime("%Y-%m-%d %H:%M:%S")
print(localtime + "第" + str(i + 1) + "个视频:" + Video_List[i]['title'] + " 开始下载")
f.write(requests.get(Video_Url[0], headers=headers, verify=False).content)
localtime = time.strftime("%Y-%m-%d %H:%M:%S")
print(localtime + "下载完成")
print("-------------------------STop LINE-------------------------")
else:
print("视频 " + title + " 存在于本地,跳过下载")
# 主函数
def main():
User_Mid = 216720985#91236407 # 在这里改你的Up主编号
Video_List = get_Mainpage_Video(User_Mid) # 拿到视频列表
pickle.dump(Video_List, open('Video_List.txt', 'wb'))
# Video_List = pickle.load(open('Video_List.txt', 'rb'))
# print(Video_List) # 看一下你拿到的视频列表
# 下面开始下载
# for i in range(len(Video_List)):
# download(i, Video_List, Get_Path(Video_List))
return Video_List
if __name__ == '__main__':
# Video_List = main()
Video_List = pickle.load(open('Video_List.txt', 'rb'))
down_list = []
down_list.append(Video_List[2])
for i in range(len(down_list)):
download(i, down_list, Get_Path(down_list))
# pickle.dump(Video_List, open('./Video_List.txt', 'wb'))
end_time = datetime.datetime.now()
minus = end_time - start_time
consume = minus.total_seconds()
print("总共用时:" + str(round(consume, 1)) + "s")
</code></pre>