python


29、哔哩哔哩

<pre><code>moviepy==0.2.3.2 # !/usr/bin/python # -*- coding:utf-8 -*- # time: 2019/04/17--08:12 __author__ = 'Henry' # pip3 install imageio==2.4.1 ''' 项目: B站视频下载 版本1: 加密API版,不需要加入cookie,直接即可下载1080p视频 20190422 - 增加多P视频单独下载其中一集的功能 ''' import imageio imageio.plugins.ffmpeg.download() import requests, time, hashlib, urllib.request, re, json from moviepy.editor import * import os, sys # 访问API地址 def get_play_list(start_url, cid, quality): entropy = 'rbMCKn@KuamXWlPMoJGsKcbiJKUfkPF_8dABscJntvqhRSETg' appkey, sec = ''.join([chr(ord(i) + 2) for i in entropy[::-1]]).split(':') params = 'appkey=%s&amp;cid=%s&amp;otype=json&amp;qn=%s&amp;quality=%s&amp;type=' % (appkey, cid, quality, quality) chksum = hashlib.md5(bytes(params + sec, 'utf8')).hexdigest() url_api = 'https://interface.bilibili.com/v2/playurl?%s&amp;sign=%s' % (params, chksum) headers = { 'Referer': start_url, # 注意加上referer 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36' } # print(url_api) html = requests.get(url_api, headers=headers).json() # print(json.dumps(html)) video_list = [html['durl'][0]['url']] # print(video_list) return video_list # 下载视频 ''' urllib.urlretrieve 的回调函数: def callbackfunc(blocknum, blocksize, totalsize): @blocknum: 已经下载的数据块 @blocksize: 数据块的大小 @totalsize: 远程文件的大小 ''' def Schedule_cmd(blocknum, blocksize, totalsize): speed = (blocknum * blocksize) / (time.time() - start_time) # speed_str = " Speed: %.2f" % speed speed_str = " Speed: %s" % format_size(speed) recv_size = blocknum * blocksize # 设置下载进度条 f = sys.stdout pervent = recv_size / totalsize percent_str = "%.2f%%" % (pervent * 100) n = round(pervent * 50) s = ('#' * n).ljust(50, '-') f.write(percent_str.ljust(8, ' ') + '[' + s + ']' + speed_str) f.flush() # time.sleep(0.1) f.write('\r') def Schedule(blocknum, blocksize, totalsize): speed = (blocknum * blocksize) / (time.time() - start_time) # speed_str = " Speed: %.2f" % speed speed_str = " Speed: %s" % format_size(speed) recv_size = blocknum * blocksize # 设置下载进度条 f = sys.stdout pervent = recv_size / totalsize percent_str = "%.2f%%" % (pervent * 100) n = round(pervent * 50) s = ('#' * n).ljust(50, '-') print(percent_str.ljust(6, ' ') + '-' + speed_str) f.flush() time.sleep(2) # print('\r') # 字节bytes转化K\M\G def format_size(bytes): try: bytes = float(bytes) kb = bytes / 1024 except: print("传入的字节格式不对") return "Error" if kb &gt;= 1024: M = kb / 1024 if M &gt;= 1024: G = M / 1024 return "%.3fG" % (G) else: return "%.3fM" % (M) else: return "%.3fK" % (kb) # 下载视频 def down_video(video_list, title, start_url, page): num = 1 print('[正在下载P{}段视频,请稍等...]:'.format(page) + title) currentVideoPath = os.path.join(sys.path[0], 'bilibili_video', title) # 当前目录作为下载目录 for i in video_list: opener = urllib.request.build_opener() # 请求头 opener.addheaders = [ # ('Host', 'upos-hz-mirrorks3.acgvideo.com'), #注意修改host,不用也行 ('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0) Gecko/20100101 Firefox/56.0'), ('Accept', '*/*'), ('Accept-Language', 'en-US,en;q=0.5'), ('Accept-Encoding', 'gzip, deflate, br'), ('Range', 'bytes=0-'), # Range 的值要为 bytes=0- 才能下载完整视频 ('Referer', start_url), # 注意修改referer,必须要加的! ('Origin', 'https://www.bilibili.com'), ('Connection', 'keep-alive'), ] urllib.request.install_opener(opener) # 创建文件夹存放下载的视频 if not os.path.exists(currentVideoPath): os.makedirs(currentVideoPath) print(currentVideoPath) # 开始下载 if len(video_list) &gt; 1: urllib.request.urlretrieve(url=i, filename=os.path.join(currentVideoPath, r'{}-{}.flv'.format(title, num)),reporthook=Schedule_cmd) # 写成mp4也行 title + '-' + num + '.flv' else: urllib.request.urlretrieve(url=i, filename=os.path.join(currentVideoPath, r'{}.flv'.format(title)),reporthook=Schedule_cmd) # 写成mp4也行 title + '-' + num + '.flv' num += 1 # 合并视频 def combine_video(video_list, title): currentVideoPath = os.path.join(sys.path[0], 'bilibili_video', title) # 当前目录作为下载目录 print(currentVideoPath) if len(video_list) &gt;= 2: # 视频大于一段才要合并 print('[下载完成,正在合并视频...]:' + title) # 定义一个数组 L = [] # 访问 video 文件夹 (假设视频都放在这里面) root_dir = currentVideoPath # 遍历所有文件 for file in sorted(os.listdir(root_dir), key=lambda x: int(x[x.rindex("-") + 1:x.rindex(".")])): # 如果后缀名为 .mp4/.flv if os.path.splitext(file)[1] == '.flv': # 拼接成完整路径 filePath = os.path.join(root_dir, file) # 载入视频 video = VideoFileClip(filePath) # 添加到数组 L.append(video) # 拼接视频 final_clip = concatenate_videoclips(L) # 生成目标视频文件 final_clip.to_videofile(os.path.join(root_dir, r'{}.mp4'.format(title)), fps=24, remove_temp=False) print('[视频合并完成]' + title) else: # 视频只有一段则直接打印下载完成 print('[视频合并完成]:' + title) if __name__ == '__main__': # 用户输入av号或者视频链接地址 print('*' * 30 + 'B站视频下载小助手' + '*' * 30) start = input('请输入您要下载的B站av号或者视频链接地址:') if start.isdigit() == True: # 如果输入的是av号 # 获取cid的api, 传入aid即可 start_url = 'https://api.bilibili.com/x/web-interface/view?aid=' + start else: # https://www.bilibili.com/video/av46958874/?spm_id_from=333.334.b_63686965665f7265636f6d6d656e64.16 start_url = 'https://api.bilibili.com/x/web-interface/view?aid=' + re.search(r'/av(\d+)/*', start).group(1) # 视频质量 # &lt;accept_format&gt;&lt;![CDATA[flv,flv720,flv480,flv360]]&gt;&lt;/accept_format&gt; # &lt;accept_description&gt;&lt;![CDATA[高清 1080P,高清 720P,清晰 480P,流畅 360P]]&gt;&lt;/accept_description&gt; # &lt;accept_quality&gt;&lt;![CDATA[80,64,32,16]]&gt;&lt;/accept_quality&gt; quality = input('请输入您要下载视频的清晰度(1080p:80;720p:64;480p:32;360p:16)(填写80或64或32或16):') # 获取视频的cid,title headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36' } html = requests.get(start_url, headers=headers).json() data = html['data'] video_title=data["title"].replace(" ","_") cid_list = [] if '?p=' in start: # 单独下载分P视频中的一集 p = re.search(r'\?p=(\d+)',start).group(1) cid_list.append(data['pages'][int(p) - 1]) else: # 如果p不存在就是全集下载 cid_list = data['pages'] # print(cid_list) for item in cid_list: cid = str(item['cid']) title = item['part'] if not title: title = video_title title = re.sub(r'[\/\\:*?"&lt;&gt;|]', '', title) # 替换为空的 print('[下载视频的cid]:' + cid) print('[下载视频的标题]:' + title) page = str(item['page']) start_url = start_url + "/?p=" + page video_list = get_play_list(start_url, cid, quality) start_time = time.time() down_video(video_list, title, start_url, page) combine_video(video_list, title) # 如果是windows系统,下载完成后打开下载目录 currentVideoPath = os.path.join(sys.path[0], 'bilibili_video') # 当前目录作为下载目录 if (sys.platform.startswith('win')): os.startfile(currentVideoPath) # 分P视频下载测试: https://www.bilibili.com/video/av19516333/ </code></pre> <pre><code> # coding: utf-8 # Writer: Mike_Shine # Date: 2018-7-11 # 请尊重原创,谢谢。 # 这段代码是输入B站UP主的编号(User_Mid------进入up主的主页 https://space.bilibili.com/91236407/#/video。 其中91236407即为 User_Mid),然后爬取主页内的视频 # 有一点没有做的就是说这段代码只爬100个。如果想要爬所有的视频,你可以加一点点代码做翻页的动作,获取新的包 import json import re import os import requests import time import datetime import pickle start_time = datetime.datetime.now() # 从主页拿视频列表的函数 def get_Mainpage_Video(User_Mid): headers = { 'Host': 'space.bilibili.com', 'Connection': 'keep-alive', 'Accept': 'application/json, text/plain, */*', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Referer': 'https://space.bilibili.com/' + str(User_Mid) + '/', # 这里是Mid 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', } url = 'https://space.bilibili.com/ajax/member/getSubmitVideos?mid=' + str( User_Mid) + '&amp;pagesize=100&amp;tid=0&amp;page=1&amp;keyword=&amp;order=pubdate' # 请求 Url 前面要加上 主机地址! 在这里就是space.xxxxxx # 注意从浏览器里抓包可以看到完整的地址,而从Fiddler # 最大的请求size是100 content = requests.get(url, headers=headers, verify=False).json() i = content['data']['count'] # 视频个数 if i &gt;= 100: i = 100 video_List = [] for num in range(i): aid = content['data']['vlist'][num]['aid'] title = content['data']['vlist'][num]['title'] author = content['data']['vlist'][num]['author'] tmp = {"aid": aid, "title": title, "author": author} video_List.append(tmp) return video_List else: video_List = [] for num in range(i): aid = content['data']['vlist'][num]['aid'] title = content['data']['vlist'][num]['title'] author = content['data']['vlist'][num]['author'] tmp = {"aid": aid, "title": title, "author": author} video_List.append(tmp) return video_List # 为了替换掉命名时的非法字符,不然下载创建路径时会报错 def sub(s): patn_1 = re.compile(r'\?') patn_2 = re.compile(r'\/') patn_3 = re.compile(r'\\') patn_4 = re.compile(r'\|') patn_5 = re.compile(r'\:') patn_6 = re.compile(r'\&lt;') patn_7 = re.compile(r'\&gt;') patn_8 = re.compile(r'\*') patn_9 = re.compile(r'\:') s = re.sub(patn_1, "", s) s = re.sub(patn_2, "", s) s = re.sub(patn_3, "", s) s = re.sub(patn_4, "", s) s = re.sub(patn_5, "", s) s = re.sub(patn_6, "", s) s = re.sub(patn_7, "", s) s = re.sub(patn_8, "", s) s = re.sub(patn_9, "", s) return s # 下面是创建路径 def Get_Path(Video_List): path = r"D:/video/" + Video_List[0]['author'] + "/" if not os.path.isdir(path): os.makedirs(path) return path # 下载的函数 # 这里拿到URL就下载,因为URL是动态更新的,要注意这个点。 import urllib.request import requests import gzip from io import BytesIO def openUrl(url): # headers = {'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'} it_header = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.1 Safari/605.1.15'} req = urllib.request.Request(url, headers=it_header) response = urllib.request.urlopen(req) # 请求 html = response.read() # print(html) # Soup = BeautifulSoup(html, 'lxml') print(html) print('*************') buff = BytesIO(html) f = gzip.GzipFile(fileobj=buff) res = f.read().decode('utf-8') # print(res) return res def download(i, Video_List, path): url = 'https://www.bilibili.com/video/av' + str(Video_List[i]['aid']) # 每个的Url print(url) # html = requests.get(url, verify=False).text # html = requests.get(url) # html = html.text # html = html.read().decode("utf-8") html = openUrl(url) print('-----------------------') print(html) url_patn = re.compile(r'"base_url":"(.*?)","backup_url"') Video_Url = [] # print(html) Video_Url.append(re.findall(url_patn, html)[0]) # 这个是URL print(Video_Url[0]) host_patn = re.compile('http://(.*?)/upgcxcode') host = re.findall(host_patn, Video_Url[0])[0] # 下面是下载内容 headers = { 'Host': host, 'Connection': 'keep-alive', 'Origin': 'https://www.bilibili.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Accept': '*/*', 'Referer': 'https://www.bilibili.com/video/av' + str(Video_List[i]['aid']) + "/", 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', } title = sub(str(Video_List[i]['title'])) # 防止有不合法的命名符号出现。 if not os.path.exists(path + title + '.mp4'): with open(path + title + '.mp4', 'wb') as f: print("-------------------------STart LINE-------------------------") localtime = time.strftime("%Y-%m-%d %H:%M:%S") print(localtime + "第" + str(i + 1) + "个视频:" + Video_List[i]['title'] + " 开始下载") f.write(requests.get(Video_Url[0], headers=headers, verify=False).content) localtime = time.strftime("%Y-%m-%d %H:%M:%S") print(localtime + "下载完成") print("-------------------------STop LINE-------------------------") else: print("视频 " + title + " 存在于本地,跳过下载") # 主函数 def main(): User_Mid = 216720985#91236407 # 在这里改你的Up主编号 Video_List = get_Mainpage_Video(User_Mid) # 拿到视频列表 pickle.dump(Video_List, open('Video_List.txt', 'wb')) # Video_List = pickle.load(open('Video_List.txt', 'rb')) # print(Video_List) # 看一下你拿到的视频列表 # 下面开始下载 # for i in range(len(Video_List)): # download(i, Video_List, Get_Path(Video_List)) return Video_List if __name__ == '__main__': # Video_List = main() Video_List = pickle.load(open('Video_List.txt', 'rb')) down_list = [] down_list.append(Video_List[2]) for i in range(len(down_list)): download(i, down_list, Get_Path(down_list)) # pickle.dump(Video_List, open('./Video_List.txt', 'wb')) end_time = datetime.datetime.now() minus = end_time - start_time consume = minus.total_seconds() print("总共用时:" + str(round(consume, 1)) + "s") </code></pre>

页面列表

ITEM_HTML