初学 Python 练手之作,让我们快乐地下载 B 站视频
功能介绍
- 通过接口获取视频地址用多线程进行下载
- 下载的文件大小决定线程数,小文件一个线程就够了,大文件最多会开启10个线程,也就是10倍速下载!
- 代码分两个文件,一个是主文件,一个是写多线程的
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
|
import re import json from multithreading import thread import requests from bs4 import BeautifulSoup headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36', } url = input("请输入视频地址:") with open('SESSDATA.txt', 'r') as f: SESSDATA = f.read() if SESSDATA == '0': print('检测到您未设置SESSDATA,最高只能下载480p画质哦!')
r = requests.get(url) r.encoding = 'utf-8' soup = BeautifulSoup(r.text, 'html.parser') name = soup.title.text.split('_')[0] print(name)
if '?' in url: url = url.split('?')[0] print(url) bvid = re.search(r'BV.*', url).group() print('BV号:' + bvid + "1111")
cid_json = requests.get('https://api.bilibili.com/x/player/pagelist?bvid={}&jsonp=jsonp'.format(bvid)).text print(cid_json) cid = re.search(r'{"cid":(\d+)', cid_json).group()[7:] print('CID:' + cid)
rsp = requests.get(url, headers=headers) aid = re.search(r'"aid":(.*?),"', rsp.text).group()[6:-2] print('AV号:' + aid)
qn = 16 headers['Referer'] = url api_url = 'https://api.bilibili.com/x/player/playurl?cid={}&avid={}&qn={}&otype=json&requestFrom=bilibili-helper'.format( cid, aid, qn) qn_dict = {} rsp = requests.get(api_url, headers=headers).content rsp = json.loads(rsp) qn_accept_description = rsp.get('data').get('accept_description') qn_accept_quality = rsp.get('data').get('accept_quality') print('下载视频清晰度选择') for i, j, xuhao in zip(qn_accept_description, qn_accept_quality, range(len(qn_accept_quality))): print(str(xuhao + 1) + ':' + i) qn_dict[str(xuhao + 1)] = j xuhao = input('请选择(输入清晰度前的标号):') qn = qn_dict[xuhao] print('清晰度参数qn:' + str(qn)) api_url = 'https://api.bilibili.com/x/player/playurl?cid={}&avid={}&qn={}&otype=json&requestFrom=bilibili-helper'.format( cid, aid, qn)
cookies = {} cookies['SESSDATA'] = SESSDATA rsp = requests.get(api_url, headers=headers, cookies=cookies).content rsp = json.loads(rsp) real_url = rsp.get('data').get('durl')[0].get('url') print('成功获取视频直链!') print('正在开启多线程极速下载……') thread(real_url, url, name + '.flv')
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
|
import requests import threading import datetime import time
def thread(url, Referer, file_name): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36', 'Referer': Referer } r = requests.get(url, headers=headers, stream=True, timeout=30) all_thread = 1 file_size = int(r.headers['content-length']) if file_size: fp = open(file_name, 'wb') fp.truncate(file_size) print('视频大小:' + str(int(file_size / 1024 / 1024)) + "MB") fp.close() size = 5242880 if file_size > size: all_thread = int(file_size / size) if all_thread > 10: all_thread = 10 part = file_size // all_thread threads = [] starttime = datetime.datetime.now().replace(microsecond=0) for i in range(all_thread): start = part * i if i == all_thread - 1: end = file_size else: end = start + part if i > 0: start += 1 headers = headers.copy() headers['Range'] = "bytes=%s-%s" % (start, end) t = threading.Thread(target=Handler, name='线程-' + str(i), kwargs={'start': start, 'end': end, 'url': url, 'filename': file_name, 'headers': headers}) t.setDaemon(True) threads.append(t) for t in threads: time.sleep(0.2) t.start() print('正在下载……') for t in threads: t.join() endtime = datetime.datetime.now().replace(microsecond=0) print('下载完成!用时:%s' % (endtime - starttime)) def Handler(start, end, url, filename, headers={}): tt_name = threading.current_thread().getName() print(tt_name + ' 已启动') r = requests.get(url, headers=headers, stream=True) total_size = end - start downsize = 0 startTime = time.time() with open(filename, 'r+b') as fp: fp.seek(start) var = fp.tell() for chunk in r.iter_content(204800): if chunk: fp.write(chunk) downsize += len(chunk) line = tt_name + '-downloading %d KB/s - %.2f MB, 共 %.2f MB' line = line % ( downsize / 1024 / (time.time() - startTime), downsize / 1024 / 1024, total_size / 1024 / 1024) print(line, end='\r') if __name__ == '__main__': url = input('输入视频链接(请输入视频原链):') thread(url)
|
SESSDATA.txt
需要保存在主文件同目录,登录解析输入SESSDATA即可解析4K,1080+(两个取最高的一个,比如原视频同时有4k和1080+那就出来的是4k,不会出来1080+)
P.S.如果你要分享给其他人,记得清空 SESSDATA.txt 内容
SESSDATA的获取方法有很多种,我就说其中一个:
![](/img/num77.gif)
![](/img/num77.gif)
![](/img/num77.gif)