# -*- coding: utf-8 -*-
"""
Created on Sat Jun 26 10:18:39 2021
@author: Liu_YK d20091100124@cityu.mo
"""
import requests
from bs4 import BeautifulSoup
import time
import json
import numpy as np
import pandas as pd
# ---------------------------------------------
# BV2AV AV2BV
# https://blog.csdn.net/jkddf9h8xd9j646x798t/article/details/105124465
alphabet = 'fZodR9XQDSUm21yCkr6zBqiveYah8bt4xsWpHnJE7jL5VG3guMTKNPAwcF'
def dec(x):
r = 0
for i, v in enumerate([11, 10, 3, 8, 4, 6]):
r += alphabet.find(x[v]) * 58**i
return (r - 0x2_0840_07c0) ^ 0x0a93_b324
def enc(x):
x = (x ^ 0x0a93_b324) + 0x2_0840_07c0
r = list('BV1**4*1*7**')
for v in [11, 10, 3, 8, 4, 6]:
x, d = divmod(x, 58)
r[v] = alphabet[d]
return ''.join(r)
# -----------------------------------------------
# no need!
def getMid(bv):
headers = {
'authority': 'api.bilibili.com',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.54',
'referer': 'https://www.bilibili.com',
}
now_time = int(time.time() * 1000)
params = (
('callback', 'jQuery33105450292163562576_' + str(now_time)),
('jsonp', 'jsonp'),
('next', '0'),
('type', '1'),
('oid', dec(bv)),#588676861 630756505
('mode', '3'),
('plat', '1'),
('_', str(int(time.time() * 1000))),
)
proxy = 'http:\\\\' + '42.56.239.217' + ':' + '9999'
proxies = {'proxy':proxy}
response = requests.get('https://api.bilibili.com/x/v2/reply/main', headers=headers, params=params, proxies = proxies)
rsp_str = response.text.replace('jQuery33105450292163562576_' + str(now_time) + "(", '').strip(')')
data = json.loads(rsp_str)
mid = data['data']['upper']['mid']
print("mid:", mid)
return mid
# API随时变动,此方法随时失效
def getCid(bv):
headers = {
'authority': 'api.bilibili.com',
'sec-ch-ua': '" Not;A Brand";v="99", "Microsoft Edge";v="91", "Chromium";v="91"',
'accept': '*/*',
'dnt': '1',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.59',
'origin': 'https://www.bilibili.com',
'sec-fetch-site': 'same-site',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.bilibili.com',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6,zh-TW;q=0.5',
}
params = (
('bvid', bv),
('jsonp', 'jsonp'),
)
response = requests.get('https://api.bilibili.com/x/player/pagelist', headers=headers, params=params)
data = json.loads(response.text)
cid = data["data"][0]["cid"]
return cid
def getDm(bv):
dm = []
headers = {
'authority': 'comment.bilibili.com',
'sec-ch-ua': '" Not;A Brand";v="99", "Microsoft Edge";v="91", "Chromium";v="91"',
'sec-ch-ua-mobile': '?0',
'dnt': '1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.59',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'sec-fetch-site': 'none',
'sec-fetch-mode': 'navigate',
'sec-fetch-user': '?1',
'sec-fetch-dest': 'document',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6,zh-TW;q=0.5',
}
# set proxy
proxy = 'http:\\\\' + '58.255.199.192' + ':' + '9999'
proxies = {'proxy':proxy}
# https://comment.bilibili.com/{5771902}.xml
# 这里是cid,非oid(av)/mid
response = requests.get('https://comment.bilibili.com/' + str(getCid(bv)) + '.xml', headers=headers, proxies = proxies)
# !!!!很重要,必须转码
pagetext = response.content.decode("utf-8")
soup = BeautifulSoup(pagetext, 'lxml')
with open(bv + '_dm.txt', 'w', encoding='utf-8') as fp:
for i in soup.find_all('d'):
print(i.text)
fp.write(i.text + '\n')
dm.append(i.text)
return dm
bv = input('请输入bv号:')
dm = getDm(bv)
dm_np = np.array(dm)
dm_pd = pd.DataFrame(dm_np)
dm_pd.to_csv(bv + "_dm" + ".csv", encoding='utf-8-sig')