In [1]:
import requests
from bs4 import BeautifulSoup
import csv

headers = {
    'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/84.0.4147.89 Safari/537.36",
    'Referer': "https://search.bilibili.com"
}
path = "./data/bilibili_tag.csv"

with open(path, 'w', encoding='utf8', newline='') as f:
    writer = csv.writer(f, delimiter='\t')
    csv_head = ["A", "B"]
    writer.writerow(csv_head)

In [2]:
def check_bv(bv):
    if bv.startswith('BV'):
        return bv
    elif bv.startswith('bv'):
        return 'BV' + bv[2:]
    else:
        return 'BV' + bv


def get_tag(bv, tag_num):
    """
    :param tag_num: 你需要的tag数目，有些视频可能比较少（不用担心bug），如果你想要全部的tag就给参数-1
    :param bv: bv 字符串
    :return: tag列表
    """
    bv = check_bv(bv)

    tag_list = []
    url = f'https://www.bilibili.com/video/{bv}'
    content = requests.get(url).content
    soup = BeautifulSoup(content, 'html.parser')
    tags = soup.find_all('a', class_='tag-link')
    if tag_num == -1:
        for tag in tags:
            tag_list.append(tag.text.strip())
    else:
        tag_num = min(tag_num, len(tags))
        for i in range(tag_num):
            tag_list.append(tags[i].text.strip())
    return tag_list


def search_video_by_tag(tag, video_num):
    """
    :param tag: 要搜索的tag
    :param video_num: 你需要多少个视频，这里上限是50！
    :return: 搜索到的前num个视频的bv号列表
    """
    url = "https://search.bilibili.com/all?keyword=" + tag + "&from_source=video_tag"
    req = requests.get(url, headers=headers)
    soup = BeautifulSoup(req.text, 'html.parser')
    if soup.title.text.startswith("验证码"):
        return []
    cards = soup.find_all(class_="bili-video-card")

    bv_list = []
    for i in range(video_num):
        bv = cards[i].contents[1].a.get('href').split('BV')[1][:-1]
        bv_list.append(bv)
    return bv_list


def get_csv_by_bv(bv, video_num=1, tag_num=3, depth=3):
    """
    :param bv: 初始化输入给的bv号
    :param video_num: 每次希望找几个视频，上限50个！！！
    :param tag_num: 你需要的tag数目，有些视频可能比较少（不用担心bug），如果你想要全部的tag就给参数-1
    :param depth: 需要深度搜索多少次
    """
    bv = check_bv(bv)

    tag_list = get_tag(bv, tag_num)

    for tag in tag_list:
        bv_list = search_video_by_tag(tag, video_num)
        for bv in bv_list:
            next_tag_list = get_tag(bv, tag_num)
            for next_tag in next_tag_list:
                data_row = [tag, next_tag]
                with open(path, 'a', encoding='utf8', newline='') as f:
                    csv_write = csv.writer(f, delimiter='\t')
                    csv_write.writerow(data_row)
            if depth:
                get_csv_by_tag(next_tag_list, video_num, tag_num, depth - 1)
            else:
                return


def get_csv_by_tag(tag, video_num=1, tag_num=3, depth=5):
    """
    :param tag: 初始化输入给的tag
    :param video_num: 每次希望找几个视频，上限50个！！！
    :param tag_num: 你需要的tag数目，有些视频可能比较少（不用担心bug），如果你想要全部的tag就给参数-1
    :param depth: 需要深度搜索多少次
    """
    if isinstance(tag, list):
        tag_list = tag
    else:
        tag_list = [tag]

    for tag in tag_list:
        bv_list = search_video_by_tag(tag, video_num)
        for bv in bv_list:
            next_tag_list = get_tag(bv, tag_num)
            for next_tag in next_tag_list:
                if tag != next_tag:
                    data_row = [tag, next_tag]
                    with open(path, 'a', encoding='utf8', newline='') as f:
                        csv_write = csv.writer(f, delimiter='\t')
                        csv_write.writerow(data_row)
            if depth:
                get_csv_by_tag(next_tag_list, video_num, tag_num, depth - 1)
            else:
                return

In [3]:
get_csv_by_bv('1X8411e7EJ', video_num=5, tag_num=-1, depth=3)
# get_csv_by_tag('rap')
print('done')

IndexError: list index out of range