In [35]:
import xml.etree.ElementTree as ET
import plotly.express as px
import os
import ffmpeg
from typing import List


In [36]:
danmaku_stats = {}
SHOW_PLOTS = False
SHOW_STATS = True

In [37]:
class Keyword:
    def __init__(
        self,
        exact_match_list: List[str],
        fuzzy_match_list: List[str],
        weight_factor: int = 1,
    ) -> None:
        self.exact_match_list = exact_match_list
        self.fuzzy_match_list = fuzzy_match_list
        self.weight_factor = weight_factor

    def match(self, text: str) -> bool:
        if text in self.exact_match_list:
            return True
        for keyword in self.fuzzy_match_list:
            if keyword in text:
                return True
        return False


def sec2time(sec: float) -> str:
    sec = round(sec)
    return f"{sec // 3600}:{(sec % 3600) // 60}:{sec % 60}"


def sec2hhmmss(sec: float) -> str:
    sec = round(sec)
    return f"{sec // 3600:02d}{(sec % 3600) // 60:02d}{sec % 60:02d}"


def find_danmaku_in_file(
    filepath: str,
    target_danmaku: Keyword,
    threshold: float = 2.0,
    time_backwards: int = 60,
    time_afterwards: int = 15,
) -> List[ffmpeg.Stream]:
    tree = ET.parse(filepath)
    root = tree.getroot()

    danmaku_map = {"time": [], "text": [], "freq": [], "timecode": []}
    max_time = 0
    for child in root:
        if child.tag == "d":
            text = child.text
            time = float(child.attrib["p"].split(",")[0])
            user = child.attrib["user"]
            if text and target_danmaku.match(text):
                danmaku_map["time"].append(time)
                danmaku_map["text"].append(text)
                danmaku_map["timecode"].append(sec2time(time))
            if time > max_time:
                max_time = time

    num_danmaku = len(danmaku_map["time"])
    if num_danmaku == 0:
        return []

    average_duration = 15
    hi, lo = 0, 0
    max_index = num_danmaku - 1
    for time in danmaku_map["time"]:
        left = time - average_duration / 2
        right = time + average_duration / 2
        while lo <= max_index and danmaku_map["time"][lo] < left:
            lo += 1
        while hi <= max_index and danmaku_map["time"][hi] < right:
            hi += 1
        danmaku_map["freq"].append((hi - lo) / average_duration)

    if SHOW_PLOTS:
        fig = px.scatter(
            danmaku_map, x="time", y="freq", hover_data=["timecode", "text"], title=filepath
        )
        fig.update_layout(hovermode="x unified")
        fig.update_xaxes(range=[0, max_time])
        fig.update_yaxes(range=[0, max(danmaku_map["freq"]) * 1.1])
        fig.show()

    if SHOW_STATS:
        for text in danmaku_map["text"]:
            if text in danmaku_stats:
                danmaku_stats[text] += 1
            else:
                danmaku_stats[text] = 1

    clip_list = []
    for i in range(num_danmaku):
        if danmaku_map["freq"][i] > threshold:
            start = max(danmaku_map["time"][i] - time_backwards, 0)
            end = min(danmaku_map["time"][i] + time_afterwards, max_time)
            if clip_list and clip_list[-1][1] > start:
                clip_list[-1][1] = end
            else:
                clip_list.append([start, end])

    if os.path.exists(filepath[:-3] + "mp4"):
        streampath = filepath[:-3] + "mp4"
    elif os.path.exists(filepath[:-3] + "flv"):
        streampath = filepath[:-3] + "flv"
    else:
        raise Exception(filepath[:-3] + "mp4/flv not found!")
    output_streams = []
    for [start, end] in clip_list:
        output_name = (
            "_".join(filepath.split("/")[-1].split("_")[:-1])
            + "_"
            + sec2hhmmss(start)
            + ".mp4"
        )
        output_streams.append(
            ffmpeg.input(streampath)
            .output(
                "/home/yiguo/GitHub/hotspot-clips/clips/" + output_name,
                ss=str(start),
                to=str(end),
                acodec="copy",
                vcodec="copy",
            )
            .compile(overwrite_output=True)
        )

    return output_streams


In [38]:
# dir_list = ["/home/yiguo/GitHub/hotspot-clips/recordings"]
dir_list = [
    # "/mnt/c/Users/yiguo/Desktop/BiliRec_tmp/saved/",
    # "/mnt/c/Users/yiguo/Desktop/BiliRec_tmp/tmp/",
    # "/mnt/c/Users/yiguo/Desktop/BiliRec_tmp/trash/",
    # "/mnt/e/BiliRec_apixC/saved/",
    # "/mnt/e/BiliRec_apixC/trash/",
    # "/mnt/e/BiliRec_apixC/素材录播/",
    # "/mnt/d/Videos/ApixC/宝贝APEX/素材/",
    # "/mnt/d/Videos/ApixC/回家的诱惑/素材/",
    # "/mnt/d/Videos/ApixC/apixC_MC暮色/素材/",
    # "/mnt/c/Users/yiguo/Desktop/B站录播姬/92613-少年Pi/"
    "/mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/"

]
# target_danmaku = Keyword([], ["乐"])
# target_danmaku = Keyword([], ["呱", "挂", "准"])
# target_danmaku = Keyword(
#     [
#         "呱！",
#         "呱",
#         "准",
#         "有挂",
#         "挂",
#         "好准",
#         "开了就是挂？",
#         "准啊",
#         "挂！",
#         "有呱",
#         "有挂！",
#         "呱！！！",
#         "精准",
#         "呱！！",
#         "有呱！",
#         "铁挂",
#         "准神",
#         "呱!",
#         "太准了",
#         "准！",
#         "呱呱呱",
#         "准准准",
#         "好准啊",
#         "xdm有挂",
#         "还挺准",
#         "开挂是吧",
#         "呱？",
#         "准神！",
#         "开挂",
#         "挂！！！",
#         "兄弟们有挂",
#         "小透不算挂",
#         "是挂",
#         "挂!",
#         "开了就是呱？",
#         "有点准",
#         "呱！！！！",
#         "这么准",
#         "呱！！！！！",
#         "挂哥",
#         "酱紫准？",
#         "这么准？",
#         "好准的3030",
#         "准神啊",
#         "小透不是挂",
#     ],
#     [],
# )
# target_danmaku = Keyword([], ["易大山", "一打三"])
# target_danmaku = Keyword(["乐", "？", "?"], [])
target_danmaku = Keyword([], [''])

output_streams = []
for dir in dir_list:
    for file in os.listdir(dir):
        # if file.endswith(".xml") and (
        #     file.startswith("92613") or file.startswith("录制-92613")
        # ):
        if file.endswith(".xml"):
            print(dir + file)
            output_streams += find_danmaku_in_file(
                os.path.join(dir, file),
                target_danmaku,
                threshold=2/15,
                time_backwards=45,
                time_afterwards=10,
            )

if SHOW_STATS:
    danmaku_stats_ordered = []
    for text in danmaku_stats:
        danmaku_stats_ordered.append([text, danmaku_stats[text]])


    def key_func(e):
        return e[1]


    danmaku_stats_ordered.sort(key=key_func, reverse=True)
    for line in danmaku_stats_ordered[:100]:
        print(line[1], line[0])


/mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/47867_20220719_050730_MC末世生存day1.xml
/mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/47867_20220720_042435_MC末世生存day2.xml
/mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/47867_20220720_050930_MC末世生存day2.xml
/mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/47867_20220720_054649_MC末世生存day2.xml
/mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/92613_20220719_050021_末世生存第二季DLC.xml
/mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/92613_20220720_043415_末世2DLC.xml
/mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/92613_20220720_053036_末世2DLC.xml
/mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/92613_20220720_055115_末世2DLC.xml
/mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/92613_20220720_060522_末世2DLC.xml
/mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/92613_20220720_061903_末世2DLC.xml
/mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/92613_20220720_063213_末世2DLC.xml
/mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/92613_20220720_064613_末世2DLC.xml
/mnt/c/Users/yiguo/D

In [39]:
num_segment = 1
shell_command_list = []
print(len(output_streams))
for line in output_streams:
    shell_command_list.append(" ".join(line))
    if len(shell_command_list) == len(output_streams) // num_segment + 1:
        shell_command = "; ".join(shell_command_list)
        print(shell_command)
        shell_command_list = []
shell_command = "; ".join(shell_command_list)
print(shell_command)


15
ffmpeg -i /mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/47867_20220719_050730_MC末世生存day1.mp4 -acodec copy -ss 0 -to 20558.272 -vcodec copy /home/yiguo/GitHub/hotspot-clips/clips/47867_20220719_050730_000000.mp4 -y; ffmpeg -i /mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/47867_20220720_042435_MC末世生存day2.mp4 -acodec copy -ss 0 -to 2693.915 -vcodec copy /home/yiguo/GitHub/hotspot-clips/clips/47867_20220720_042435_000000.mp4 -y; ffmpeg -i /mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/47867_20220720_050930_MC末世生存day2.mp4 -acodec copy -ss 0 -to 2235.597 -vcodec copy /home/yiguo/GitHub/hotspot-clips/clips/47867_20220720_050930_000000.mp4 -y; ffmpeg -i /mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/47867_20220720_054649_MC末世生存day2.mp4 -acodec copy -ss 0 -to 15004.069 -vcodec copy /home/yiguo/GitHub/hotspot-clips/clips/47867_20220720_054649_000000.mp4 -y; ffmpeg -i /mnt/c/Users/yiguo/Desktop/apixC_末日合集/footages/92613_20220719_050021_末世生存第二季DLC.mp4 -acodec copy -ss 0 -to 21189.494 -vcodec 

In [40]:
a = """278 呱！
249 呱
163 准
147 有挂
77 挂
60 好准
60 开了就是挂？
57 准啊
53 挂！
46 有呱
25 有挂！
20 呱！！！
18 精准
15 呱！！
14 有呱！
14 铁挂
12 准神
12 呱!
12 太准了
11 准！
11 呱呱呱
11 准准准
9 好准啊
8 xdm有挂
7 还挺准
7 开挂是吧
7 呱？
5 准神！
5 开挂
5 挂！！！
5 兄弟们有挂
5 小透不算挂
5 是挂
5 挂!
5 开了就是呱？
4 有点准
4 呱！！！！
4 这么准
4 呱！！！！！
4 挂哥
4 酱紫准？
4 这么准？
3 好准的3030
3 准神啊
3 小透不是挂
"""

res = []
for line in a.split("\n"):
    if line:
        res.append(line.split()[1])
print(res)


['呱！', '呱', '准', '有挂', '挂', '好准', '开了就是挂？', '准啊', '挂！', '有呱', '有挂！', '呱！！！', '精准', '呱！！', '有呱！', '铁挂', '准神', '呱!', '太准了', '准！', '呱呱呱', '准准准', '好准啊', 'xdm有挂', '还挺准', '开挂是吧', '呱？', '准神！', '开挂', '挂！！！', '兄弟们有挂', '小透不算挂', '是挂', '挂!', '开了就是呱？', '有点准', '呱！！！！', '这么准', '呱！！！！！', '挂哥', '酱紫准？', '这么准？', '好准的3030', '准神啊', '小透不是挂']


In [41]:
print(danmaku_stats["嗷呜"])

961
