打算做个群里接龙的年度总结

1. 出勤奖
2. 老鸽子奖
3. 最受鸽子眷顾奖
4. 霸王球奖 （全靠感觉）

出勤奖，数出现在最终列表上多少次就完事了

但是需要考虑到接龙人有别称的情况

老鸽子奖，需要知道每个接龙人最小的接龙序号，如果接龙序号小于鸽子阈值，而没出现在最终列表上，则视为鸽子

鸽子阈值与最终参与人数不同，因为可能会鸽到砍场

关于最受鸽子眷顾奖，需要知道接龙人的最大序号， 如果最大序号大于人满人数，但出现在最终列表上，则视为被鸽子眷顾

霸王球奖不需要数据支持，全凭我感觉

In [1]:
import re

LOONG_ENTRY_PATTERN = re.compile(r"^\d{1,2}\.\D.*")


def get_names(file_path):
    with open(file_path) as file:
        lines = file.readlines()

    names = {clean_entry(line) for line in lines if LOONG_ENTRY_PATTERN.match(line)}

    return sorted(list(names))


def clean_entry(line):
    first_dot = line.index(".")
    with_bracketed_text_removed = re.sub(
        r"[\(\（][^\)\）]*[\)\）]", "", line[first_dot + 1 :]
    )
    with_consecutive_spaces_removed = re.sub(
        r"[\s\xa0]+", " ", with_bracketed_text_removed
    )
    cleand_entry = (
        with_consecutive_spaces_removed.lower()
        .replace("paid", "")
        .replace("wait list", "")
        .replace("wl", "")
        .replace("waitlist", "")
        .replace("wait", "")
        .strip()
    )

    return cleand_entry

In [2]:
from dataclasses import dataclass
from pathlib import Path

import json


@dataclass(frozen=True)
class Person:
    primary_id: str
    aliases: frozenset


class PersonFactory:
    def build(self, json_path, names):
        alias_dict = self._update_alias_dict(json_path, names)
        return self._build_persons(alias_dict)

    # read aliases from file and only ask for new ones
    def _update_alias_dict(self, json_path, names):
        path = Path(json_path)

        if path.exists():
            with open(json_path) as file:
                json_dict = json.load(file)
                alias_dict = {
                    primary_id: set(aliases)
                    for primary_id, aliases in json_dict.items()
                }

        else:
            alias_dict = {}

        known_names = set().union(*alias_dict.values())
        new_names = [name for name in names if name not in known_names]

        for name in new_names:
            primary_id = (
                input(f"Enter the primaryId for alias '{name}': ").strip().lower()
            )

            if primary_id == "break":
                break

            if primary_id == "":
                primary_id = name

            if primary_id in alias_dict:
                alias_dict[primary_id].add(name)
            else:
                alias_dict[primary_id] = {name}

        with open(json_path, "w") as file:
            for_storage = {
                primary_id: sorted(list(aliases))
                for primary_id, aliases in sorted(alias_dict.items())
            }
            json.dump(
                for_storage, file, indent=4, ensure_ascii=False
            )  # ensure_ascii=False for saving chinese characters

        return alias_dict

    def _build_persons(self, alias_dict):
        persons = [
            Person(primary_id, frozenset(aliases))
            for primary_id, aliases in alias_dict.items()
        ]

        alias_to_person = {
            alias: person for person in persons for alias in person.aliases
        }

        return alias_to_person


names = get_names("2024-raw.txt")
alias_to_person = PersonFactory().build("2024-aliases.json", names)

In [3]:
from pprint import pprint

In [4]:
from datetime import date


@dataclass(frozen=True)
class Loong:
    date: date
    max_ordinal: int
    bird_threshold: int
    final_participants: frozenset  # of Person


class LoongFactory:
    def build(self, raw_messages_path, alias_to_person):
        with open(raw_messages_path) as file:
            lines = file.readlines()

        loong_stats_line_indicies = [
            i for i, line in enumerate(lines) if "max_ordinal" in line
        ]
        loongs = []

        for i in loong_stats_line_indicies:
            loong_stats = json.loads(lines[i])

            # populate final participants
            final_participants = set()
            message_started = False

            for j in range(i + 1, len(lines)):
                # date line eg 11.7 周四 8-10pm
                if "8-10" in lines[j]:
                    if not message_started:
                        message_started = True
                    else:
                        break

                if not LOONG_ENTRY_PATTERN.match(lines[j]):
                    continue

                cleaned_name = clean_entry(lines[j])
                person = alias_to_person[cleaned_name]
                final_participants.add(person)

                if len(final_participants) == loong_stats["max_ordinal"]:
                    break

            loong = Loong(
                date=date.fromisoformat(loong_stats["date"]),
                max_ordinal=loong_stats["max_ordinal"],
                bird_threshold=loong_stats.get(
                    "bird_threshold", loong_stats["max_ordinal"]
                ),
                final_participants=frozenset(final_participants),
            )
            loongs.append(loong)

        return loongs


loongs = LoongFactory().build("2024-raw.txt", alias_to_person)
# pprint(loongs)

In [5]:
@dataclass(frozen=True)
class LoongEntry:
    loong: Loong
    person: Person
    ordinal: int


class LoongEntryFactory:
    def build(self, raw_messages_path, alias_to_person, loongs):

        with open(raw_messages_path) as file:
            lines = file.readlines()

        loong_entries = set()

        date_to_loong = {loong.date: loong for loong in loongs}
        current_entry_date = None

        for line in lines:
            # date line eg 11.7 周四 8-10pm
            if "8-10" in line:
                (month, day) = self._extract_month_day(line)
                current_entry_date = date(2024, month, day)
                continue

            if not LOONG_ENTRY_PATTERN.match(line):
                continue

            loong = date_to_loong[current_entry_date]
            cleaned_name = clean_entry(line)
            person = alias_to_person[cleaned_name]
            ordinal = int(line.split(".")[0])
            loong_entry = LoongEntry(loong, person, ordinal)

            loong_entries.add(loong_entry)

        return loong_entries

    def _extract_month_day(self, date_str):
        pattern = re.compile(r"(\d{1,2})\.(\d{1,2})")
        match = pattern.search(date_str)

        if match:
            month = int(match.group(1))
            day = int(match.group(2))
            return month, day
        else:
            raise ValueError(f"No valid date found in {date_str}")


loong_entries = LoongEntryFactory().build("2024-raw.txt", alias_to_person, loongs)
# pprint(loong_entries)

In [6]:
from collections import defaultdict


@dataclass
class Loonger:
    loong: Loong
    person: Person
    min_ordinal: int
    max_ordinal: int

    def is_final_participant(self):
        return self.person in self.loong.final_participants

    def is_bird(self):
        return (
            self.min_ordinal < self.loong.bird_threshold
            and not self.is_final_participant()
        )

    def is_favored_by_bird(self):
        return self.max_ordinal > self.loong.max_ordinal and self.is_final_participant()


class LoongerFactory:
    def build(self, loong_entries):
        person_to_loong_to_entry = defaultdict(lambda: defaultdict(list))

        for loong_entry in loong_entries:
            person_to_loong_to_entry[loong_entry.person][loong_entry.loong].append(
                loong_entry
            )

        person_to_loonger = defaultdict(list)

        for person, loongs in person_to_loong_to_entry.items():
            for loong, entries in loongs.items():
                min_ordinal = min(entry.ordinal for entry in entries)
                max_ordinal = max(entry.ordinal for entry in entries)
                loonger = Loonger(loong, person, min_ordinal, max_ordinal)
                person_to_loonger[person].append(loonger)

        return person_to_loonger


@dataclass
class LoongerStat:
    attendance: list  # of dates
    birds: list  # of dates
    favored_by_birds: list  # of dates


class LoongerStatFactory:
    def build(self, loongers):
        person_to_stat = {}

        for person, loongers in loongers.items():
            attendance = []
            birds = []
            favored_by_birds = []

            for loonger in loongers:
                if loonger.is_final_participant():
                    attendance.append(loonger.loong.date.isoformat())

                if loonger.is_bird():
                    birds.append(loonger.loong.date.isoformat())

                if loonger.is_favored_by_bird():
                    favored_by_birds.append(loonger.loong.date.isoformat())

            stat = LoongerStat(
                sorted(attendance), sorted(birds), sorted(favored_by_birds)
            )
            person_to_stat[person] = stat

        return person_to_stat


loongers = LoongerFactory().build(loong_entries)
loongers_stats = LoongerStatFactory().build(loongers)
unknown_person = alias_to_person["unknown"]
host = alias_to_person["邓布利多邓"]
loongers_stats.pop(unknown_person)
loongers_stats.pop(host)

sorted_by_attendance = sorted(
    loongers_stats.items(),
    key=lambda x: (len(x[1].attendance), x[0].primary_id),
    reverse=True,
)
sorted_by_birds = sorted(
    loongers_stats.items(),
    key=lambda x: (len(x[1].birds), x[0].primary_id),
    reverse=True,
)
sorted_by_favored_by_birds = sorted(
    loongers_stats.items(),
    key=lambda x: (len(x[1].favored_by_birds), x[0].primary_id),
    reverse=True,
)

In [7]:
pprint("《出勤奖》")

for index, (person, stat) in enumerate(sorted_by_attendance[:18]):
        pprint(f"{index + 1}. {person.primary_id}: {len(stat.attendance)}")

'《出勤奖》'
'1. xiaojiling: 45'
'2. dian: 35'
'3. 悠: 34'
'4. terrance: 33'
'5. freda: 33'
'6. tommy: 30'
'7. 草: 26'
'8. 呆: 26'
'9. 矇ken＿阿烦: 24'
'10. tiamo: 17'
'11. mike: 17'
'12. ben: 17'
'13. l1o0hb0!: 16'
'14. kai: 16'
'15. pocky: 14'
'16. 蓝众众: 13'
'17. 挖花菌: 12'
'18. 我: 11'


In [8]:
pprint("《老鸽子奖》")

for index, (person, stat) in enumerate(sorted_by_birds[:8]):
    pprint(f"{index + 1}. {person.primary_id}: {len(stat.birds)}")

'《老鸽子奖》'
'1. 草: 7'
'2. 悠: 7'
'3. pocky: 7'
'4. dian: 7'
'5. tiamo: 6'
'6. pearl: 6'
'7. leon w: 6'
'8. 矇ken＿阿烦: 5'


In [9]:
pprint("《受鸽子眷顾奖》")

for index, (person, stat) in enumerate(sorted_by_favored_by_birds[:8]):
    pprint(f"{index + 1}. {person.primary_id}: {len(stat.favored_by_birds)}")

'《受鸽子眷顾奖》'
'1. l1o0hb0!: 6'
'2. ben: 6'
'3. tommy: 5'
'4. tiamo: 5'
'5. 矇ken＿阿烦: 3'
'6. 挖花菌: 3'
'7. dian: 3'
'8. 阿祝儿: 2'


In [10]:
pprint(f"场次: {len(loongs)}")

'场次: 49'


In [11]:
pprint(f"场地费: {sum([loong.max_ordinal / 6 for loong in loongs]) * 60}")

'场地费: 7080.0'
