In [1]:
import os, csv, re

base_dir = r"d:\study\研一上\CHC5904\A2"
chap_dir = os.path.join(base_dir, "30-50")
city_file = os.path.join(base_dir, "五城稱呼匯總.csv")
people_file = os.path.join(base_dir, "人物统计_合并.csv")
output_file = os.path.join(base_dir, "稱呼分析結果.csv")

# 读取稱呼映射（跳过表头）
city_terms = {}  # 称呼->城市
with open(city_file, "r", encoding="utf-8-sig") as f:
    first = True
    for line in f:
        if first:
            first = False
            continue
        line = line.strip()
        if not line:
            continue
        parts = line.split(",")
        if len(parts) < 2:
            continue
        city = parts[0].strip()
        raw_terms = ",".join(parts[1:]).strip()
        segs = re.split("[、，,]", raw_terms)
        for t in segs:
            tt = t.strip()
            if tt:
                city_terms[tt] = city

# 读取章节人物（已跳过表头）
chapter_people = {}  # ChapterXX -> set(人物)
with open(people_file, "r", encoding="utf-8-sig") as f:
    reader = csv.reader(f)
    header = next(reader)
    for row in reader:
        if len(row) < 2:
            continue
        chapter = row[0].strip()
        persons = [p.strip() for p in row[1].split(",") if p.strip()]
        chapter_people[chapter] = set(persons)

# 初始化统计结构
terms = list(city_terms.keys())
term_stats = {t: {"counts": {}, "people": set()} for t in terms}

chapters = [f"Chapter{n}" for n in range(30, 51)]

for chap in chapters:
    filename = os.path.join(chap_dir, f"chapter{chap[7:]}.txt")
    if not os.path.isfile(filename):
        for t in terms:
            term_stats[t]["counts"][chap] = 0
        continue
    with open(filename, "r", encoding="utf-8-sig") as f:
        lines = f.readlines()
    for t in terms:
        total = 0
        for line in lines:
            c = line.count(t)
            if c > 0:
                total += c
                ppl = chapter_people.get(chap, set())
                for person in ppl:
                    if person and person in line:
                        term_stats[t]["people"].add(person)
        term_stats[t]["counts"][chap] = total

# 写出 CSV（第三列不包含0次的章节）
with open(output_file, "w", newline="", encoding="utf-8-sig") as out:
    writer = csv.writer(out)
    writer.writerow(["稱呼", "城市", "章節出現次數", "同行出現人物匯總"])
    for t in terms:
        city = city_terms[t]
        counts_list = [f"{chap}:{term_stats[t]['counts'][chap]}" for chap in chapters if term_stats[t]["counts"][chap] > 0]
        counts_part = ",".join(counts_list)
        people_part = ",".join(sorted(term_stats[t]["people"]))
        writer.writerow([t, city, counts_part, people_part])

print("完成，输出文件：", output_file)

完成，输出文件： d:\study\研一上\CHC5904\A2\稱呼分析結果.csv
