获得经济学人中有notes的刊号和文章名称，用于获取相关音频

In [14]:
import os
import json
from datetime import datetime

def get_closest_json_by_filename(folder):
    today = datetime.today().date()
    closest_file = None
    min_diff = None

    for file in os.listdir(folder):
        if file.endswith(".json"):
            name = os.path.splitext(file)[0]
            try:
                file_date = datetime.strptime(name, "%Y-%m-%d").date()
                diff = abs((file_date - today).days)
                if min_diff is None or diff < min_diff:
                    min_diff = diff
                    closest_file = file
            except ValueError:
                # 文件名不是日期格式，跳过
                continue

    if closest_file:
        closest_path = os.path.join(folder, closest_file)
        print(f"✅ 最接近今天的文件: {closest_path}")
        with open(closest_path, "r", encoding="utf-8") as f:
            data = json.load(f)
        return data
    else:
        print("❌ 未找到日期格式的 json 文件。")
        return None


folder_path = r"G:\Code\Python\Project\Reader\data\backup\notes"
json_data = get_closest_json_by_filename(folder_path)

pairs = []
seen_articles = set()  # 用 set 去重章节

for entry in json_data:
    bookName = entry.get("bookName", "")
    chapter = entry.get("chapter", "")
    chapterIndex = entry.get("chapterIndex", "")
    if "The Economist" in bookName and chapter not in seen_articles:
        seen_articles.add(chapter)
        pairs.append((bookName, chapter,chapterIndex))

# 按 bookName 排序
pairs.sort(key=lambda x: x[0])

print("\n".join(f"{book} - {chapterIndex} - {chapter}" for book, chapter, chapterIndex in pairs))

# chapterIndex + 6 = real index of audio


✅ 最接近今天的文件: G:\Code\Python\Project\Reader\data\backup\notes\2025-10-09.json
The Economist [Sep 13th 2025] - 22 - Japan’s new leadership struggle is far from business as usual
The Economist [Sep 13th 2025] - 31 - Charlie Kirk challenged liberals until the day he was murdered
The Economist [Sep 20th 2025] - 9 - What Elon Musk gets wrong about Europe’s hard right
The Economist [Sep 20th 2025] - 26 - Don’t fret over China’s new climate targets
The Economist [Sep 20th 2025] - 11 - How Israel is losing America
The Economist [Sep 20th 2025] - 27 - The surprising nature of protest in China
The Economist [Sep 20th 2025] - 61 - The UN’s grim future
The Economist [Sep 20th 2025] - 81 - The health benefits of sunlight may outweigh the risk of skin cancer
The Economist [Sep 27th 2025] - 29 - All eyes on the NBA as its players return to China
The Economist [Sep 27th 2025] - 30 - A restaurant scandal sticks in China’s throat
The Economist [Sep 27th 2025] - 27 - Why Xi Jinping now accepts Kim Jong Un 

In [35]:
import re
from datetime import datetime
from pathlib import Path
import shutil

def extract_index(filename: Path):
        match = re.match(r"(\d+)", filename.name)
        return int(match.group(1)) if match else 999999

audio_base_dir = Path(r"G:\Book\Economist")
readed_dir = audio_base_dir / "ReadedAudio"

for book, chapter, chapterIndex in pairs:
    print(book)
    # 1. 提取方括号中的日期
    match = re.search(r'\[(.*?)\]', book)
    date_str = match.group(1)  # 'Sep 13th 2025'

    # 2. 清洗并转为标准日期格式
    date_str = re.sub(r'(st|nd|rd|th)', '', date_str)  # 去掉th等后缀
    date_obj = datetime.strptime(date_str, '%b %d %Y')
    formatted_date = date_obj.strftime('%Y-%m-%d')

    # 3. 转换为文件夹格式
    folder_date = date_obj.strftime('%Y%m%d')
    folder_name = f"TEco-{folder_date}音频"
    folder_path = audio_base_dir / folder_name

    mp3_files = [f for f in folder_path.rglob("*.mp3")]
    mp3_files_sorted = sorted(mp3_files, key=extract_index)

    for mp3_file in mp3_files_sorted:
        if extract_index(mp3_file) == chapterIndex + 6:
            dest_file = readed_dir / mp3_file.name
            if dest_file.exists():
                print(f"⚠️ 文件已存在，跳过: {dest_file.name}")
            else:
                shutil.copy2(mp3_file, dest_file)  # 拷贝文件，保留元数据
                print(f"✅ 已拷贝: {mp3_file.name} -> {readed_dir}")
            print(chapter)
            break
    # break  # 示例只处理第一个




The Economist [Sep 13th 2025]
⚠️ 文件已存在，跳过: 028 Asia - A prime minister quits.mp3
Japan’s new leadership struggle is far from business as usual
The Economist [Sep 13th 2025]
⚠️ 文件已存在，跳过: 037 Middle East and Africa - A new front.mp3
Charlie Kirk challenged liberals until the day he was murdered
The Economist [Sep 20th 2025]
✅ 已拷贝: 013 Briefing - Israel_s reliance on America.mp3 -> G:\Book\Economist\ReadedAudio
The Economist [Sep 20th 2025]
✅ 已拷贝: 015 United States - Soldiers as cops.mp3 -> G:\Book\Economist\ReadedAudio
What Elon Musk gets wrong about Europe’s hard right
The Economist [Sep 20th 2025]
The Economist [Sep 20th 2025]
✅ 已拷贝: 017 United States - Violence.mp3 -> G:\Book\Economist\ReadedAudio
How Israel is losing America
The Economist [Sep 20th 2025]
✅ 已拷贝: 033 China - Environmental action.mp3 -> G:\Book\Economist\ReadedAudio
The surprising nature of protest in China
The Economist [Sep 20th 2025]
✅ 已拷贝: 067 Business - Schumpeter.mp3 -> G:\Book\Economist\ReadedAudio
The UN’s grim 