In [1]:
import pandas as pd

In [2]:
def extract_secondary_headings(markdown_file):
    secondary_headings = []
    
    with open(markdown_file, 'r', encoding='utf-8') as file:
        for line in file:
            if line.startswith('## '):
                line = line.strip()
                line = line.rsplit(' ', 1)[-1]
                secondary_headings.append(line)
    
    return secondary_headings

def check_missing_date(mydate):
    daterange = pd.date_range(start='1939-01-01', end='1945-12-31',freq='D')
    dfr = pd.DataFrame(daterange, columns=['dr'])
    dateset = set(dfr['dr'].dt.date.apply(str))
    return dateset - set(mydate)

In [5]:
mydate = extract_secondary_headings('ww2cn.md')

In [6]:
len(check_missing_date(mydate))
check_missing_date(mydate)

set()

In [11]:
import re
from pathlib import Path

def split_markdown_file(markdown_file, folder):
    dir = Path(folder)
    dir.mkdir(exist_ok=True)
    with open(markdown_file, 'r', encoding='utf-8') as file:
        content = file.read()

    # 删除所有一级标签
    content = re.sub(r'^#\s.*$', '', content, flags=re.MULTILINE)

    # 以二级标签为特征分割文件
    sections = re.split(r'^##\s', content, flags=re.MULTILINE)

    # 去掉第一个空元素
    if sections[0].strip() == '':
        sections = sections[1:]

    # 保存每个分割后的部分为单独的文件
    for section in sections:
        # 提取二级标签内容作为文件名
        heading = re.match(r'(.*?)\n', section)
        if heading:
            file_name = heading.group(1).strip().replace(' ', '_') + '.md'
            section = '# ' + section
            # 将二级标题提升为一级标题，三级标题提升为二级标题
            section = re.sub(r'^##\s', '# ', section, flags=re.MULTILINE)
            section = re.sub(r'^###\s', '## ', section, flags=re.MULTILINE)

            with open(dir/file_name, 'w', encoding='utf-8') as output_file:
                output_file.write(section)
            # print(f"Saved section to {file_name}")


In [12]:
split_markdown_file('ww2cn.md', 'ww2cnbyday')


In [12]:
import json
from datetime import datetime, timedelta

def generate_date_json(file_path, events_file_path):
    start_date = datetime(1939, 1, 1)
    end_date = datetime(1945, 12, 31)
    ref_date = datetime(1939, 9, 1)
    date_json = {}

    # 读取 events.json 文件
    with open(events_file_path, 'r', encoding='utf-8') as events_file:
        events_data = json.load(events_file)
        events = events_data.get('events', [])

    current_date = start_date
    while current_date <= end_date:
        year = current_date.year
        month = current_date.strftime('%m')
        day = current_date.strftime('%d')
        days_to = (current_date - ref_date).days
        date_str = current_date.strftime('%Y-%m-%d')

        if year not in date_json:
            date_json[year] = {}
        if month not in date_json[year]:
            date_json[year][month] = []

        # 查找当前日期的事件
        event_info = next((event for event in events if event['date'] == date_str), None)
        if event_info:
            date_json[year][month].append({
                'date': date_str,
                'days_to': days_to,
                'event': event_info['event']
            })
        else:
            date_json[year][month].append({
                'date': date_str,
                'days_to': days_to
            })

        current_date += timedelta(days=1)

    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(date_json, file, ensure_ascii=False, indent=4)

In [13]:
generate_date_json('mydate.json', './static/events.json')