# Copy All Speeches Files
@author: zhangwubin

@date: 2024/12/05

In [2]:
import os
import shutil

def copy_speech_files(source_dir: str, target_dir: str):
    """
    将源目录下所有以_fed_speeches结尾的文件夹中的所有以_speeches.json结尾的文件，
    复制到目标目录中，并保留_fed_speeches文件夹这一层结构。

    :param source_dir: 源目录路径
    :param target_dir: 目标目录路径
    """
    # 确保目标目录存在
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    # 遍历源目录下的所有文件和文件夹
    for root, dirs, files in os.walk(source_dir):
        for dir_name in dirs:
            if dir_name.endswith('_fed_speeches'):
                # 构建_fed_speeches文件夹的完整路径
                fed_speeches_dir = os.path.join(root, dir_name)
                # 计算_fed_speeches文件夹相对于源目录的相对路径
                relative_path = os.path.relpath(fed_speeches_dir, source_dir)
                # 构建目标_fed_speeches文件夹的完整路径
                target_fed_speeches_dir = os.path.join(target_dir, relative_path)
                # 确保目标_fed_speeches文件夹存在
                if not os.path.exists(target_fed_speeches_dir):
                    os.makedirs(target_fed_speeches_dir)
                # 遍历_fed_speeches文件夹中的所有文件
                for file_name in os.listdir(fed_speeches_dir):
                    if file_name.endswith('_speeches.json'):
                        # 构建源文件的完整路径
                        source_file_path = os.path.join(fed_speeches_dir, file_name)
                        # 构建目标文件的完整路径
                        target_file_path = os.path.join(target_fed_speeches_dir, file_name)
                        # 复制文件
                        shutil.copy2(source_file_path, target_file_path)
                        print(f"Copied {source_file_path} to {target_file_path}")

# 示例调用
source_directory = '../data/fed_speeches'
target_directory = "../data/fed_speeches/versions"
copy_speech_files(source_directory, target_directory)

Copied ../data/fed_speeches\atlanta_fed_speeches\atlanta_speeches.json to ../data/fed_speeches/versions\atlanta_fed_speeches\atlanta_speeches.json
Copied ../data/fed_speeches\bog_fed_speeches\bog_speeches.json to ../data/fed_speeches/versions\bog_fed_speeches\bog_speeches.json
Copied ../data/fed_speeches\boston_fed_speeches\boston_speeches.json to ../data/fed_speeches/versions\boston_fed_speeches\boston_speeches.json
Copied ../data/fed_speeches\chicago_fed_speeches\chicago_speeches.json to ../data/fed_speeches/versions\chicago_fed_speeches\chicago_speeches.json
Copied ../data/fed_speeches\cleveland_fed_speeches\cleveland_speeches.json to ../data/fed_speeches/versions\cleveland_fed_speeches\cleveland_speeches.json
Copied ../data/fed_speeches\dallas_fed_speeches\dallas_speeches.json to ../data/fed_speeches/versions\dallas_fed_speeches\dallas_speeches.json
Copied ../data/fed_speeches\kansascity_fed_speeches\kansascity_speeches.json to ../data/fed_speeches/versions\kansascity_fed_speeches\

In [3]:
import re

def find_date_strings(text):
    """
    在给定的文本中查找所有形如 '%B %d, %Y' 格式的日期字符串。

    :param text: 输入的文本字符串
    :return: 匹配到的日期字符串列表
    """
    # 定义正则表达式模式
    date_pattern = r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},\s+\d{4}\b'
    
    # 查找所有匹配的日期字符串
    matches = re.findall(date_pattern, text)
    
    return matches

# 示例调用
text = "The event is scheduled for June 15, 2021 and July 4, 2022. Please mark your calendars."
dates = find_date_strings(text)
print(dates)

['June 15, 2021', 'July 4, 2022']
