In [1]:
import pandas as pd
import os
import re

# === 设置路径 ===
txt_folder = r"D:\Onedrive\Temp\Projects\STOCKAct\regression_results\20250501"  # 修改为你的真实目录
output_excel = r"D:\Onedrive\Temp\Projects\STOCKAct\regression_results\20250501\merged_regressions.xlsx"


In [2]:
# === 读取所有txt文件 ===
all_txt_files = [f for f in os.listdir(txt_folder) if f.endswith('.txt')]

# === 自定义排序函数 ===
def sort_key(filename):
    # 去掉扩展名
    name = os.path.splitext(filename)[0]

    # 正则提取开头的编号信息，比如T3, T11, TB2, TOA1等
    match = re.match(r'(T\d+|TB\d+|TOA\d+)', name)
    if match:
        prefix = match.group(1)
        # 排序规则：
        # T3, T4, ..., T11 → TB系列（比如TB2）→ TOA系列（比如TOA1）
        if prefix.startswith('T') and prefix[1].isdigit():
            num = int(prefix[1:])
            return (0, num)
        elif prefix.startswith('TB'):
            num = int(prefix[2:])
            return (1, num)
        elif prefix.startswith('TOA'):
            num = int(prefix[3:])
            return (2, num)
    else:
        return (99, name)  # 未匹配到的，放最后

# === 排序文件列表 ===
all_txt_files_sorted = sorted(all_txt_files, key=sort_key)

In [3]:
# === 创建Excel Writer ===
with pd.ExcelWriter(output_excel, engine='xlsxwriter') as writer:
    for txt_file in all_txt_files_sorted:
        file_path = os.path.join(txt_folder, txt_file)

        # 读取txt文件
        df = pd.read_csv(file_path, delimiter='\t', header=None, engine='python')

        # 整理：去掉多余空列，补齐空白
        df = df.dropna(how='all', axis=1)
        df = df.dropna(how='all', axis=0)

        # 写入Excel，每个txt一个sheet，sheet名字是去掉.txt后的文件名
        sheet_name = os.path.splitext(txt_file)[0][:31]  # sheet name最多31字符
        df.to_excel(writer, sheet_name=sheet_name, index=False, header=False)

print(f"成功合并 {len(all_txt_files)} 个文件到 {output_excel}！")

成功合并 72 个文件到 D:\Onedrive\Temp\Projects\STOCKAct\regression_results\20250501\merged_regressions.xlsx！
