In [None]:
import pandas as pd
import re

# ------------------------------------------
# 1. 读取之前生成的宽表
# ------------------------------------------
df = pd.read_csv('temp_all_years_combined.csv', dtype=str)

# ------------------------------------------
# 2. 找出“所有年份”以及对应的列名模板
# ------------------------------------------
# 根据列名里匹配 r'代码_\d{4}' 提取年份
code_cols = [c for c in df.columns if re.match(r'代码_\d{4}', c)]
years = sorted(int(c.split('_')[1]) for c in code_cols)

# 用来横向填充、查询用的五个前缀字段
FIELDS = ['代码', '一级行政区', '二级行政区', '名称', '级别']

# ------------------------------------------
# 3. 构造一个“横向向前填充”副本 df_filled，用于查找正确源行
#    但对原始 df 保持“空代表没有变化”的状态
# ------------------------------------------
df_filled = df.copy()
for fld in FIELDS:
    cols = [f'{fld}_{yr}' for yr in years]
    # 将空字符串当 NA，然后做 ffill，再把 NA 转空串
    df_filled[cols] = (
        df_filled[cols]
        .replace({'': pd.NA})
        .ffill(axis=1)
        .fillna('')
    )

# ------------------------------------------
# 4. 重新“组合”逻辑，把每一年里“先插入”那些子记录时，做更严格的 lineage 匹配
#    所有结果先放到一个新的 DataFrame `combined` 中
# ------------------------------------------
# 最后会把 combined 输出到同名临时文件，再做第二步“补缺失”的操作
combined_columns = []
for yr in years:
    combined_columns += [
        f'代码_{yr}', f'一级行政区_{yr}', f'二级行政区_{yr}', f'名称_{yr}', f'级别_{yr}'
    ]
# 追加两列“变更代码”和“变更时间”
combined_columns += ['变更代码', '变更时间']

combined = pd.DataFrame(columns=combined_columns)

# 逐年循环（1980 到 2024）
for y in years:
    y_str = str(y)
    df_this = df[df['启用时间'] == y_str].copy()

    # 按原始 dataFrame 的索引顺序遍历
    for _, row in df_this.iterrows():
        code_cur = row['代码']
        status = row['状态']
        # 处理“新代码”与“变更时间”，若不是字符串则当作空
        raw_new = row.get('新代码', '')
        new_field = raw_new.strip() if isinstance(raw_new, str) else ''
        raw_ct = row.get('变更（弃用）时间', '')
        ct_field = raw_ct.strip() if isinstance(raw_ct, str) else ''

        # —— A) 尝试找“血缘行”：首先要满足“变更代码 == code_cur 且 变更时间 == y_str”
        mask = (combined['变更代码'] == code_cur) & (combined['变更时间'] == y_str)
        candidates = combined.index[mask].tolist()

        # 在候选里面，再筛选“上一年度的代码也匹配”，以保证不误接
        true_lineage = []
        for idx in candidates:
            # 如果 y==第一个年份(1980)，就不需要额外匹配上一年，算它 true
            if y == years[0]:
                true_lineage.append(idx)
            else:
                prev_col = f'代码_{y-1}'
                # 这一候选行里，上一年这一字段如果和当前 row 里上一年代码相同，才算 lineage
                val_comb_prev = combined.at[idx, prev_col] or ''
                # 当前 row 的上一年代码（直接读原 df；若原来就是空，is ''）
                val_row_prev = row.get(prev_col, '') or ''
                if val_comb_prev == val_row_prev:
                    true_lineage.append(idx)

        if true_lineage:
            # 如果严格筛选后还有多个，好好地按“最晚插入”的优先，保留第一个满足 above 的 idx
            idx0 = true_lineage[0]

            # —— 1) 先把本年度的“五列”写回到这一行
            combined.at[idx0, f'代码_{y}']       = row['代码']
            combined.at[idx0, f'一级行政区_{y}'] = row['一级行政区']
            combined.at[idx0, f'二级行政区_{y}'] = row['二级行政区']
            combined.at[idx0, f'名称_{y}']       = row['名称']
            combined.at[idx0, f'级别_{y}']       = row['级别']

            # 清空该行的“变更代码”“变更时间”
            combined.at[idx0, '变更代码'] = ''
            combined.at[idx0, '变更时间'] = ''

            # —— 2) 如果是“启用 & 无新代码”，直接跳过子行插入
            if status == '启用' and new_field == '':
                continue

            # —— 3) 如果是“启用 & 有新代码”：拆分 new_field（先把逗号当分号处理）
            if status == '启用' and new_field != '':
                base = {c: combined.at[idx0, c] for c in combined_columns[:-2]}
                clean_new = new_field.replace(',', ';')
                parts = [p.strip() for p in clean_new.split(';') if p.strip()]
                for part in parts:
                    m = re.match(r'(\d{6})(?:\[(\d{4})\])?$', part)
                    if m:
                        nc = m.group(1)
                        tm = m.group(2) or ''
                    else:
                        nc, tm = part, ''
                    rec = base.copy()
                    rec['变更代码'] = nc
                    rec['变更时间'] = tm
                    combined = pd.concat([combined, pd.DataFrame([rec], columns=combined_columns)], ignore_index=True)
                continue

            # —— 4) 如果是“变更/弃用 & 有新代码”：先删掉这一行的空变更行，再拆分
            if status in ['变更', '弃用']:
                if new_field == '':
                    # 空 new_field 不保留任何行
                    continue
                base = {c: combined.at[idx0, c] for c in combined_columns[:-2]}
                # 删除这一行（因为“变更/弃用”要把空变更行去掉）
                combined.drop(index=idx0, inplace=True)
                clean_new = new_field.replace(',', ';')
                parts = [p.strip() for p in clean_new.split(';') if p.strip()]
                for part in parts:
                    m = re.match(r'(\d{6})(?:\[(\d{4})\])?$', part)
                    if m:
                        nc = m.group(1)
                        tm = m.group(2) or ct_field
                    else:
                        nc, tm = part, ct_field
                    rec = base.copy()
                    rec['变更代码'] = nc
                    rec['变更时间'] = tm
                    combined = pd.concat([combined, pd.DataFrame([rec], columns=combined_columns)], ignore_index=True)
                continue

        else:
            # —— B) 全新出现：将所有更早年份置空、仅保留本年度
            base = {c: '' for c in combined_columns[:-2]}
            base[f'代码_{y}']       = row['代码']
            base[f'一级行政区_{y}'] = row['一级行政区']
            base[f'二级行政区_{y}'] = row['二级行政区']
            base[f'名称_{y}']       = row['名称']
            base[f'级别_{y}']       = row['级别']

            # —— B.1) “启用 无新代码”→直接一条（空变更）
            if status == '启用' and new_field == '':
                rec = base.copy()
                rec['变更代码'] = ''
                rec['变更时间'] = ''
                combined = pd.concat([combined, pd.DataFrame([rec], columns=combined_columns)], ignore_index=True)
                continue

            # —— B.2) “启用 有新代码”：先原始一条（空变更）→拆分新代码
            if status == '启用' and new_field != '':
                rec0 = base.copy()
                rec0['变更代码'] = ''
                rec0['变更时间'] = ''
                combined = pd.concat([combined, pd.DataFrame([rec0], columns=combined_columns)], ignore_index=True)
                clean_new = new_field.replace(',', ';')
                parts = [p.strip() for p in clean_new.split(';') if p.strip()]
                for part in parts:
                    m = re.match(r'(\d{6})(?:\[(\d{4})\])?$', part)
                    if m:
                        nc = m.group(1)
                        tm = m.group(2) or ''
                    else:
                        nc, tm = part, ''
                    rec = base.copy()
                    rec['变更代码'] = nc
                    rec['变更时间'] = tm
                    combined = pd.concat([combined, pd.DataFrame([rec], columns=combined_columns)], ignore_index=True)
                continue

            # —— B.3) “变更/弃用 有新代码”：拆分出若干子行（不保留空变更）
            if status in ['变更', '弃用']:
                if new_field == '':
                    continue
                clean_new = new_field.replace(',', ';')
                parts = [p.strip() for p in clean_new.split(';') if p.strip()]
                for part in parts:
                    m = re.match(r'(\d{6})(?:\[(\d{4})\])?$', part)
                    if m:
                        nc = m.group(1)
                        tm = m.group(2) or ct_field
                    else:
                        nc, tm = part, ct_field
                    rec = base.copy()
                    rec['变更代码'] = nc
                    rec['变更时间'] = tm
                    combined = pd.concat([combined, pd.DataFrame([rec], columns=combined_columns)], ignore_index=True)
                continue

# At this point，`combined` 就是按所有年份“严格血缘”拼出来的宽表
# 先存一个中间文件，后面再做「只把缺少的年份字段补上」的操作
combined.to_csv('temp_all_years_strictly_combined.csv', index=False)

# ------------------------------------------
# 5. 读取刚才“严格血缘合并”后的结果，做“向前补全但仅限被标记要补”的逻辑
# ------------------------------------------
df2 = pd.read_csv('temp_all_years_strictly_combined.csv', dtype=str)

# 构造供查找的“横向向前填充”副本
df2_filled = df2.copy()
for fld in FIELDS:
    cols = [f'{fld}_{yr}' for yr in years]
    df2_filled[cols] = (
        df2_filled[cols]
        .replace({'': pd.NA})
        .ffill(axis=1)
        .fillna('')
    )

# 6. 找到“需要补缺年份信息”的行：即“变更时间/变更代码 非空，但对应该年 df2 中的 ‘代码_{year}’ 仍为空”
to_fix2 = []
for idx, row in df2.iterrows():
    raw_ct = row.get('变更时间', '')
    raw_cc = row.get('变更代码', '')
    ct = raw_ct.strip() if isinstance(raw_ct, str) else ''
    cc = raw_cc.strip() if isinstance(raw_cc, str) else ''
    if ct and cc:
        yr = int(ct)
        code_col = f'代码_{yr}'
        val = row.get(code_col, '')
        if not (isinstance(val, str) and val.strip()):
            to_fix2.append(idx)

# 7. 只有对上面 to_fix2 里标记的行，才去 df2_filled 里找“真正的同一条线路”把那一年五个字段补上
for idx in to_fix2:
    raw_ct = df2.at[idx, '变更时间']
    raw_cc = df2.at[idx, '变更代码']
    ct = raw_ct.strip() if isinstance(raw_ct, str) else ''
    cc = raw_cc.strip() if isinstance(raw_cc, str) else ''
    if not (ct and cc):
        continue

    yr = int(ct)
    code_col = f'代码_{yr}'

    # 在“已填充”的 df2_filled 里，先筛出“那一年这列 == 变更代码”的候选
    cands = df2_filled.index[df2_filled[code_col] == cc].tolist()
    if not cands:
        continue

    # 如果有多条，还要再匹配“上一年”是否相同。上一年列名：
    prev_col = f'代码_{yr-1}'
    # 当前行的上一年代码（空则作 ''）
    raw_prev = df2.at[idx, prev_col]
    prev_val = raw_prev.strip() if isinstance(raw_prev, str) else ''

    chosen = None
    for cand in cands:
        val_prev_cand = df2_filled.at[cand, prev_col] or ''
        if val_prev_cand == prev_val:
            chosen = cand
            break
    if chosen is None:
        # 若完全找不到“上一年吻合”的，就拿第一条
        chosen = cands[0]

    # 复制这一年的五列
    for fld in FIELDS:
        colnm = f'{fld}_{yr}'
        df2.at[idx, colnm] = df2_filled.at[chosen, colnm]

    # 清空“变更时间”“变更代码”
    df2.at[idx, '变更时间'] = ''
    df2.at[idx, '变更代码'] = ''

# 8. 最终结果写到 CSV
df2.to_csv('temp_all_years_combined_fixed.csv', index=False)


In [None]:
import pandas as pd
import re

# -------------- 1. 读取原始数据 --------------
df = pd.read_csv('../result.csv', dtype=str)

# -------------- 2. 准备年份列表和列名 --------------
years = list(range(1980, 2025))  # 1980 到 2024

# 为每个年份生成 ['代码_年', '一级行政区_年', '二级行政区_年', '名称_年', '级别_年']
year_cols = []
for y in years:
    suffix = f"_{y}"
    year_cols.extend([
        f'代码{suffix}',
        f'一级行政区{suffix}',
        f'二级行政区{suffix}',
        f'名称{suffix}',
        f'级别{suffix}',
    ])

# 最终 DataFrame 的所有列：所有年份的 5 个字段 + '变更代码' + '变更时间'
all_columns = year_cols + ['变更代码', '变更时间']

# 初始化一个空的 processed_df
processed_df = pd.DataFrame(columns=all_columns)

# -------------- 3. 按年逐步合并 --------------
for year in years:
    year_str = str(year)
    df_this_year = df[df['启用时间'] == year_str].copy()

    for _, row in df_this_year.iterrows():
        code_this = row['代码']
        status = row['状态']
        new_codes_field = row.get('新代码', '') or ''
        change_datetime = row.get('变更（弃用）时间', '') or ''

        # —— A) 检查是否有“lineage”：processed_df 里存在 变更代码==code_this 且 变更时间==year_str
        mask_lineage = (
            (processed_df['变更代码'] == code_this) &
            (processed_df['变更时间'] == year_str)
        )
        lineage_indices = processed_df.index[mask_lineage].tolist()

        if lineage_indices:
            for idx in lineage_indices:
                # —— 先把本年度的字段写回到这一行
                processed_df.at[idx, f'代码_{year}']       = row['代码']
                processed_df.at[idx, f'一级行政区_{year}'] = row['一级行政区']
                processed_df.at[idx, f'二级行政区_{year}'] = row['二级行政区']
                processed_df.at[idx, f'名称_{year}']       = row['名称']
                processed_df.at[idx, f'级别_{year}']       = row['级别']

                # 清空原先的“变更代码”“变更时间”
                processed_df.at[idx, '变更代码'] = ''
                processed_df.at[idx, '变更时间'] = ''

                # —— 如果 status == '启用' 且 new_codes_field 为空：仅写回，不新增子行
                if status == '启用' and new_codes_field.strip() == '':
                    continue

                # —— 如果 status == '启用' 且 new_codes_field 非空：拆分“新代码”
                if status == '启用' and new_codes_field.strip() != '':
                    # 先截取这一行已有的所有年份列作 base
                    base = {col: processed_df.at[idx, col] for col in year_cols}

                    # 替换逗号为分号，再按分号拆分
                    clean_field = new_codes_field.replace(',', ';')
                    parts = [p.strip() for p in clean_field.split(';') if p.strip()]

                    # 已有的“保留原始”被 lineage 行承担，下面只添加拆分出来的子行
                    for part in parts:
                        m = re.match(r'(\d{6})(?:\[(\d{4})\])?$', part)
                        if m:
                            new_code = m.group(1)
                            change_time = m.group(2) if m.group(2) else ''
                        else:
                            new_code = part
                            change_time = ''

                        rec = base.copy()
                        rec['变更代码'] = new_code
                        rec['变更时间'] = change_time
                        # 用 pd.concat 追加
                        processed_df = pd.concat(
                            [processed_df, pd.DataFrame([rec], columns=all_columns)],
                            ignore_index=True
                        )
                    continue

                # —— 如果 status in ['变更','弃用']
                if status in ['变更', '弃用']:
                    # 如果 new_codes_field 为空，则不保留空变更行，直接跳过
                    if new_codes_field.strip() == '':
                        continue

                    # 先截取这一行已有的所有年份列作 base
                    base = {col: processed_df.at[idx, col] for col in year_cols}

                    # 删除这一条 lineage 行（因为变更/弃用不保留“空变更”行）
                    processed_df.drop(index=idx, inplace=True)

                    clean_field = new_codes_field.replace(',', ';')
                    parts = [p.strip() for p in clean_field.split(';') if p.strip()]

                    for part in parts:
                        m = re.match(r'(\d{6})(?:\[(\d{4})\])?$', part)
                        if m:
                            new_code = m.group(1)
                            change_time = m.group(2) if m.group(2) else change_datetime
                        else:
                            new_code = part
                            change_time = change_datetime

                        rec = base.copy()
                        rec['变更代码'] = new_code
                        rec['变更时间'] = change_time
                        processed_df = pd.concat(
                            [processed_df, pd.DataFrame([rec], columns=all_columns)],
                            ignore_index=True
                        )
                    continue

        else:
            # —— B) 没有 lineage：视作“全新出现”
            # 先构造一个 base，使得所有早期年份列都为空
            base = {col: '' for col in year_cols}
            # 填写当前 year 的 5 列
            base[f'代码_{year}']       = row['代码']
            base[f'一级行政区_{year}'] = row['一级行政区']
            base[f'二级行政区_{year}'] = row['二级行政区']
            base[f'名称_{year}']       = row['名称']
            base[f'级别_{year}']       = row['级别']

            # —— 如果 status == '启用' 且 new_codes_field 为空：仅插入一条行
            if status == '启用' and new_codes_field.strip() == '':
                rec = base.copy()
                rec['变更代码'] = ''
                rec['变更时间'] = ''
                processed_df = pd.concat(
                    [processed_df, pd.DataFrame([rec], columns=all_columns)],
                    ignore_index=True
                )
                continue

            # —— 如果 status == '启用' 且 new_codes_field 非空：先插入保留原始一条，再拆分子行
            if status == '启用' and new_codes_field.strip() != '':
                rec_original = base.copy()
                rec_original['变更代码'] = ''
                rec_original['变更时间'] = ''
                processed_df = pd.concat(
                    [processed_df, pd.DataFrame([rec_original], columns=all_columns)],
                    ignore_index=True
                )

                clean_field = new_codes_field.replace(',', ';')
                parts = [p.strip() for p in clean_field.split(';') if p.strip()]

                for part in parts:
                    m = re.match(r'(\d{6})(?:\[(\d{4})\])?$', part)
                    if m:
                        new_code = m.group(1)
                        change_time = m.group(2) if m.group(2) else ''
                    else:
                        new_code = part
                        change_time = ''

                    rec_split = base.copy()
                    rec_split['变更代码'] = new_code
                    rec_split['变更时间'] = change_time
                    processed_df = pd.concat(
                        [processed_df, pd.DataFrame([rec_split], columns=all_columns)],
                        ignore_index=True
                    )
                continue

            # —— 如果 status in ['变更','弃用']
            if status in ['变更', '弃用']:
                # 如果 new_codes_field 为空，则不插入任何行
                if new_codes_field.strip() == '':
                    continue

                clean_field = new_codes_field.replace(',', ';')
                parts = [p.strip() for p in clean_field.split(';') if p.strip()]

                for part in parts:
                    m = re.match(r'(\d{6})(?:\[(\d{4})\])?$', part)
                    if m:
                        new_code = m.group(1)
                        change_time = m.group(2) if m.group(2) else change_datetime
                    else:
                        new_code = part
                        change_time = change_datetime

                    rec = base.copy()
                    rec['变更代码'] = new_code
                    rec['变更时间'] = change_time
                    processed_df = pd.concat(
                        [processed_df, pd.DataFrame([rec], columns=all_columns)],
                        ignore_index=True
                    )
                continue

# -------------- 4. 重置索引并保存最终结果 --------------
df_final = processed_df.reset_index(drop=True)
df_final.to_csv('../temp_all_years_combined.csv', index=False)


In [12]:
import pandas as pd
import re

# 1. 读取之前生成的宽表
df = pd.read_csv('../temp_all_years_combined.csv', dtype=str)

# 2. 提取所有年份（通过列名匹配“代码_YYYY”）
code_cols = [col for col in df.columns if re.match(r'代码_\d{4}', col)]
years = sorted(int(col.split('_')[1]) for col in code_cols)

# 3. 定义要复制的字段前缀
fields = ['代码', '一级行政区', '二级行政区', '名称', '级别']

# 4. 创建一个“已向前填充”（FFill）的副本，用于查找对应来源行
df_filled = df.copy()
for field in fields:
    cols = [f'{field}_{year}' for year in years]
    df_filled[cols] = (
        df_filled[cols]
        .replace({'': pd.NA})
        .ffill(axis=1)
        .fillna('')
    )

# 5. 找到需要修正的行：变更时间和变更代码都被认为“非空”（必须是字符串且 strip() 后非空），
#    且原表中对应年度的“代码_年”为空
to_fix = []
for idx, row in df.iterrows():
    raw_ct = row.get('变更时间', '')
    raw_cc = row.get('变更代码', '')

    # 确保都是字符串，否则当作空
    ct = raw_ct.strip() if isinstance(raw_ct, str) else ''
    cc = raw_cc.strip() if isinstance(raw_cc, str) else ''

    if ct and cc:
        year = int(ct)
        code_col = f'代码_{year}'
        val = row.get(code_col, '')
        # 如果 “代码_年” 不是非空字符串，就要修正
        if not (isinstance(val, str) and val.strip()):
            to_fix.append(idx)

# 6. 对需要修正的行进行填充：在 df_filled 中找到变更代码对应行，将该年字段复制过来
for idx in to_fix:
    raw_ct = df.at[idx, '变更时间']
    raw_cc = df.at[idx, '变更代码']
    ct = raw_ct.strip() if isinstance(raw_ct, str) else ''
    cc = raw_cc.strip() if isinstance(raw_cc, str) else ''
    if not (ct and cc):
        continue

    year = int(ct)
    code_col = f'代码_{year}'
    
    # 在 df_filled 中查找符合 “该年代码 == 变更代码” 的行
    matches = df_filled.index[df_filled[code_col] == cc].tolist()
    if not matches:
        continue
    src = matches[0]
    
    # 仅复制五个“代码_年、一级行政区_年、二级行政区_年、名称_年、级别_年”
    for field in fields:
        col = f'{field}_{year}'
        df.at[idx, col] = df_filled.at[src, col]
    
    # 清空“变更时间”“变更代码”
    df.at[idx, '变更时间'] = ''
    df.at[idx, '变更代码'] = ''

# 7. 保存修正后的结果
df.to_csv('../temp_all_years_combined_fixed.csv', index=False)


  df_filled[cols]
  df_filled[cols]
  df_filled[cols]
  df_filled[cols]
  df_filled[cols]
  df_filled[cols]


In [16]:
import pandas as pd
import re

# ------------------------------------------
# 1. 读取原始 result.csv
# ------------------------------------------
df_raw = pd.read_csv('../result.csv', dtype=str)

# ------------------------------------------
# 2. 固定年份范围：1980 到 2024
# ------------------------------------------
years = list(range(1980, 2025))

# 五个字段前缀
FIELDS = ['代码', '一级行政区', '二级行政区', '名称', '级别']

# ------------------------------------------
# 3. 构造“宽表”列名
# ------------------------------------------
combined_columns = []
for yr in years:
    combined_columns += [
        f'代码_{yr}',
        f'一级行政区_{yr}',
        f'二级行政区_{yr}',
        f'名称_{yr}',
        f'级别_{yr}'
    ]
# 最后再加“变更代码”“变更时间”
combined_columns += ['变更代码', '变更时间']

# 最终用于合并的 DataFrame
combined = pd.DataFrame(columns=combined_columns)

# ------------------------------------------
# 4. 逐年遍历（1980 至 2024），按“严格血缘”逻辑合并
# ------------------------------------------
for y in years:
    y_str = str(y)
    df_this_year = df_raw[df_raw['启用时间'] == y_str].copy()
    
    for _, row in df_this_year.iterrows():
        code_cur = row['代码']
        status = row['状态']
        raw_new = row.get('新代码', '')
        new_field = raw_new.strip() if isinstance(raw_new, str) else ''
        raw_ct = row.get('变更（弃用）时间', '')
        ct_field = raw_ct.strip() if isinstance(raw_ct, str) else ''

        # —— A) 查找“变更代码 == code_cur 且 变更时间 == y_str”
        mask = (combined['变更代码'] == code_cur) & (combined['变更时间'] == y_str)
        candidates = combined.index[mask].tolist()

        if candidates:
            idx0 = candidates[0]

            # 1) 填写本年度五列
            combined.at[idx0, f'代码_{y}']       = row['代码']
            combined.at[idx0, f'一级行政区_{y}'] = row['一级行政区']
            combined.at[idx0, f'二级行政区_{y}'] = row['二级行政区']
            combined.at[idx0, f'名称_{y}']       = row['名称']
            combined.at[idx0, f'级别_{y}']       = row['级别']

            # 清空“变更代码”“变更时间”
            combined.at[idx0, '变更代码'] = ''
            combined.at[idx0, '变更时间'] = ''

            # 2) 如果“启用 & 无 new_field”，仅写回不拆分
            if status == '启用' and new_field == '':
                continue

            # 3) 如果“启用 & 有 new_field”，拆分 new_field
            if status == '启用' and new_field != '':
                base = {c: combined.at[idx0, c] for c in combined_columns[:-2]}
                clean_new = new_field.replace(',', ';')
                parts = [p.strip() for p in clean_new.split(';') if p.strip()]
                for part in parts:
                    m = re.match(r'(\d{6})(?:\[(\d{4})\])?$', part)
                    if m:
                        nc = m.group(1)
                        tm = m.group(2) or ''
                    else:
                        nc, tm = part, ''
                    rec = base.copy()
                    rec['变更代码'] = nc
                    rec['变更时间'] = tm
                    combined = pd.concat(
                        [combined, pd.DataFrame([rec], columns=combined_columns)],
                        ignore_index=True
                    )
                continue

            # 4) 如果“变更/弃用 & 有 new_field”，删除旧行再拆分
            if status in ['变更', '弃用']:
                if new_field == '':
                    continue
                base = {c: combined.at[idx0, c] for c in combined_columns[:-2]}
                combined.drop(index=idx0, inplace=True)
                clean_new = new_field.replace(',', ';')
                parts = [p.strip() for p in clean_new.split(';') if p.strip()]
                for part in parts:
                    m = re.match(r'(\d{6})(?:\[(\d{4})\])?$', part)
                    if m:
                        nc = m.group(1)
                        tm = m.group(2) or ct_field
                    else:
                        nc, tm = part, ct_field
                    rec = base.copy()
                    rec['变更代码'] = nc
                    rec['变更时间'] = tm
                    combined = pd.concat(
                        [combined, pd.DataFrame([rec], columns=combined_columns)],
                        ignore_index=True
                    )
                continue

        else:
            # —— B) 全新出现
            base = {c: '' for c in combined_columns[:-2]}
            base[f'代码_{y}']       = row['代码']
            base[f'一级行政区_{y}'] = row['一级行政区']
            base[f'二级行政区_{y}'] = row['二级行政区']
            base[f'名称_{y}']       = row['名称']
            base[f'级别_{y}']       = row['级别']

            # B1) “启用 & 无 new_field”
            if status == '启用' and new_field == '':
                rec = base.copy()
                rec['变更代码'] = ''
                rec['变更时间'] = ''
                combined = pd.concat(
                    [combined, pd.DataFrame([rec], columns=combined_columns)],
                    ignore_index=True
                )
                continue

            # B2) “启用 & 有 new_field”
            if status == '启用' and new_field != '':
                rec0 = base.copy()
                rec0['变更代码'] = ''
                rec0['变更时间'] = ''
                combined = pd.concat(
                    [combined, pd.DataFrame([rec0], columns=combined_columns)],
                    ignore_index=True
                )
                clean_new = new_field.replace(',', ';')
                parts = [p.strip() for p in clean_new.split(';') if p.strip()]
                for part in parts:
                    m = re.match(r'(\d{6})(?:\[(\d{4})\])?$', part)
                    if m:
                        nc = m.group(1)
                        tm = m.group(2) or ''
                    else:
                        nc, tm = part, ''
                    rec = base.copy()
                    rec['变更代码'] = nc
                    rec['变更时间'] = tm
                    combined = pd.concat(
                        [combined, pd.DataFrame([rec], columns=combined_columns)],
                        ignore_index=True
                    )
                continue

            # B3) “变更/弃用 & 有 new_field”
            if status in ['变更', '弃用']:
                if new_field == '':
                    continue
                clean_new = new_field.replace(',', ';')
                parts = [p.strip() for p in clean_new.split(';') if p.strip()]
                for part in parts:
                    m = re.match(r'(\d{6})(?:\[(\d{4})\])?$', part)
                    if m:
                        nc = m.group(1)
                        tm = m.group(2) or ct_field
                    else:
                        nc, tm = part, ct_field
                    rec = base.copy()
                    rec['变更代码'] = nc
                    rec['变更时间'] = tm
                    combined = pd.concat(
                        [combined, pd.DataFrame([rec], columns=combined_columns)],
                        ignore_index=True
                    )
                continue

# ------------------------------------------
# 5. 对合并结果做“向前补全但仅限被标记要补”的逻辑
# ------------------------------------------
df_comb = combined.copy()

# 构造“横向向前填充”副本 df_filled 用于查找
df_filled = df_comb.copy()
for fld in FIELDS:
    cols = [f'{fld}_{yr}' for yr in years]
    df_filled[cols] = (
        df_filled[cols]
        .replace({'': pd.NA})
        .ffill(axis=1)
        .fillna('')
    )

# 6. 找到“需要补缺年份信息”的行：
to_fix = []
for idx, row in df_comb.iterrows():
    raw_ct = row.get('变更时间', '')
    raw_cc = row.get('变更代码', '')
    ct = raw_ct.strip() if isinstance(raw_ct, str) else ''
    cc = raw_cc.strip() if isinstance(raw_cc, str) else ''
    if ct and cc:
        yr = int(ct)
        code_col = f'代码_{yr}'
        val = row.get(code_col, '')
        if not (isinstance(val, str) and val.strip()):
            to_fix.append(idx)

# 7. 仅对 to_fix 中的行，从 df_filled 里复制该年五列，然后清空变更字段
for idx in to_fix:
    raw_ct = df_comb.at[idx, '变更时间']
    raw_cc = df_comb.at[idx, '变更代码']
    ct = raw_ct.strip() if isinstance(raw_ct, str) else ''
    cc = raw_cc.strip() if isinstance(raw_cc, str) else ''
    if not (ct and cc):
        continue

    yr = int(ct)
    code_col = f'代码_{yr}'
    
    cands = df_filled.index[df_filled[code_col] == cc].tolist()
    if not cands:
        continue

    chosen = cands[0]
    for fld in FIELDS:
        colnm = f'{fld}_{yr}'
        df_comb.at[idx, colnm] = df_filled.at[chosen, colnm]
    
    df_comb.at[idx, '变更时间'] = ''
    df_comb.at[idx, '变更代码'] = ''

# 8. 保存最终结果
df_comb.to_csv('../temp_all_years_combined_fixed.csv', index=False)
