In [1]:
import pandas as pd

In [2]:
base_df = pd.read_csv('ai_regulation_list.csv')
update_df = pd.read_csv('updated_ai_regulation_list_US3.csv')

In [3]:
# マージキー
key_col = 'regulation_name'

# 上書き対象カラム（announced_date も含む）
merge_columns = ['announced_date', 'enforcement_date', 'category', 'official_link', 'status']

# 実際に update_df にあるカラムだけを対象とする
existing_merge_columns = [col for col in merge_columns if col in update_df.columns]

# マージ実行
merged_df = base_df.merge(
    update_df[[key_col] + existing_merge_columns],
    on=key_col,
    how='left',
    suffixes=('', '_updated')
)

# 上書き処理
for col in existing_merge_columns:
    updated_col = f'{col}_updated'
    if updated_col in merged_df.columns:
        merged_df[col] = merged_df[updated_col].combine_first(merged_df[col])
        merged_df.drop(columns=[updated_col], inplace=True)

# DF表示
merged_df.head(4)

Unnamed: 0,regulation_name,country,announced_date,enforcement_date,category,api_category,reference_point,purpose,keywords,official_link,status
0,Emerging Innovative Border Technologies Act,United States,,,Law,acts_bills_reform,<News summary only available for Tier 2 subscr...,,,https://www.congress.gov/bill/118th-congress/h...,Placed on Senate Legislative Calendar under Ge...
1,Creating Resources for Every American To Exper...,United States,,,Law,acts_bills_reform,<News summary only available for Tier 2 subscr...,,,https://www.congress.gov/bill/118th-congress/h...,Ordered to be Reported (Amended) by Voice Vote.
2,Small Business Artificial Intelligence Trainin...,United States,,,Law,acts_bills_reform,<News summary only available for Tier 2 subscr...,,,https://www.congress.gov/bill/118th-congress/s...,Placed on Senate Legislative Calendar under Ge...
3,AI Leadership To Enable Accountable Deployment...,United States,,,Law,acts_bills_reform,<News summary only available for Tier 2 subscr...,,,https://www.congress.gov/bill/118th-congress/h...,Referred to the Committee on Oversight and Acc...


In [4]:
# エラー件数（regulation_name が update_df に見つからず、すべての対象列が欠損）
not_matched_mask = merged_df[existing_merge_columns].isnull().all(axis=1)
missing_count = not_matched_mask.sum()
print(f"マージできなかった（全列がNaN）行数: {missing_count}")

マージできなかった（全列がNaN）行数: 0


In [5]:
# CSV出力
merged_df.to_csv('ai_regulation_list2.csv', index=False)


In [10]:
import pandas as pd

# CSVファイルの読み込み
df_main = pd.read_csv("ai_regulation_list3.csv")
df_eu = pd.read_csv("ai_regulation_listEU.csv")

# 全件追加（縦方向にマージ）
df_merged = pd.concat([df_main, df_eu], ignore_index=True)

# 新しいCSVファイルとして保存
df_merged.to_csv("ai_regulation_list2_merged.csv", index=False)

print("マージ完了：ai_regulation_list2_merged.csv を出力しました。")


マージ完了：ai_regulation_list2_merged.csv を出力しました。
