In [12]:
import pandas as pd
import requests
import re
import time

In [13]:
# APIキーとベースURL
API_KEY = 'APIキー'
BASE_URL = 'https://api.congress.gov/v3/bill'


In [17]:

# CSV読み込み　初回はai_regulation_list_US.csvだけ
df = pd.read_csv('updated_ai_regulation_list_US2.csv')

# 結果用カラムを初期化
df['bill_title'] = ''
df['latest_action_date'] = ''
df['status'] = ''
df['announced_date'] = ''
df['error_reason'] = ''

# 正規表現でofficial_linkから情報抽出
pattern = re.compile(r'bill/(\d+)(?:th)?-congress/([a-z-]+)-bill/(\d+)', re.IGNORECASE)

# bill_type変換表
bill_type_map = {
    'house': 'hr',
    'senate': 's',
    'hjres': 'hjres',
    'sjres': 'sjres',
    'concurrent': 'hconres',
    'joint': 'hjres'
}

for idx, row in df.iterrows():
    link = row.get('official_link', '')
    match = pattern.search(link)
    if not match:
        df.at[idx, 'status'] = 'ERROR'
        df.at[idx, 'error_reason'] = 'Invalid URL format'
        continue

    congress, bill_type_raw, bill_number = match.groups()
    bill_type_key = bill_type_raw.lower().split('-')[0]
    bill_type = bill_type_map.get(bill_type_key, bill_type_key)

    # API URL構築
    url = f"{BASE_URL}/{congress}/{bill_type}/{bill_number}?api_key={API_KEY}"

    try:
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json().get('bill', {})
            df.at[idx, 'bill_title'] = data.get('title', '')
            latest_action = data.get('latestAction', {})
            df.at[idx, 'latest_action_date'] = latest_action.get('actionDate', '')
            df.at[idx, 'status'] = latest_action.get('text', '')

            # 成立したかどうかをチェック
            if 'became public law' in latest_action.get('text', '').lower():
                df.at[idx, 'announced_date'] = latest_action.get('actionDate', '')

        else:
            df.at[idx, 'status'] = 'ERROR'
            df.at[idx, 'error_reason'] = f"HTTP {response.status_code}"

    except Exception as e:
        df.at[idx, 'status'] = 'ERROR'
        df.at[idx, 'error_reason'] = str(e)

    time.sleep(2)

# 頭10件だけ表示して確認
df[['regulation_name', 'official_link', 'bill_title', 'latest_action_date', 'status', 'announced_date', 'error_reason']].head(10)


Unnamed: 0,regulation_name,official_link,bill_title,latest_action_date,status,announced_date,error_reason
0,Emerging Innovative Border Technologies Act,https://www.congress.gov/bill/118th-congress/h...,Emerging Innovative Border Technologies Act,2024-12-19,Placed on Senate Legislative Calendar under Ge...,,
1,Creating Resources for Every American To Exper...,https://www.congress.gov/bill/118th-congress/h...,CREATE AI Act of 2023,2024-09-11,Ordered to be Reported (Amended) by Voice Vote.,,
2,Small Business Artificial Intelligence Trainin...,https://www.congress.gov/bill/118th-congress/s...,Small Business Artificial Intelligence Trainin...,2024-08-01,Placed on Senate Legislative Calendar under Ge...,,
3,AI Leadership To Enable Accountable Deployment...,https://www.congress.gov/bill/118th-congress/h...,AI LEAD Act,2024-06-14,Referred to the Committee on Oversight and Acc...,,
4,Small Business Artificial Intelligence Advance...,https://www.congress.gov/bill/118th-congress/h...,Small Business Artificial Intelligence Advance...,2024-09-11,Ordered to be Reported (Amended) by Voice Vote.,,
5,Consumer Literacy and Empowerment to Advance R...,https://www.congress.gov/bill/118th-congress/s...,Consumers LEARN AI Act,2024-07-30,Read twice and referred to the Committee on Co...,,
6,"Nurture Originals, Foster Art, and Keep Entert...",https://www.congress.gov/bill/118th-congress/s...,NO FAKES Act of 2024,2024-07-31,Read twice and referred to the Committee on th...,,
7,Healthy Technology Act of 2023,https://www.congress.gov/bill/118th-congress/h...,Healthy Technology Act of 2023,2023-01-20,Referred to the Subcommittee on Health.,,
8,AI Disclosure Act of 2023,https://www.congress.gov/bill/118th-congress/h...,AI Disclosure Act of 2023,2023-06-09,"Referred to the Subcommittee on Innovation, Da...",,
9,National AI Commission Act,https://www.congress.gov/bill/118th-congress/h...,National AI Commission Act,2023-06-20,"Referred to the House Committee on Science, Sp...",,


In [18]:
df.to_csv('updated_ai_regulation_list_US3.csv', index=False)

In [7]:
# URLパースに失敗した件数をカウント
invalid_url_count = df[df['error_reason'] == 'Invalid URL format'].shape[0]
print(f"URLパースに失敗した行数: {invalid_url_count}")

URLパースに失敗した行数: 32


In [9]:
# 必要な列を表形式で確認（タイトルとリンク）
df[df['error_reason'] == 'Invalid URL format'][['regulation_name', 'official_link']].head(40)


Unnamed: 0,regulation_name,official_link
20,PREPARED for AI Act,PREPARED for AI Act
144,Next Generation Military Education Act,Next Generation Military Education Act
149,Decoupling America’s Artificial Intelligence C...,https://www.hawley.senate.gov/hawley-introduce...
150,Leveraging Artificial Intelligence to Streamli...,https://www.husted.senate.gov/wp-content/uploa...
151,TAME Extreme Weather and Wildfires Act,https://www.schatz.senate.gov/download/tame
152,Health Tech Investment Act,https://www.rounds.senate.gov/imo/media/doc/he...
153,Clean Cloud Act,https://www.epw.senate.gov/public/_cache/files...
154,Chip Security Act,https://www.cotton.senate.gov/imo/media/doc/ch...
156,Responsible Innovation and Safe Expertise (RIS...,https://www.lummis.senate.gov/wp-content/uploa...
157,Preventing Deep Fake Scams Act,https://www.husted.senate.gov/wp-content/uploa...
