In [None]:
# Local installs
%pip install python
%pip install requests
%pip install pandas

import requests # for scraping
import pandas as pd # for data manipulation / handling
from datetime import datetime # for formatting dates
import logging

In [None]:

FILENAME = '21-22'
# TODO: Adjust path in above or below line to support new organizational directories
docket_ids = pd.read_csv(f'{FILENAME}.csv')['Docket ID'].drop_duplicates().tolist() # Getting only docket ids from that file
# logging.basicConfig(filename='docket_scraping.log', level=logging.INFO)

# Key under "Tristan Cooper" in the API key vault
api_key = 'QQZvi5FMGwQPT1XuFWT5tDzvs4obqSbbhe8XmhH6' 

def fetch_docket_title(docket_id, api_key):
    docket_url = f"https://api.regulations.gov/v4/dockets/{docket_id}"
    headers = {"X-Api-Key": api_key}
    docket_response = requests.get(docket_url, headers=headers)
    
    if docket_response.status_code == 200:
        docket_data = docket_response.json()
        return docket_data['data']['attributes']['title']
    else:
        print(f"Error fetching title for docket ID {docket_id}: {docket_response.status_code}, Details: {docket_response.text}")
        return None

def fetch_commenters_info(docket_id, api_key):
    url = f"https://api.regulations.gov/v4/comments?filter[docketId]={docket_id}"
    headers = {"X-Api-Key": api_key}
    response = requests.get(url, headers=headers)
    commenters_info = []
    
    docket_title = fetch_docket_title(docket_id, api_key)
    if not docket_title:
        return []
    
    if response.status_code == 200:
        data = response.json()
        
        for comment in data['data']:
            title = comment['attributes']['title']
            date_iso = comment['attributes']['postedDate']
            date = date_iso.split('T')[0]
            submitted_by_prefix = "Comment Submitted by "
            if title.startswith(submitted_by_prefix) and "Anonymous" not in title:
                org_name = title[len(submitted_by_prefix):]
                commenters_info.append({
                    'Docket ID': docket_id,
                    'Docket Name': docket_title,
                    'Commenter Name': org_name,
                    'Date': date
                })
        return commenters_info
    else:
        error_details = response.text
        print(f"Couldn't fetch data for: {docket_id}: {response.status_code}, Details: {error_details}")
        return []

all_commenters_info = []


In [None]:
for docket_id in docket_ids:
    # logging.info(f"Processing docket ID: {docket_id}")
    try:
        commenters_info = fetch_commenters_info(docket_id, api_key)
        if commenters_info:
            all_commenters_info.extend(commenters_info)
            logging.info(f"Successfully processed docket ID: {docket_id}")
        else:
            logging.warning(f"No data returned for docket ID: {docket_id}")
    except Exception as e:
        logging.error(f"Error processing docket ID {docket_id}: {e}")

all_commenters_df = pd.DataFrame(all_commenters_info)

In [None]:
# Cell for getting the count column
all_commenters_df['Total Comments'] = all_commenters_df.groupby('Commenter Name')['Commenter Name'].transform('count')

In [None]:
# Save as CSV file
all_commenters_df.to_csv(f'{FILENAME}_commenters.csv', index=False)