In [2]:
import pandas as pd
import os

"""
This script processes a CSV file containing AP College Poll Top 25 voting by grouping the data by 'Season' and 'Week'. 
It extracts the name of voters and saves the results as separate CSV files.

Input: A CSV file with AP College Poll Top 25 voting data, including columns for voter names.

Output: 
1.  Separate CSV files for each combination of 'Season' and 'Week', containing sorted lists of unique voters.
"""

df = pd.read_csv('../../entire_ballot_data/college_polls_original.csv')

grouped = df.groupby(['Season', 'Week'])

for (season, week), group in grouped:
    #Ensures that the header "Season" in not included in the output csv file
    if season != "Season":
        #Extracts all voters corresponding with specified season, week
        df_for_season_and_week = pd.read_csv(f"../../ballot_data_by_season_and_week/season_{season}/{season}_week{week}_top25.csv")
        voters_for_season_and_week = df_for_season_and_week["Pollster"].tolist()

        #Creates season folder that will contain names of voters corresponding with that season, if it does not exist
        os.makedirs(f"./season_{season}", exist_ok=True)
        file_name = f"./season_{season}/{season}_week{week}_voters.csv"
        new_df = pd.DataFrame(voters_for_season_and_week, columns=["Voters"])

        # Writes data corresponding with season, week to the corresponding csv file
        new_df.to_csv(file_name, index=False)

