In [4]:
import pandas as pd
import os

"""
This script processes a CSV file containing AP College Poll Top 25 voting, groups the data by 'Season' and 'Week', 
and saves separate CSV files for each season and week.

Input: A CSV file containing data from all weeks from 2014 to 2024, with columns including 'Season' and 'Week'

Output: 
1. Separate CSV files for each combination of 'Season' and 'Week', stored in the specified directory.
"""


df = pd.read_csv('../entire_ballot_data/college_polls_original.csv')
grouped = df.groupby(['Season', 'Week'])

for (season, week), group in grouped:
    #Ensures that the header "Season" in not included in the output csv file
    if season != "Season":
        #Creates the season folder that will contain ballot data corresponding with that season, if the file does not already exist
        os.makedirs(f"./season_{season}", exist_ok=True)
        file_name = f"./season_{season}/{season}_week{week}_top25.csv"
        # Writes data corresponding with season, week to the corresponding csv file
        group.to_csv(file_name, index=False)

