In [None]:
import pandas as pd
import os

"""
This script processes a CSV file containing AP College Poll Top 25 voting by grouping the data by 'Season' and 'Week'. 
It extracts the voted teams (from the top 25 ranks) and saves the results as separate CSV files.

Input: A CSV file with AP College Poll Top 25 voting data, including columns for team rankings
('1st' to '25th').

Output: 
1.  Separate CSV files for each combination of 'Season' and 'Week', containing sorted lists of unique voted teams.
"""

df = pd.read_csv('../../entire_ballot_data/college_polls_original.csv')

grouped = df.groupby(['Season', 'Week'])
columns_1_to_25 = ['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th', '11th', '12th', '13th', '14th', '15th', '16th', '17th', '18th', '19th', '20th', '21st', '22nd', '23rd', '24th', '25th']

for (season, week), group in grouped:
    #Ensures that the header "Season" in not included in the output csv file
    if season != "Season":
        #Extracts all voted teams corresponding with specified season, week
        df_for_season_and_week = pd.read_csv(f"../../ballot_data_by_season_and_week/season_{season}/{season}_week{week}_top25.csv")
        voted_teams_for_season_and_week = []
        for col in columns_1_to_25:
            voted_teams_for_season_and_week.extend(df_for_season_and_week[col].tolist())

        #Ensures the header is not included as part of the output and that there are no duplicate values
        teams_without_ranking_nums = set(voted_teams_for_season_and_week).difference(set(columns_1_to_25))
        unique_teams = sorted(teams_without_ranking_nums)

        #Creates season folder that will contain voted teams corresponding with that season, if it does not exist
        os.makedirs(f"./season_{season}", exist_ok=True)
        file_name = f"./season_{season}/{season}_week{week}_voted_teams.csv"
        new_df = pd.DataFrame(unique_teams, columns=["Voted Teams"])

        # Writes data corresponding with season, week to the corresponding csv file
        new_df.to_csv(file_name, index=False)
