In [1]:
import csv
import pandas as pd
import os

"""
Calculate Borda points for voted teams in a specific week and season
from a given CSV file containing AP College Poll ballots for the corresponding week and season.

Parameters:
    weights (list): List of weights assigned to team rankings.
    year (str): The year for which to calculate Borda points.
    week (str): The week within the corresponding year for which to calculate Borda points.
    output_filename(str)
"""


def borda_count_ap_polls(weights, year, week, output_filename):
    #Reads in csv file the provided week and year from ballot_data_by_season_and_week directory if it exists
    try:
        df = pd.read_csv(f'../../../../data/college-polls/processed_data/ballot_data_by_season_and_week/season_{year}/{year}_week{week}_top25.csv')
    except:
        print(f'Data for this combination of week and year (year: {year}, week: {week}) was not available in the local ballot_data_by_season_and_week directory')
        return
    columns_1_to_25 = ['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th', '11th', '12th', '13th', '14th', '15th', '16th', '17th', '18th', '19th', '20th', '21st', '22nd', '23rd', '24th', '25th']
    borda_scores = dict()

    #Iterates through ranking orders in columns_1_to_25
    for i in range(len(columns_1_to_25)):
        ith_ranked_team = df[columns_1_to_25[i]].tolist()

        #Iterates through all teams voted in 'columns_1_to_25[i]' position by voters and adjusts the teams' borda count by the point 
        # total corresponding with 'columns_1_to_25[i]' position ranking
        for team in ith_ranked_team:
            if team not in borda_scores: borda_scores[team] = 0
            if i < len(weights):
                borda_scores[team] += weights[i]

    #Sorts teams by their Borda Count in descending order     
    sorted_teams = sorted(borda_scores.items(), key=lambda x: x[1], reverse=True)

    #Creates borda could folder for specific borda weight distribution and year if it does not exist
    os.makedirs(f"./borda_{output_filename}/season_{year}", exist_ok=True)
    output_file = f'./borda_{output_filename}/season_{year}/{year}_week{week}_{output_filename}.csv'

    #Writes the borda count data to the corresponding csv file
    with open(output_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Teams', 'Borda Points'])
        writer.writerows(sorted_teams)

#Iterate through all combinations of weeks and years in order to craft borda count csvs for the corresponding week and year combo
for year in range(2014, 2025):
    for week in range(1, 18):
        borda_count_ap_polls([25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1], str(year), str(week), "top25")

Data for this combination of week and year (year: 2015, week: 2) was not available in the local ballot_data_by_season_and_week directory
Data for this combination of week and year (year: 2015, week: 17) was not available in the local ballot_data_by_season_and_week directory
Data for this combination of week and year (year: 2016, week: 17) was not available in the local ballot_data_by_season_and_week directory
Data for this combination of week and year (year: 2017, week: 17) was not available in the local ballot_data_by_season_and_week directory
Data for this combination of week and year (year: 2018, week: 17) was not available in the local ballot_data_by_season_and_week directory
Data for this combination of week and year (year: 2021, week: 17) was not available in the local ballot_data_by_season_and_week directory
Data for this combination of week and year (year: 2022, week: 17) was not available in the local ballot_data_by_season_and_week directory
Data for this combination of week a