In [8]:
import csv
import pandas as pd
import os

"""
Calculate Borda points for voted teams in a specific week and season
from a given CSV file containing AP College Poll ballots for the corresponding week and season,
where a specified team is removed from all ballots it appears on.

Parameters:
    weights (list): List of weights assigned to team rankings.
    year (str): The year for which to calculate Borda points.
    week (str): The week within the corresponding year for which to calculate Borda points.
    output_filename(str): specifies borda count being utilized
    removed_team (str): The team that will be removed/dropped for the Pollsters' ballots
"""
output_file = None

def removed_borda_count(weights, year, week, output_filename, removed_team):
    global output_file
    #Reads in csv file the provided week and year from ballot_data_by_season_and_week directory if it exists
    try:
        df = pd.read_csv(f'../../../../data/college-polls/processed_data/ballot_data_by_season_and_week/season_{year}/{year}_week{week}_top25.csv')
    except:
        print(f'Data for this combination of week and year (year: {year}, week: {week}) was not available in the local ballot_data_by_season_and_week directory')
        return
    borda_scores = dict()

    #Iterates through rows of top25 data and retrieved list of team_rankings for each pollster (making sure to remove 'removed_team')
    for index, row in df.iterrows():
        team_rankings = row[3:].tolist()
        if removed_team in team_rankings: team_rankings.remove(removed_team)
        for idx in range(len(team_rankings)):
            if team_rankings[idx] not in borda_scores: borda_scores[team_rankings[idx]] = 0
            if idx < len(weights):
                borda_scores[team_rankings[idx]] += weights[idx]

    #Sorts teams by their Borda Count in descending order     
    sorted_teams = sorted(borda_scores.items(), key=lambda x: x[1], reverse=True)

    #Creates borda could folder for specific borda weight distribution and year if it does not exist
    os.makedirs(f"./borda_{output_filename}/season_{year}", exist_ok=True)
    output_file = f'./borda_{output_filename}/season_{year}/{year}_week{week}_{output_filename}_removed_{removed_team}.csv'

    #Writes the borda count data to the corresponding csv file
    with open(output_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Teams', 'Borda Points'])
        writer.writerows(sorted_teams) 

def paradox_find_compare(week, year):
    #Reads in csv file tha contains standard borda-count data for the provided week and year if it exists
    try:
        top25_df = pd.read_csv(f"../results/borda_top25/season_{year}/{year}_week{week}_top25.csv")
    except:
        print(f'Data for this combination of week and year (year: {year}, week: {week}) was not available in the local ballot_data_by_season_and_week directory')
        return
        
    global_ranked_teams = top25_df["Teams"].tolist()
    paradox_find_results = []

    #Iterate through all teams in standard borda-count data, removes a team per iteration and generates a new borda-count that disregards the 
    # removed team
    for i in range(len(global_ranked_teams)):
        ranked_teams = global_ranked_teams[:]
        removed_borda_count([25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1], year, week, "top25", ranked_teams[i])
        new_df = pd.read_csv(output_file)
        new_ranked_teams = new_df["Teams"].tolist()
        ranked_teams.pop(i)
        min_len = min(len(ranked_teams), len(new_ranked_teams))
        paradox_found = False
        for j in range(min_len):
            #Records the highest ranked team (in standard borda-count) in a different position within newly constructed borda-count data
            if new_ranked_teams[j] != ranked_teams[j]:
                new_rank = "NR"                    
                if ranked_teams[j] in new_ranked_teams: new_rank = new_ranked_teams.index(ranked_teams[j]) + 1
                paradox_find_results.append({
                    'Season': f'{year}',
                    'Week': f'{week}',
                    'Removed Team': f'{global_ranked_teams[i]}',
                    'Removed Team Rank': f'{i + 1}',
                    'Highest Ranked Team w/Different Rank': f'{ranked_teams[j]}',
                    'Original Ranking': f'{j + 1}',
                    'New Ranking': f'{new_rank}'
                })
                paradox_found = True
                break

    return pd.DataFrame(paradox_find_results), paradox_found

def paradox_file_maker():
    all_results_df = pd.DataFrame()

    #Iterate through all combinations of years and weeks in order find any paradoxes and records all the discovered paradoxes to a universal csv file
    for year in range(2014, 2025):
        for week in range(1, 18):
            result_df = paradox_find_compare(str(week), str(year))
            if result_df is not None and result_df[1]:
                all_results_df = pd.concat([all_results_df, result_df[0]], ignore_index=True)

    all_results_df.to_csv("found_paradoxes.csv", index=False)

paradox_file_maker()


Data for this combination of week and year (year: 2015, week: 2) was not available in the local ballot_data_by_season_and_week directory
Data for this combination of week and year (year: 2015, week: 17) was not available in the local ballot_data_by_season_and_week directory
Data for this combination of week and year (year: 2016, week: 17) was not available in the local ballot_data_by_season_and_week directory
Data for this combination of week and year (year: 2017, week: 17) was not available in the local ballot_data_by_season_and_week directory
Data for this combination of week and year (year: 2018, week: 17) was not available in the local ballot_data_by_season_and_week directory
Data for this combination of week and year (year: 2021, week: 17) was not available in the local ballot_data_by_season_and_week directory
Data for this combination of week and year (year: 2022, week: 17) was not available in the local ballot_data_by_season_and_week directory
Data for this combination of week a