In [20]:
import csv
import pandas as pd
import os

"""
Calculate Borda points for voted teams in a specific week and season
from a given CSV file containing AP College Poll ballots for the corresponding week and season.

Parameters:
    weights (list): List of weights assigned to team rankings.
    year (str): The year for which to calculate Borda points.
    week (str): The week within the corresponding year for which to calculate Borda points.
    output_filename(str)
"""
output_file = None

def removed_borda_count(weights, year, week, output_filename, removed_team):
    global output_file
    #Reads in csv file the provided week and year from ballot_data_by_season_and_week directory if it exists
    try:
        df = pd.read_csv(f'../../../../data/college-polls/processed_data/ballot_data_by_season_and_week/season_{year}/{year}_week{week}_top25.csv')
    except:
        print(f'Data for this combination of week and year (year: {year}, week: {week}) was not available in the local ballot_data_by_season_and_week directory')
        return
    columns_1_to_25 = ['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th', '11th', '12th', '13th', '14th', '15th', '16th', '17th', '18th', '19th', '20th', '21st', '22nd', '23rd', '24th', '25th']
    borda_scores = dict()

    #Iterates through ranking orders in columns_1_to_25
    for i in range(len(columns_1_to_25)):
        ith_ranked_team = df[columns_1_to_25[i]].tolist()
        if removed_team in ith_ranked_team: ith_ranked_team.remove(removed_team)
        #Iterates through all teams voted in 'columns_1_to_25[i]' position by voters and adjusts the teams' borda count by the point 
        # total corresponding with 'columns_1_to_25[i]' position ranking
        for team in ith_ranked_team:
            if team not in borda_scores: borda_scores[team] = 0
            if i < len(weights):
                borda_scores[team] += weights[i]

    #Sorts teams by their Borda Count in descending order     
    sorted_teams = sorted(borda_scores.items(), key=lambda x: x[1], reverse=True)

    #Creates borda could folder for specific borda weight distribution and year if it does not exist
    os.makedirs(f"./borda_{output_filename}/season_{year}", exist_ok=True)
    output_file = f'./borda_{output_filename}/season_{year}/{year}_week{week}_{output_filename}_removed_{removed_team}.csv'

    #Writes the borda count data to the corresponding csv file
    with open(output_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Teams', 'Borda Points'])
        writer.writerows(sorted_teams) 
    
#list of top 25 teams
top25_df = pd.read_csv("../results/borda_top25/season_2014/2014_week1_top25.csv")
ranked_teams = top25_df["Teams"].tolist()
print(len(ranked_teams))

paradox_find_results = []
for i in range(len(ranked_teams)):
    removed_borda_count([25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1], "2014", "1", "top25", ranked_teams[i])
    new_df = pd.read_csv(output_file)
    new_ranked_teams = new_df["Teams"].tolist()
    for j in range(len(ranked_teams)):
        if j < len(new_ranked_teams):
            if new_ranked_teams[j] != ranked_teams[j]:
                new_rank = "NR"
                if ranked_teams[j] in new_ranked_teams: new_rank = new_ranked_teams.index(ranked_teams[j]) + 1
                paradox_find_results.append({
                    'Season': "2014",
                    'Week': "1",
                    'Removed Team': f'{ranked_teams[i]}',
                    'Highest Ranked Team w/Different Rank': f'{ranked_teams[j]}',
                    'Original Ranking': f'{j + 1}',
                    'New Ranking': f'{new_rank}'
                })
                break

paradox_find_results_df = pd.DataFrame(paradox_find_results)
paradox_find_results_df.to_csv("found_paradoxes.csv", index=False)


50
