In [6]:
import pandas as pd
import itertools


def preprocess_pairwise_data(pairwise_df):
    """
    Preprocess the pairwise DataFrame to create a dictionary
    for faster lookups.
    """
    pairwise_dict = {}

    for _, row in pairwise_df.iterrows():
        team_a = row['TeamA']
        team_b = row['TeamB']
        a_b = (row['A>B'], row['B>A'])
        pairwise_dict[(team_a, team_b)] = a_b
        pairwise_dict[(team_b, team_a)] = (a_b[1], a_b[0])  # Reverse the order for (B, A)

    return pairwise_dict

In [7]:
def get_team_rankings(ranking_file):
    """
    Read the player rankings from a CSV file.
    Returns a dictionary mapping teams to their rank and points.
    """
    ranking_df = pd.read_csv(ranking_file)
    ranking_df['Rank'] = ranking_df.index + 1  # Rank is based on the row number (1-indexed)
    ranking_dict = {row['Teams']: {'Rank': row['Rank'], 'Points': row['Borda Points']}
                    for _, row in ranking_df.iterrows()}
    return ranking_dict

In [23]:
def cycle_finder_one_iteration(season, week):
    # Read the nominees name list
    data_path = f'C:\\Research\\VotingParadoxes\\data\\college-polls\\processed_data\\auxiliary_files\\voted_teams_by_season_and_week\\season_{season}\\{season}_week{week}_voted_teams.csv'
    try:
        df = pd.read_csv(data_path)
    except:
        print(f'No valid data for {season} and {week}')
        return
    
    # Extract the player names from the 'Player' column
    name_list = df['Voted Teams'].tolist()

    # Generate all possible 3-name combinations, sorted
    combinations = [sorted(combo) for combo in itertools.combinations(name_list, 3)]

    # Read the pairwise data CSV file (PlayerA, PlayerB, A>B, B>A)
    pairwise_path = f'C:\\Research\\VotingParadoxes\\src\\college-polls\\Pairwise\\results\\season_{season}\\{season}_week{week}_condorcet.csv'
    pairwise_df = pd.read_csv(pairwise_path)
    
    # Preprocess the pairwise data into a dictionary for faster lookups
    pairwise_dict = preprocess_pairwise_data(pairwise_df)

    # find team-rankings
    rank_file = f"./src/college-polls/Borda/results/borda_top25/season_{season}/{season}_week{week}_top25.csv"
    team_rankings = get_team_rankings(rank_file)
    
    # Iterate through each 3-player combination
    valid_combinations = []
    for combo in combinations:
        a, b, c = combo

        # Look up the results for each pair from the preprocessed dictionary
        try:
            a_b_result = pairwise_dict[(a, b)]
            a_c_result = pairwise_dict[(a, c)]
            b_c_result = pairwise_dict[(b, c)]
        except KeyError:
            continue  # Skip this combination if any pair data is missing
        # Check the cycle conditions
        if ((a_b_result[0] > a_b_result[1] and  # A > B
            b_c_result[0] > b_c_result[1] and  # B > C
            a_c_result[1] > a_c_result[0]) or  # C > A
            (a_b_result[1] > a_b_result[0] and  # A < B
            b_c_result[1] > b_c_result[0] and  # B < C
            a_c_result[0] > a_c_result[1])):    # C < A
            
            a_rank = team_rankings.get(a, {'Rank': 'N/A', 'Points': 'N/A'})
            b_rank = team_rankings.get(b, {'Rank': 'N/A', 'Points': 'N/A'})
            c_rank = team_rankings.get(c, {'Rank': 'N/A', 'Points': 'N/A'})

            valid_combinations.append({
                'Season': season,
                'Week': week,
                'Combo': f'{a}, {b}, {c}',
                'Ranks': f'{a_rank["Rank"]}, {b_rank["Rank"]}, {c_rank["Rank"]}',
                'ab': f'({a}, {b})',
                'ab-a': f'{a_b_result[0]}',
                'ab-b': f'{a_b_result[1]}',
                'bc': f'({b}, {c})', 
                'bc-b': f'{b_c_result[0]}', 
                'bc-c': f'{b_c_result[1]}', 
                'ca': f'({c}, {a})', 
                'ca-c': f'{a_c_result[1]}', 
                'ca-a': f'{a_c_result[0]}'
            })

    # Convert the valid combinations to a DataFrame
    valid_combos_df = pd.DataFrame(valid_combinations)
    return valid_combos_df

In [24]:
def all_college_cycles():
    seasons = range(2014, 2025)  
    weeks = range(1, 18)

    results_df = pd.DataFrame()

    for season in seasons:
        for week in weeks:
            print(f"Processing year {season}, week {week}...")
            result_df = cycle_finder_one_iteration(season, week)
            results_df = pd.concat([results_df, result_df], ignore_index=True)

    # Save the combined DataFrame to a CSV file with a clean format
    results_df.to_csv(f'C:\\Research\\VotingParadoxes\\src\\college-polls\\Pairwise\\all-cycles.csv', index=False)
    print("All data has been processed and saved")

In [25]:
get_team_rankings(f"./src/college-polls/Borda/results/borda_top25/season_2014/2014_week1_top25.csv")

{'florida-state-seminoles': {'Rank': 1, 'Points': 1496},
 'alabama-crimson-tide': {'Rank': 2, 'Points': 1361},
 'oregon-ducks': {'Rank': 3, 'Points': 1334},
 'oklahoma-sooners': {'Rank': 4, 'Points': 1324},
 'ohio-state-buckeyes': {'Rank': 5, 'Points': 1207},
 'auburn-tigers': {'Rank': 6, 'Points': 1198},
 'ucla-bruins': {'Rank': 7, 'Points': 1106},
 'michigan-state-spartans': {'Rank': 8, 'Points': 1080},
 'south-carolina-gamecocks': {'Rank': 9, 'Points': 1015},
 'baylor-bears': {'Rank': 10, 'Points': 966},
 'stanford-cardinal': {'Rank': 11, 'Points': 885},
 'georgia-bulldogs': {'Rank': 12, 'Points': 843},
 'lsu-tigers': {'Rank': 13, 'Points': 776},
 'wisconsin-badgers': {'Rank': 14, 'Points': 637},
 'usc-trojans': {'Rank': 15, 'Points': 626},
 'clemson-tigers': {'Rank': 16, 'Points': 536},
 'notre-dame-fighting-irish': {'Rank': 17, 'Points': 445},
 'ole-miss-rebels': {'Rank': 18, 'Points': 424},
 'arizona-state-sun-devils': {'Rank': 19, 'Points': 357},
 'kansas-state-wildcats': {'Rank

In [26]:
all_college_cycles()

Processing year 2014, week 1...
Processing year 2014, week 2...
Processing year 2014, week 3...
Processing year 2014, week 4...
Processing year 2014, week 5...
Processing year 2014, week 6...
Processing year 2014, week 7...
Processing year 2014, week 8...
Processing year 2014, week 9...
Processing year 2014, week 10...
Processing year 2014, week 11...
Processing year 2014, week 12...
Processing year 2014, week 13...
Processing year 2014, week 14...
Processing year 2014, week 15...
Processing year 2014, week 16...
Processing year 2014, week 17...
Processing year 2015, week 1...
Processing year 2015, week 2...
No valid data for 2015 and 2
Processing year 2015, week 3...
Processing year 2015, week 4...
Processing year 2015, week 5...
Processing year 2015, week 6...
Processing year 2015, week 7...
Processing year 2015, week 8...
Processing year 2015, week 9...
Processing year 2015, week 10...
Processing year 2015, week 11...
Processing year 2015, week 12...
Processing year 2015, week 13...

In [26]:
def cycle_finder_4(season, week):

    data_path = f'C:\\Research\\VotingParadoxes\\data\\college-polls\\processed_data\\auxiliary_files\\voted_teams_by_season_and_week\\season_{season}\\{season}_week{week}_voted_teams.csv'
    try:
        df = pd.read_csv(data_path)
    except:
        print(f'No valid data for {season} and {week}')
        return

    team_list = df['Voted Teams'].tolist()

    combinations = [sorted(combo) for combo in itertools.combinations(team_list, 4)]

    pairwise_path = f'C:\\Research\\VotingParadoxes\\src\\college-polls\\Pairwise\\results\\season_{season}\\{season}_week{week}_condorcet.csv'
    pairwise_df = pd.read_csv(pairwise_path)
    
    pairwise_dict = preprocess_pairwise_data(pairwise_df)

    rank_file = f"./src/college-polls/Borda/results/borda_top25/season_{season}/{season}_week{week}_top25.csv"
    team_rankings = get_team_rankings(rank_file)
    
    # Iterate through each 4-player combination
    valid_combinations = []
    for combo in combinations:
        a, b, c, d = combo
        try:
            a_b_result = pairwise_dict[(a, b)]
            a_c_result = pairwise_dict[(a, c)]
            a_d_result = pairwise_dict[(a, d)]
            b_c_result = pairwise_dict[(b, c)]
            b_d_result = pairwise_dict[(b, d)]
            c_d_result = pairwise_dict[(c, d)]
        except KeyError:
            continue  # Skip this combination if any pair data is missing

        # A>B, B>C, C>D, D>A OR A<B, B<C, C<D, D<A
        if ((a_b_result[0] > a_b_result[1] and  
            b_c_result[0] > b_c_result[1] and  
            c_d_result[0] > c_d_result[1] and  
            a_d_result[1] > a_d_result[0]) or 
            (a_b_result[1] > a_b_result[0] and  
            b_c_result[1] > b_c_result[0] and  
            c_d_result[1] > c_d_result[0] and  
            a_d_result[0] > a_d_result[1])):   
            
            a_rank = team_rankings.get(a, {'Rank': 'N/A', 'Points': 'N/A'})
            b_rank = team_rankings.get(b, {'Rank': 'N/A', 'Points': 'N/A'})
            c_rank = team_rankings.get(c, {'Rank': 'N/A', 'Points': 'N/A'})
            d_rank = team_rankings.get(d, {'Rank': 'N/A', 'Points': 'N/A'})

            valid_combinations.append({
                'Season': season,
                'Week': week,
                'Combo': f'{a}, {b}, {c}, {d}',
                'Ranks': f'{a_rank["Rank"]}, {b_rank["Rank"]}, {c_rank["Rank"]}, {d_rank["Rank"]}',
                'ab': f'({a} {b})',
                'a>b': f'{a_b_result[0]}',
                'b>a': f'{a_b_result[1]}',
                'bc': f'({b} {c})', 
                'b>c': f'{b_c_result[0]}', 
                'c>b': f'{b_c_result[1]}',
                'cd': f'({c} {d})',
                'c>d': f'{c_d_result[0]}',
                'd>c': f'{c_d_result[1]}',
                'da': f'({d} {a})',
                'd>a': f'{a_d_result[1]}',
                'a>d': f'{a_d_result[0]}'
            })

    valid_combos_df = pd.DataFrame(valid_combinations)
    return valid_combos_df

In [27]:
def cycle_through_4cycle():
    seasons = range(2014, 2025)  
    weeks = range(1, 18)

    all_results_df = pd.DataFrame()

    for season in seasons:
        for week in weeks:
            print(f"Processing season {season}, week {week}...")
            result_df = cycle_finder_4(season, week)
            all_results_df = pd.concat([all_results_df, result_df], ignore_index=True)

    # Save the combined DataFrame to a CSV file with a clean format
    all_results_df.to_csv("./src/college-polls/Pairwise/4_cycle.csv", index=False)
    print("All data has been processed and saved")

In [2]:
import os
print(os.getcwd())
os.chdir('c:\Research\VotingParadoxes')
print(os.getcwd())

c:\Research\VotingParadoxes
c:\Research\VotingParadoxes


In [28]:
cycle_through_4cycle()

Processing season 2014, week 1...
Processing season 2014, week 2...
Processing season 2014, week 3...
Processing season 2014, week 4...
Processing season 2014, week 5...
Processing season 2014, week 6...
Processing season 2014, week 7...
Processing season 2014, week 8...
Processing season 2014, week 9...
Processing season 2014, week 10...
Processing season 2014, week 11...
Processing season 2014, week 12...
Processing season 2014, week 13...
Processing season 2014, week 14...
Processing season 2014, week 15...
Processing season 2014, week 16...
Processing season 2014, week 17...
Processing season 2015, week 1...
Processing season 2015, week 2...
No valid data for 2015 and 2
Processing season 2015, week 3...
Processing season 2015, week 4...
Processing season 2015, week 5...
Processing season 2015, week 6...
Processing season 2015, week 7...
Processing season 2015, week 8...
Processing season 2015, week 9...
Processing season 2015, week 10...
Processing season 2015, week 11...
Processin

5-Cycle

In [11]:
def cycle_finder_5(season, week):

    data_path = f'C:\\Research\\VotingParadoxes\\data\\college-polls\\processed_data\\auxiliary_files\\voted_teams_by_season_and_week\\season_{season}\\{season}_week{week}_voted_teams.csv'
    try:
        df = pd.read_csv(data_path)
    except:
        print(f'No valid data for {season} and {week}')
        return

    team_list = df['Voted Teams'].tolist()

    combinations = [sorted(combo) for combo in itertools.combinations(team_list, 5)]

    pairwise_path = f'C:\\Research\\VotingParadoxes\\src\\college-polls\\Pairwise\\results\\season_{season}\\{season}_week{week}_condorcet.csv'
    pairwise_df = pd.read_csv(pairwise_path)
    
    pairwise_dict = preprocess_pairwise_data(pairwise_df)

    rank_file = f"./src/college-polls/Borda/results/borda_top25/season_{season}/{season}_week{week}_top25.csv"
    team_rankings = get_team_rankings(rank_file)
    
    # Iterate through each 4-player combination
    valid_combinations = []
    for combo in combinations:
        a, b, c, d, e = combo
        try:
            a_b_result = pairwise_dict[(a, b)]
            a_c_result = pairwise_dict[(a, c)]
            a_d_result = pairwise_dict[(a, d)]
            a_e_result = pairwise_dict[(a, e)]
            b_c_result = pairwise_dict[(b, c)]
            b_d_result = pairwise_dict[(b, d)]
            b_e_result = pairwise_dict[(b, e)]
            c_d_result = pairwise_dict[(c, d)]
            c_e_result = pairwise_dict[(c, e)]
            d_e_result = pairwise_dict[(d, e)]
        except KeyError:
            continue 

        # A>B, B>C, C>D, D>E, E>A OR A<B, B<C, C<D, D<E, E<A
        if ((a_b_result[0] > a_b_result[1] and
            b_c_result[0] > b_c_result[1] and
            c_d_result[0] > c_d_result[1] and
            d_e_result[0] > d_e_result[1] and
            a_e_result[1] > a_e_result[0]) or
            (a_b_result[1] > a_b_result[0] and
            b_c_result[1] > b_c_result[0] and
            c_d_result[1] > c_d_result[0] and
            d_e_result[1] > d_e_result[0] and
            a_e_result[0] > a_e_result[1])):

            a_rank = team_rankings.get(a, {'Rank': 'N/A', 'Points': 'N/A'})
            b_rank = team_rankings.get(b, {'Rank': 'N/A', 'Points': 'N/A'})
            c_rank = team_rankings.get(c, {'Rank': 'N/A', 'Points': 'N/A'})
            d_rank = team_rankings.get(d, {'Rank': 'N/A', 'Points': 'N/A'})
            e_rank = team_rankings.get(e, {'Rank': 'N/A', 'Points': 'N/A'})

            valid_combinations.append({
                    'Season': season,
                    'Week': week,
                    'Combo': f'{a}, {b}, {c}, {d}, {e}',
                    'Rankings': f'{a_rank["Rank"]}, {b_rank["Rank"]}, {c_rank["Rank"]}, {d_rank["Rank"]}, {e_rank["Rank"]}',
                    'ab': f'({a} {b})',
                    'a>b': f'{a_b_result[0]}',
                    'b>a': f'{a_b_result[1]}',
                    'bc': f'({b} {c})',
                    'b>c': f'{b_c_result[0]}',
                    'c>b': f'{b_c_result[1]}',
                    'cd': f'({c} {d})',
                    'c>d': f'{c_d_result[0]}',
                    'd>c': f'{c_d_result[1]}',
                    'de': f'({d} {e})',
                    'd>e': f'{d_e_result[0]}',
                    'e>d': f'{d_e_result[1]}',
                    'ea': f'({e} {a})',
                    'e>a': f'{a_e_result[1]}',
                    'a>e': f'{a_e_result[0]}'
                })

    # Convert the valid combinations to a DataFrame
    valid_combos_df = pd.DataFrame(valid_combinations)
    return valid_combos_df

In [12]:
def cycle_through_5cycle():
    seasons = range(2014, 2025)  
    weeks = range(1, 18)

    all_results_df = pd.DataFrame()

    for season in seasons:
        for week in weeks:
            print(f"Processing season {season}, week {week}...")
            result_df = cycle_finder_5(season, week)
            all_results_df = pd.concat([all_results_df, result_df], ignore_index=True)

    # Save the combined DataFrame to a CSV file with a clean format
    all_results_df.to_csv("./src/college-polls/Pairwise/5_cycle.csv", index=False)
    print("All data has been processed and saved")

In [13]:
cycle_through_5cycle()

Processing season 2014, week 1...
Processing season 2014, week 2...
Processing season 2014, week 3...
Processing season 2014, week 4...
Processing season 2014, week 5...
Processing season 2014, week 6...
Processing season 2014, week 7...
Processing season 2014, week 8...
Processing season 2014, week 9...
Processing season 2014, week 10...
Processing season 2014, week 11...
Processing season 2014, week 12...
Processing season 2014, week 13...
Processing season 2014, week 14...
Processing season 2014, week 15...
Processing season 2014, week 16...
Processing season 2014, week 17...
Processing season 2015, week 1...
Processing season 2015, week 2...
No valid data for 2015 and 2
Processing season 2015, week 3...
Processing season 2015, week 4...
Processing season 2015, week 5...
Processing season 2015, week 6...
Processing season 2015, week 7...
Processing season 2015, week 8...
Processing season 2015, week 9...
Processing season 2015, week 10...
Processing season 2015, week 11...
Processin