In [38]:
import pandas as pd
import itertools


def preprocess_pairwise_data(pairwise_df):
    """
    Preprocess the pairwise DataFrame to create a dictionary
    for faster lookups.
    """
    pairwise_dict = {}

    for _, row in pairwise_df.iterrows():
        team_a = row['TeamA']
        team_b = row['TeamB']
        a_b = (row['A>B'], row['B>A'])
        pairwise_dict[(team_a, team_b)] = a_b
        pairwise_dict[(team_b, team_a)] = (a_b[1], a_b[0])  # Reverse the order for (B, A)

    return pairwise_dict

In [44]:
def cycle_finder_one_iteration(season, week):
    # Read the nominees name list
    data_path = f'C:\\Research\\VotingParadoxes\\data\\college-polls\\processed_data\\auxiliary_files\\voted_teams_by_season_and_week\\season_{season}\\{season}_week{week}_voted_teams.csv'
    try:
        df = pd.read_csv(data_path)
    except:
        print(f'No valid data for {season} and {week}')
        return
    
    # Extract the player names from the 'Player' column
    name_list = df['Voted Teams'].tolist()

    # Generate all possible 3-name combinations, sorted
    combinations = [sorted(combo) for combo in itertools.combinations(name_list, 3)]

    # Read the pairwise data CSV file (PlayerA, PlayerB, A>B, B>A)
    pairwise_path = f'C:\\Research\\VotingParadoxes\\src\\college-polls\\Pairwise\\results\\season_{season}\\{season}_week{week}_condorcet.csv'
    pairwise_df = pd.read_csv(pairwise_path)
    
    # Preprocess the pairwise data into a dictionary for faster lookups
    pairwise_dict = preprocess_pairwise_data(pairwise_df)
    
    # Iterate through each 3-player combination
    valid_combinations = []
    for combo in combinations:
        a, b, c = combo

        # Look up the results for each pair from the preprocessed dictionary
        try:
            a_b_result = pairwise_dict[(a, b)]
            a_c_result = pairwise_dict[(a, c)]
            b_c_result = pairwise_dict[(b, c)]
        except KeyError:
            continue  # Skip this combination if any pair data is missing
        # Check the cycle conditions
        if ((a_b_result[0] > a_b_result[1] and  # A > B
            b_c_result[0] > b_c_result[1] and  # B > C
            a_c_result[1] > a_c_result[0]) or  # C > A
            (a_b_result[1] > a_b_result[0] and  # A < B
            b_c_result[1] > b_c_result[0] and  # B < C
            a_c_result[0] > a_c_result[1])):    # C < A
            valid_combinations.append({
                'Combo': f'{a}, {b}, {c}',
                'ab': f'({a}, {b})',
                'ab-a': f'{a_b_result[0]}',
                'ab-b': f'{a_b_result[1]}',
                'bc': f'({b}, {c})', 
                'bc-b': f'{b_c_result[0]}', 
                'bc-c': f'{b_c_result[1]}', 
                'ca': f'({c}, {a})', 
                'ca-c': f'{a_c_result[1]}', 
                'ca-a': f'{a_c_result[0]}'
            })

    # Convert the valid combinations to a DataFrame
    valid_combos_df = pd.DataFrame(valid_combinations)
    return valid_combos_df

In [47]:
def all_college_cycles():
    seasons = range(2014, 2025)  
    weeks = range(1, 18)

    results_df = pd.DataFrame()

    for season in seasons:
        for week in weeks:
            print(f"Processing year {season}, week {week}...")
            result_df = cycle_finder_one_iteration(season, week)
            results_df = pd.concat([results_df, result_df], ignore_index=True)

    # Save the combined DataFrame to a CSV file with a clean format
    results_df.to_csv(f'C:\\Research\\VotingParadoxes\\src\\college-polls\\Pairwise\\all-cycles.csv', index=False)
    print("All data has been processed and saved")

In [48]:
all_college_cycles()

Processing year 2014, week 1...
Processing year 2014, week 2...
Processing year 2014, week 3...
Processing year 2014, week 4...
Processing year 2014, week 5...
Processing year 2014, week 6...
Processing year 2014, week 7...
Processing year 2014, week 8...
Processing year 2014, week 9...
Processing year 2014, week 10...
Processing year 2014, week 11...
Processing year 2014, week 12...
Processing year 2014, week 13...
Processing year 2014, week 14...
Processing year 2014, week 15...
Processing year 2014, week 16...
Processing year 2014, week 17...
Processing year 2015, week 1...
Processing year 2015, week 2...
No valid data for 2015 and 2
Processing year 2015, week 3...
Processing year 2015, week 4...
Processing year 2015, week 5...
Processing year 2015, week 6...
Processing year 2015, week 7...
Processing year 2015, week 8...
Processing year 2015, week 9...
Processing year 2015, week 10...
Processing year 2015, week 11...
Processing year 2015, week 12...
Processing year 2015, week 13...