In [1]:
import pandas as pd

In [2]:
cleaned_league = pd.read_csv("complete_cleaned.csv")
counter_df = pd.read_csv("counters.csv")

In [3]:
cleaned_league = cleaned_league.drop(columns=['Unnamed: 0.1', "Unnamed: 0"])

In [5]:
import pandas as pd
import ast  # For safely parsing tuple strings

def format_champion(champ):
    if pd.isna(champ):  
        return None  
    return champ.lower().replace(" ", "-").replace("'", "").replace("nunu-&-willump", "nunu-willump").replace("dr.-mundo", "dr-mundo")

def get_picks_and_bans(df): 
    blue_picks = df[df['side'] == 'Blue'][['pick1', 'pick2', 'pick3', 'pick4', 'pick5']].values.flatten().tolist()
    red_picks = df[df['side'] == 'Red'][['pick1', 'pick2', 'pick3', 'pick4', 'pick5']].values.flatten().tolist()

    blue_bans = df[df['side'] == 'Blue'][['ban1', 'ban2', 'ban3', 'ban4', 'ban5']].values.flatten().tolist()
    red_bans = df[df['side'] == 'Red'][['ban1', 'ban2', 'ban3', 'ban4', 'ban5']].values.flatten().tolist()

    blue_picks = [format_champion(champ) for champ in blue_picks if pd.notna(champ)]
    red_picks = [format_champion(champ) for champ in red_picks if pd.notna(champ)]
    
    blue_bans = [format_champion(champ) for champ in blue_bans if pd.notna(champ)]
    red_bans = [format_champion(champ) for champ in red_bans if pd.notna(champ)]

    return blue_picks, red_picks, blue_bans, red_bans

def preprocess_counters(counter_df):
    counter_dict = {}

    for _, row in counter_df.iterrows():
        champ = format_champion(row['Champion'])
        
        if pd.notna(row['counters champ']) and isinstance(row['counters champ'], str):
            try:
                counters = ast.literal_eval(row['counters champ'])  # Safely parse the tuple
                if isinstance(counters, tuple):  # Ensure it's a tuple before extracting
                    if champ not in counter_dict:
                        counter_dict[champ] = set()
                    counter_dict[champ].add(format_champion(counters[0]))  # Extract champion names
            except (SyntaxError, ValueError):
                continue  # Skip malformed rows
                
    return counter_dict

def count_counters(df, counter_dict): 
    blue_picks, red_picks, blue_bans, red_bans = get_picks_and_bans(df)

    num_blue_counters = 0
    num_red_counters = 0
    num_blue_counters_banned = 0
    num_red_counters_banned = 0

    # Count blue team's counter picks (against red team)
    for red_champ in red_picks:
        champ_counters = counter_dict.get(red_champ, set())  # Get the set of counters
        num_blue_counters += sum(1 for blue_champ in blue_picks if blue_champ in champ_counters)  # Count all matches

    # Count red team's counter picks (against blue team)
    for blue_champ in blue_picks:
        champ_counters = counter_dict.get(blue_champ, set())  # Get the set of counters
        num_red_counters += sum(1 for red_champ in red_picks if red_champ in champ_counters)  # Count all matches

    # Count blue team's counter bans (Blue banned a counter to Blue's own picks)
    for blue_champ in blue_picks:
        champ_counters = counter_dict.get(blue_champ, set())
        num_blue_counters_banned += sum(1 for ban in blue_bans if ban in champ_counters)

    # Count red team's counter bans (Red banned a counter to Red's own picks)
    for red_champ in red_picks:
        champ_counters = counter_dict.get(red_champ, set())
        num_red_counters_banned += sum(1 for ban in red_bans if ban in champ_counters)

    return pd.Series({
        "Red Counters": num_red_counters, 
        "Blue Counters": num_blue_counters,
        "Red Counters Banned": num_red_counters_banned,
        "Blue Counters Banned": num_blue_counters_banned
    })

def process_data(df, counter_df):
    counter_dict = preprocess_counters(counter_df)
    counters_df = df.groupby("gameid").apply(lambda x: count_counters(x, counter_dict)).reset_index()
    df = df.merge(counters_df, on="gameid", how="left")
    
    df["num_counters"] = df.apply(lambda row: row["Red Counters"] if row["side"] == "Red" else row["Blue Counters"], axis=1)
    df["num_counters_banned"] = df.apply(lambda row: row["Red Counters Banned"] if row["side"] == "Red" else row["Blue Counters Banned"], axis=1)
    
    # Fill NaN values to avoid calculation issues
    df["num_counters"] = df["num_counters"].fillna(0)
    df["num_counters_banned"] = df["num_counters_banned"].fillna(0)
    
    # Calculate PGA as the sum of num_counters and num_counters_banned
    df["PGA"] = df["num_counters"] + df["num_counters_banned"]
    
    df.drop(columns=["Red Counters", "Blue Counters", "Red Counters Banned", "Blue Counters Banned"], inplace=True)

    
    
    
    return df

# Load counter data
counter_df = pd.read_csv("counters.csv")

# Load match data
cleaned_league = pd.read_csv("complete_cleaned.csv")

# Process data (testing with a slice of 2 rows)
final = process_data(cleaned_league, counter_df)
final.head()
final.to_csv("transformed_league_data.csv")


  counters_df = df.groupby("gameid").apply(lambda x: count_counters(x, counter_dict)).reset_index()
