In [18]:
import pandas as pd
import plotly.express as px

In [19]:
df = pd.read_csv("transformed_data.csv")
df

Unnamed: 0.1,Unnamed: 0,year,gameid,league,teamname,side,ban1,ban2,ban3,ban4,...,pick2,pick3,pick4,pick5,result,num_counters_picked,num_counters_banned,PGA,mean_champ_wr,higher_PGA
0,0,2017,1506-1540,LPL,I May,Blue,Syndra,Malzahar,Ashe,Karma,...,Kha'Zix,Cassiopeia,Varus,Tahm Kench,1,0,2,2,0.508666,1
1,1,2017,1506-1540,LPL,Royal Never Give Up,Red,Camille,Rengar,Zyra,Elise,...,Lee Sin,Ryze,Caitlyn,Nautilus,0,0,0,0,0.493086,0
2,2,2017,1506-1541,LPL,I May,Blue,Syndra,Malzahar,Ashe,Rek'Sai,...,Lee Sin,Corki,Caitlyn,Thresh,1,0,0,0,0.510833,0
3,3,2017,1506-1541,LPL,Royal Never Give Up,Red,Rengar,Camille,Varus,Cassiopeia,...,Rumble,Ryze,Jhin,Zyra,0,1,0,1,0.492558,1
4,4,2017,1507-1544,LPL,Invictus Gaming,Blue,Jayce,Elise,Malzahar,Kha'Zix,...,Rengar,LeBlanc,Varus,Tahm Kench,1,0,1,1,0.492380,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88345,119955,2025,LOLTMNT06_96854,LFL,Vitality.Bee,Red,Skarner,Viktor,Maokai,Rell,...,Ivern,Sylas,Varus,Nautilus,1,1,1,2,0.497897,1
88346,119956,2025,LOLTMNT06_96867,LFL2,Zerance,Blue,Aurora,Karthus,Kayn,Galio,...,Xin Zhao,Sylas,Corki,Nautilus,0,1,2,3,0.501714,1
88347,119957,2025,LOLTMNT06_96867,LFL2,Project Conquerors,Red,Viktor,K'Sante,Ivern,Jayce,...,Amumu,Ambessa,Varus,Rell,1,2,0,2,0.509128,0
88348,119960,2025,LOLTMNT06_96906,LFL2,IZI Dream,Blue,Akali,Rell,Varus,Skarner,...,Maokai,Corki,Kai'Sa,Rakan,1,1,2,3,0.516646,1


In [20]:
bin_edges = list(range(11))
bin_counts = df["PGA"].value_counts().reindex(bin_edges, fill_value=0).reset_index()
bin_counts.columns = ["PGA", "Count"]

fig = px.bar(
    bin_counts,
    x="PGA",
    y="Count",
    title="Distribution of Pre Game Advantages",
    labels={"PGA": "PGA Value", "Count": "Count"},
    text_auto=True
)

fig.update_layout(
    xaxis=dict(type="category"),
    bargap=0.05  
)

fig.write_html('uni.html', include_plotlyjs='cdn')


In [21]:
import plotly.express as px

data = {
    "Category": ["Teans with PGA", "Teams with no PGA"],
    "Values": [
        df[df['PGA'] != 0].shape[0], 
        df[df['PGA'] == 0].shape[0], 
    ]
}

fig = px.pie(
    names=data["Category"],  
    values=data["Values"],   
    title="How common is PGA?"
)

# Show plot
fig.write_html('uni2.html', include_plotlyjs='cdn')


"""
Note that there are more teams with PGA. Meaning, most teams either counter picked the enemy or banned one of their counters. 
This leads us to believe that players are considering the affect of counter matchups
"""

'\nNote that there are more teams with PGA. Meaning, most teams either counter picked the enemy or banned one of their counters. \nThis leads us to believe that players are considering the affect of counter matchups\n'

In [22]:
import plotly.express as px

data = {
    "Category": ["Wins with PGA = 0", "Wins with PGA > 0"],
    "Values": [
        df[(df['result'] == 1) & (df['PGA'] == 0)].shape[0], 
        df[(df['result'] == 1) & (df['PGA'] != 0)].shape[0]
    ]
}

fig = px.pie(
    names=data["Category"],  
    values=data["Values"],   
    title="Distribution of Winners - When PGA present / missing"
)

# Show plot
fig.show()


"""
Note that most winning teams have at least one PGA. As shown earlier, about 56.5% of teams have a PGA > 0. Thus, since the 
wins with PGA > 0 are greater than wins with PGA = 0, we can be led to assume that PGA has some sort of affect on wins. 
However, these results are for when PGA = 1. Lets see the distributions of wins for a higher PGA, lets say 5. 
"""

'\nNote that most winning teams have at least one PGA. As shown earlier, about 56.5% of teams have a PGA > 0. Thus, since the \nwins with PGA > 0 are greater than wins with PGA = 0, we can be led to assume that PGA has some sort of affect on wins. \nHowever, these results are for when PGA = 1. Lets see the distributions of wins for a higher PGA, lets say 5. \n'

In [23]:
import pandas as pd

df = df.sort_values(by=["gameid", "side"])
games = df.groupby("gameid")

win_higher_pga = 0
lose_higher_pga = 0

for gameid, group in games:
    if len(group) != 2:
        continue  # Skip if game data is incomplete

    team1, team2 = group.iloc[0], group.iloc[1]

    # Determine which team won
    if team1["result"] == 1:
        winner, loser = team1, team2
    else:
        winner, loser = team2, team1

    if winner["PGA"] > loser["PGA"]:
        win_higher_pga += 1
    elif loser["PGA"] > winner["PGA"]:
        lose_higher_pga += 1

labels = ["Won with Higher PGA", "Lost with Higher PGA"]
values = [win_higher_pga, lose_higher_pga]

fig = px.pie(
    names=labels, 
    values=values, 
    title="Dist of game result for teams with Higher PGA than opponent", 
)

fig.write_html('bi2.html', include_plotlyjs='cdn')

"""
This visualization shows another 50/50 split. So, it looks like PGA doesnt have 
an affect on your wins/losses, if half of the teams are beating their opponent, while having less PGA
Then why did our distribution of winners have so many teams with PGA > 0? 

Well, when considering the number of champions per lane, usually about 20-30, and we're considering the top 5 counters, then
theres a 1/5 - 1/6 chance that a pick will counter an opponent. Additionally, it is very reasonable to say that a player 
will not want to play against their counter, leading to most players banning their counter. But lets see which variable affected PGA the most. 
"""

"\nThis visualization shows another 50/50 split. So, it looks like PGA doesnt have \nan affect on your wins/losses, if half of the teams are beating their opponent, while having less PGA\nThen why did our distribution of winners have so many teams with PGA > 0? \n\nWell, when considering the number of champions per lane, usually about 20-30, and we're considering the top 5 counters, then\ntheres a 1/5 - 1/6 chance that a pick will counter an opponent. Additionally, it is very reasonable to say that a player \nwill not want to play against their counter, leading to most players banning their counter. But lets see which variable affected PGA the most. \n"

In [24]:
import plotly.express as px

data = {
    "Category": ["PGA from counters", "PGA from bans"],
    "Values": [
        df['num_counters_picks'].sum(), 
        df['num_counters_banned'].sum(), 
    ]
}

fig = px.pie(
    names=data["Category"],  
    values=data["Values"],   
    title="Distribution of PGA factor"
)

# Show plot
fig.show()


KeyError: 'num_counters_picks'

In [None]:
import plotly.express as px

data = {
    "Category": ["Wins on Red", "Wins on Blue"],
    "Values": [
        df[(df['result'] == 1) & (df['side'] == "Red")].shape[0], 
        df[(df['result'] == 1) & (df['side'] == "Blue")].shape[0], 
    ]
}

fig = px.pie(
    names=data["Category"],  
    values=data["Values"],   
    title="Distribution of Results for Side"
)

# Show plot
fig.show()

In [None]:
#bar chart, each bar is a PGA (0-10)
#each pga has two bars, # wins # losses 
df

Unnamed: 0.1,Unnamed: 0,year,gameid,teamname,side,ban1,ban2,ban3,ban4,ban5,...,pick2,pick3,pick4,pick5,result,num_counters,num_counters_banned,PGA,mean_wr,higher_PGA
70464,97064,2023,10660-10660_game_1,LNG Esports,Blue,Akali,Nocturne,K'Sante,Lee Sin,Wukong,...,Maokai,Orianna,Kalista,Senna,0,0,1,1,0.511094,0
70465,97065,2023,10660-10660_game_1,Rare Atom,Red,Poppy,Ashe,Neeko,Vi,Jarvan IV,...,Rell,LeBlanc,Varus,Renata Glasc,1,3,1,4,0.514139,1
70466,97066,2023,10660-10660_game_2,LNG Esports,Blue,Nocturne,Udyr,Renata Glasc,Nautilus,Lee Sin,...,Bel'Veth,Neeko,Senna,Tahm Kench,0,1,3,4,0.489709,1
70467,97067,2023,10660-10660_game_2,Rare Atom,Red,Poppy,Ashe,Rumble,Tristana,Lucian,...,Jarvan IV,LeBlanc,Kalista,Rell,1,1,2,3,0.510223,0
70468,97068,2023,10660-10660_game_3,LNG Esports,Blue,Rell,Nocturne,Tristana,Jarvan IV,Rumble,...,Bel'Veth,Neeko,Caitlyn,Lux,1,1,1,2,0.497444,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7997,11397,2017,WMC2TMNT1/320030,Team Cloud Drake,Red,Ornn,Kalista,Tristana,Maokai,Shen,...,Sejuani,Ryze,Caitlyn,Braum,0,0,1,1,0.481034,0
7998,11398,2017,WMC2TMNT1/320038,Team Cloud Drake,Blue,Azir,Alistar,Tristana,Shen,Gnar,...,Jarvan IV,Syndra,Varus,Braum,0,2,0,2,0.497706,1
7999,11399,2017,WMC2TMNT1/320038,Team Mountain Drake,Red,Taric,Kalista,Ornn,Malzahar,Karma,...,Sejuani,Orianna,Xayah,Rakan,1,0,1,1,0.502609,0
8000,11402,2017,WMC2TMNT1/320050,Team Infernal Drake,Blue,Caitlyn,Jayce,Tristana,Twitch,Jarvan IV,...,Sejuani,Viktor,Xayah,Braum,0,0,0,0,0.487662,0


In [None]:
import plotly.graph_objects as go

pga_range = list(range(11))

win_PGAs = df[df['result'] == 1]['PGA'].value_counts()
loss_PGAs = df[df['result'] == 0]['PGA'].value_counts()

fig = go.Figure()

fig.add_trace(go.Bar(
    x=pga_range,
    y=win_PGAs,
    name='Wins',
    marker_color='blue'
))

fig.add_trace(go.Bar(
    x=pga_range,
    y=loss_PGAs,
    name='Losses',
    marker_color='red'
))

fig.update_layout(
    title='Wins and Losses by PGA',
    xaxis=dict(
        title='PGA',
        tickmode='array',
        tickvals=pga_range  
    ),
    yaxis_title='Count',
    barmode='group'  
)

fig.write_html('bi.html', include_plotlyjs='cdn')
