In [None]:
import pandas as pd
import numpy as np

import plotly.graph_objects as go
import plotly.express as px

from plotly.subplots import make_subplots
from sklearn.decomposition import PCA

In [None]:
df = pd.read_excel('Courses_mk8.xlsx')
df = df.fillna(0)

# Mise en place des classement en version plus facile à interpreter
for column in df.iloc[:, 4:]:
        df[column] = df[column].apply(lambda x : (str(column)+" ")*int(x))
df["Places"] = df[1]+df[2]+df[3]+df[4]+df[5]+df[6]+df[7]+df[8]+df[9]+df[10]+df[11]+df[12]
df["Places"] = df["Places"].apply(lambda x : [i for i in x.split(' ')[:-1]])

df = df[['Course', 'Tiers', 'Extension', 'Coupe', 'Places']]

In [None]:
df.loc[:, 'mean']  = df['Places'].apply(lambda row: np.mean(list(map(int, row))) if len(row)>0 else None)
df.loc[:, 'std']   = df['Places'].apply(lambda row: np.std(list(map(int, row))) if len(row)>0 else None)
df.loc[:, 'count'] = df['Places'].apply(lambda row: len(row))

In [None]:
df[df['Tiers']=='A']

In [None]:
name = 'Gorge Champignon [Wii]'
df_name = df[df['Course'] == name]

fig = make_subplots(rows=1, cols=2, subplot_titles=(name, "All"))

fig.add_trace(go.Histogram(x=df_name['Places'].values[0]), row=1, col=1)
fig.add_trace(go.Histogram(x=df['Places'].values.sum()), row=1, col=2)

fig.update_xaxes(categoryorder='array', categoryarray= [str(i) for i in range(1, 13)])
fig.update_layout(showlegend=False, bargap=.1)

fig.show()

In [None]:
df_switch = df[df['Extension'] == "Switch"]
df_others = df[df['Extension'] != "Switch"]

fig = make_subplots(rows=1, cols=2, subplot_titles=("Switch", "Others"))

fig.add_trace(go.Histogram(x=df_switch['Places'].values.sum()), row=1, col=1)
fig.add_trace(go.Histogram(x=df_others['Places'].values.sum()), row=1, col=2)

fig.update_xaxes(categoryorder='array', categoryarray= [str(i) for i in range(1, 13)])
fig.update_layout(showlegend=False, bargap=.1)

fig.show()

In [None]:
vec_tiers_name = ['A', 'B', 'C', 'D', 'E', 'F']
fig = make_subplots(rows=6, cols=1)

mean_tiers = []

for i, tiers_name in enumerate(vec_tiers_name):
        
    df_tiers = df[df['Tiers'] == tiers_name]
    x = df_tiers['Places'].values.sum()
    mean_x = round(np.mean(list(map(int, x))), 2)
    fig.add_trace(go.Histogram(x=x, name=tiers_name+f" ({mean_x})"), row=i+1, col=1)

fig.update_xaxes(categoryorder='array', categoryarray= [str(i) for i in range(1, 13)])
fig.update_layout(legend=dict(orientation="h", xanchor="center", yanchor="bottom", y=1.02, x=0.5), bargap=.1, height=1500)
fig.show()

In [None]:
vec_tiers_name = ['Switch', 'Tour', '3DS', 'Wii', 'DS', 'GBA', 'GCN', 'SNES']
fig = make_subplots(rows=8, cols=1)

mean_tiers = []

for i, tiers_name in enumerate(vec_tiers_name):
        
    df_tiers = df[df['Extension'] == tiers_name]
    x = df_tiers['Places'].values.sum()
    mean_x = round(np.mean(list(map(int, x))), 2)
    fig.add_trace(go.Histogram(x=x, name=tiers_name+f" ({mean_x})"), row=i+1, col=1)

fig.update_xaxes(categoryorder='array', categoryarray= [str(i) for i in range(1, 13)])
fig.update_layout(legend=dict(orientation="h", xanchor="center", yanchor="bottom", y=1.02, x=0.5), bargap=.1, height=1500)
fig.show()

In [None]:
df.sort_values(by = 'count')

In [None]:
df_for_PCA = df[df['count'] > 10].dropna()
X = df_for_PCA[['mean', 'std', 'count']]

pca = PCA(n_components=2)
components = pca.fit_transform(X)

fig = px.scatter(components, x=0, y=1, text=df_for_PCA['Course'])
fig.show()

In [None]:
pca