In [1]:
from pymongo import MongoClient
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

plt.rcParams['figure.figsize'] = (5,4)


client = MongoClient()

db = client.league_of_legends

In [2]:
def get_df(df, championId):
    cols = ['kills', 'deaths', 'assists', 'largestKillingSpree', 'largestMultiKill', 'killingSprees', 'longestTimeSpentLiving', 'doubleKills', 'tripleKills', 'quadraKills', 'pentaKills', 'unrealKills', 'totalDamageDealt', 'magicDamageDealt', 'physicalDamageDealt', 'trueDamageDealt', 'largestCriticalStrike', 'totalDamageDealtToChampions', 'magicDamageDealtToChampions', 'physicalDamageDealtToChampions', 'trueDamageDealtToChampions', 'totalHeal', 'totalUnitsHealed', 'damageSelfMitigated', 'damageDealtToObjectives', 'damageDealtToTurrets', 'visionScore', 'timeCCingOthers', 'totalDamageTaken', 'magicalDamageTaken', 'physicalDamageTaken', 'trueDamageTaken', 'goldEarned', 'goldSpent', 'turretKills', 'inhibitorKills', 'totalMinionsKilled', 'neutralMinionsKilled', 'totalTimeCrowdControlDealt', 'champLevel', 'visionWardsBoughtInGame', 'sightWardsBoughtInGame', 'combatPlayerScore', 'objectivePlayerScore', 'totalPlayerScore', 'totalScoreRank']
    new_df = pd.DataFrame(columns=cols)
    for i in range(len(df)):
        match = df.iloc[i,:]
        for player in match['participants']:
            if player['championId'] == championId:
                summonerLvl = match['yulia_player_lvls'][player['participantId']-1]
                row = {
                    'summonerLvl':summonerLvl
                }
                for c in cols:
                    row[c] = player['stats'][c]
                new_df = new_df.append(row, ignore_index=True)
    return new_df

In [3]:
from sklearn.preprocessing import MinMaxScaler
s = MinMaxScaler()
# PCA
from sklearn.decomposition import PCA
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE
from sklearn.metrics import precision_recall_fscore_support

def plot_svm_tsne(df):
    X = df.iloc[:,:-1].values
    X = s.fit_transform(X)
    y = df.iloc[:,-1].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    classifier = svm.SVC(C=50)
    classifier.fit(X_train, y_train)
    pred = classifier.predict(X_test)
    X_tsne = TSNE(n_components=2).fit_transform(s.fit_transform(X_test))
    f1 = X_tsne[:,0]
    f2 = X_tsne[:,1]
    l = []
    for i in range(0, len(y_test)):
        if y_test[i] == "WEAK":
            l.append("blue")
        else:
            l.append("lime")
    labels = []
    for i in range(0, len(y_test)):
        if y_test[i] == pred[i] == "WEAK":
            labels.append('blue')
        elif y_test[i] == pred[i] == "STRONG":
            labels.append('lime')
        else:
            labels.append('red')
    plt.title("Actual")
    plt.scatter(f1, f2, c=l, s=4, cmap='brg')
    plt.show()
    plt.title("Predicted")
    plt.scatter(f1, f2, c=labels, s=4)
    plt.show()
    random_guess = max(precision_recall_fscore_support(y_test, pred)[3])/len(pred)
    accuracy = classifier.score(X_test, y_test)
    print ("RANDOM GUESS: ", random_guess)
    print ("TEST SET ACC: ", accuracy)
    print ("F-scores: ", precision_recall_fscore_support(y_test, pred)[2])

In [4]:
def get_acc_and_rand(df):
    X = df.iloc[:,:-1].values
    X = s.fit_transform(X)
    y = df.iloc[:,-1].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    classifier = svm.SVC(C=50)
    classifier.fit(X_train, y_train)
    pred = classifier.predict(X_test)
    random_guess = max(precision_recall_fscore_support(y_test, pred)[3])/len(pred)
    accuracy = classifier.score(X_test, y_test)
    return accuracy, random_guess

In [5]:
from scipy.stats import ttest_ind

def classify(championId):
    df = pd.DataFrame(list(db.matches_season11_72hrs.find({'participants': {'$elemMatch':{'championId': championId}},'gameMode':'CLASSIC'})))
    new_df = get_df(df, championId)
    df_weak = new_df[new_df['summonerLvl'] <= new_df.summonerLvl.quantile(0.25)]
    df_strong = new_df[new_df['summonerLvl'] >= new_df.summonerLvl.quantile(0.75)]
    df_weak['level'] = "WEAK"
    df_strong['level'] = "STRONG"
    df_agg = pd.concat([df_weak, df_strong])
    df_agg = df_agg.drop('summonerLvl', axis=1)
    accuracies = []
    random_guesses = []
    for i in range(50):
        acc, rand_guess = get_acc_and_rand(df_agg)
        accuracies.append(acc)
        random_guesses.append(rand_guess)
    p_val = ttest_ind(accuracies, random_guesses).pvalue
    return(p_val, np.mean(accuracies), np.mean(random_guesses), np.std(accuracies), np.std(random_guesses))

In [6]:
champions = pd.DataFrame(list(db.playtraces_season11_72hrs.find({'gameMode':'CLASSIC','gameVersion': '8.3.217.1022', 'queueId': 420}))).championId.unique()

In [8]:
classifier_results = []
for c in champions:
    c = int(c)
    result_obj = {
        'championId': c
    }
    p_val, mean_svm, mean_rand, std_svm, std_rand = classify(c)
    print ("Champion: ", c, " p_val", p_val)
    result_obj = {
        'championId': c,
        'p_val': p_val,
        'mean_svm': mean_svm,
        'mean_rand': mean_rand,
        'std_svm': std_svm,
        'std_rand': std_rand
    }
    classifier_results.append(result_obj)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Champion:  37  p_val 3.50896820446e-69
Champion:  267  p_val 2.63047573615e-105
Champion:  25  p_val 6.05590337267e-110
Champion:  81  p_val 2.49136699633e-104
Champion:  236  p_val 1.29768408649e-66
Champion:  91  p_val 1.32395944886e-50
Champion:  41  p_val 4.52868269926e-09
Champion:  77  p_val 2.34176312535e-48
Champion:  12  p_val 5.73706940045e-123
Champion:  21  p_val 1.82092515356e-132
Champion:  245  p_val 0.0447722053738
Champion:  24  p_val 2.04531808737e-65
Champion:  39  p_val 8.60450140828e-55
Champion:  110  p_val 1.93587989412e-28
Champion:  86  p_val 7.91168779344e-107
Champion:  18  p_val 2.85517412493e-64
Champion:  103  p_val 5.75181572319e-119
Champion:  11  p_val 1.18715416671e-94
Champion:  101  p_val 3.54132244899e-59
Champion:  61  p_val 5.63536231163e-49
Champion:  67  p_val 1.50205702531e-37
Champion:  164  p_val 2.44140122216e-14
Champion:  32  p_val 1.46927056862e-72
Champion:  134  p_val 0.540824754862
Champion:  19  p_val 5.58214859337e-105
Champion:  143

In [9]:
results_df = pd.DataFrame(classifier_results)

In [10]:
for col in results_df.columns[1:]:
    results_df[col] = results_df[col].round(decimals=5)

In [11]:
results_df['mean_diff'] = results_df['mean_svm'] - results_df['mean_rand']
results_df = results_df[['championId', 'p_val','mean_diff', 'mean_svm','mean_rand','std_svm', 'std_rand']]
results_df.sort_values(by=['p_val']).to_csv('results_levels_stats.csv', index=False)

In [12]:
df = pd.DataFrame(list(db.matches_season11_72hrs.find({'participants': {'$elemMatch':{'championId': 82}},'gameMode':'CLASSIC'})))

In [13]:
len(df)

532