In [3]:
from pymongo import MongoClient
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

plt.rcParams['figure.figsize'] = (5,4)


client = MongoClient()

db = client.league_of_legends

In [4]:
actionTypes = db.playtraces_season11_72hrs.distinct('playtrace.type')

In [5]:
actionTypes = db.playtraces_season11_72hrs.distinct('playtrace.type')

def get_period_range_list(end,n):
    period_range_list = []
    start = 0
    period_len = end/n
    for i in range(n):
        period_range_list.append((int(start), int(start+period_len)))
        start += period_len
    return period_range_list

# create a df with action counts for n periods
def divide(df,n):
    action_count_dict = {k:0 for k in ["%s_count_%s" % (a,p) for p in range(n) for a in actionTypes]}
    action_count_dict['id'] = ''
    new_df = pd.DataFrame(columns=action_count_dict.keys())
    for i in range(len(df)):
        row = df.iloc[i,:].copy()
        row_action_count_dict = action_count_dict.copy()
        if row['playtrace'] != []:
            last_action_time=row['playtrace'][-1]['timestamp']
            period_range_list = get_period_range_list(last_action_time,n)
            for action in row['playtrace']:
                for p in period_range_list:
                    if action['timestamp'] in range(p[0],p[1]):
                        t = period_range_list.index(p)
                k = "%s_count_%s" % (action['type'], t)
                row_action_count_dict[k] += 1
        row_action_count_dict['id']=row['id']
        new_df=new_df.append(row_action_count_dict, ignore_index=True)
    return new_df

In [6]:
def get_df(df,n):
    action_count_dict = {k:0 for k in ["%s_count_%s" % (a,p) for p in range(n) for a in actionTypes]}
    action_count_dict['id'] = ''
    new_df = pd.DataFrame(columns=action_count_dict.keys())
    for i in range(len(df)):
        row = df.iloc[i,:].copy()
        row_action_count_dict = action_count_dict.copy()
        if row['playtrace'] != []:
            last_action_time=row['playtrace'][-1]['timestamp']
            period_range_list = get_period_range_list(last_action_time,n)
            for action in row['playtrace']:
                for p in period_range_list:
                    if action['timestamp'] in range(p[0],p[1]):
                        t = period_range_list.index(p)
                k = "%s_count_%s" % (action['type'], t)
                row_action_count_dict[k] += 1
        row_action_count_dict['highestAchievedSeasonTier'] = row['highestAchievedSeasonTier']
        new_df=new_df.append(row_action_count_dict, ignore_index=True)
    return new_df

In [7]:
from sklearn.preprocessing import MinMaxScaler
s = MinMaxScaler()
# PCA
from sklearn.decomposition import PCA
from sklearn import svm
from scipy.stats import ttest_ind
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support

def get_acc_and_rand(df):
    X = df.iloc[:,df.columns != 'highestAchievedSeasonTier'].values
    X = s.fit_transform(X)
    try:
        y = df.highestAchievedSeasonTier.values
    except:
        print('highestAchievedSeasonTier' in df.columns)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    classifier = svm.SVC(C=50)
    classifier.fit(X_train, y_train)
    pred = classifier.predict(X_test)
    most_popular_training = max(set(y_train), key=list(y_train).count)
    random_guess = list(y_test).count(most_popular_training)/len(y_test)
    accuracy = classifier.score(X_test, y_test)
    return accuracy, random_guess

In [8]:
from scipy.stats import ttest_ind

def classify(championId):
    df = pd.DataFrame(list(db.playtraces_season11_72hrs.find({'gameMode':'CLASSIC', 'championId': championId, 'highestAchievedSeasonTier': {'$in': ['BRONZE', 'PLATINUM']}})))
    df_1 = get_df(df,1).drop('id', axis=1)
    accuracies = []
    random_guesses = []
    for i in range(50):
        acc, rand_guess = get_acc_and_rand(df_1)
        accuracies.append(acc)
        random_guesses.append(rand_guess)
    p_val = ttest_ind(accuracies, random_guesses).pvalue
    return(p_val, np.mean(accuracies), np.mean(random_guesses), np.std(accuracies), np.std(random_guesses))

In [11]:
champions = pd.DataFrame(list(db.playtraces_season11_72hrs.find({'gameMode':'CLASSIC','gameVersion': '8.3.217.1022', 'queueId': 420}))).championId.unique()

In [12]:
classifier_results = []
for c in champions:
    c = int(c)
    result_obj = {
        'championId': c
    }
    p_val, mean_svm, mean_rand, std_svm, std_rand = classify(c)
    print ("Champion: ", c, " p_val", p_val)
    result_obj = {
        'championId': c,
        'p_val': p_val,
        'mean_svm': mean_svm,
        'mean_rand': mean_rand,
        'std_svm': std_svm,
        'std_rand': std_rand
    }
    classifier_results.append(result_obj)



Champion:  37  p_val 0.064453368816
Champion:  267  p_val 3.58087182058e-39
Champion:  25  p_val 0.00522249458075
Champion:  81  p_val 4.15136702532e-05
Champion:  236  p_val 0.00937049676509
Champion:  91  p_val 0.00340730403389
Champion:  41  p_val 0.0238510351216
Champion:  77  p_val 2.91149741795e-05
Champion:  12  p_val 4.05647063574e-24
Champion:  21  p_val 0.896501231797
Champion:  245  p_val 7.29538800282e-08
Champion:  24  p_val 1.62324033089e-11
Champion:  39  p_val 0.133210019343
Champion:  110  p_val 2.85524446283e-12
Champion:  86  p_val 1.0
Champion:  18  p_val 3.0327790831e-05
Champion:  103  p_val 3.34293056714e-10
Champion:  11  p_val 0.318053651576
Champion:  101  p_val 0.121713525325
Champion:  61  p_val 8.35554275799e-08
Champion:  67  p_val 5.70470060075e-17
Champion:  164  p_val 0.0247664903096
Champion:  32  p_val 0.739082587087
Champion:  134  p_val 0.18059344139
Champion:  19  p_val 0.644895456847
Champion:  143  p_val 3.03162976366e-22
Champion:  131  p_val 0.

In [13]:
results_df = pd.DataFrame(classifier_results)
for col in results_df.columns[1:]:
    results_df[col] = results_df[col].round(decimals=5)  
results_df['mean_diff'] = results_df['mean_svm'] - results_df['mean_rand']
results_df = results_df[['championId', 'p_val','mean_diff', 'mean_svm','mean_rand','std_svm', 'std_rand']]
results_df.sort_values(by=['p_val']).to_csv('results_action_counts.csv', index=False)