# Data Mining for Item Recommendation in MOBA Games
Code of section 4.3 Recommender System Based on Classifiers

## Dependencies

In [None]:
import os
import json
import pickle
from tqdm import tqdm
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.externals import joblib
from sklearn.tree import DecisionTreeClassifier
from sklearn import linear_model
from sklearn.multiclass import OneVsRestClassifier

## Load Data

In [2]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [4]:
# file ids of datasets
file_id = ['1QyC2zho409ctBVSEgylOsqSGdfzPi7On',
          '1-4Pqyd1ivq-o33UdzuH4eIgjoq-QdTy5',
          '1GBWilejUnIatT7o0QvRUwMefo-ydPAjl',
          '1ipgQtMTc_Z-LJuQAtYHTi75jP3UQ1AT5',
          '139b8gxLRa02x29p13eNDrGlFC-zMG8hM']

In [None]:
for id in file_id:
  fileId = drive.CreateFile({'id': id})
  print(fileId['title'])
  fileId.GetContentFile(fileId['title']) 

In [3]:
def open_pickle(path):
    pickle_in = open(path,"rb")
    example_dict = pickle.load(pickle_in)
    df=example_dict[0]
    pickle_in.close()
    return df

In [None]:
# load training set
print('Loading training set..')
df = open_pickle('baseline_train_labels_splits.pkl')
Y_train = df.values

df = open_pickle('baseline_train_inputs_splits.pkl')
X_train = df.values

# load test set
print('Loading test set..')
df = open_pickle('baseline_test_labels_splits.pkl')
Y_test = df.values
item_ids = df.columns.tolist()

df = open_pickle('baseline_test_inputs_splits.pkl')
X_test = df.values

# load items data
print('Loading items information..')
with open('items.json') as items_json:
    data_items = json.load(items_json)

## Utils

### Transformations

In [8]:
id_dictionary = {}
for i, j in enumerate(item_ids):
    id_dictionary.update({i:j}) # create a indexed dictionary from the items's ids

## create function that recieves a one-hot list and returns the respective ids
def translate_onehot(onehot, name=True):
    ids = [] # create an empty list to collect the ids of the items
    names = [] # create an empty list to collect the names of the items
    idxs = [i for i, e in enumerate(onehot) if e == 1] # extract indexes of encoded items in the one-hot vector
    for idx in idxs: # iterate over the list of indexes
        id = id_dictionary[idx] # search the correspondent id according to the index
        ids.append(id) # append the id to the list of ids
        id_dict = (next(item for item in data_items if item['id'] == str(id))) # search the dictionary that corresponds to the item's id
        names.append(id_dict['name']) # extract the name of the item from this dictionary
    if name:
        return ids, names
    else:
        return ids

## create a function that recieves a output of a classifier and returns a @k itemset
def decoding(prediction, itemset_size, id_items):
    index_max = np.argsort(prediction)
    _set = index_max[(len(id_items)-itemset_size):len(id_items)]
    items = np.asarray(id_items)
    itemset = items[np.flip(_set)]
    return itemset

### Metrics

In [None]:
# Forked from https://gist.github.com/bwhite/3726239
!wget https://gist.githubusercontent.com/bwhite/3726239/raw/2c92e90259b01b4a657d20c0ad8390caadd59c8b/rank_metrics.py

In [10]:
from rank_metrics import *

## ANN RecSys Model

### Training

In [11]:
def trainANN(X_train, Y_train, input_size, target_size, save=True):
    """ Function to train a neural net of 2 layer
 
        Parameters
        ----------
        X_train: inputs
        Y_train: targets
        input_size: input size of the network
        target_size: ouput size of the network
        save: if True, save model
    """
    # define modelo y lo entrena
    model = Sequential()
    model.add(Dense(150, input_dim=input_size, activation='relu'))
    model.add(Dense(150, activation='relu'))
    model.add(Dense(target_size, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.fit(X_train, Y_train, epochs=1, batch_size=128, verbose=1, validation_split=0.1, shuffle=True)
    if save:
        # serializar modelo a JSON
        model_json = model.to_json()
        with open("model.json", "w") as json_file:
            json_file.write(model_json)
        # serializar pesos a HDF5
        model.save_weights("model.h5")
        print("Model saved")

    return model

In [None]:
# Train and save the model
input_size = 136 # number of champions
target_size = 89 # number of items
model = trainANN(X_train, Y_train, input_size, target_size, save=True)

### Evaluation

In [13]:
# predictions of test set
Y_p = model.predict(X_test)

In [None]:
eval_itemset_size = 6 # size of itemset required

Relevance=[]
Precision = []
Recall = []
F1 = []

for i in tqdm(range(X_test.shape[0]-1)):
    targ_items=translate_onehot(Y_test[i], name=False)
    # decode output of the model
    itemset = decoding(Y_p[i],itemset_size=eval_itemset_size,id_items=item_ids)
    rel_items = [int(j in targ_items) for j in itemset]
    Relevance.append(rel_items)
    num = list(set(itemset) & set(targ_items))
    if len(targ_items) >= 1:
        precision = (len(num) / len(itemset))
        recall = (len(num) / len(targ_items))
        Precision.append(precision)
        Recall.append(recall)
        if precision == 0.0 and recall == 0.0:
            F1.append(0)
        else:
            F1.append(2 * ((precision * recall) / (precision + recall)))

In [None]:
_map=mean_average_precision(Relevance)
mrr=mean_reciprocal_rank(Relevance)

print('MAP:',_map)
print('MRR:',mrr)
print('Precision:', np.mean(Precision))
print('Recall:', np.mean(Recall))
print('F1:', np.mean(F1))

## Logit or Decision Tree RecSys Model

### Training

In [None]:
def train(X_tr, Y_tr, model_type, save=True):
    if model_type == 'logit':
      clf = OneVsRestClassifier(linear_model.SGDClassifier(max_iter=1000, tol=1e-3, loss='log', verbose=True))
    elif model_type == 'dtree':
      clf = OneVsRestClassifier(DecisionTreeClassifier(random_state=0))
    clf.fit(X_tr, Y_tr)
    print('Training finished')
    if save:
        # save model
        joblib.dump(clf, 'model.pkl')
        print('Model saved')
    return clf

In [None]:
# Train and save the model
model_type = 'logit' # choose 'logit' or 'dtree'
clf = train(X_train, Y_train, model_type, save=True)

### Evaluation

In [None]:
# predictions of test set
Y_p = clf.predict_proba(X_test)
pred_classes=clf.classes_

In [None]:
eval_itemset_size = 6 # size of itemset required
target_size = 89 # number of items

Relevance=[]
Precision = []
Recall = []
F1 = []

for i in tqdm(range(X_test.shape[0]-1)):
    targ_items=translate_onehot(Y_test[i], name=False)
    # adjusting list size
    real_pred = []
    aux = 0
    for j in range(target_size):
        if j == pred_classes[aux]:
            real_pred.append(Y_p[i][aux])
            aux += 1
        else:
            real_pred.append(0)
    # decode output of the model
    itemset = decoding(real_pred,itemset_size=eval_itemset_size,id_items=item_ids) #itemset_size = @
    rel_items = [int(j in targ_items) for j in itemset]
    Relevance.append(rel_items)
    num = list(set(itemset) & set(targ_items))
    if len(targ_items) >= 1:
        precision = (len(num) / len(itemset))
        recall = (len(num) / len(targ_items))
        Precision.append(precision)
        Recall.append(recall)
        if precision == 0.0 and recall == 0.0:
            F1.append(0)
        else:
            F1.append(2 * ((precision * recall) / (precision + recall)))

In [None]:
_map=mean_average_precision(Relevance)
mrr=mean_reciprocal_rank(Relevance)

print('MAP:',_map)
print('MRR:',mrr)
print('Precision:', np.mean(Precision))
print('Recall:', np.mean(Recall))
print('F1:', np.mean(F1))