In [1]:
import pandas as pd
import pickle

from classes.model_factory import ModelFactory

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

In [2]:
with open("../data/data_prep.pkl", "rb") as p:
    df_train = pickle.load(p)

with open("../data/data_prep_test.pkl", "rb") as p:
    df_test = pickle.load(p)

In [3]:
df_train.head()

Unnamed: 0,id,name,mana,attack,health,type,god,strategy,cat_strategy,cat_god,cat_type
0,1118,Firewine,5,0,0,spell,nature,early,0,4,2
2,244,Aetherfuel Alchemist,6,4,4,creature,neutral,late,1,5,0
3,215,Millenium Matryoshka,4,2,2,creature,neutral,late,1,5,0
4,87013,Poison Peddler,4,1,3,creature,neutral,late,1,5,0
5,230,Canopic Hoarder,5,2,1,creature,death,late,1,0,0


In [4]:
df_test.head()

Unnamed: 0,id,name,mana,attack,health,type,god,cat_god,cat_type
0,100042,Walking Plant,1,1,1,creature,nature,4,0
1,1018,Black Jaguar,2,3,3,creature,nature,4,0
2,129,Avatar of Death,6,6,6,creature,death,0,0
3,87069,Mind Jolt,5,0,0,spell,magic,3,2
4,1090,Minotaur Phalanx,6,4,8,creature,neutral,5,0


In [5]:
features = ['mana', 'attack', 'health', 'cat_god', 'cat_type']
target = 'cat_strategy'

In [6]:
X = df_train[features]
y = df_train[target]

# separação de treino e teste
X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.2, random_state=42)

In [7]:
models = pd.DataFrame()

In [8]:
model_name= 'Logistic Regression'
model = LogisticRegression(random_state=42)
params = {'C': [.01, 0.1, 1.0, 10], 'solver': ['liblinear', 'lbfgs']}
score = 'f1'

logistic_regression = ModelFactory(model_name, model, params, score)

lr_model, lr_pred = logistic_regression.classify(X_train, y_train, X_test, y_test)
metrics = logistic_regression.metrics(y_test, lr_pred['y_pred'])



models = models.append({'model_name': model_name,
                        'f1-score': metrics['f1-score']['accuracy'], 
                        'precision': metrics['precision']['accuracy'], 
                        'recall': metrics['recall']['accuracy']}, 
                       ignore_index=True)

Matriz de confusão
[[78  0]
 [ 0 68]]
------------------------------------------
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        78
           1       1.00      1.00      1.00        68

    accuracy                           1.00       146
   macro avg       1.00      1.00      1.00       146
weighted avg       1.00      1.00      1.00       146



In [9]:
model_name= 'Random Forest'
model = RandomForestClassifier(n_estimators=50, random_state=42)
params = {'max_depth': [4, 5, 6, 7, 8, 9, 10, 15], 'criterion': ['gini', 'entropy']}
score = 'f1'

random_forest = ModelFactory(model_name, model, params, score)

model_random, pred = random_forest.classify(X_train, y_train, X_test, y_test)

metrics = random_forest.metrics(y_test, pred['y_pred'])

models = models.append({'model_name': model_name,
                        'f1-score': metrics['f1-score']['accuracy'], 
                        'precision': metrics['precision']['accuracy'], 
                        'recall': metrics['recall']['accuracy']}, 
                       ignore_index=True)

Matriz de confusão
[[78  0]
 [ 0 68]]
------------------------------------------
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        78
           1       1.00      1.00      1.00        68

    accuracy                           1.00       146
   macro avg       1.00      1.00      1.00       146
weighted avg       1.00      1.00      1.00       146



In [10]:
model_name= 'knn'
model = KNeighborsClassifier(n_jobs=-1)
params = {'n_neighbors': [9 , 10, 11, 12, 13]}
score = 'f1'

knn = ModelFactory(model_name, model, params, score)

model_knn, pred = knn.classify(X_train, y_train, X_test, y_test)
metrics = knn.metrics(y_test, pred['y_pred'])

models = models.append({'model_name': model_name,
                        'f1-score': metrics['f1-score']['accuracy'], 
                        'precision': metrics['precision']['accuracy'], 
                        'recall': metrics['recall']['accuracy']}, 
                       ignore_index=True)

Matriz de confusão
[[78  0]
 [ 5 63]]
------------------------------------------
              precision    recall  f1-score   support

           0       0.94      1.00      0.97        78
           1       1.00      0.93      0.96        68

    accuracy                           0.97       146
   macro avg       0.97      0.96      0.97       146
weighted avg       0.97      0.97      0.97       146



In [11]:
model_name= 'gaussian'
model = GaussianNB()
params = {}
score = 'f1'

gaussian = ModelFactory(model_name, model, params, score)

model_gaussian, pred = gaussian.classify(X_train, y_train, X_test, y_test)
metrics = gaussian.metrics(y_test, pred['y_pred'])

models = models.append({'model_name': model_name,
                        'f1-score': metrics['f1-score']['accuracy'], 
                        'precision': metrics['precision']['accuracy'], 
                        'recall': metrics['recall']['accuracy']}, 
                       ignore_index=True)

Matriz de confusão
[[75  3]
 [ 4 64]]
------------------------------------------
              precision    recall  f1-score   support

           0       0.95      0.96      0.96        78
           1       0.96      0.94      0.95        68

    accuracy                           0.95       146
   macro avg       0.95      0.95      0.95       146
weighted avg       0.95      0.95      0.95       146



In [12]:
models.sort_values(by='f1-score', ascending=False)

Unnamed: 0,model_name,f1-score,precision,recall
0,Logistic Regression,1.0,1.0,1.0
1,Random Forest,1.0,1.0,1.0
2,knn,0.965753,0.965753,0.965753
3,gaussian,0.952055,0.952055,0.952055


In [19]:
predictions = pd.DataFrame()

preds = model_knn.predict(df_test[features])
ids = df_test['id']

predictions['id'] = ids
predictions['strategy'] = preds

predictions['strategy'] = predictions['strategy'].apply(lambda x: 'early' if x == 0 else 'late')

predictions.to_csv('../data/predictions.csv', index= False)

In [21]:
with open('../data/model.pkl', 'wb') as handle:
    pickle.dump(model_knn, handle, protocol=pickle.HIGHEST_PROTOCOL)