# Modeling

This step involves modeling (SVM Model and Random Forest Model) for three different world:

>1.[World 1- Magma Peak](#World-1-Magma-Peak)<br>
>2.[World 2- TreeTop City](#World-2-TreeTop-City)<br>
>3.[World 3- Crystal Caves](#World-3-Crystal-Caves)<br>


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split,cross_val_score, cross_val_predict
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn import svm
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.ensemble import RandomForestClassifier


Using TensorFlow backend.


In [2]:
pd.set_option('display.max_columns', None)  
pd.set_option('display.max_rows', None) 
pd.set_option('display.max_colwidth', -1) 

# World 1 Magma Peak

In [3]:
df_mp=pd.read_csv('../data/magmapeak_final.csv')

In [4]:
df_mp.head()

Unnamed: 0,installation_id,Bottle Filler (Activity),Bubble Bath,Dino Dive,Dino Drink,Magma Peak - Level 1,Magma Peak - Level 2,Sandcastle Builder (Activity),Scrub-A-Dub,Slop Problem,Watering Hole (Activity),Cauldron Filler (Assessment)_t,accuracy_group
0,0006c192,6.6,1.61,2.53,0.0,2,2,1.887,2.71,0,0.36,0.24,3
1,002db7e3,8.24,4.32,4.03,3.49,2,3,0.717,2.51,0,1.57,0.57,3
2,003372b0,2.46,2.61,2.6,3.31,1,1,1.22,0.49,0,0.07,0.6,3
3,004c2091,1.197,0.0,0.0,0.0,1,2,0.0,0.0,0,0.12,0.3,3
4,00634433,0.0,0.0,0.0,0.0,1,1,0.0,0.0,0,0.0,1.13,3


In [5]:
df_mp.shape

(1839, 13)

In [6]:
df_mp.columns

Index(['installation_id', 'Bottle Filler (Activity)', 'Bubble Bath',
       'Dino Dive', 'Dino Drink', 'Magma Peak - Level 1',
       'Magma Peak - Level 2', 'Sandcastle Builder (Activity)', 'Scrub-A-Dub',
       'Slop Problem', 'Watering Hole (Activity)',
       'Cauldron Filler (Assessment)_t', 'accuracy_group'],
      dtype='object')

In [7]:
feature=['Bottle Filler (Activity)', 'Bubble Bath',
       'Dino Dive', 'Dino Drink', 'Magma Peak - Level 1',
       'Magma Peak - Level 2', 'Sandcastle Builder (Activity)', 'Scrub-A-Dub',
       'Slop Problem', 'Watering Hole (Activity)',
       'Cauldron Filler (Assessment)_t']

In [8]:
# define x and y
X = df_mp[feature]
y = df_mp['accuracy_group']

In [9]:
y.value_counts(normalize=True)

3    0.641109
2    0.147363
0    0.113105
1    0.098423
Name: accuracy_group, dtype: float64

In [10]:
# split test and train set
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.10,
                                                    stratify=y)
print(X_train.shape)
print(X_test.shape)

(1655, 11)
(184, 11)


In [11]:

#fitting a `StandardScaler` to `X_train` and use it to transform both `X_train` and `X_test`

ss = StandardScaler()
ss.fit(X_train)
X_train_sc = ss.transform(X_train)
X_test_sc = ss.transform(X_test)

# SVM MODEL

In [12]:
clf = svm.SVC(C=10)
param_grid = {'gamma':np.logspace(-5, 2, 20)}
grid = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy', verbose=1)
grid.fit(X_train_sc, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:   14.5s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'gamma': array([1.00000000e-05, 2.3357214...05, 1.27427499e-04,
       2.97635144e-04, 6.95192796e-04, 1.62377674e-03, 3.79269019e-03,
       8.85866790e-03, 2.06913808e-02, 4.83293024e-02, 1.12883789e-01,
       2.63665090e-01, 6.15848211e-01, 1.43844989e+00, 3.35981829e+00,
       7.84759970e+00, 1.83298071e+01, 4.28133240e+01, 1.00000000e+02])},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=1)

In [13]:
grid.best_params_, grid.best_score_

({'gamma': 0.008858667904100823}, 0.6549848942598188)

In [14]:
svc = SVC(
    C = 10,
    kernel = "rbf",
    gamma = 0.008858667904100823
)


In [15]:
# Fit support vector machine to training data.
svc.fit(X_train_sc, y_train)

SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.008858667904100823,
    kernel='rbf', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [16]:
# Generate predictions.
y_pred = svc.predict(X_test_sc)

In [17]:
svc.score(X_train_sc, y_train)

0.6761329305135951

In [18]:
svc.score(X_test_sc, y_test)

0.6413043478260869

In [19]:
# Measure performance based on accuracy.
accuracy_score(y_test, y_pred)

0.6413043478260869

In [20]:
metrics.cohen_kappa_score(y_test, y_pred,weights="quadratic" )

0.10071282377020263

In [21]:
# Generate a confusion matrix.
confusion_matrix(y_test, y_pred)

array([[  0,   2,   0,  19],
       [  0,   5,   0,  13],
       [  0,   1,   0,  26],
       [  1,   4,   0, 113]])

In [22]:
print(metrics.classification_report(y_test, y_pred, digits=3))

              precision    recall  f1-score   support

           0      0.000     0.000     0.000        21
           1      0.417     0.278     0.333        18
           2      0.000     0.000     0.000        27
           3      0.661     0.958     0.782       118

    accuracy                          0.641       184
   macro avg      0.269     0.309     0.279       184
weighted avg      0.465     0.641     0.534       184



  'precision', 'predicted', average, warn_for)


# Random Forest

In [23]:
rfc = RandomForestClassifier(n_estimators=400)
rfc.fit(X_train_sc,y_train)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=400,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [24]:
rfc_prediction = rfc.predict(X_test_sc)

In [25]:
# Measure performance based on accuracy.
accuracy_score(y_test, rfc_prediction)

0.625

In [26]:
metrics.cohen_kappa_score(y_test, rfc_prediction,weights="quadratic" )

0.29004789781798834

In [27]:
# Generate a confusion matrix.
confusion_matrix(y_test, rfc_prediction)

array([[ 6,  2,  1, 12],
       [ 0,  7,  3,  8],
       [ 0,  5,  4, 18],
       [ 6,  7,  7, 98]])

In [28]:
print(metrics.classification_report(y_test,rfc_prediction))

              precision    recall  f1-score   support

           0       0.50      0.29      0.36        21
           1       0.33      0.39      0.36        18
           2       0.27      0.15      0.19        27
           3       0.72      0.83      0.77       118

    accuracy                           0.62       184
   macro avg       0.46      0.41      0.42       184
weighted avg       0.59      0.62      0.60       184



|Model|Score|Kappa Score|
|---|---|---|
|SVM|0.64130|0.1007
|Random Forest|0.625|0.2900|


# World 2 TreeTop City

In [29]:
df_tt=pd.read_csv('../data/treetopcity_final.csv')

In [30]:
df_tt.head()

Unnamed: 0,installation_id,12 Monkeys,Air Show,All Star Sorting,Bug Measurer (Activity),Costume Box,Crystals Rule,Fireworks (Activity),Flower Waterer (Activity),Ordering Spheres,PiratesTale,Rulers,Treasure Map,Tree Top City - Level 1,Tree Top City - Level 2,Tree Top City - Level 3,Mushroom Sorter (Assessment)_t,num_correct,num_incorrect,accuracy,accuracy_group,Bird Measurer (Assessment)_t
0,0006a69f,1,3.22,0.917,1.75,2,1.31,1.53,2.74,1,1,1,1,1,1,1,0.66,0,11,0.0,0,0.0
1,0006c192,1,0.0,0.0,1.32,0,0.2,0.0,4.46,1,1,3,1,3,1,1,1.53,1,1,0.5,2,0.0
2,001d0ed0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,2,1,2,1,1,1,1,1.38,0,5,0.0,0,0.0
3,002db7e3,0,3.11,1.24,1.33,0,0.0,0.6,0.71,1,0,0,0,2,2,2,0.87,1,10,0.091,1,0.0
4,003372b0,2,2.75,1.28,1.2,1,3.32,1.43,0.79,2,1,1,2,1,1,1,0.58,1,1,0.5,2,0.0


In [31]:
df_tt.shape

(2667, 22)

In [32]:
df_tt.columns

Index(['installation_id', '12 Monkeys', 'Air Show', 'All Star Sorting',
       'Bug Measurer (Activity)', 'Costume Box', 'Crystals Rule',
       'Fireworks (Activity)', 'Flower Waterer (Activity)', 'Ordering Spheres',
       'PiratesTale', 'Rulers', 'Treasure Map', 'Tree Top City - Level 1',
       'Tree Top City - Level 2', 'Tree Top City - Level 3',
       'Mushroom Sorter (Assessment)_t', 'num_correct', 'num_incorrect',
       'accuracy', 'accuracy_group', 'Bird Measurer (Assessment)_t'],
      dtype='object')

In [33]:
feature=['12 Monkeys', 'Air Show', 'All Star Sorting',
       'Bug Measurer (Activity)', 'Costume Box', 'Crystals Rule',
       'Fireworks (Activity)', 'Flower Waterer (Activity)', 'Ordering Spheres',
       'PiratesTale', 'Rulers', 'Treasure Map', 'Tree Top City - Level 1',
       'Tree Top City - Level 2', 'Tree Top City - Level 3',
       'Mushroom Sorter (Assessment)_t', 'Bird Measurer (Assessment)_t']

In [34]:
# define x and y
X = df_tt[feature]
y = df_tt['accuracy_group']

In [35]:
y.value_counts(normalize=True)

1    0.302962
0    0.271466
3    0.261342
2    0.164229
Name: accuracy_group, dtype: float64

In [36]:
# split test and train set
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.10,
                                                    stratify=y)
print(X_train.shape)
print(X_test.shape)

(2400, 17)
(267, 17)


In [37]:
# #fitting a `StandardScaler` to `X_train` and use it to transform both `X_train` and `X_test`

ss = StandardScaler()
ss.fit(X_train)
X_train_sc = ss.transform(X_train)
X_test_sc = ss.transform(X_test)

# SVM

In [38]:
clf = svm.SVC(C=10)
param_grid = {'gamma':np.logspace(-5, 2, 20)}
grid = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy', verbose=1)
grid.fit(X_train_sc, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:   34.2s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'gamma': array([1.00000000e-05, 2.3357214...05, 1.27427499e-04,
       2.97635144e-04, 6.95192796e-04, 1.62377674e-03, 3.79269019e-03,
       8.85866790e-03, 2.06913808e-02, 4.83293024e-02, 1.12883789e-01,
       2.63665090e-01, 6.15848211e-01, 1.43844989e+00, 3.35981829e+00,
       7.84759970e+00, 1.83298071e+01, 4.28133240e+01, 1.00000000e+02])},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=1)

In [39]:
grid.best_params_, grid.best_score_

({'gamma': 0.26366508987303555}, 0.5758333333333333)

In [40]:
svc = SVC(
    C = 10,
    kernel = "rbf",
    gamma = 0.26366508987303555
)

In [41]:
# Fit support vector machine to training data.
svc.fit(X_train_sc, y_train)

SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.26366508987303555,
    kernel='rbf', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [42]:
# Generate predictions.
y_pred = svc.predict(X_test_sc)

In [43]:
svc.score(X_train_sc, y_train)

0.93625

In [44]:
svc.score(X_test_sc, y_test)


0.6367041198501873

In [45]:
# Measure performance based on accuracy.
accuracy_score(y_test, y_pred)

0.6367041198501873

In [46]:
metrics.cohen_kappa_score(y_test, y_pred,weights='quadratic')

0.5473239775548822

In [47]:
# Generate a confusion matrix.

confusion_matrix(y_test, y_pred)

array([[44, 20,  2,  6],
       [10, 58,  5,  8],
       [ 9,  6, 23,  6],
       [ 9, 12,  4, 45]])

In [48]:
print(metrics.classification_report(y_test, y_pred, digits=3))

              precision    recall  f1-score   support

           0      0.611     0.611     0.611        72
           1      0.604     0.716     0.655        81
           2      0.676     0.523     0.590        44
           3      0.692     0.643     0.667        70

    accuracy                          0.637       267
   macro avg      0.646     0.623     0.631       267
weighted avg      0.641     0.637     0.636       267



# Random Forest

In [49]:
rfc = RandomForestClassifier(n_estimators=450)
rfc.fit(X_train_sc,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=450,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [50]:
rfc_prediction = rfc.predict(X_test_sc)

In [51]:
# Measure performance based on accuracy.
accuracy_score(y_test, rfc_prediction)

0.6479400749063671

In [52]:
metrics.cohen_kappa_score(y_test, rfc_prediction,weights='quadratic')

0.6348841821768949

In [53]:
# Generate a confusion matrix.

confusion_matrix(y_test, rfc_prediction)

array([[44, 18,  4,  6],
       [13, 52,  3, 13],
       [ 6,  6, 22, 10],
       [ 3,  9,  3, 55]])

In [54]:
print(metrics.classification_report(y_test, rfc_prediction, digits=3))

              precision    recall  f1-score   support

           0      0.667     0.611     0.638        72
           1      0.612     0.642     0.627        81
           2      0.688     0.500     0.579        44
           3      0.655     0.786     0.714        70

    accuracy                          0.648       267
   macro avg      0.655     0.635     0.639       267
weighted avg      0.650     0.648     0.645       267



|Model|Score|Kappa Score|
|---|---|---|
|SVM|0.6367|0.5473|
|Random Forest|0.6479|0.6348|

# World 3 Crystal Caves

In [55]:
df_cc=pd.read_csv('../data/crystalcaves_final.csv')

In [56]:
df_cc.head()

Unnamed: 0,installation_id,Balancing Act,Chicken Balancer (Activity),Chow Time,Crystal Caves - Level 1,Crystal Caves - Level 2,Crystal Caves - Level 3,Egg Dropper (Activity),Happy Camel,"Heavy, Heavier, Heaviest",Honey Cake,Leaf Leader,Lifting Heavy Things,Pan Balance,Cart Balancer (Assessment)_t,num_correct,num_incorrect,accuracy,accuracy_group,Chest Sorter (Assessment)_t
0,001d0ed0,1,0.0,1.25,1,1,0,0.0,0.98,0,2,0.0,2,0.0,0.35,1,0,1.0,3,0.0
1,002db7e3,0,3.28,4.45,2,2,1,0.0,1.01,0,1,0.0,2,1.89,0.71,1,0,1.0,3,0.0
2,003372b0,1,1.49,3.02,1,1,0,0.0,2.47,0,2,0.0,4,0.0,0.5,1,0,1.0,3,0.0
3,00634433,0,0.0,2.54,1,2,0,0.0,0.0,0,0,0.0,0,0.0,0.51,1,0,1.0,3,0.0
4,00667b88,0,0.0,0.0,3,4,3,0.0,0.0,0,0,0.0,0,0.0,1.11,1,0,1.0,3,0.0


In [57]:
df_cc.shape

(3615, 20)

In [58]:
df_cc.columns

Index(['installation_id', 'Balancing Act', 'Chicken Balancer (Activity)',
       'Chow Time', 'Crystal Caves - Level 1', 'Crystal Caves - Level 2',
       'Crystal Caves - Level 3', 'Egg Dropper (Activity)', 'Happy Camel',
       'Heavy, Heavier, Heaviest', 'Honey Cake', 'Leaf Leader',
       'Lifting Heavy Things', 'Pan Balance', 'Cart Balancer (Assessment)_t',
       'num_correct', 'num_incorrect', 'accuracy', 'accuracy_group',
       'Chest Sorter (Assessment)_t'],
      dtype='object')

In [59]:
feature=['Balancing Act', 'Chicken Balancer (Activity)',
       'Chow Time', 'Crystal Caves - Level 1', 'Crystal Caves - Level 2',
       'Crystal Caves - Level 3', 'Egg Dropper (Activity)', 'Happy Camel',
       'Heavy, Heavier, Heaviest', 'Honey Cake', 'Leaf Leader',
       'Lifting Heavy Things', 'Pan Balance',
         'Cart Balancer (Assessment)_t','Chest Sorter (Assessment)_t']

In [60]:
# define x and y
X = df_cc[feature]
y = df_cc['accuracy_group']

In [61]:
y.value_counts(normalize=True)

3    0.430429
0    0.363485
1    0.112586
2    0.093499
Name: accuracy_group, dtype: float64

In [62]:
# split test and train set
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.10,
                                                    stratify=y)
print(X_train.shape)
print(X_test.shape)

(3253, 15)
(362, 15)


In [63]:
#fitting a `StandardScaler` to `X_train` and use it to transform both `X_train` and `X_test`

ss = StandardScaler()
ss.fit(X_train)
X_train_sc = ss.transform(X_train)
X_test_sc = ss.transform(X_test)

# SVM

In [64]:
clf = svm.SVC(C=10)
param_grid = {'gamma':np.logspace(-5, 2, 20)}
grid = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy', verbose=1)
grid.fit(X_train_sc, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:  1.0min finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'gamma': array([1.00000000e-05, 2.3357214...05, 1.27427499e-04,
       2.97635144e-04, 6.95192796e-04, 1.62377674e-03, 3.79269019e-03,
       8.85866790e-03, 2.06913808e-02, 4.83293024e-02, 1.12883789e-01,
       2.63665090e-01, 6.15848211e-01, 1.43844989e+00, 3.35981829e+00,
       7.84759970e+00, 1.83298071e+01, 4.28133240e+01, 1.00000000e+02])},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=1)

In [65]:
grid.best_params_, grid.best_score_

({'gamma': 0.008858667904100823}, 0.6504764832462342)

In [66]:
svc = SVC(
    C = 10,
    kernel = "rbf",
    gamma = 0.008858667904100823
)

In [67]:
# Fit support vector machine to training data.
svc.fit(X_train_sc, y_train)

SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.008858667904100823,
    kernel='rbf', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [68]:
# Generate predictions.
y_pred = svc.predict(X_test_sc)

In [69]:
svc.score(X_train_sc, y_train)

0.6704580387334768

In [70]:
svc.score(X_test_sc, y_test)

0.638121546961326

In [71]:
# Measure performance based on accuracy.
accuracy_score(y_test, y_pred)

0.638121546961326

In [72]:
metrics.cohen_kappa_score(y_test,y_pred,weights='quadratic')

0.5724328906470755

In [73]:
# Generate a confusion matrix.

confusion_matrix(y_test, y_pred)

array([[103,   2,   0,  26],
       [ 32,   1,   0,   8],
       [ 14,   1,   0,  19],
       [ 27,   2,   0, 127]])

In [74]:
print(metrics.classification_report(y_test,y_pred, digits=3))

              precision    recall  f1-score   support

           0      0.585     0.786     0.671       131
           1      0.167     0.024     0.043        41
           2      0.000     0.000     0.000        34
           3      0.706     0.814     0.756       156

    accuracy                          0.638       362
   macro avg      0.364     0.406     0.367       362
weighted avg      0.535     0.638     0.573       362



  'precision', 'predicted', average, warn_for)


# Random Forest

In [75]:
rfc = RandomForestClassifier(n_estimators=500)
rfc.fit(X_train_sc,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=500,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [76]:
rfc_prediction = rfc.predict(X_test_sc)

In [77]:
# Measure performance based on accuracy.
accuracy_score(y_test, rfc_prediction)

0.6408839779005525

In [78]:
metrics.cohen_kappa_score(y_test,rfc_prediction,weights='quadratic')

0.5862297993937404

In [79]:
# Generate a confusion matrix.

confusion_matrix(y_test, rfc_prediction)

array([[ 98,   4,   4,  25],
       [ 31,   7,   1,   2],
       [  9,   2,   1,  22],
       [ 26,   3,   1, 126]])

In [80]:
print(metrics.classification_report(y_test, rfc_prediction, digits=3))

              precision    recall  f1-score   support

           0      0.598     0.748     0.664       131
           1      0.438     0.171     0.246        41
           2      0.143     0.029     0.049        34
           3      0.720     0.808     0.761       156

    accuracy                          0.641       362
   macro avg      0.474     0.439     0.430       362
weighted avg      0.589     0.641     0.601       362



|Model|Score|Kappa Score|
|---|---|---|
|SVM|0.6574|0.6117|
|Random Forest|0.6740|0.6316|