In [1]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, log_loss
from prettytable import PrettyTable

In [2]:
df = pd.read_csv("bioresponse.csv")
ydf = df['Activity']
xdf = df.drop(columns = ['Activity'],axis = 1)
X_train, X_test, y_train, y_test = train_test_split(
        xdf, ydf, test_size=0.33, random_state=42)
df.head(7)

Unnamed: 0,Activity,D1,D2,D3,D4,D5,D6,D7,D8,D9,...,D1767,D1768,D1769,D1770,D1771,D1772,D1773,D1774,D1775,D1776
0,1,0.0,0.497009,0.1,0.0,0.132956,0.678031,0.273166,0.585445,0.743663,...,0,0,0,0,0,0,0,0,0,0
1,1,0.366667,0.606291,0.05,0.0,0.111209,0.803455,0.106105,0.411754,0.836582,...,1,1,1,1,0,1,0,0,1,0
2,1,0.0333,0.480124,0.0,0.0,0.209791,0.61035,0.356453,0.51772,0.679051,...,0,0,0,0,0,0,0,0,0,0
3,1,0.0,0.538825,0.0,0.5,0.196344,0.72423,0.235606,0.288764,0.80511,...,0,0,0,0,0,0,0,0,0,0
4,0,0.1,0.517794,0.0,0.0,0.494734,0.781422,0.154361,0.303809,0.812646,...,0,0,0,0,0,0,0,0,0,0
5,0,0.133333,0.771035,0.2,0.25,0.122153,0.677398,0.267224,0.611112,0.701421,...,0,0,0,0,0,0,0,0,0,0
6,1,0.0667,0.567401,0.1,0.0,0.116578,0.689802,0.274886,0.548509,0.785429,...,0,0,0,0,0,0,0,0,0,0


In [3]:
table = PrettyTable()
table.field_names = ["Model", "Accuracy", "Precision", "Recall", "F1", "LogLoss"]
models = {
    "DT": DecisionTreeClassifier(),
    "ST": DecisionTreeClassifier(),
    "RFS": RandomForestClassifier(),
    "RFD": RandomForestClassifier()
}
for i in models.keys():
    model = models[i]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    prec = precision_score(y_test, y_pred)
    acc = accuracy_score(y_test, y_pred)
    table.add_row([str(models[i]), 
                    accuracy_score(y_test, y_pred), 
                    precision_score(y_test, y_pred), 
                    recall_score(y_pred, y_test), 
                    f1_score(y_test, y_pred), 
                    log_loss(y_test, y_pred)])
print(table)

+--------------------------+--------------------+--------------------+--------------------+--------------------+--------------------+
|          Model           |      Accuracy      |     Precision      |       Recall       |         F1         |      LogLoss       |
+--------------------------+--------------------+--------------------+--------------------+--------------------+--------------------+
| DecisionTreeClassifier() | 0.7019386106623586 |      0.753125      |      0.753125      | 0.7231807951987997 | 10.294777727072708 |
| DecisionTreeClassifier() | 0.7124394184168013 | 0.7518684603886397 | 0.7518684603886397 | 0.7386196769456682 | 9.932097843103081  |
| RandomForestClassifier() | 0.7883683360258481 | 0.8202080237741456 | 0.8202080237741456 | 0.808199121522694  | 7.309576871369671  |
| RandomForestClassifier() | 0.7891760904684976 | 0.8243243243243243 | 0.8243243243243243 | 0.8079470198675496 | 7.2816754377795005 |
+--------------------------+--------------------+-------------