In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

train_data = pd.read_csv("./data/traininingdata.txt", sep=";")
test_data = pd.read_csv("./data/testdata.txt", sep=";")

filtered_column_name = ["day", "month"]

train_data = train_data.drop(filtered_column_name, axis=1)
test_data = test_data.drop(filtered_column_name, axis=1)

label_encoders = {}

for column in train_data.columns:
    if train_data[column].dtype == "object":
        le = LabelEncoder()
        train_data[column] = le.fit_transform(train_data[column])
        label_encoders[column] = le


for column in test_data.columns:
    if test_data[column].dtype == "object":
        le = label_encoders[column]
        test_data[column] = le.transform(test_data[column])


train_data = train_data.values
train_data = train_data.astype(np.float32)
X = train_data[:, :-1]
y = train_data[:, -1]
X = (X - X.mean(axis=0)) / X.std(axis=0)

test_data = test_data.values
test_data = test_data.astype(np.float32)
X_test = test_data[:, :-1]
# normalize data
X_test = (X_test - X_test.mean(axis=0)) / X_test.std(axis=0)
y_test = test_data[:, -1]

In [5]:
from sklearn import svm

clf = svm.SVC(kernel="poly", C=2.0, gamma="auto")
clf.fit(X, y)

# y_pred = clf.predict(X_test)

# from sklearn.metrics import accuracy_score

# print("accuracy: ", accuracy_score(y_test, y_pred))



In [8]:
from sklearn.model_selection import cross_validate
from sklearn import metrics

cross_validate(
    clf, X_test, y_test, cv=5, scoring=["accuracy", "precision", "recall", "f1"]
)

{'fit_time': array([0.3972187 , 0.3842051 , 0.51584792, 0.35351324, 0.39737296]),
 'score_time': array([0.03410029, 0.03367901, 0.03325796, 0.03432178, 0.03360605]),
 'test_accuracy': array([0.89883914, 0.88944168, 0.88225539, 0.88993363, 0.88606195]),
 'test_precision': array([0.73770492, 0.57317073, 0.49411765, 0.58333333, 0.53424658]),
 'test_recall': array([0.21226415, 0.22169811, 0.19811321, 0.19905213, 0.18483412]),
 'test_f1': array([0.32967033, 0.31972789, 0.28282828, 0.29681979, 0.27464789])}