In [1]:
import pandas as pd
from sklearn import preprocessing
from sklearn.metrics import f1_score, confusion_matrix, log_loss, recall_score
from sklearn.linear_model import LogisticRegression

In [2]:
trainDataPath = '../datasets/iris-train.txt'
testDataPath = '../datasets/iris-test.txt'

In [3]:
trainData = pd.read_csv(trainDataPath, sep="\t")
testData = pd.read_csv(testDataPath, sep="\t")

In [4]:
X_train = trainData.drop(columns="#Label")
y_train = trainData["#Label"]

X_test = testData.drop(columns="#Label")
y_test = testData["#Label"]

In [5]:
X_train = preprocessing.normalize(X_train)
X_test = preprocessing.normalize(X_test)

In [6]:
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

LogisticRegression(max_iter=1000)

In [7]:
y_pred = clf.predict(X_test)

In [8]:
accuracy = clf.score(X_test, y_test)
f1score = f1_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
logLoss = log_loss(y_test, clf.predict_proba(X_test))

In [9]:
print(f"Accuracy: {accuracy*100:.2f}%")
print(f"F1Score: {f1score*100:.2f}%")
print(f"Recall: {recall*100:.2f}%")
print(f"Log Loss: {logLoss*100:.2f}%\n")
print(confusion_matrix(y_test, y_pred))

Accuracy: 83.33%
F1Score: 82.22%
Recall: 83.33%
Log Loss: 63.13%

[[10  0  0]
 [ 0  5  5]
 [ 0  0 10]]
