In [1]:
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix, log_loss, recall_score
from sklearn.linear_model import LogisticRegression

In [2]:
dataPath = '../datasets/winequality-red.csv'
dataSet = pd.read_csv(dataPath, sep=";")

In [3]:
X = dataSet.drop(columns="quality")
y = dataSet["quality"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [4]:
X_train = preprocessing.normalize(X_train)
X_test = preprocessing.normalize(X_test)

In [5]:
clf = LogisticRegression(max_iter=10000)
clf.fit(X_train, y_train)

LogisticRegression(max_iter=10000)

In [6]:
y_pred = clf.predict(X_test)

In [7]:
accuracy = clf.score(X_test, y_test)
f1Score = f1_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
logLoss = log_loss(y_test, clf.predict_proba(X_test))

In [8]:
print(f"Accuracy: {accuracy*100:.2f}%")
print(f"F1Score: {f1Score*100:.2f}%")
print(f"Recall: {recall*100:.2f}%")
print(f"Log Loss: {logLoss*100:.2f}%\n")
print(confusion_matrix(y_test, y_pred))

Accuracy: 48.12%
F1Score: 17.79%
Recall: 19.86%
Log Loss: 112.04%

[[ 0  0  1  0  0  0]
 [ 0  0  4  5  0  0]
 [ 0  0 77 58  0  0]
 [ 0  0 47 77  0  0]
 [ 0  0  8 39  0  0]
 [ 0  0  1  3  0  0]]
