In [193]:
import numpy as np
from sklearn.linear_model import LogisticRegression, Ridge
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, \
    mean_squared_error
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

# załadowanie pliku z przygotowanymi danymi
data = pd.read_csv('..\\data\\merged_season_stats.csv')

# funkcja skalująca
scaler = StandardScaler()
scaled_data = pd.DataFrame(scaler.fit_transform(data))

# podział danych na zbiory uczące i testowe
data_train, data_test = train_test_split(scaled_data, test_size=0.2)
x_train = pd.DataFrame(data_train[data_train.columns[:-1]])
y_train = pd.Series(data_train[data_train.columns[-1]])
x_test = pd.DataFrame(data_test[data_test.columns[:-1]])
y_test = pd.Series(data_test[data_test.columns[-1]])

# kodowanie wektora y
label_encoder = LabelEncoder()
y_train_encoded = pd.Series(label_encoder.fit_transform(y_train))
y_test_encoded = pd.Series(label_encoder.fit_transform(y_test))

In [194]:
# definicja modelu
model = LogisticRegression()
model_reg = LogisticRegression(penalty="l2", C=0.1, random_state=44)
model.fit(x_train, y_train_encoded)
model_reg.fit(x_train, y_train_encoded)

# predykcja modelu
y_predicted = model.predict(x_test)
y_predicted_reg = model_reg.predict(x_test)

# ewaluacja modelu
accuracy = accuracy_score(y_test_encoded, y_predicted)
precision = precision_score(y_test_encoded, y_predicted)
recall = recall_score(y_test_encoded, y_predicted)
f1 = f1_score(y_test_encoded, y_predicted)
accuracy_reg = accuracy_score(y_test_encoded, y_predicted_reg)
precision_reg = precision_score(y_test_encoded, y_predicted_reg)
recall_reg = recall_score(y_test_encoded, y_predicted_reg)
f1_reg = f1_score(y_test_encoded, y_predicted_reg)
rmse = np.sqrt(mean_squared_error(y_test, y_predicted))
rmse_reg = np.sqrt(mean_squared_error(y_test, y_predicted_reg))

print(f"Accuracy: \t\t\t\t\t\t\t{accuracy}")
print(f"Accuracy with ridge regularization: {accuracy_reg}")
print(f"Precision: \t\t\t\t\t\t\t{precision}")
print(f"Precision with ridge regularization:{precision_reg}")
print(f"Recall: \t\t\t\t\t\t\t{recall}")
print(f"Recall with ridge regularization: \t{recall_reg}")
print(f"F1-Score: \t\t\t\t\t\t\t{f1}")
print(f"F1-Score with ridge regularization: {f1_reg}")
print(f"RMSE: \t\t\t\t\t\t{rmse}")
print(f"RMSE with regularization: \t{rmse_reg}")


Accuracy: 							0.9722222222222222
Accuracy with ridge regularization: 0.9814814814814815
Precision: 							0.75
Precision with ridge regularization:1.0
Recall: 							0.6
Recall with ridge regularization: 	0.6
F1-Score: 							0.6666666666666666
F1-Score with ridge regularization: 0.75
RMSE: 						0.9496576622468815
RMSE with regularization: 	0.9427481247030245


In [195]:
# zestawienie danych testowych z predykcjami dla modelu bez regularyzacji
data_test_with_predictions = x_test.copy()
data_test_with_predictions['actual_value'] = y_test.values
data_test_with_predictions['predicted_value'] = label_encoder.inverse_transform(y_predicted)

data_test_with_predictions

Unnamed: 0,0,1,2,3,actual_value,predicted_value
501,-1.792739,-1.467861,0.062996,1.244196,-0.206085,-0.206085
534,0.506966,0.518532,-0.373128,0.384360,-0.206085,-0.206085
4,-1.655397,-1.624292,1.916521,0.662203,-0.206085,-0.206085
177,-0.915073,-1.519821,0.771697,-0.233402,-0.206085,-0.206085
171,-1.393199,0.221369,-0.373128,1.251401,-0.206085,-0.206085
...,...,...,...,...,...,...
225,-0.739972,-1.089645,-0.373128,0.476543,-0.206085,-0.206085
483,-0.691840,-0.394462,-0.373128,0.747654,-0.206085,-0.206085
186,-1.639974,-1.264446,-0.373128,1.251401,-0.206085,-0.206085
270,-1.901264,-1.712376,1.781836,1.017889,-0.206085,-0.206085


In [196]:
# zestawienie danych testowych z predykcjami dla modelu z regularyzacją
data_test_with_predictions = x_test.copy()
data_test_with_predictions['actual_value'] = y_test.values
data_test_with_predictions['predicted_value'] = label_encoder.inverse_transform(y_predicted_reg)

data_test_with_predictions

Unnamed: 0,0,1,2,3,actual_value,predicted_value
501,-1.792739,-1.467861,0.062996,1.244196,-0.206085,-0.206085
534,0.506966,0.518532,-0.373128,0.384360,-0.206085,-0.206085
4,-1.655397,-1.624292,1.916521,0.662203,-0.206085,-0.206085
177,-0.915073,-1.519821,0.771697,-0.233402,-0.206085,-0.206085
171,-1.393199,0.221369,-0.373128,1.251401,-0.206085,-0.206085
...,...,...,...,...,...,...
225,-0.739972,-1.089645,-0.373128,0.476543,-0.206085,-0.206085
483,-0.691840,-0.394462,-0.373128,0.747654,-0.206085,-0.206085
186,-1.639974,-1.264446,-0.373128,1.251401,-0.206085,-0.206085
270,-1.901264,-1.712376,1.781836,1.017889,-0.206085,-0.206085
