### Predict wine quality with Bayesian Linear Regression
*March 2025*

Recall Bayesian Linear Regression working principle:

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import BayesianRidge
from imblearn.over_sampling import SMOTE
from imblearn.combine import SMOTETomek
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, confusion_matrix, f1_score

random_seed = 42

I/ Load datasets

In [146]:
red_wine = pd.read_csv("winequality-red.csv")
red_wine = red_wine.drop_duplicates()

In [242]:
white_wine = pd.read_csv("winequality-white.csv", sep=";")
white_wine = white_wine.drop_duplicates()

II/ White wine

With 7 classes

In [243]:
y = white_wine.quality
X = white_wine.copy().drop(["quality"], axis=1)

In [245]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply SMOTE + Tomek links (oversample minority, undersample majority)
smote_tomek = SMOTE(random_state=42, k_neighbors=4)
X_train, y_train = smote_tomek.fit_resample(X_train, y_train)

In [246]:
rb_scaler = RobustScaler()
X_train = rb_scaler.fit_transform(X_train)
X_test = rb_scaler.transform(X_test)

In [247]:
param_grid = {
    'alpha_1': [1e-6, 1e-5, 1e-4, 1e-3],
    'alpha_2': [1e-6, 1e-5, 1e-4, 1e-3],
    'lambda_1': [1e-6, 1e-5, 1e-4, 1e-3],
    'lambda_2': [1e-6, 1e-5, 1e-4, 1e-3],
}

bayesian_ridge = BayesianRidge()
grid_search = GridSearchCV(bayesian_ridge, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
print(grid_search.best_params_)

{'alpha_1': 0.001, 'alpha_2': 1e-06, 'lambda_1': 1e-06, 'lambda_2': 0.001}


In [248]:
# Predictions
y_pred = best_model.predict(X_test)

In [249]:
y_pred = np.round(y_pred).astype(int)
y_test = np.round(y_test).astype(int)

In [250]:
accuracy = accuracy_score(np.round(y_test).astype(int), np.round(y_pred).astype(int))
accuracy

0.39722572509457754

In [251]:
conf_matrix = confusion_matrix(np.round(y_test).astype(int), np.round(y_pred).astype(int))
conf_matrix

array([[  0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  1,   0,   1,   1,   1,   0,   0,   1,   0],
       [  0,   3,   7,  10,   8,   2,   0,   0,   0],
       [  0,   8,  51, 108,  54,  10,   3,   0,   0],
       [  0,   3,  27, 112, 129,  65,  12,   1,   0],
       [  0,   0,   3,   7,  45,  64,  25,   0,   1],
       [  0,   0,   1,   2,  10,   9,   7,   1,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0]], dtype=int64)

With 3 classes trained on 7 classes

In [2]:
white_wine = pd.read_csv("winequality-white.csv", sep=";")
white_wine = white_wine.drop_duplicates()

In [3]:
y = white_wine.quality
X = white_wine.copy().drop(["quality"], axis=1)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply SMOTE + Tomek links (oversample minority, undersample majority)
smote_tomek = SMOTE(random_state=42, k_neighbors=4)
X_train, y_train = smote_tomek.fit_resample(X_train, y_train)

In [5]:
rb_scaler = RobustScaler()
X_train = rb_scaler.fit_transform(X_train)
X_test = rb_scaler.transform(X_test)

In [6]:
param_grid = {
    'alpha_1': [1e-6, 1e-5, 1e-4, 1e-3],
    'alpha_2': [1e-6, 1e-5, 1e-4, 1e-3],
    'lambda_1': [1e-6, 1e-5, 1e-4, 1e-3],
    'lambda_2': [1e-6, 1e-5, 1e-4, 1e-3],
}

bayesian_ridge = BayesianRidge()
grid_search = GridSearchCV(bayesian_ridge, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
print(grid_search.best_params_)

{'alpha_1': 0.001, 'alpha_2': 1e-06, 'lambda_1': 1e-06, 'lambda_2': 0.001}


In [7]:
# Predictions
y_pred = best_model.predict(X_test)

In [8]:
y_pred = np.round(y_pred).astype(int)
y_test = np.round(y_test).astype(int)
y_pred[y_pred <= 3] = 0 #poor wines
y_pred[y_pred >= 7] = 2 #excellent wines
y_pred[(y_pred > 3) & (y_pred < 8)] = 1 #average wines
y_test[y_test <= 3] = 0 #poor wines
y_test[y_test >= 7] = 2 #excellent wines
y_test[(y_test > 3) & (y_test < 8)] = 1 #average wines

In [9]:
accuracy = accuracy_score(np.round(y_test).astype(int), np.round(y_pred).astype(int))
accuracy

0.7742749054224464

In [10]:
conf_matrix = confusion_matrix(np.round(y_test).astype(int), np.round(y_pred).astype(int))
conf_matrix

array([[  1,   3,   1],
       [ 14, 506,  93],
       [  0,  68, 107]], dtype=int64)

In [12]:
f1_score(y_test, y_pred, average='weighted')

0.7836174361349272

With 3 classes trained on 3 classes: bad, intermediate, excellent

In [232]:
white_wine = pd.read_csv("winequality-white.csv", sep=";")
white_wine = white_wine.drop_duplicates()

In [233]:
y = white_wine.quality.values
y[y <= 3] = 0 #poor wines
y[y >= 7] = 2 #excellent wines
y[(y > 3) & (y < 8)] = 1 #average wines
X = white_wine.copy().drop(["quality"], axis=1)

In [234]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Apply SMOTE + Tomek links (oversample minority, undersample majority)
#sampler = SMOTETomek(random_state=42)
sampler = SMOTE(random_state=42)
X_train, y_train = sampler.fit_resample(X_train, y_train)

In [235]:
rb_scaler = RobustScaler()
X_train = rb_scaler.fit_transform(X_train)
X_test = rb_scaler.transform(X_test)

In [236]:
param_grid = {
    'alpha_1': [1e-6, 1e-5, 1e-4, 1e-3],
    'alpha_2': [1e-6, 1e-5, 1e-4, 1e-3],
    'lambda_1': [1e-6, 1e-5, 1e-4, 1e-3],
    'lambda_2': [1e-6, 1e-5, 1e-4, 1e-3],
}

bayesian_ridge = BayesianRidge()
grid_search = GridSearchCV(bayesian_ridge, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
print(grid_search.best_params_)

{'alpha_1': 1e-06, 'alpha_2': 0.001, 'lambda_1': 0.001, 'lambda_2': 0.001}


In [237]:
y_pred = best_model.predict(X_test)
y_pred = np.round(y_pred).astype(int)

In [238]:
y_pred[y_pred<=0] = 0
y_pred[y_pred>=2] = 2

In [239]:
accuracy = accuracy_score(np.round(y_test).astype(int), np.round(y_pred).astype(int))
accuracy

0.671152228763667

In [240]:
conf_matrix = confusion_matrix(np.round(y_test).astype(int), np.round(y_pred).astype(int))
conf_matrix

array([[  2,   5,   0],
       [127, 705, 104],
       [  5, 150,  91]], dtype=int64)

In [241]:
f1_score(y_test, y_pred, average='weighted')

0.7035789345855155