In [1]:
# model_training.py
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
import pickle

# Load the dataset
df = pd.read_csv('./winequalityN.csv')

# Remove spaces from column headers
df.columns = df.columns.str.replace(' ', '_')

# Encode 'type' as a numeric category
df['type'] = df['type'].map({'red': 0, 'white': 1})

# Preprocessing
df = df.apply(pd.to_numeric, errors='coerce').fillna(0)
df['best_quality'] = [1 if x > 5 else 0 for x in df['quality']]

# Prepare features and target
features = df.drop(['quality', 'best_quality'], axis=1)
target = df['best_quality']

# Split the data
xtrain, xtest, ytrain, ytest = train_test_split(features, target, test_size=0.2, random_state=40)

# Normalize the features
scaler = MinMaxScaler()
xtrain = scaler.fit_transform(xtrain)
xtest = scaler.transform(xtest)

# Train the model
model = LogisticRegression()
model.fit(xtrain, ytrain)

# Save the model and scaler as pickled files
with open("logreg_model.pkl", "wb") as model_file:
    pickle.dump(model, model_file)
with open("scaler.pkl", "wb") as scaler_file:
    pickle.dump(scaler, scaler_file)

print("Model and scaler saved successfully.")


Model and scaler saved successfully.
