In [1]:

#Step-1: Import Libraries

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, VotingRegressor
from sklearn.metrics import mean_squared_error, r2_score




In [2]:
#Step-2: Load Dataset from UCI Link


url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
df = pd.read_csv(url, sep=';')

print("Dataset Loaded Successfully ✅")
print("Shape:", df.shape)

Dataset Loaded Successfully ✅
Shape: (1599, 12)


In [6]:
#Step-4: Train-Test Split

X = df.drop("quality", axis=1)
y = df["quality"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [7]:
#Step-3: Define Features and Target

X = df.iloc[:, :-1]
y = df.iloc[:, -1]


In [8]:
#Step-5: Scaling (only for Linear Regression)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [9]:
#Step-6: Individual Models

# Linear Regression
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)
lr_pred = lr.predict(X_test_scaled)

# Decision Tree
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)

# Random Forest
rf = RandomForestRegressor(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)


In [10]:
#Step-7: Ensemble Model (Voting Regressor)

voting = VotingRegressor([
    ('lr', LinearRegression()),
    ('dt', DecisionTreeRegressor(random_state=42)),
    ('rf', RandomForestRegressor(n_estimators=200, random_state=42))
])

voting.fit(X_train_scaled, y_train)
ensemble_pred = voting.predict(X_test_scaled)

In [11]:
#Step-8: Evaluation Function

def evaluate(name, y_true, y_pred):
    print(f"{name}")
    print("MSE:", round(mean_squared_error(y_true, y_pred), 4))
    print("R2 :", round(r2_score(y_true, y_pred), 4))
    print("-"*30)

In [12]:
#Step-9: Compare Models

evaluate("Linear Regression", y_test, lr_pred)
evaluate("Decision Tree", y_test, dt_pred)
evaluate("Random Forest", y_test, rf_pred)
evaluate("Ensemble (Voting Regressor)", y_test, ensemble_pred)


Linear Regression
MSE: 0.39
R2 : 0.4032
------------------------------
Decision Tree
MSE: 0.6062
R2 : 0.0723
------------------------------
Random Forest
MSE: 0.3059
R2 : 0.5319
------------------------------
Ensemble (Voting Regressor)
MSE: 0.3384
R2 : 0.4821
------------------------------
