In [5]:
# =====================================
# WINE QUALITY – CROSS VALIDATION MODEL
# =====================================

import pandas as pd
import joblib

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error

# -------------------------------
# LOAD DATASET
# -------------------------------
df = pd.read_csv(r"C:\Users\sachin-selvam\Desktop\task_10\winequality-red (1).csv")

print("Dataset Shape:", df.shape)

# -------------------------------
# FEATURES & TARGET
# -------------------------------
X = df.drop(columns=["quality"])
y = df["quality"]

# -------------------------------
# PIPELINE
# -------------------------------
pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("model", RandomForestRegressor(
        n_estimators=200,
        random_state=42
    ))
])

# -------------------------------
# CROSS VALIDATION
# -------------------------------
cv_scores = cross_val_score(
    pipeline,
    X,
    y,
    cv=5,
    scoring="r2"
)

print("CV R2 Scores:", cv_scores)
print("Mean CV R2:", cv_scores.mean())

# -------------------------------
# TRAIN TEST SPLIT
# -------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

pipeline.fit(X_train, y_train)

# -------------------------------
# EVALUATION
# -------------------------------
y_pred = pipeline.predict(X_test)

print("Test R2:", r2_score(y_test, y_pred))
rmse = mean_squared_error(y_test, y_pred) ** 0.5
print("RMSE:", rmse)

# -------------------------------
# SAVE MODEL
# -------------------------------
joblib.dump(pipeline, "wine_cv_model.pkl")

print("✅ Model saved: wine_cv_model.pkl")


Dataset Shape: (1599, 12)
CV R2 Scores: [0.25160886 0.34475525 0.36483552 0.32508931 0.26215033]
Mean CV R2: 0.30968785358444617
Test R2: 0.5312219250138227
RMSE: 0.5534882281946022
✅ Model saved: wine_cv_model.pkl
