SVM Classifier for the Wine Dataset



In [23]:
#Importing the required Libraries
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

In [11]:
# Load data
data = load_wine()
X, y = data.data, data.target

In [12]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [13]:
# Build pipeline (scaler + SVM)
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("svm", SVC())
])

In [18]:
# Hyperparameter tuning
param_grid = {
    "svm__C": [0.1, 1, 10, 100],
    "svm__gamma": ["scale", 0.01, 0.001],
    "svm__kernel": ["rbf", "linear"]
}
grid = GridSearchCV(pipe, param_grid, cv=5, scoring="accuracy", n_jobs=-1)
grid.fit(X_train, y_train)

print("Best params:", grid.best_params_)


Best params: {'svm__C': 1, 'svm__gamma': 0.01, 'svm__kernel': 'rbf'}


In [20]:
# Evaluate
y_pred = grid.predict(X_test)
print("Best Accuracy:", accuracy_score(y_test, y_pred))

Best Accuracy: 1.0


In [21]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(grid.best_estimator_, X, y, cv=10, scoring="accuracy")
print("Cross-validation mean accuracy:", scores.mean())
print("Cross-validation std:", scores.std())


Cross-validation mean accuracy: 0.9833333333333334
Cross-validation std: 0.02545875386086579


SVM regressor for California Housing Price

In [5]:
#Importing the required Libraries and loading the dataset
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV

# Load dataset
housing = fetch_california_housing()
X, y = housing.data, housing.target  # target in $100,000s

In [7]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [8]:
# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
#Hyperparameter Tuning on a Subset


# Subsample for faster tuning
X_small, y_small = X_train[:2000], y_train[:2000]

param_distributions = {
    "C": [0.1, 1, 10, 100],
    "gamma": ["scale", "auto", 0.1, 0.01],
    "epsilon": [0.01, 0.1, 0.2]
}

svr = SVR(kernel="rbf")

In [10]:

rnd_search = RandomizedSearchCV(
    svr, param_distributions, n_iter=10,
    scoring="neg_root_mean_squared_error", cv=3,
    random_state=42, n_jobs=-1
)

rnd_search.fit(X_small, y_small)

print("Best params:", rnd_search.best_params_)

Best params: {'gamma': 'auto', 'epsilon': 0.01, 'C': 10}


In [11]:
# Evaluate the Best Model on Full Test Set

from sklearn.metrics import mean_squared_error
import numpy as np

# Best model
best_svr = rnd_search.best_estimator_

# Retrain on full train set
best_svr.fit(X_train, y_train)

# Evaluate on test set
y_pred = best_svr.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print("Test RMSE:", rmse)

Test RMSE: 0.5694018403482203
