In [1]:
import numpy as np
import pandas as pd
import pickle
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC, LinearSVR, SVR
from sklearn.metrics import accuracy_score, mean_squared_error

In [2]:
data_breast_cancer = datasets.load_breast_cancer()
X_bc = data_breast_cancer.data[:, [3, 4]]  # mean area, mean smoothness
y_bc = data_breast_cancer.target

X_bc_train, X_bc_test, y_bc_train, y_bc_test = train_test_split(X_bc, y_bc, test_size=0.2, random_state=42)


In [3]:
model_bc = LinearSVC(loss="hinge", max_iter=10000)
model_bc.fit(X_bc_train, y_bc_train)
acc_bc_train = accuracy_score(y_bc_train, model_bc.predict(X_bc_train))
acc_bc_test = accuracy_score(y_bc_test, model_bc.predict(X_bc_test))



In [4]:
scaler_bc = StandardScaler()
X_bc_train_scaled = scaler_bc.fit_transform(X_bc_train)
X_bc_test_scaled = scaler_bc.transform(X_bc_test)

model_bc_scaled = LinearSVC(loss="hinge", max_iter=10000)
model_bc_scaled.fit(X_bc_train_scaled, y_bc_train)
acc_bc_train_scaled = accuracy_score(y_bc_train, model_bc_scaled.predict(X_bc_train_scaled))
acc_bc_test_scaled = accuracy_score(y_bc_test, model_bc_scaled.predict(X_bc_test_scaled))

bc_acc = [acc_bc_train, acc_bc_test, acc_bc_train_scaled, acc_bc_test_scaled]
with open("bc_acc.pkl", "wb") as f:
    pickle.dump(bc_acc, f)


In [5]:
data_iris = datasets.load_iris()
X_iris = data_iris.data[:, [2, 3]]  # petal length, petal width
y_iris = (data_iris.target == 2).astype(int)  # Virginica vs others

X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(X_iris, y_iris, test_size=0.2, random_state=42)

In [6]:
model_iris = LinearSVC(loss="hinge", max_iter=10000)
model_iris.fit(X_iris_train, y_iris_train)
acc_iris_train = accuracy_score(y_iris_train, model_iris.predict(X_iris_train))
acc_iris_test = accuracy_score(y_iris_test, model_iris.predict(X_iris_test))

In [7]:
scaler_iris = StandardScaler()
X_iris_train_scaled = scaler_iris.fit_transform(X_iris_train)
X_iris_test_scaled = scaler_iris.transform(X_iris_test)

model_iris_scaled = LinearSVC(loss="hinge", max_iter=10000)
model_iris_scaled.fit(X_iris_train_scaled, y_iris_train)
acc_iris_train_scaled = accuracy_score(y_iris_train, model_iris_scaled.predict(X_iris_train_scaled))
acc_iris_test_scaled = accuracy_score(y_iris_test, model_iris_scaled.predict(X_iris_test_scaled))

iris_acc = [acc_iris_train, acc_iris_test, acc_iris_train_scaled, acc_iris_test_scaled]
with open("iris_acc.pkl", "wb") as f:
    pickle.dump(iris_acc, f)


In [8]:
size = 900
X = np.random.rand(size) * 5 - 2.5
w4, w3, w2, w1, w0 = 1, 2, 1, -4, 2
y = w4*(X**4) + w3*(X**3) + w2*(X**2) + w1*X + w0 + np.random.randn(size)*8 - 4

X_poly = X.reshape(-1, 1)
y_poly = y
X_poly_train, X_poly_test, y_poly_train, y_poly_test = train_test_split(X_poly, y_poly, test_size=0.2, random_state=42)

In [9]:
pipe_linsvr = Pipeline([
    ("poly", PolynomialFeatures(degree=4)),
    ("scaler", StandardScaler()),
    ("lin_svr", LinearSVR())
])
pipe_linsvr.fit(X_poly_train, y_poly_train)
mse_linsvr_train = mean_squared_error(y_poly_train, pipe_linsvr.predict(X_poly_train))
mse_linsvr_test = mean_squared_error(y_poly_test, pipe_linsvr.predict(X_poly_test))


In [10]:
pipe_svr = Pipeline([
    ("scaler", StandardScaler()),
    ("svr", SVR(kernel="poly", degree=4))
])
pipe_svr.fit(X_poly_train, y_poly_train)
mse_svr_train = mean_squared_error(y_poly_train, pipe_svr.predict(X_poly_train))
mse_svr_test = mean_squared_error(y_poly_test, pipe_svr.predict(X_poly_test))


In [None]:
param_grid = {
    "svr__C": [0.1, 1, 10],
    "svr__coef0": [0.1, 1, 10]
}
pipe_svr_grid = Pipeline([
    ("scaler", StandardScaler()),
    ("svr", SVR(kernel="poly", degree=4))
])
grid = GridSearchCV(pipe_svr_grid, param_grid, scoring="neg_mean_squared_error", cv=5)
grid.fit(X_poly, y_poly)
