##**Cross validation**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/DianCA26/Dataset/main/CarPrice_Assignment.csv")

In [None]:
X = df[['horsepower','wheelbase','enginesize','boreratio','compressionratio','highwaympg','citympg','curbweight','carwidth','carlength']]
y = df['price']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=26)

In [None]:
from sklearn.svm import SVR
model = SVR(kernel="poly", degree=3,C=1000,epsilon=0.01)

In [None]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, X_train, y_train, cv=5)

In [None]:
scores

array([0.83116366, 0.70381438, 0.58123855, 0.62578521, 0.58473012])

In [None]:
scores.mean()

0.665346382529936

In [None]:
scores.std()

0.09392907172928662

In [None]:
scores.mean()+scores.std()

0.7592754542592227

In [None]:
scores.mean()-scores.std()

0.5714173108006494

##**GridSearch**

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
param_grid = [{'kernel': ['rbf'],
               'C': [0.001, 0.01, 0.1, 1, 10, 100],
               'gamma': [0.001, 0.01, 0.1, 1, 10, 100]},
              {'kernel': ['linear'],
               'C': [0.001, 0.01, 0.1, 1, 10, 100]},
              {'kernel': ['poly'],
               'degree': [2,3,4,5,6,7,8],
               'epsilon': [0.01, 0.02, 0.05, 1, 10, 100, 1000],
               'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000]}]

In [None]:
grid_search = GridSearchCV(SVR(), param_grid, cv=5, scoring="r2")

In [None]:
grid_search.fit(X_train, y_train)
print("Best parameters: {}".format(grid_search.best_params_))
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))

Best parameters: {'C': 10, 'kernel': 'linear'}
Best cross-validation score: 0.73


In [None]:
param_new=grid_search.best_estimator_

In [None]:
param_new

SVR(C=10, kernel='linear')

In [None]:
y_train_pred=param_new.predict(X_train)
y_test_pred=param_new.predict(X_test)

In [None]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

print('MAE train = ', mean_absolute_error(y_train, y_train_pred))
print('MAE test = ', mean_absolute_error(y_test, y_test_pred))

print('MSE train = ', mean_squared_error(y_train, y_train_pred))
print('MSE test = ', mean_squared_error(y_test, y_test_pred))

print('RMSE train = ', np.sqrt(mean_squared_error(y_train, y_train_pred)))
print('RMSE test = ', np.sqrt(mean_squared_error(y_test, y_test_pred)))

print('r2score train = ', r2_score(y_train, y_train_pred))
print('r2score test = ', r2_score(y_test, y_test_pred))

MAE train =  2400.591157389554
MAE test =  2011.9361377299804
MSE train =  14916959.157999916
MSE test =  7329882.15487767
RMSE train =  3862.247941031222
RMSE test =  2707.375510504162
r2score train =  0.779725079607157
r2score test =  0.8380421089632877


**Pipeline**

In [None]:
from sklearn.compose import ColumnTransformer

In [None]:
from sklearn.pipeline import Pipeline

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [None]:
numeric_features = ["horsepower","wheelbase","enginesize","boreratio","compressionratio"]
numeric_features2 = ["highwaympg","citympg","curbweight","carwidth","carlength"]

numeric_transformer_mms = Pipeline([("scaler", MinMaxScaler())])

numeric_transformer_sc = Pipeline([("scaler2", StandardScaler())])

preprocessor = ColumnTransformer(
    transformers=[
        ("num1", numeric_transformer_mms, numeric_features),
        ("num2", numeric_transformer_sc, numeric_features2),
    ]
)


In [None]:
pipe = Pipeline([("prep", preprocessor), ("svr", SVR(kernel='poly',degree=2,C=1000,epsilon=0.01))])
pipe.fit(X_train,y_train)

Pipeline(steps=[('prep',
                 ColumnTransformer(transformers=[('num1',
                                                  Pipeline(steps=[('scaler',
                                                                   MinMaxScaler())]),
                                                  ['horsepower', 'wheelbase',
                                                   'enginesize', 'boreratio',
                                                   'compressionratio']),
                                                 ('num2',
                                                  Pipeline(steps=[('scaler2',
                                                                   StandardScaler())]),
                                                  ['highwaympg', 'citympg',
                                                   'curbweight', 'carwidth',
                                                   'carlength'])])),
                ('svr', SVR(C=1000, degree=2, epsilon=0.01, kernel='poly'))])

In [None]:
y_train_pred=pipe.predict(X_train)
y_test_pred=pipe.predict(X_test)

In [None]:
print('MAE train = ', mean_absolute_error(y_train, y_train_pred))
print('MAE test = ', mean_absolute_error(y_test, y_test_pred))

print('MSE train = ', mean_squared_error(y_train, y_train_pred))
print('MSE test = ', mean_squared_error(y_test, y_test_pred))

print('RMSE train = ', np.sqrt(mean_squared_error(y_train, y_train_pred)))
print('RMSE test = ', np.sqrt(mean_squared_error(y_test, y_test_pred)))

print('r2score train = ', r2_score(y_train, y_train_pred))
print('r2score test = ', r2_score(y_test, y_test_pred))

MAE train =  2063.934845272449
MAE test =  1879.1101134895523
MSE train =  15952959.589521378
MSE test =  7834191.501049096
RMSE train =  3994.115620449836
RMSE test =  2798.9625758571865
r2score train =  0.7644267262253985
r2score test =  0.8268991087880835


In [None]:
param=[{'svr__kernel': ['poly'],
               'svr__degree': [2,3,4],
               'svr__epsilon': [0.01],
               'svr__C': [1000]}]

In [None]:
grid_search = GridSearchCV(pipe, param_grid=param, cv=5, scoring="r2")

In [None]:
grid_search.fit(X_train,y_train)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('prep',
                                        ColumnTransformer(transformers=[('num1',
                                                                         Pipeline(steps=[('scaler',
                                                                                          MinMaxScaler())]),
                                                                         ['horsepower',
                                                                          'wheelbase',
                                                                          'enginesize',
                                                                          'boreratio',
                                                                          'compressionratio']),
                                                                        ('num2',
                                                                         Pipeline(steps=[('scaler2',
               

In [None]:
best=grid_search.best_estimator_

Pipeline(steps=[('prep',
                 ColumnTransformer(transformers=[('num1',
                                                  Pipeline(steps=[('scaler',
                                                                   MinMaxScaler())]),
                                                  ['horsepower', 'wheelbase',
                                                   'enginesize', 'boreratio',
                                                   'compressionratio']),
                                                 ('num2',
                                                  Pipeline(steps=[('scaler2',
                                                                   StandardScaler())]),
                                                  ['highwaympg', 'citympg',
                                                   'curbweight', 'carwidth',
                                                   'carlength'])])),
                ('svr', SVR(C=1000, degree=2, epsilon=0.01, kernel='poly'))])

In [None]:
y_train_pred=grid_search.best_estimator_.predict(X_train)
y_test_pred=grid_search.best_estimator_.predict(X_test)

In [None]:
print('MAE train = ', mean_absolute_error(y_train, y_train_pred))
print('MAE test = ', mean_absolute_error(y_test, y_test_pred))

print('MSE train = ', mean_squared_error(y_train, y_train_pred))
print('MSE test = ', mean_squared_error(y_test, y_test_pred))

print('RMSE train = ', np.sqrt(mean_squared_error(y_train, y_train_pred)))
print('RMSE test = ', np.sqrt(mean_squared_error(y_test, y_test_pred)))

print('r2score train = ', r2_score(y_train, y_train_pred))
print('r2score test = ', r2_score(y_test, y_test_pred))

MAE train =  2063.934845272449
MAE test =  1879.1101134895523
MSE train =  15952959.589521378
MSE test =  7834191.501049096
RMSE train =  3994.115620449836
RMSE test =  2798.9625758571865
r2score train =  0.7644267262253985
r2score test =  0.8268991087880835
