In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR

from sklearn.model_selection import GridSearchCV

In [2]:
automobile = pd.read_csv('datasets/auto-mpg_processed.csv')
automobile.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,age
0,20.5,6,200.0,95,3155,18.2,42
1,23.0,8,350.0,125,3900,17.4,41
2,23.0,4,122.0,86,2220,14.0,49
3,30.0,4,111.0,80,2155,14.8,43
4,12.0,8,350.0,180,4499,12.5,47


In [3]:
X= automobile.drop(['mpg', 'age'], axis= 1)
Y= automobile['mpg']
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size= 0.2)

### lasso regression

In [4]:
parameters = {'alpha': [0.1, 0.2, 0.4, 0.6, 0.7, 0.8, 0.9, 1.0]}

grid_search = GridSearchCV(Lasso(), parameters, cv= 3, return_train_score= True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'alpha': 0.9}

In [5]:
for i in range(len(parameters['alpha'])):
    print("parameters: ", grid_search.cv_results_['params'][i])
    
    print("mean test score: ", grid_search.cv_results_['mean_test_score'][i])
    
    print("rank: ", grid_search.cv_results_['rank_test_score'][i])

parameters:  {'alpha': 0.1}
mean test score:  0.6837901915221433
rank:  8
parameters:  {'alpha': 0.2}
mean test score:  0.6862453448317135
rank:  7
parameters:  {'alpha': 0.4}
mean test score:  0.6880989112879442
rank:  6
parameters:  {'alpha': 0.6}
mean test score:  0.6885905225660172
rank:  5
parameters:  {'alpha': 0.7}
mean test score:  0.6887519922451902
rank:  4
parameters:  {'alpha': 0.8}
mean test score:  0.6887535192377765
rank:  3
parameters:  {'alpha': 0.9}
mean test score:  0.6887542741221737
rank:  1
parameters:  {'alpha': 1.0}
mean test score:  0.688753786902588
rank:  2


In [6]:
lasso_model= Lasso(alpha= grid_search.best_params_['alpha']).fit(x_train, y_train)

In [7]:
y_pred = lasso_model.predict(x_test)

print('training score: ', lasso_model.score(x_train, y_train))
print('test score: ', r2_score(y_test, y_pred))

training score:  0.6910683778716977
test score:  0.7648906003055441


### knn regression

In [9]:
parameters = {'n_neighbors': [10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45, 50]}

grid_search = GridSearchCV(KNeighborsRegressor(), parameters, cv= 3, return_train_score= True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'n_neighbors': 25}

In [10]:
for i in range(len(parameters['n_neighbors'])):
    print("parameters: ", grid_search.cv_results_['params'][i])
    
    print("mean test score: ", grid_search.cv_results_['mean_test_score'][i])
    
    print("rank: ", grid_search.cv_results_['rank_test_score'][i])

parameters:  {'n_neighbors': 10}
mean test score:  0.6675315010766033
rank:  12
parameters:  {'n_neighbors': 12}
mean test score:  0.6687993153840169
rank:  11
parameters:  {'n_neighbors': 14}
mean test score:  0.6713343673786477
rank:  10
parameters:  {'n_neighbors': 16}
mean test score:  0.671457573710172
rank:  9
parameters:  {'n_neighbors': 18}
mean test score:  0.6779637736537177
rank:  8
parameters:  {'n_neighbors': 20}
mean test score:  0.6818826733871391
rank:  7
parameters:  {'n_neighbors': 25}
mean test score:  0.686021817798748
rank:  1
parameters:  {'n_neighbors': 30}
mean test score:  0.6844318039127716
rank:  3
parameters:  {'n_neighbors': 35}
mean test score:  0.6840250172116648
rank:  4
parameters:  {'n_neighbors': 40}
mean test score:  0.6857372708663699
rank:  2
parameters:  {'n_neighbors': 45}
mean test score:  0.6819218821713067
rank:  6
parameters:  {'n_neighbors': 50}
mean test score:  0.6835746457986215
rank:  5


In [11]:
knn_model= KNeighborsRegressor(n_neighbors= grid_search.best_params_['n_neighbors']).fit(x_train, y_train)

y_pred = knn_model.predict(x_test)

print('training score: ', knn_model.score(x_train, y_train))
print('test score: ', r2_score(y_test, y_pred))


training score:  0.7106841795839849
test score:  0.7937179863160635


### decision tree regression

In [12]:
parameters = {'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12]}

grid_search = GridSearchCV(DecisionTreeRegressor(), parameters, cv= 3, return_train_score= True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'max_depth': 3}

In [13]:
for i in range(len(parameters['max_depth'])):
    print("parameters: ", grid_search.cv_results_['params'][i])
    
    print("mean test score: ", grid_search.cv_results_['mean_test_score'][i])
    
    print("rank: ", grid_search.cv_results_['rank_test_score'][i])

parameters:  {'max_depth': 1}
mean test score:  0.5300277865754078
rank:  10
parameters:  {'max_depth': 2}
mean test score:  0.6715974925708866
rank:  3
parameters:  {'max_depth': 3}
mean test score:  0.7134432630948052
rank:  1
parameters:  {'max_depth': 4}
mean test score:  0.6969690583247917
rank:  2
parameters:  {'max_depth': 5}
mean test score:  0.6673691084839782
rank:  4
parameters:  {'max_depth': 6}
mean test score:  0.6295214143926017
rank:  5
parameters:  {'max_depth': 7}
mean test score:  0.5621704455006453
rank:  7
parameters:  {'max_depth': 8}
mean test score:  0.550083456206375
rank:  8
parameters:  {'max_depth': 9}
mean test score:  0.5754979947739165
rank:  6
parameters:  {'max_depth': 10}
mean test score:  0.5437574746005729
rank:  9
parameters:  {'max_depth': 12}
mean test score:  0.5228448504139426
rank:  11


In [14]:
decision_tree_model= DecisionTreeRegressor(max_depth= grid_search.best_params_['max_depth']).fit(x_train, y_train)

y_pred = decision_tree_model.predict(x_test)

print('training score: ', decision_tree_model.score(x_train, y_train))
print('test score: ', r2_score(y_test, y_pred))


training score:  0.7762537758177225
test score:  0.7012166808816707


### SVR

In [15]:
parameters = {'epsilon': [0.05, 0.1, 0.2, 0.3],
              'C': [0.2, 0.3, 0.4]}

grid_search = GridSearchCV(SVR(kernel= 'linear'), parameters, cv= 3, return_train_score= True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'C': 0.3, 'epsilon': 0.3}

In [16]:
svr_model= SVR(kernel= 'linear',
               epsilon = grid_search.best_params_['epsilon'],
               C = grid_search.best_params_['C']).fit(x_train, y_train)

y_pred = svr_model.predict(x_test)

print('training score: ', svr_model.score(x_train, y_train))
print('test score: ', r2_score(y_test, y_pred))


training score:  0.681773852493753
test score:  0.7626850967458925
