# Linear Regression - Auto Dataset

### Import Libraries

In [2]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_squared_error,mean_absolute_error, r2_score

import matplotlib.pyplot as plt
import seaborn as sns

## Problem Statement

## Data Gathering 

In [3]:
df = pd.read_csv('autos_dataset.csv')
df.head().T

Unnamed: 0,0,1,2,3,4
symboling,3,3,1,2,2
normalized-losses,?,?,?,164,164
make,alfa-romero,alfa-romero,alfa-romero,audi,audi
fuel-type,gas,gas,gas,gas,gas
aspiration,std,std,std,std,std
num-of-doors,two,two,two,four,four
body-style,convertible,convertible,hatchback,sedan,sedan
drive-wheels,rwd,rwd,rwd,fwd,4wd
engine-location,front,front,front,front,front
wheel-base,88.6,88.6,94.5,99.8,99.4


## Exploratory Data Analysis

In [4]:
df.isna().sum()

symboling            0
normalized-losses    0
make                 0
fuel-type            0
aspiration           0
num-of-doors         0
body-style           0
drive-wheels         0
engine-location      0
wheel-base           0
length               0
width                0
height               0
curb-weight          0
engine-type          0
num-of-cylinders     0
engine-size          0
fuel-system          0
bore                 0
stroke               0
compression-ratio    0
horsepower           0
peak-rpm             0
city-mpg             0
highway-mpg          0
price                0
dtype: int64

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   symboling          205 non-null    int64  
 1   normalized-losses  205 non-null    object 
 2   make               205 non-null    object 
 3   fuel-type          205 non-null    object 
 4   aspiration         205 non-null    object 
 5   num-of-doors       205 non-null    object 
 6   body-style         205 non-null    object 
 7   drive-wheels       205 non-null    object 
 8   engine-location    205 non-null    object 
 9   wheel-base         205 non-null    float64
 10  length             205 non-null    float64
 11  width              205 non-null    float64
 12  height             205 non-null    float64
 13  curb-weight        205 non-null    int64  
 14  engine-type        205 non-null    object 
 15  num-of-cylinders   205 non-null    object 
 16  engine-size        205 non

### 1. symboling          

In [6]:
df['symboling']

0      3
1      3
2      1
3      2
4      2
      ..
200   -1
201   -1
202   -1
203   -1
204   -1
Name: symboling, Length: 205, dtype: int64

### 2. normalized-losses

In [7]:
df['normalized-losses']

0        ?
1        ?
2        ?
3      164
4      164
      ... 
200     95
201     95
202     95
203     95
204     95
Name: normalized-losses, Length: 205, dtype: object

In [8]:
df['normalized-losses'].replace({'?':np.nan},inplace = True)

In [9]:
df.replace({'?':np.nan},inplace = True)

In [10]:
df.isna().sum()

symboling             0
normalized-losses    41
make                  0
fuel-type             0
aspiration            0
num-of-doors          2
body-style            0
drive-wheels          0
engine-location       0
wheel-base            0
length                0
width                 0
height                0
curb-weight           0
engine-type           0
num-of-cylinders      0
engine-size           0
fuel-system           0
bore                  4
stroke                4
compression-ratio     0
horsepower            2
peak-rpm              2
city-mpg              0
highway-mpg           0
price                 4
dtype: int64

In [11]:
df.isna().mean() * 100

symboling             0.00000
normalized-losses    20.00000
make                  0.00000
fuel-type             0.00000
aspiration            0.00000
num-of-doors          0.97561
body-style            0.00000
drive-wheels          0.00000
engine-location       0.00000
wheel-base            0.00000
length                0.00000
width                 0.00000
height                0.00000
curb-weight           0.00000
engine-type           0.00000
num-of-cylinders      0.00000
engine-size           0.00000
fuel-system           0.00000
bore                  1.95122
stroke                1.95122
compression-ratio     0.00000
horsepower            0.97561
peak-rpm              0.97561
city-mpg              0.00000
highway-mpg           0.00000
price                 1.95122
dtype: float64

In [12]:
df['normalized-losses'] = df['normalized-losses'].astype(float)
df['normalized-losses'].mean()

122.0

In [13]:
df['normalized-losses'].median()

115.0

In [14]:
df['normalized-losses'] = df['normalized-losses'].fillna(df['normalized-losses'].median()).astype(int)
df['normalized-losses']

0      115
1      115
2      115
3      164
4      164
      ... 
200     95
201     95
202     95
203     95
204     95
Name: normalized-losses, Length: 205, dtype: int32

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   symboling          205 non-null    int64  
 1   normalized-losses  205 non-null    int32  
 2   make               205 non-null    object 
 3   fuel-type          205 non-null    object 
 4   aspiration         205 non-null    object 
 5   num-of-doors       203 non-null    object 
 6   body-style         205 non-null    object 
 7   drive-wheels       205 non-null    object 
 8   engine-location    205 non-null    object 
 9   wheel-base         205 non-null    float64
 10  length             205 non-null    float64
 11  width              205 non-null    float64
 12  height             205 non-null    float64
 13  curb-weight        205 non-null    int64  
 14  engine-type        205 non-null    object 
 15  num-of-cylinders   205 non-null    object 
 16  engine-size        205 non

## 3. make

In [16]:
df['make']

0      alfa-romero
1      alfa-romero
2      alfa-romero
3             audi
4             audi
          ...     
200          volvo
201          volvo
202          volvo
203          volvo
204          volvo
Name: make, Length: 205, dtype: object

In [17]:
df['make'].nunique()

22

In [18]:
df['make'].value_counts()

toyota           32
nissan           18
mazda            17
mitsubishi       13
honda            13
volkswagen       12
subaru           12
peugot           11
volvo            11
dodge             9
mercedes-benz     8
bmw               8
audi              7
plymouth          7
saab              6
porsche           5
isuzu             4
jaguar            3
chevrolet         3
alfa-romero       3
renault           2
mercury           1
Name: make, dtype: int64

# 4. fuel-type

In [19]:
df['fuel-type'].value_counts()

gas       185
diesel     20
Name: fuel-type, dtype: int64

In [20]:
df['fuel-type'].replace({'gas':1,'diesel':0},inplace = True)
df['fuel-type'].value_counts()

1    185
0     20
Name: fuel-type, dtype: int64

In [96]:
fuel_type_values = {'gas':1,'diesel':0}

In [66]:
df['fuel-type']

0      1
1      1
2      1
3      1
4      1
      ..
200    1
201    1
202    1
203    0
204    1
Name: fuel-type, Length: 205, dtype: int64

## 5. aspiration

In [21]:
df['aspiration']

0        std
1        std
2        std
3        std
4        std
       ...  
200      std
201    turbo
202      std
203    turbo
204    turbo
Name: aspiration, Length: 205, dtype: object

In [22]:
df['aspiration'].value_counts()

std      168
turbo     37
Name: aspiration, dtype: int64

In [23]:
df['aspiration'].value_counts().to_dict()

{'std': 168, 'turbo': 37}

In [24]:
df['aspiration'].replace({'std': 0, 'turbo': 1},inplace = True)
# df.info()

## 6. num-of-doors

In [25]:
df['num-of-doors']

0       two
1       two
2       two
3      four
4      four
       ... 
200    four
201    four
202    four
203    four
204    four
Name: num-of-doors, Length: 205, dtype: object

In [26]:
df['num-of-doors'].value_counts()

four    114
two      89
Name: num-of-doors, dtype: int64

In [27]:
df['num-of-doors'].value_counts().to_dict()

{'four': 114, 'two': 89}

In [28]:
df['num-of-doors'].replace({'four': 4, 'two': 2},inplace= True)

In [29]:
df['num-of-doors'].fillna(df['num-of-doors'].mode()[0],inplace = True)
df['num-of-doors'].unique()

array([2., 4.])

In [97]:
num_of_doors_values = {'four': 4, 'two': 2}

## 7. body-style

In [30]:
df['body-style']

0      convertible
1      convertible
2        hatchback
3            sedan
4            sedan
          ...     
200          sedan
201          sedan
202          sedan
203          sedan
204          sedan
Name: body-style, Length: 205, dtype: object

In [31]:
df['body-style'].value_counts()

sedan          96
hatchback      70
wagon          25
hardtop         8
convertible     6
Name: body-style, dtype: int64

In [32]:
df = pd.get_dummies(df, columns=['body-style'])
df

Unnamed: 0,symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,drive-wheels,engine-location,wheel-base,length,...,horsepower,peak-rpm,city-mpg,highway-mpg,price,body-style_convertible,body-style_hardtop,body-style_hatchback,body-style_sedan,body-style_wagon
0,3,115,alfa-romero,1,0,2.0,rwd,front,88.6,168.8,...,111,5000,21,27,13495,1,0,0,0,0
1,3,115,alfa-romero,1,0,2.0,rwd,front,88.6,168.8,...,111,5000,21,27,16500,1,0,0,0,0
2,1,115,alfa-romero,1,0,2.0,rwd,front,94.5,171.2,...,154,5000,19,26,16500,0,0,1,0,0
3,2,164,audi,1,0,4.0,fwd,front,99.8,176.6,...,102,5500,24,30,13950,0,0,0,1,0
4,2,164,audi,1,0,4.0,4wd,front,99.4,176.6,...,115,5500,18,22,17450,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,-1,95,volvo,1,0,4.0,rwd,front,109.1,188.8,...,114,5400,23,28,16845,0,0,0,1,0
201,-1,95,volvo,1,1,4.0,rwd,front,109.1,188.8,...,160,5300,19,25,19045,0,0,0,1,0
202,-1,95,volvo,1,0,4.0,rwd,front,109.1,188.8,...,134,5500,18,23,21485,0,0,0,1,0
203,-1,95,volvo,0,1,4.0,rwd,front,109.1,188.8,...,106,4800,26,27,22470,0,0,0,1,0


# 8. drive-wheels

In [33]:
df['drive-wheels'].value_counts()

fwd    120
rwd     76
4wd      9
Name: drive-wheels, dtype: int64

In [34]:
df['drive-wheels'].value_counts().to_dict()

{'fwd': 120, 'rwd': 76, '4wd': 9}

In [35]:
df['drive-wheels'].replace({'fwd': 0, 'rwd': 1, '4wd': 2},inplace = True)
df['drive-wheels']

0      1
1      1
2      1
3      0
4      2
      ..
200    1
201    1
202    1
203    1
204    1
Name: drive-wheels, Length: 205, dtype: int64

# 9. engine-location

In [36]:
df['engine-location'].value_counts()

front    202
rear       3
Name: engine-location, dtype: int64

In [37]:
df['engine-location'].replace({'front' :1,'rear':0},inplace = True)

## 10. wheel-base

In [38]:
df['wheel-base']

0       88.6
1       88.6
2       94.5
3       99.8
4       99.4
       ...  
200    109.1
201    109.1
202    109.1
203    109.1
204    109.1
Name: wheel-base, Length: 205, dtype: float64

## 11.engine-type

In [39]:
df['engine-type']

0      dohc
1      dohc
2      ohcv
3       ohc
4       ohc
       ... 
200     ohc
201     ohc
202    ohcv
203     ohc
204     ohc
Name: engine-type, Length: 205, dtype: object

In [40]:
df['engine-type'].value_counts()

ohc      148
ohcf      15
ohcv      13
dohc      12
l         12
rotor      4
dohcv      1
Name: engine-type, dtype: int64

In [41]:
df = pd.get_dummies(df,columns=['engine-type'])
df

Unnamed: 0,symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,drive-wheels,engine-location,wheel-base,length,...,body-style_hatchback,body-style_sedan,body-style_wagon,engine-type_dohc,engine-type_dohcv,engine-type_l,engine-type_ohc,engine-type_ohcf,engine-type_ohcv,engine-type_rotor
0,3,115,alfa-romero,1,0,2.0,1,1,88.6,168.8,...,0,0,0,1,0,0,0,0,0,0
1,3,115,alfa-romero,1,0,2.0,1,1,88.6,168.8,...,0,0,0,1,0,0,0,0,0,0
2,1,115,alfa-romero,1,0,2.0,1,1,94.5,171.2,...,1,0,0,0,0,0,0,0,1,0
3,2,164,audi,1,0,4.0,0,1,99.8,176.6,...,0,1,0,0,0,0,1,0,0,0
4,2,164,audi,1,0,4.0,2,1,99.4,176.6,...,0,1,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,-1,95,volvo,1,0,4.0,1,1,109.1,188.8,...,0,1,0,0,0,0,1,0,0,0
201,-1,95,volvo,1,1,4.0,1,1,109.1,188.8,...,0,1,0,0,0,0,1,0,0,0
202,-1,95,volvo,1,0,4.0,1,1,109.1,188.8,...,0,1,0,0,0,0,0,0,1,0
203,-1,95,volvo,0,1,4.0,1,1,109.1,188.8,...,0,1,0,0,0,0,1,0,0,0


## 12. num-of-cylinders

In [42]:
df['num-of-cylinders'].value_counts()

four      159
six        24
five       11
eight       5
two         4
three       1
twelve      1
Name: num-of-cylinders, dtype: int64

In [43]:
print(df['num-of-cylinders'].value_counts().to_dict())

{'four': 159, 'six': 24, 'five': 11, 'eight': 5, 'two': 4, 'three': 1, 'twelve': 1}


In [44]:
df['num-of-cylinders'].replace({'four': 4, 'six': 6, 'five': 5, 'eight': 8, 
                                'two': 2, 'three': 3, 'twelve': 12},inplace = True)

In [109]:
# df.info()
num_of_cylinders_values = {'four': 4, 'six': 6, 'five': 5, 'eight': 8, 
                                'two': 2, 'three': 3, 'twelve': 12}

# 12. fuel-system

In [46]:
df['fuel-system'].value_counts()

mpfi    94
2bbl    66
idi     20
1bbl    11
spdi     9
4bbl     3
mfi      1
spfi     1
Name: fuel-system, dtype: int64

In [47]:
df = pd.get_dummies(df,columns=['fuel-system'])
df

Unnamed: 0,symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,drive-wheels,engine-location,wheel-base,length,...,engine-type_ohcv,engine-type_rotor,fuel-system_1bbl,fuel-system_2bbl,fuel-system_4bbl,fuel-system_idi,fuel-system_mfi,fuel-system_mpfi,fuel-system_spdi,fuel-system_spfi
0,3,115,alfa-romero,1,0,2.0,1,1,88.6,168.8,...,0,0,0,0,0,0,0,1,0,0
1,3,115,alfa-romero,1,0,2.0,1,1,88.6,168.8,...,0,0,0,0,0,0,0,1,0,0
2,1,115,alfa-romero,1,0,2.0,1,1,94.5,171.2,...,1,0,0,0,0,0,0,1,0,0
3,2,164,audi,1,0,4.0,0,1,99.8,176.6,...,0,0,0,0,0,0,0,1,0,0
4,2,164,audi,1,0,4.0,2,1,99.4,176.6,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,-1,95,volvo,1,0,4.0,1,1,109.1,188.8,...,0,0,0,0,0,0,0,1,0,0
201,-1,95,volvo,1,1,4.0,1,1,109.1,188.8,...,0,0,0,0,0,0,0,1,0,0
202,-1,95,volvo,1,0,4.0,1,1,109.1,188.8,...,1,0,0,0,0,0,0,1,0,0
203,-1,95,volvo,0,1,4.0,1,1,109.1,188.8,...,0,0,0,0,0,1,0,0,0,0


## 13. bore

In [48]:
df.isna().sum()

symboling                 0
normalized-losses         0
make                      0
fuel-type                 0
aspiration                0
num-of-doors              0
drive-wheels              0
engine-location           0
wheel-base                0
length                    0
width                     0
height                    0
curb-weight               0
num-of-cylinders          0
engine-size               0
bore                      4
stroke                    4
compression-ratio         0
horsepower                2
peak-rpm                  2
city-mpg                  0
highway-mpg               0
price                     4
body-style_convertible    0
body-style_hardtop        0
body-style_hatchback      0
body-style_sedan          0
body-style_wagon          0
engine-type_dohc          0
engine-type_dohcv         0
engine-type_l             0
engine-type_ohc           0
engine-type_ohcf          0
engine-type_ohcv          0
engine-type_rotor         0
fuel-system_1bbl    

In [49]:
df['bore'] = df['bore'].fillna(df['bore'].median()).astype(float)
df['stroke'] = df['stroke'].fillna(df['stroke'].median()).astype(float)
df['horsepower'] = df['horsepower'].fillna(df['horsepower'].median()).astype(float)
df['peak-rpm'] = df['peak-rpm'].fillna(df['peak-rpm'].median()).astype(float)
df['price'] = df['price'].fillna(df['price'].median()).astype(float)

In [50]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 43 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   symboling               205 non-null    int64  
 1   normalized-losses       205 non-null    int32  
 2   make                    205 non-null    object 
 3   fuel-type               205 non-null    int64  
 4   aspiration              205 non-null    int64  
 5   num-of-doors            205 non-null    float64
 6   drive-wheels            205 non-null    int64  
 7   engine-location         205 non-null    int64  
 8   wheel-base              205 non-null    float64
 9   length                  205 non-null    float64
 10  width                   205 non-null    float64
 11  height                  205 non-null    float64
 12  curb-weight             205 non-null    int64  
 13  num-of-cylinders        205 non-null    int64  
 14  engine-size             205 non-null    in

# Feature Selection

In [51]:
df.corr()

Unnamed: 0,symboling,normalized-losses,fuel-type,aspiration,num-of-doors,drive-wheels,engine-location,wheel-base,length,width,...,engine-type_ohcv,engine-type_rotor,fuel-system_1bbl,fuel-system_2bbl,fuel-system_4bbl,fuel-system_idi,fuel-system_mfi,fuel-system_mpfi,fuel-system_spdi,fuel-system_spfi
symboling,1.0,0.457484,0.194311,-0.059866,-0.663595,-0.11115,-0.212471,-0.531954,-0.357612,-0.232919,...,-0.013597,0.24595,-0.037911,-0.034069,0.212471,-0.194311,0.122067,0.012532,0.181939,0.065707
normalized-losses,0.457484,1.0,0.104668,-0.011273,-0.34885,0.133824,0.02151,-0.073709,-0.006837,0.058378,...,0.130717,0.130721,-0.122539,-0.123927,0.112927,-0.104668,0.053844,0.179458,0.052231,-0.012358
fuel-type,0.194311,0.104668,1.0,-0.401397,-0.188496,-0.051874,-0.04007,-0.308346,-0.212679,-0.23388,...,0.085556,0.046383,0.078293,0.226565,0.04007,-1.0,0.02302,0.302574,0.070457,0.02302
aspiration,-0.059866,-0.011273,-0.401397,1.0,0.052803,0.153897,0.057191,0.257611,0.234539,0.300567,...,-0.07007,-0.066203,-0.111748,-0.323378,-0.057191,0.401397,0.14919,-0.050041,0.394703,-0.032857
num-of-doors,-0.663595,-0.34885,-0.188496,0.052803,1.0,-0.00323,0.139129,0.439635,0.385675,0.197735,...,-0.054764,-0.161052,-0.09715,0.013769,-0.139129,0.188496,-0.079932,0.035746,-0.148565,-0.079932
drive-wheels,-0.11115,0.133824,-0.051874,0.153897,-0.00323,1.0,-0.113823,0.366828,0.416076,0.376554,...,0.139453,0.131758,-0.188341,-0.310901,0.113823,0.051874,-0.055378,0.369782,-0.128419,0.065393
engine-location,-0.212471,0.02151,-0.04007,0.057191,0.139129,-0.113823,1.0,0.18779,0.050989,0.051698,...,0.031711,0.017192,0.029019,0.083975,0.014851,0.04007,0.008532,-0.132429,0.026114,0.008532
wheel-base,-0.531954,-0.073709,-0.308346,0.257611,0.439635,0.366828,0.18779,1.0,0.874587,0.795144,...,0.166152,-0.081174,-0.191073,-0.396505,-0.070124,0.308346,-0.033294,0.348891,-0.117359,-0.032129
length,-0.357612,-0.006837,-0.212679,0.234539,0.385675,0.416076,0.050989,0.874587,1.0,0.841118,...,0.244053,-0.057877,-0.29733,-0.487237,-0.049998,0.212679,-0.004831,0.511374,-0.07979,-0.008245
width,-0.232919,0.058378,-0.23388,0.300567,0.197735,0.376554,0.051698,0.795144,0.841118,1.0,...,0.348869,-0.013699,-0.194078,-0.522594,-0.011834,0.23388,0.012832,0.461896,-0.046399,-0.023158


## Train Test Split

In [52]:
df = df.select_dtypes(exclude=object) 
x = df.drop('price',axis = 1)
y = df['price']

In [53]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=34)

In [54]:
x_train.shape

(164, 41)

# Model Training

In [55]:
model = LinearRegression()
model.fit(x_train, y_train)

LinearRegression()

In [56]:
model.intercept_

-5689.181169533058

# Model Evaluation

In [57]:
# Testing Data Evaluation
y_pred = model.predict(x_test)
y_pred[20:25]

array([11145.861593  , 20225.5525414 , 11181.58217364,  8038.91250302,
        7476.14731518])

In [58]:
y_test[20:25]

123     8921.0
178    16558.0
3      13950.0
76      5389.0
79      7689.0
Name: price, dtype: float64

In [59]:
mse = mean_squared_error(y_test, y_pred)
print("MSE :",mse)

rmse = np.sqrt(mse)
print("RMSE :",rmse)

mae = mean_absolute_error(y_test, y_pred)
print("MAE :",mae)

r2 = r2_score(y_test, y_pred)
print('R-Squared :',r2)

MSE : 14957825.67192796
RMSE : 3867.5348313787636
MAE : 2432.667890927147
R-Squared : 0.7148417940528924


In [60]:
# Training Data Evaluation

y_pred_train = model.predict(x_train)
mse = mean_squared_error(y_train, y_pred_train)
print("MSE :",mse)

rmse = np.sqrt(mse)
print("RMSE :",rmse)

mae = mean_absolute_error(y_train, y_pred_train)
print("MAE :",mae)

r2 = r2_score(y_train, y_pred_train)
print('R-Squared :',r2)

MSE : 4932870.406966661
RMSE : 2221.006620198747
MAE : 1583.590277898529
R-Squared : 0.923009311864952


## Testing on Single Row

In [62]:
model.n_features_in_

41

In [68]:
x.columns

Index(['symboling', 'normalized-losses', 'fuel-type', 'aspiration',
       'num-of-doors', 'drive-wheels', 'engine-location', 'wheel-base',
       'length', 'width', 'height', 'curb-weight', 'num-of-cylinders',
       'engine-size', 'bore', 'stroke', 'compression-ratio', 'horsepower',
       'peak-rpm', 'city-mpg', 'highway-mpg', 'body-style_convertible',
       'body-style_hardtop', 'body-style_hatchback', 'body-style_sedan',
       'body-style_wagon', 'engine-type_dohc', 'engine-type_dohcv',
       'engine-type_l', 'engine-type_ohc', 'engine-type_ohcf',
       'engine-type_ohcv', 'engine-type_rotor', 'fuel-system_1bbl',
       'fuel-system_2bbl', 'fuel-system_4bbl', 'fuel-system_idi',
       'fuel-system_mfi', 'fuel-system_mpfi', 'fuel-system_spdi',
       'fuel-system_spfi'],
      dtype='object')

In [124]:
symboling = 3.00
normalized_losses = 134.00
fuel_type = 'gas'
aspiration = 0.00
num_of_doors = 'two'
drive_wheels = 1.00
engine_location = 1.00
wheel_base = 88.60
length = 168.80
width = 64.10
height = 48.80
curb_weight = 2458.00
num_of_cylinders = 'four'
engine_size = 150.00
bore = 4.47
stroke = 2.78
compression_ratio = 9.00
horsepower = 121.00
peak_rpm = 5000.00
city_mpg = 20.00
highway_mpg = 26.00
body_style = "sedan"
engine_type= "ohc"
fuel_system = "mfi"

In [134]:
label_encoded_columns = {"Num_of_Doors" :num_of_doors_values,
                       "num_of_cylinders_values" :num_of_cylinders_values,
                        "fuel_type_values":fuel_type_values,
                        "Columns" : list(x.columns)}
label_encoded_columns

{'Num_of_Doors': {'four': 4, 'two': 2},
 'num_of_cylinders_values': {'four': 4,
  'six': 6,
  'five': 5,
  'eight': 8,
  'two': 2,
  'three': 3,
  'twelve': 12},
 'fuel_type_values': {'gas': 1, 'diesel': 0},
 'Columns': ['symboling',
  'normalized-losses',
  'fuel-type',
  'aspiration',
  'num-of-doors',
  'drive-wheels',
  'engine-location',
  'wheel-base',
  'length',
  'width',
  'height',
  'curb-weight',
  'num-of-cylinders',
  'engine-size',
  'bore',
  'stroke',
  'compression-ratio',
  'horsepower',
  'peak-rpm',
  'city-mpg',
  'highway-mpg',
  'body-style_convertible',
  'body-style_hardtop',
  'body-style_hatchback',
  'body-style_sedan',
  'body-style_wagon',
  'engine-type_dohc',
  'engine-type_dohcv',
  'engine-type_l',
  'engine-type_ohc',
  'engine-type_ohcf',
  'engine-type_ohcv',
  'engine-type_rotor',
  'fuel-system_1bbl',
  'fuel-system_2bbl',
  'fuel-system_4bbl',
  'fuel-system_idi',
  'fuel-system_mfi',
  'fuel-system_mpfi',
  'fuel-system_spdi',
  'fuel-system_s

In [135]:
import json

with open("Label_Encoded_Columns.json",'w') as f:
    json.dump(label_encoded_columns,f)

In [125]:
column_names = x.columns
no_of_doors = num_of_doors_values[num_of_doors]
no_of_cylinders = num_of_cylinders_values[num_of_cylinders]
fuel_type_new =fuel_type_values[fuel_type]
array = np.zeros(len(x.columns),dtype = int)

array[0] = symboling
array[1] =normalized_losses
array[2] =fuel_type_new
array[3] =aspiration
array[4] = no_of_doors
array[5] =drive_wheels 
array[6] =engine_location 
array[7] =wheel_base
array[8] =length 
array[9] =width 
array[10] =height 
array[11] =curb_weight 
array[12] =no_of_cylinders
array[13] =engine_size 
array[14] =bore 
array[15] =stroke
array[16] =compression_ratio 
array[17] =horsepower
array[18] =peak_rpm 
array[19] =city_mpg
array[20] =highway_mpg 

body_style = "body-style_"+"sedan"
engine_type= "engine-type_"+"ohc"
fuel_system = "fuel-system_"+"mfi"

body_style_index = np.where(column_names == body_style)[0][0]
engine_type_index = np.where(column_names == engine_type)[0][0]
fuel_system_index = np.where(column_names == fuel_system)[0][0]

array[body_style_index] = 1 
array[engine_type_index] = 1
array[fuel_system_index] = 1 

predicted_price = np.around(model.predict([array])[0],2)
print("predicted_price of car is :",predicted_price)

predicted_price of car is : 17171.42


In [130]:
import pickle

with open("Linear_Model.pkl",'wb') as f:
    pickle.dump(model,f)