# **Car Price Prediction**


## Importing Libraries


In [74]:
import numpy as np
import pandas as pd
from math import trunc

## Importing Dataset

In [75]:
dataset = pd.read_csv('car_data.csv')
dataset.head()

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner
0,Maruti 800 AC,2007,60000,70000,Petrol,Individual,Manual,First Owner
1,Maruti Wagon R LXI Minor,2007,135000,50000,Petrol,Individual,Manual,First Owner
2,Hyundai Verna 1.6 SX,2012,600000,100000,Diesel,Individual,Manual,First Owner
3,Datsun RediGO T Option,2017,250000,46000,Petrol,Individual,Manual,First Owner
4,Honda Amaze VX i-DTEC,2014,450000,141000,Diesel,Individual,Manual,Second Owner


In [76]:
dataset["selling_price"].isnull().sum()

0

In [77]:
dataset["transmission"].value_counts()

Manual       3892
Automatic     448
Name: transmission, dtype: int64

In [78]:
dataset["owner"].value_counts()

First Owner             2832
Second Owner            1106
Third Owner              304
Fourth & Above Owner      81
Test Drive Car            17
Name: owner, dtype: int64

## Encoding the Data

In [79]:
X = dataset.iloc[:, [1,3,4,6,7]].values
y = dataset.iloc[:, 2].values

In [80]:
X

array([[2007, 70000, 'Petrol', 'Manual', 'First Owner'],
       [2007, 50000, 'Petrol', 'Manual', 'First Owner'],
       [2012, 100000, 'Diesel', 'Manual', 'First Owner'],
       ...,
       [2009, 83000, 'Petrol', 'Manual', 'Second Owner'],
       [2016, 90000, 'Diesel', 'Manual', 'First Owner'],
       [2016, 40000, 'Petrol', 'Manual', 'First Owner']], dtype=object)

In [81]:
from sklearn.preprocessing import LabelEncoder
lb = LabelEncoder()
X[:,2]=lb.fit_transform(X[:,2])
lb1 = LabelEncoder()
X[:,3]=lb1.fit_transform(X[:,3])
lb2 = LabelEncoder()
X[:,4]=lb2.fit_transform(X[:,4])

In [82]:
X


array([[2007, 70000, 4, 1, 0],
       [2007, 50000, 4, 1, 0],
       [2012, 100000, 1, 1, 0],
       ...,
       [2009, 83000, 4, 1, 2],
       [2016, 90000, 1, 1, 0],
       [2016, 40000, 4, 1, 0]], dtype=object)

## Splitting the Data into Training and Test Set

In [83]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.05, random_state = 0)

print(X_train[:,:])

[[2016 36000 1 1 2]
 [2014 70000 4 1 0]
 [2016 23000 4 1 0]
 ...
 [2016 22000 4 1 0]
 [2015 70000 1 1 0]
 [2013 62000 4 1 2]]


## Training the model

In [84]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=300,random_state=0)
regressor.fit(X_train,y_train)

In [85]:
accuracy = regressor.score(X_test,y_test)
print(accuracy*100,'%')

85.7140885824745 %


## Performing a Test on given Input

In [86]:
new_data=[2017,7000,"LPG","Automatic","Test Drive Car"]
new_data[2]=lb.transform([new_data[2]])[0]
new_data[3]=lb1.transform([new_data[3]])[0]
new_data[4]=lb2.transform([new_data[4]])[0]


In [87]:
print(new_data)
predicted_price = regressor.predict([new_data])
print("the predicted price for the specified car is : " + str(trunc(predicted_price[0])) + " birr")

[2017, 7000, 3, 0, 3]
the predicted price for the specified car is : 455646 birr


## Saving the Model and Encoder using Pickle

In [88]:
import pickle
pickle.dump(regressor,open('regressor.pkl','wb'))