## Import the libraries

In [1]:
import numpy as np
import pandas as pd
from joblib import load

## Read dataset for comparing after implement model

In [2]:
dataset = pd.read_csv('./dataSet/main_data.csv')
dataset

Unnamed: 0,model,year,transmission,mileage,fuelType,tax,mpg,engineSize,price
0,GT86,2016,Manual,24089,Petrol,265.0,36.2,2.0,16000
1,GT86,2017,Manual,18615,Petrol,145.0,36.2,2.0,15995
2,GT86,2015,Manual,27469,Petrol,265.0,36.2,2.0,13998
3,GT86,2017,Manual,14736,Petrol,150.0,36.2,2.0,18998
4,GT86,2017,Manual,36284,Petrol,145.0,36.2,2.0,17498
...,...,...,...,...,...,...,...,...,...
6733,IQ,2011,Automatic,30000,Petrol,20.0,58.9,1.0,5500
6734,Urban Cruiser,2011,Manual,36154,Petrol,125.0,50.4,1.3,4985
6735,Urban Cruiser,2012,Manual,46000,Diesel,125.0,57.6,1.4,4995
6736,Urban Cruiser,2011,Manual,60700,Petrol,125.0,50.4,1.3,3995


## Read test data and convert to numpy 2D array

In [3]:
X_test = pd.read_csv('./Test_data/X_test_deploy.csv')
y_test = pd.read_csv('./Test_data/y_test_deploy.csv')

X_test = np.array(X_test)
y_test = np.array(y_test)

## Read train data and convert to numpy 2D array

In [4]:
X_train = pd.read_csv('./Train_data/X_train_deploy.csv')
y_train = pd.read_csv('./Train_data/y_train_deploy.csv')

X_train = np.array(X_train)
y_train = np.array(y_train)

## load model and test 

In [5]:
final_model = load('./regressor_random_forest.joblib') 
print(final_model.score(X_test,y_test))

0.9660568891880978


## Read X features and convert to numpy 2D array

In [6]:
X_deploy = pd.read_csv('./Models/X_deploy.csv')
X_deploy = np.array(X_deploy)

print(X_deploy)

[[' GT86' 2016 'Manual' ... 265.0 36.2 2.0]
 [' GT86' 2017 'Manual' ... 145.0 36.2 2.0]
 [' GT86' 2015 'Manual' ... 265.0 36.2 2.0]
 ...
 [' Urban Cruiser' 2012 'Manual' ... 125.0 57.6 1.4]
 [' Urban Cruiser' 2011 'Manual' ... 125.0 50.4 1.3]
 [' Urban Cruiser' 2011 'Manual' ... 125.0 50.4 1.3]]


## Encoding the categorical variables of input

In [7]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0, 2, 4])], remainder='passthrough')
# X = np.array(ct.fit_transform(X))
ct.fit_transform(X_deploy)

<6738x31 sparse matrix of type '<class 'numpy.float64'>'
	with 52108 stored elements in Compressed Sparse Row format>

## Predict function 

In [8]:
def predict(model, input_df):  
    predictions = model.predict(input_df)
    return predictions

## Take the data for comparison 

In [9]:
dataset.iloc[0]

model             GT86
year              2016
transmission    Manual
mileage          24089
fuelType        Petrol
tax                265
mpg               36.2
engineSize           2
price            16000
Name: 0, dtype: object

## Create new input for testing 

In [10]:
model=' GT86'
year=2016
transmission='Manual'
mileage=24089
fuelType='Petrol'
tax=265.0
mpg=36.2
engineSize=2.0

input_dict = {'model' : model, 'year' : year, 'transmission' : transmission, 'mileage' : mileage, 
              'fuelType' : fuelType, 'tax' : tax, 'mpg' : mpg, 'engineSize' : engineSize}

input_df = pd.DataFrame([input_dict])
input_df = np.array(input_df)

X_input = ct.transform(input_df).toarray()

# print(X_input)

## Predict results for new input

In [11]:
# features are the same
print(X_test.shape)
print(X_input.shape)

# print(final_model.predict(X_input))
# print(output)

(1348, 31)
(1, 31)


## Display the result

In [12]:
output = predict(model=final_model, input_df=X_input)[0]
output = '$' + str(round(output,2))

print("Actual price: ", '$'+str(dataset.iloc[0].price))
print("predict price: ", output)

Actual price:  $16000
predict price:  $15998.5


In [13]:
np.__version__

'1.21.5'

In [14]:
pd.__version__

'1.1.5'

In [1]:
import sklearn
sklearn.__version__

'1.0.2'