## Car Evaluation

#### Importing Packages

#### The data in this project was obtained from Kaggle.

In [137]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder


In [138]:
data = pd.read_csv("car_evaluation.csv")
data.head()

Unnamed: 0,buying_price,maintenance_cost,number_of_doors,number_of_persons,lug_boot,safety,decision
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [139]:
df = pd.DataFrame(data)

###### I converted some of our data to numerical data type because a portion of it was of object type. I used Label Encoding for this purpose because the categories I wanted to convert indicated an order

In [140]:
encoder = LabelEncoder()
encoded_data = encoder.fit_transform(data['buying_price'])


print(encoded_data)

[3 3 3 ... 1 1 1]


In [141]:
encoder = LabelEncoder()
encoded_data = encoder.fit_transform(data['maintenance_cost'])


print(encoded_data)

[3 3 3 ... 1 1 1]


In [142]:
encoder = LabelEncoder()
encoded_data = encoder.fit_transform(data['number_of_doors'])

print(encoded_data)

[0 0 0 ... 3 3 3]


In [143]:
encoder = LabelEncoder()
encoded_data = encoder.fit_transform(data['number_of_persons'])

print(encoded_data)

[0 0 0 ... 2 2 2]


In [144]:
encoder = LabelEncoder()
encoded_data = encoder.fit_transform(data['safety'])


print(encoded_data)

[1 2 0 ... 1 2 0]


In [145]:
encoder = LabelEncoder()
encoded_data = encoder.fit_transform(data['lug_boot'])


print(encoded_data)

[2 2 2 ... 0 0 0]


In [146]:
encoder = LabelEncoder()
encoded_data = encoder.fit_transform(data['decision'])


print(encoded_data)

[2 2 2 ... 2 1 3]


In [147]:
encoded_buying_price = encoder.fit_transform(data['buying_price'])
encoded_maintenance_cost = encoder.fit_transform(data['maintenance_cost'])
encoded_number_of_doors = encoder.fit_transform(data['number_of_doors'])
encoded_number_of_persons = encoder.fit_transform(data['number_of_persons'])
encoded_lug_boot = encoder.fit_transform(data['lug_boot'])
encoded_safety = encoder.fit_transform(data['safety'])
encoded_decision = encoder.fit_transform(data['decision'])

In [148]:
#Adding the transformed data to a new DataFrame.
encoded_data = pd.DataFrame({
    'buying_price_encoded': encoded_buying_price,
    'maintenance_cost_encoded': encoded_maintenance_cost,
    'number_of_doors': encoded_number_of_doors,
    'number_of_persons': encoded_number_of_persons,
    'lug_boot': encoded_lug_boot,
    'safety_encoded': encoded_safety,
    'decision_encoded': encoded_decision
})   

In [149]:
final_data = pd.concat([encoded_data], axis = 1)
print(final_data)

      buying_price_encoded  maintenance_cost_encoded  number_of_doors  \
0                        3                         3                0   
1                        3                         3                0   
2                        3                         3                0   
3                        3                         3                0   
4                        3                         3                0   
...                    ...                       ...              ...   
1723                     1                         1                3   
1724                     1                         1                3   
1725                     1                         1                3   
1726                     1                         1                3   
1727                     1                         1                3   

      number_of_persons  lug_boot  safety_encoded  decision_encoded  
0                     0         2               1    

In [150]:
y = final_data.decision_encoded.values
x_raw_data = final_data.drop(["decision_encoded"], axis =1) 
#I eliminated dependent variables

x.head()

Unnamed: 0,buying_price_encoded,maintenance_cost_encoded,number_of_doors,number_of_persons,lug_boot,safety_encoded
0,1.0,1.0,0.0,0.0,1.0,0.5
1,1.0,1.0,0.0,0.0,1.0,1.0
2,1.0,1.0,0.0,0.0,1.0,0.0
3,1.0,1.0,0.0,0.0,0.5,0.5
4,1.0,1.0,0.0,0.0,0.5,1.0


#### normalization

In [151]:
x = (x_raw_data - np.min(x_raw_data)) / (np.max(x_raw_data) - np.min(x_raw_data))

print("Raw data before normalization:\n")
print(x_raw_data.head())

print("Data for artificial intelligence training after normalization:\n")
print(x.head())

Raw data before normalization:

   buying_price_encoded  maintenance_cost_encoded  number_of_doors  \
0                     3                         3                0   
1                     3                         3                0   
2                     3                         3                0   
3                     3                         3                0   
4                     3                         3                0   

   number_of_persons  lug_boot  safety_encoded  
0                  0         2               1  
1                  0         2               2  
2                  0         2               0  
3                  0         1               1  
4                  0         1               2  
Data for artificial intelligence training after normalization:

   buying_price_encoded  maintenance_cost_encoded  number_of_doors  \
0                   1.0                       1.0              0.0   
1                   1.0                       1.0

  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)


In [152]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1, random_state = 1)

In [153]:
counter = 1 
for k in range(1,15):
    knn_new = KNeighborsClassifier(n_neighbors = k)
    knn_new.fit(x_train, y_train)
    print(counter, " ", "Accuracy rate: %", knn_new.score(x_test, y_test)*100)
    counter += 1

1   Accuracy rate: % 89.01734104046243
2   Accuracy rate: % 88.4393063583815
3   Accuracy rate: % 93.64161849710982
4   Accuracy rate: % 90.7514450867052
5   Accuracy rate: % 90.7514450867052
6   Accuracy rate: % 90.17341040462428
7   Accuracy rate: % 91.90751445086705
8   Accuracy rate: % 89.01734104046243
9   Accuracy rate: % 91.90751445086705
10   Accuracy rate: % 91.90751445086705
11   Accuracy rate: % 92.48554913294798
12   Accuracy rate: % 92.48554913294798
13   Accuracy rate: % 92.48554913294798
14   Accuracy rate: % 90.7514450867052


In [154]:
knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(x_train, y_train)
prediction = knn.predict(x_test)
print("For k = 3, the result of our validation test on the test data is", knn.score(x_test, y_test))


For k = 3, the result of our validation test on the test data is 0.9364161849710982


In [158]:
# When I provide the data in the format '4,1,3,0,0,4', it returns a result.
from sklearn.preprocessing import MinMaxScaler


sc = MinMaxScaler()
sc.fit_transform(x_ham_veri)

new_prediction = knn.predict(sc.transform(np.array([[4,1, 3, 0, 0, 4]])))
new_prediction[0]



2

In [None]:
# I'm encountering an error at this step because the machine also needs to perform 'encoding' on the input data I provided. 
#I'm currently working on improving this stage

from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler()
sc.fit_transform(x_raw_data)


new_prediction = pd.DataFrame({
    'buying_price_encoded': ['vhigh'],
    'maintenance_cost_encoded': ['high'],
    'number_of_doors': [3],
    'number_of_persons': ['more'],
    'lug_boot': ['big'],
    'safety_encoded':['low']
})   

encoded_new_prediction = encoder.transform(new_prediction['vhigh','high',3, 'more', 'big', 'low'])
new_prediction = knn.predict(sc.transform(np.array([[vhigh, high, 3, more, big, low]])))

print(predictions)