In [1]:
import cv2
import os
import base64
import requests
import pickle
import numpy as np

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [2]:
train_dir = r'Cars Dataset\train'
test_dir = r'Cars Dataset\test'
Dataset = r'Cars Dataset'

In [3]:
url = "http://localhost:8080/api/gethog"

# car brand name
carBrandName = ['Audi', 'Hyundai Creta', 'Mahindra Scorpio', 'Rolls Royce','Swift', 'Tata Safari', 'Toyota Innova']

def img2Hog(img):
    v, buffer = cv2.imencode(".jpg", img)
    img_str = base64.b64encode(buffer)
    data = "image data,"+str.split(str(img_str),"'")[1]
    response = requests.get(url, json={"img":data})
    
    return response.json()

In [4]:
# Read data
def readData(path):
    response = []
    amount = 0
    for brandFile in os.listdir(path):
        count = 0
        for imgName in os.listdir(path + '/' + brandFile):
            img_file_name = path + '/' + brandFile + "/" + imgName
            img = cv2.imread(img_file_name)
            res = img2Hog(img)
            hog = list(res["hog"])
            hog.append(carBrandName.index(brandFile))
            response.append(hog)
            count = count + 1
        print(str((brandFile))+ ': have "'+str(count)+'" cars.')
        amount = amount + count
    print("all amount : ",amount)
    return response

In [5]:
listDataTrain = readData(train_dir)

Audi: have "814" cars.
Hyundai Creta: have "271" cars.
Mahindra Scorpio: have "316" cars.
Rolls Royce: have "311" cars.
Swift: have "424" cars.
Tata Safari: have "441" cars.
Toyota Innova: have "775" cars.
all amount :  3352


In [6]:
listDataTest = readData(test_dir)

Audi: have "199" cars.
Hyundai Creta: have "67" cars.
Mahindra Scorpio: have "75" cars.
Rolls Royce: have "74" cars.
Swift: have "102" cars.
Tata Safari: have "106" cars.
Toyota Innova: have "190" cars.
all amount :  813


In [7]:
# save data to pkl
def savePkl(filename ,path):
    write_path = filename + ".pkl"
    pickle.dump(path, open(write_path,"wb"))
    print("data preparation is done")

# load data to pkl   
def loadPkl(filename):
    dataset = pickle.load(open(filename + '.pkl','rb'))
    return dataset

In [8]:
# savePkl โดยใช้ path ของโฟลเดอร์ train
savePkl('train_data',listDataTrain)

data preparation is done


In [9]:
# savePkl โดยใช้ path ของโฟลเดอร์ train
savePkl('test_data',listDataTest)

data preparation is done


In [10]:
# loadPkl โดยส่งชื่อ file ไป
dataset_train = loadPkl('train_data')
print("Data train : ",len(dataset_train))
dataset_test = loadPkl('test_data')
print("Data test : ",len(dataset_test))

Data train :  3352
Data test :  813


In [11]:
# convert list data train to array
train_arr = np.array(dataset_train)
x_train = train_arr[:,0:-1]
y_train = train_arr[:,-1]

In [12]:
# convert list data test to array
test_arr = np.array(dataset_test)
x_test = test_arr[:,0:-1]
y_test = test_arr[:,-1]

In [13]:
# train data with Decision Tree
clf = DecisionTreeClassifier()
clf.fit(x_train,y_train)

In [14]:
# test efficiency data
y_pred = clf.predict(x_test)
print("Accuracy:",accuracy_score(y_test, y_pred)*100)
print("Confusion Matrix : \n",confusion_matrix(y_test,y_pred))

Accuracy: 47.601476014760145
Confusion Matrix : 
 [[ 73  12  16  24  25  21  28]
 [ 10  38   6   4   0   3   6]
 [ 14   3  27   9   2   7  13]
 [ 24   4   9  12   2   8  15]
 [  9   9   8   6  50   4  16]
 [ 11   2   3  10   6  66   8]
 [ 19  13   6   7  13  11 121]]


In [15]:
# save model file with pickle
path_model ='carbrandmodel.pkl'
pickle.dump(clf, open(path_model,'wb'))