In [1]:
#pip install opencv-python
import numpy as np
import cv2 as cv
import sys
from matplotlib import pyplot as plt
from img_gist_feature.utils_gist import *
from tqdm import tqdm


print("opencv version:", cv.__version__ )


'''
# load image test
img = cv.imread(r'training/bedroom/0.jpg', flags=cv.IMREAD_UNCHANGED)
cv.imshow("Display window", img)
cv.waitKey()
cv.destroyAllWindows()

print(img.shape) # (200, 267) images have different sizes 
'''

opencv version: 4.5.1


'\n# load image test\nimg = cv.imread(r\'training/bedroom/0.jpg\', flags=cv.IMREAD_UNCHANGED)\ncv.imshow("Display window", img)\ncv.waitKey()\ncv.destroyAllWindows()\n\nprint(img.shape) # (200, 267) images have different sizes \n'

In [2]:
def read_data(path):
    file_name=os.listdir(path)  # get the file names of all the files
    data=[]
    labels=[]
    label_dict = {}
    for idx,fn in enumerate(file_name): 
        label_dict[idx] = fn.lower()  # save the corresponding name of each class 
        im_dirs=path+'/'+fn
        im_path=os.listdir(im_dirs)  # get the image names of all the images 
        for n in im_path:
            im=cv2.imread(im_dirs+'/'+n, flags=cv.IMREAD_UNCHANGED)
            data.append(im)
            labels.append(idx)
    return np.asarray(data), np.asarray(labels), label_dict

training_data, training_label, label_dict = read_data(r'dataset/training')
print(training_data.shape, training_label.shape)
print()
print(label_dict)

(1500,) (1500,)

{0: 'bedroom', 1: 'coast', 2: 'forest', 3: 'highway', 4: 'industrial', 5: 'insidecity', 6: 'kitchen', 7: 'livingroom', 8: 'mountain', 9: 'office', 10: 'opencountry', 11: 'store', 12: 'street', 13: 'suburb', 14: 'tallbuilding'}


In [3]:
def gist_feature(data):
    # generate the GIST features, every image is transformed to a vector with 512 elements
    gist_helper = GistUtils()
    features = []
    for image in tqdm(data):
        features.append(gist_helper.get_gist_vec(image, mode="gray"))
    return np.asarray(features).squeeze()

training_data_gist = gist_feature(training_data)
print(training_data_gist.shape)

100%|██████████████████████████████████████████████████████████████████████████████| 1500/1500 [02:29<00:00, 10.00it/s]

(1500, 512)





In [5]:
# linear SVM
from sklearn.linear_model import SGDClassifier
sgd_clf = SGDClassifier(n_jobs=-1, random_state=0)

# cross validation
from sklearn.model_selection import cross_validate
scoring = ['f1_weighted']
scores = cross_validate(sgd_clf, training_data_gist, training_label, scoring=scoring, cv=5, return_train_score=False)
for key, value in scores.items():
    print(key, ":", value, "\taverage:", value.mean())  

# accuracy [0.51333333 0.54333333 0.56       0.54333333 0.48333333]
# f1_weighted [0.50216976, 0.53358042, 0.54905343, 0.53437037, 0.49624456]

fit_time : [0.30402637 0.34632015 0.32825804 0.24799967 0.23500514] 	average: 0.29232187271118165
score_time : [0.12332201 0.00274587 0.00374389 0.0030005  0.00199294] 	average: 0.026961040496826173
test_f1_weighted : [0.50216976 0.53358042 0.54905343 0.53437037 0.49624456] 	average: 0.523083708065992


In [6]:
# KNN
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier(n_jobs=-1)

# cross validation
from sklearn.model_selection import cross_validate
scoring = ['f1_weighted']
scores = cross_validate(knn_clf, training_data_gist, training_label, scoring=scoring, cv=5, return_train_score=False)
for key, value in scores.items():
    print(key, ":", value, "\taverage:", value.mean())  
# f1_weighted [0.42249245 0.4197886  0.42915389 0.46078416 0.41589014]
# accuracy [0.45666667 0.45       0.46       0.48333333 0.43333333]

fit_time : [0.00399804 0.00300002 0.00300169 0.00400233 0.00600052] 	average: 0.004000520706176758
score_time : [0.12600017 0.12100029 0.12400222 0.11999488 0.12199903] 	average: 0.12259931564331054
test_f1_weighted : [0.42249245 0.4197886  0.42915389 0.46078416 0.41589014] 	average: 0.4296218465347928


In [7]:
# SVM-RBF
from sklearn.svm import SVC
svm_clf = SVC(random_state=0)

# cross validation
from sklearn.model_selection import cross_validate
scoring = ['f1_weighted']
scores = cross_validate(svm_clf, training_data_gist, training_label, scoring=scoring, cv=5, return_train_score=False)
for key, value in scores.items():
    print(key, ":", value, "\taverage:", value.mean())  
# f1_weighted [0.56240514 0.64320757 0.62095    0.63658535 0.56061762]
# accuracy [0.57333333 0.64666667 0.62666667 0.63666667 0.55666667]

fit_time : [0.31899977 0.31999874 0.33400083 0.34000063 0.36200023] 	average: 0.33500003814697266
score_time : [0.18201375 0.17495346 0.20000172 0.20899868 0.1720407 ] 	average: 0.1876016616821289
test_f1_weighted : [0.56240514 0.64320757 0.62095    0.63658535 0.56061762] 	average: 0.6047531364713434


In [8]:
# decision tree
from sklearn.tree import DecisionTreeClassifier
dt_clf = DecisionTreeClassifier(random_state=0)

# cross validation
from sklearn.model_selection import cross_validate
scoring = ['f1_weighted']
scores = cross_validate(dt_clf, training_data_gist, training_label, scoring=scoring, cv=5, return_train_score=False)
for key, value in scores.items():
    print(key, ":", value, "\taverage:", value.mean())  

# f1_weighted [0.27217444 0.29825757 0.2960041  0.33553851 0.23707538]
# accuracy [0.27       0.30666667 0.3        0.33666667 0.24      ]

fit_time : [0.94899726 0.84703493 0.8650322  0.8910346  0.86295891] 	average: 0.8830115795135498
score_time : [0.0010016  0.0010047  0.00199938 0.00100446 0.002002  ] 	average: 0.001402425765991211
test_f1_weighted : [0.27217444 0.29825757 0.2960041  0.33553851 0.23707538] 	average: 0.2878100013424772


In [9]:
# random forest
from sklearn.ensemble import RandomForestClassifier
rf_clf = RandomForestClassifier(n_jobs=-1, random_state=0)

# cross validation
from sklearn.model_selection import cross_validate
scoring = ['f1_weighted']
scores = cross_validate(dt_clf, training_data_gist, training_label, scoring=scoring, cv=5, return_train_score=False)
for key, value in scores.items():
    print(key, ":", value, "\taverage:", value.mean())  
# f1_weighted [0.45054782 0.49540636 0.54606433 0.49499881 0.45866629]
# accuracy [0.47       0.51       0.56       0.51333333 0.47333333]

fit_time : [0.87796021 0.848001   0.79900217 0.89397025 0.82999873] 	average: 0.8497864723205566
score_time : [0.00200224 0.00200224 0.00200033 0.00299382 0.00300193] 	average: 0.002400112152099609
test_f1_weighted : [0.27217444 0.29825757 0.2960041  0.33553851 0.23707538] 	average: 0.2878100013424772


In [10]:
# Gaussian Naive Bayes
from sklearn.naive_bayes import GaussianNB

from sklearn.ensemble import RandomForestClassifier
gnb_clf = GaussianNB()

# cross validation
from sklearn.model_selection import cross_validate
scoring = ['f1_weighted', 'accuracy']
scores = cross_validate(gnb_clf, training_data_gist, training_label, scoring=scoring, cv=5, return_train_score=False)
for key, value in scores.items():
    print(key, ":", value, "\taverage:", value.mean())  
# f1_weighted [0.50312962, 0.52984398, 0.56528844, 0.54532902, 0.48004291]
# accuracy [0.51      , 0.53      , 0.57      , 0.55      , 0.47333333]

fit_time : [0.01400065 0.01600027 0.01399827 0.01199985 0.01399803] 	average: 0.013999414443969727
score_time : [0.03499961 0.03300166 0.03400159 0.03704023 0.03200221] 	average: 0.034209060668945315
test_f1_weighted : [0.50312962 0.52984398 0.56528844 0.54532902 0.48004291] 	average: 0.5247267921218752
test_accuracy : [0.51       0.53       0.57       0.55       0.47333333] 	average: 0.5266666666666666


fine tune the best model: RBF SVM

In [11]:
# SVM-RBF
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold


svm_clf = SVC(random_state=0)

parameters = {
    'C': [1, 2, 5, 10],
    'gamma':["scale", "auto"],
    'decision_function_shape': ['ovo', 'ovr']
}

splitting_strategy = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
grid_search = GridSearchCV(svm_clf, parameters, cv=splitting_strategy, scoring='f1_weighted', n_jobs=-1, verbose=4)
grid_search.fit(training_data_gist, training_label)
print(grid_search.best_estimator_)
print(grid_search.best_score_)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
SVC(C=5, decision_function_shape='ovo', random_state=0)
0.644572240260597


In [12]:
best_clf = grid_search.best_estimator_  # best classifier from grid search

In [14]:
def read_test_data(path):
    data = []
    index = []
    image_path=os.listdir(path)
    for image_name in image_path:
        image = cv2.imread(path + '/'+ image_name, flags=cv.IMREAD_UNCHANGED)
        data.append(image)
        index.append(image_name)
    return np.asarray(data), index

test_data, test_data_index = read_test_data(r'dataset/testing')

In [15]:
test_data_gist = gist_feature(test_data)

100%|██████████████████████████████████████████████████████████████████████████████| 2985/2985 [05:29<00:00,  9.06it/s]


In [16]:
pred = best_clf.predict(test_data_gist)

# save the prediciton results
with open("test.txt","w") as f:
    for i in range(len(test_data_index)):
        f.write(test_data_index[i] + ' ' + label_dict[pred[i]] + '\n')