In [8]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier  # MLP is an NN
from sklearn.model_selection import train_test_split
from sklearn import svm
from skimage.feature import hog
import numpy as np
import argparse
import cv2
import os
import matplotlib.pyplot as plt


In [9]:
path_to_a_dataset = r'letters_data_set/A'
path_to_b_dataset = r'letters_data_set/B'
path_to_c_dataset = r'letters_data_set/C'
path_to_d_dataset = r'letters_data_set/D'
path_to_e_dataset = r'letters_data_set/E'
KNN = KNeighborsClassifier(n_neighbors=15,weights='distance')
random_seed=20

In [10]:
def extract_hog_features(img):
    
    img = cv2.resize(img, (32, 32))
    win_size = (32, 32)
    cell_size = (4, 4)
    block_size_in_cells = (2, 2)
    
    block_size = (block_size_in_cells[1] * cell_size[1], block_size_in_cells[0] * cell_size[0])
    block_stride = (cell_size[1], cell_size[0])
    nbins = 12 
    hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, nbins)
    h = hog.compute(img)
    return h.flatten()

In [11]:
def load_dataset():
    features = []
    labels = []
    img_filenames_a = os.listdir(path_to_a_dataset)
    img_filenames_b = os.listdir(path_to_b_dataset)
    img_filenames_c = os.listdir(path_to_c_dataset)
    img_filenames_d = os.listdir(path_to_d_dataset)
    img_filenames_e = os.listdir(path_to_e_dataset)


    for i, fn in enumerate(img_filenames_a):
        if fn.split('.')[-1] != 'png' and fn.split('.')[-1]!='jpg':
            continue

        label = 'A'
        labels.append(label)

        path = os.path.join(path_to_a_dataset, fn)
        img = cv2.imread(path)
        
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_a)))
    print("A dataset processing done")
    for i, fn in enumerate(img_filenames_b):
        if fn.split('.')[-1] != 'png' and fn.split('.')[-1]!='jpg':
            continue

        label = 'B'
        labels.append(label)

        path = os.path.join(path_to_b_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_b)))     
    print("B dataset processing done")   
    
    for i, fn in enumerate(img_filenames_c):
        if fn.split('.')[-1] != 'png' and fn.split('.')[-1]!='jpg':
            continue

        label = 'C'
        labels.append(label)

        path = os.path.join(path_to_c_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_c)))     
    print("C dataset processing done")   
    
    for i, fn in enumerate(img_filenames_d):
        if fn.split('.')[-1] != 'png' and fn.split('.')[-1]!='jpg':
            continue

        label = 'D'
        labels.append(label)

        path = os.path.join(path_to_d_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_d)))     
    print("D dataset processing done")   
    
    # for i, fn in enumerate(img_filenames_e):
    #     if fn.split('.')[-1] != 'png' and fn.split('.')[-1]!='jpg':
    #         continue

    #     label = 'E'
    #     labels.append(label)

    #     path = os.path.join(path_to_e_dataset, fn)
    #     img = cv2.imread(path)
    #     features.append(extract_hog_features(img))
        
    #     # show an update every 10 images
    #     if i > 0 and i % 10 == 0:
    #         print("[INFO] processed {}/{}".format(i, len(img_filenames_e)))     
    # print("E dataset processing done")   
    

    return features, labels        

In [12]:
def train():
    
    # Load dataset with extracted features
    print('Loading dataset. This will take time ...')
    features, labels = load_dataset()
    print('Finished loading dataset.')
    print(len(labels))

    train_features, test_features, train_labels, test_labels = train_test_split(
        features, labels, test_size=0.2, random_state=random_seed)
    
    #print(labels)
       
    KNN.fit(train_features, train_labels)
        
    
    accuracy = KNN.score(test_features, test_labels)
        
    print('accuracy: ', accuracy*100, '%')

In [13]:
train()

Loading dataset. This will take time ...
[INFO] processed 10/45
[INFO] processed 20/45
[INFO] processed 30/45
[INFO] processed 40/45
A dataset processing done
[INFO] processed 10/45
[INFO] processed 20/45
[INFO] processed 30/45
[INFO] processed 40/45
B dataset processing done
[INFO] processed 10/45
[INFO] processed 20/45
[INFO] processed 30/45
[INFO] processed 40/45
C dataset processing done
[INFO] processed 10/45
[INFO] processed 20/45
[INFO] processed 30/45
[INFO] processed 40/45
D dataset processing done
Finished loading dataset.
180
accuracy:  100.0 %


In [14]:
letters=['A','B','C','D','E','F']
def natural_sort_key(s):
    """Key function for natural sorting."""
    import re
    return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', s)]
output_file_path = 'results.txt'
path_to_testset = r'testset3'
filenames = sorted(os.listdir(path_to_testset), key=natural_sort_key)
with open(output_file_path, 'w') as output_file:
    for i, fn in enumerate(filenames):
        if fn.split('.')[-1] != 'png' and fn.split('.')[-1]!='jpg':
            continue
        features = extract_hog_features(cv2.imread(os.path.join(path_to_testset, fn)))
        pred = KNN.predict([features])
        result = f"{fn} {letters[np.argmax(pred)]}\n"
        print(pred)
        print(result)  
        output_file.write(result)
    
# features=extract_hog_features(cv2.imread('test.png'))
# pred=KNN.predict_proba([features])
# print(np.argmax(pred))

['A']
test0.png A

['A']
test1.png A

['A']
test2.png A

['C']
test3.png A

['B']
test4.png A

['D']
test5.png A

['D']
test6.png A

['B']
test7.png A

['C']
test8.png A

['C']
test192.png A

['B']
test193.png A

['B']
test194.png A

['B']
test195.png A

['C']
test196.png A

['C']
test197.png A

