In [24]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier  # MLP is an NN
from sklearn.model_selection import train_test_split
from sklearn import svm
from skimage.feature import hog
import numpy as np
import argparse
import cv2
import os
import matplotlib.pyplot as plt


In [25]:
path_to_b_dataset = r'letters_data_set/B'
path_to_e_dataset = r'letters_data_set/E'
path_to_m_dataset = r'letters_data_set/M'
path_to_s_dataset = r'letters_data_set/S'
path_to_y_dataset = r'letters_data_set/Y'
path_to_xor_dataset = r'letters_data_set/XOR'
path_to_and_dataset = r'letters_data_set/AND'

KNN = KNeighborsClassifier(n_neighbors=5,weights='distance')
random_seed=20

In [26]:
def extract_hog_features(img):
    
    img = cv2.resize(img, (32, 32))
    win_size = (32, 32)
    cell_size = (4, 4)
    block_size_in_cells = (2, 2)
    
    block_size = (block_size_in_cells[1] * cell_size[1], block_size_in_cells[0] * cell_size[0])
    block_stride = (cell_size[1], cell_size[0])
    nbins = 12 
    hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, nbins)
    h = hog.compute(img)
    return h.flatten()

In [27]:
def load_dataset():
    features = []
    labels = []
    img_filenames_b = os.listdir(path_to_b_dataset)
    img_filenames_e = os.listdir(path_to_e_dataset)
    img_filenames_m = os.listdir(path_to_m_dataset)
    img_filenames_s = os.listdir(path_to_s_dataset)
    img_filenames_y = os.listdir(path_to_y_dataset)


    for i, fn in enumerate(img_filenames_b):
        if fn.split('.')[-1] != 'png' and fn.split('.')[-1]!='jpg':
            continue

        label = 'B'
        labels.append(label)

        path = os.path.join(path_to_b_dataset, fn)
        img = cv2.imread(path)
        
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_b)))
    print("B dataset processing done")
    for i, fn in enumerate(img_filenames_e):
        if fn.split('.')[-1] != 'png' and fn.split('.')[-1]!='jpg':
            continue

        label = 'E'
        labels.append(label)

        path = os.path.join(path_to_e_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_e)))     
    print("E dataset processing done")   
    
    for i, fn in enumerate(img_filenames_m):
        if fn.split('.')[-1] != 'png' and fn.split('.')[-1]!='jpg':
            continue

        label = 'M'
        labels.append(label)

        path = os.path.join(path_to_m_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_m)))     
    print("M dataset processing done")   
    
    for i, fn in enumerate(img_filenames_s):
        if fn.split('.')[-1] != 'png' and fn.split('.')[-1]!='jpg':
            continue

        label = 'S'
        labels.append(label)

        path = os.path.join(path_to_s_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_s)))     
    print("S dataset processing done")   
    
    for i, fn in enumerate(img_filenames_y):
        if fn.split('.')[-1] != 'png' and fn.split('.')[-1]!='jpg':
            continue

        label = 'Y'
        labels.append(label)

        path = os.path.join(path_to_y_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_y)))     
    print("Y dataset processing done")   
      
    for i, fn in enumerate(img_filenames_xor):
        if fn.split('.')[-1] != 'png' and fn.split('.')[-1]!='jpg':
            continue

        label = 'XOR'
        labels.append(label)

        path = os.path.join(path_to_xor_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_xor)))     
    print("XOR dataset processing done")   
      
    for i, fn in enumerate(img_filenames_and):
        if fn.split('.')[-1] != 'png' and fn.split('.')[-1]!='jpg':
            continue

        label = 'AND'
        labels.append(label)

        path = os.path.join(path_to_and_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_and)))     
    print("AND dataset processing done")   
    
    

    return features, labels        

In [28]:
def train():
    
    # Load dataset with extracted features
    print('Loading dataset. This will take time ...')
    features, labels = load_dataset()
    print('Finished loading dataset.')
    print(len(labels))

    train_features, test_features, train_labels, test_labels = train_test_split(
        features, labels, test_size=0.2, random_state=random_seed)
    
    #print(labels)
       
    KNN.fit(train_features, train_labels)
        
    
    accuracy = KNN.score(test_features, test_labels)
        
    print('accuracy: ', accuracy*100, '%')

In [29]:
train()

Loading dataset. This will take time ...
[INFO] processed 10/149
[INFO] processed 20/149
[INFO] processed 30/149
[INFO] processed 40/149
[INFO] processed 50/149
[INFO] processed 60/149
[INFO] processed 70/149
[INFO] processed 80/149
[INFO] processed 90/149
[INFO] processed 100/149
[INFO] processed 110/149
[INFO] processed 120/149
[INFO] processed 130/149
[INFO] processed 140/149
B dataset processing done
[INFO] processed 10/146
[INFO] processed 20/146
[INFO] processed 30/146
[INFO] processed 40/146
[INFO] processed 50/146
[INFO] processed 60/146
[INFO] processed 70/146
[INFO] processed 80/146
[INFO] processed 90/146
[INFO] processed 100/146
[INFO] processed 110/146
[INFO] processed 120/146
[INFO] processed 130/146
[INFO] processed 140/146
E dataset processing done
[INFO] processed 10/143
[INFO] processed 20/143
[INFO] processed 30/143
[INFO] processed 40/143
[INFO] processed 50/143
[INFO] processed 60/143
[INFO] processed 70/143
[INFO] processed 80/143
[INFO] processed 90/143
[INFO] pr

In [30]:
letters=['B','E','M','S','Y']
def natural_sort_key(s):
    """Key function for natural sorting."""
    import re
    return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', s)]
output_file_path = 'results.txt'
path_to_testset = r'testset3'
filenames = sorted(os.listdir(path_to_testset), key=natural_sort_key)
with open(output_file_path, 'w') as output_file:
    for i, fn in enumerate(filenames):
        if fn.split('.')[-1] != 'png' and fn.split('.')[-1]!='jpg':
            continue
        features = extract_hog_features(cv2.imread(os.path.join(path_to_testset, fn)))
        pred = KNN.predict_proba([features])
        result = f"{fn} {letters[np.argmax(pred)]}\n"
        print(pred)
        print(result)  
        output_file.write(result)
    
# features=extract_hog_features(cv2.imread('test.png'))
# pred=KNN.predict_proba([features])
# print(np.argmax(pred))

[[0. 0. 1. 0. 0.]]
test1.png M

[[1. 0. 0. 0. 0.]]
test2.png B

[[1. 0. 0. 0. 0.]]
test3.png B

[[0. 0. 1. 0. 0.]]
test4.png M

[[0. 0. 1. 0. 0.]]
test5.png M

[[1. 0. 0. 0. 0.]]
test7.png B

[[1. 0. 0. 0. 0.]]
test8.png B

[[1. 0. 0. 0. 0.]]
test9.png B

[[0. 0. 0. 0. 1.]]
test15.png Y

