In [28]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier  # MLP is an NN
from sklearn.model_selection import train_test_split
from sklearn import svm
from skimage.feature import hog
import numpy as np
import argparse
import cv2
import os
import matplotlib.pyplot as plt


In [29]:
path_to_a_dataset = r'letters_data_set/A'
path_to_b_dataset = r'letters_data_set/B'
path_to_c_dataset = r'letters_data_set/C'
path_to_d_dataset = r'letters_data_set/D'
path_to_e_dataset = r'letters_data_set/E'
path_to_f_dataset = r'letters_data_set/F'
KNN = KNeighborsClassifier(n_neighbors=5)
random_seed=2

In [30]:
def extract_hog_features(img):
    
    img = cv2.resize(img, (32, 32))
    win_size = (32, 32)
    cell_size = (4, 4)
    block_size_in_cells = (2, 2)
    
    block_size = (block_size_in_cells[1] * cell_size[1], block_size_in_cells[0] * cell_size[0])
    block_stride = (cell_size[1], cell_size[0])
    nbins = 9  
    hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, nbins,2)
    h = hog.compute(img)
    return h.flatten()

In [31]:
def load_dataset():
    features = []
    labels = []
    img_filenames_a = os.listdir(path_to_a_dataset)
    img_filenames_b = os.listdir(path_to_b_dataset)
    img_filenames_c = os.listdir(path_to_c_dataset)
    img_filenames_d = os.listdir(path_to_d_dataset)
    img_filenames_e = os.listdir(path_to_e_dataset)
    img_filenames_f = os.listdir(path_to_f_dataset)


    for i, fn in enumerate(img_filenames_a):
        if fn.split('.')[-1] != 'png':
            continue

        label = 'A'
        labels.append(label)

        path = os.path.join(path_to_a_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_a)))
    print("A dataset processing done")
    for i, fn in enumerate(img_filenames_b):
        if fn.split('.')[-1] != 'png':
            continue

        label = 'B'
        labels.append(label)

        path = os.path.join(path_to_b_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_b)))     
    print("B dataset processing done")   
    
    for i, fn in enumerate(img_filenames_c):
        if fn.split('.')[-1] != 'png':
            continue

        label = 'C'
        labels.append(label)

        path = os.path.join(path_to_c_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_c)))     
    print("C dataset processing done")   
    
    for i, fn in enumerate(img_filenames_d):
        if fn.split('.')[-1] != 'png':
            continue

        label = 'D'
        labels.append(label)

        path = os.path.join(path_to_d_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_d)))     
    print("D dataset processing done")   
    
    for i, fn in enumerate(img_filenames_e):
        if fn.split('.')[-1] != 'png':
            continue

        label = 'E'
        labels.append(label)

        path = os.path.join(path_to_e_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_e)))     
    print("E dataset processing done")   
    
    for i, fn in enumerate(img_filenames_f):
        if fn.split('.')[-1] != 'png':
            continue

        label = 'F'
        labels.append(label)

        path = os.path.join(path_to_f_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 10 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_f)))     
    print("F dataset processing done")   
        
    return features, labels        

In [32]:
def train():
    
    # Load dataset with extracted features
    print('Loading dataset. This will take time ...')
    features, labels = load_dataset()
    print('Finished loading dataset.')
    

    train_features, test_features, train_labels, test_labels = train_test_split(
        features, labels, test_size=0.3, random_state=random_seed)
    
    #print(labels)
       
    KNN.fit(train_features, train_labels)
        
    
    accuracy = KNN.score(test_features, test_labels)
        
    print('accuracy: ', accuracy*100, '%')

In [33]:
train()

Loading dataset. This will take time ...
[INFO] processed 10/76
[INFO] processed 20/76
[INFO] processed 30/76
[INFO] processed 40/76
[INFO] processed 50/76
[INFO] processed 60/76
[INFO] processed 70/76
A dataset processing done
[INFO] processed 10/65
[INFO] processed 20/65
[INFO] processed 30/65
[INFO] processed 40/65
[INFO] processed 50/65
[INFO] processed 60/65
B dataset processing done
[INFO] processed 10/79
[INFO] processed 20/79
[INFO] processed 30/79
[INFO] processed 40/79
[INFO] processed 50/79
[INFO] processed 60/79
[INFO] processed 70/79
C dataset processing done
[INFO] processed 10/76
[INFO] processed 20/76
[INFO] processed 30/76
[INFO] processed 40/76
[INFO] processed 50/76
[INFO] processed 60/76
[INFO] processed 70/76
D dataset processing done
[INFO] processed 10/70
[INFO] processed 20/70
[INFO] processed 30/70
[INFO] processed 40/70
[INFO] processed 50/70
[INFO] processed 60/70
E dataset processing done
[INFO] processed 10/78
[INFO] processed 20/78
[INFO] processed 30/78
[

In [34]:
letters=['A','B','C','D','E','F']
def natural_sort_key(s):
    """Key function for natural sorting."""
    import re
    return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', s)]
output_file_path = 'results.txt'
path_to_testset = r'testset'
filenames = sorted(os.listdir(path_to_testset), key=natural_sort_key)
with open(output_file_path, 'w') as output_file:
    for i, fn in enumerate(filenames):
        if fn.split('.')[-1] != 'png':
            continue
        features = extract_hog_features(cv2.imread(os.path.join(path_to_testset, fn)))
        pred = KNN.predict_proba([features])
        result = f"{fn} {letters[np.argmax(pred)]}\n"
        print(result)  
        output_file.write(result)
    
# features=extract_hog_features(cv2.imread('test.png'))
# pred=KNN.predict_proba([features])
# print(np.argmax(pred))

test2.png B

test5.png B

test6.png D

test7.png B

test8.png B

test9.png B

test10.png B

test11.png B

test16.png E

test17.png B

test18.png B

test19.png B

test20.png D

test22.png B

test23.png B

test24.png B

test25.png B

test26.png B

test27.png E

test29.png D

test30.png E

test33.png B

test34.png B

test35.png B

test38.png B

test39.png B

test40.png D

test41.png B

test42.png B

test43.png E

test49.png E

test50.png E

test51.png B

test53.png E

test55.png B

test61.png B

test62.png E

test63.png B

test64.png E

test65.png B

test66.png B

test76.png B

test77.png B

test78.png E

test79.png D

test80.png B

test81.png E

test82.png B

test83.png B

test84.png B

test85.png B

test86.png E

test92.png B

test93.png B

test94.png B

test95.png B

test96.png E

test97.png B

test98.png E

test100.png E

test102.png A

test103.png D

test104.png E

test106.png D

test107.png B

test108.png B

test111.png B

test113.png B

test114.png B

test117.png B

test118.png B

