In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier  # MLP is an NN
from sklearn.model_selection import train_test_split
from sklearn import svm
from skimage.feature import hog
import numpy as np
import argparse
import cv2
import os
import matplotlib.pyplot as plt
import pickle


In [2]:
path_to_0_dataset = r'0_1_DataSets/0'
path_to_1_dataset = r'0_1_DataSets/1'
KNN = KNeighborsClassifier(n_neighbors=5)
random_seed=2

In [3]:
def extract_hog_features(img):
    
    img = cv2.resize(img, (32, 32))
    win_size = (32, 32)
    cell_size = (4, 4)
    block_size_in_cells = (2, 2)
    
    block_size = (block_size_in_cells[1] * cell_size[1], block_size_in_cells[0] * cell_size[0])
    block_stride = (cell_size[1], cell_size[0])
    nbins = 9  
    hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, nbins,2)
    h = hog.compute(img)
    return h.flatten()

In [4]:
def load_dataset():
    features = []
    labels = []
    img_filenames_0 = os.listdir(path_to_0_dataset)
    img_filenames_1 = os.listdir(path_to_1_dataset)


    for i, fn in enumerate(img_filenames_0):
        if fn.split('.')[-1] != 'png':
            continue

        label = '0'
        labels.append(label)

        path = os.path.join(path_to_0_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 1,000 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_0)))
    print("0 dataset processing done")
    for i, fn in enumerate(img_filenames_1):
        if fn.split('.')[-1] != 'png':
            continue

        label = '1'
        labels.append(label)

        path = os.path.join(path_to_1_dataset, fn)
        img = cv2.imread(path)
        features.append(extract_hog_features(img))
        
        # show an update every 1,000 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(img_filenames_1)))     
    print("1 dataset processing done")   
        
    return features, labels        

In [5]:
def train():
    
    # Load dataset with extracted features
    print('Loading dataset. This will take time ...')
    features, labels = load_dataset()
    print('Finished loading dataset.')
    

    train_features, test_features, train_labels, test_labels = train_test_split(
        features, labels, test_size=0.3, random_state=random_seed)
    
    #print(labels)
       
    KNN.fit(train_features, train_labels)
        
    
    accuracy = KNN.score(test_features, test_labels)
        
    print('accuracy: ', accuracy*100, '%')

In [6]:
train()

Loading dataset. This will take time ...
[INFO] processed 10/74
[INFO] processed 20/74
[INFO] processed 30/74
[INFO] processed 40/74
[INFO] processed 50/74
[INFO] processed 60/74
0 dataset processing done
[INFO] processed 10/79
[INFO] processed 20/79
[INFO] processed 30/79
[INFO] processed 40/79
[INFO] processed 50/79
[INFO] processed 60/79
[INFO] processed 70/79
1 dataset processing done
Finished loading dataset.
accuracy:  100.0 %


In [7]:
def natural_sort_key(s):
    """Key function for natural sorting."""
    import re
    return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', s)]
output_file_path = 'results.txt'
path_to_testset = r'testset'
filenames = sorted(os.listdir(path_to_testset), key=natural_sort_key)
with open(output_file_path, 'w') as output_file:
    for i, fn in enumerate(filenames):
        if fn.split('.')[-1] != 'png':
            continue
        features = extract_hog_features(cv2.imread(os.path.join(path_to_testset, fn)))
        pred = KNN.predict_proba([features])
        result = f"{fn} {np.argmax(pred)}\n"
        print(result)  
        output_file.write(result)
    
# features=extract_hog_features(cv2.imread('test.png'))
# pred=KNN.predict_proba([features])
# print(np.argmax(pred))

test2.png 1

test5.png 1

test6.png 0

test7.png 1

test8.png 0

test9.png 0

test10.png 0

test11.png 0

test16.png 1

test17.png 0

test18.png 1

test19.png 1

test20.png 1

test22.png 0

test23.png 0

test24.png 1

test25.png 1

test26.png 0

test27.png 1

test29.png 1

test30.png 1

test33.png 0

test34.png 1

test35.png 0

test38.png 1

test39.png 0

test40.png 0

test41.png 0

test42.png 1

test43.png 1

test49.png 1

test50.png 1

test51.png 1

test53.png 1

test55.png 0

test61.png 0

test62.png 1

test63.png 0

test64.png 1

test65.png 0

test66.png 0

test76.png 1

test77.png 0

test78.png 1

test79.png 0

test80.png 0

test81.png 1

test82.png 0

test83.png 0

test84.png 0

test85.png 0

test86.png 1

test92.png 0

test93.png 0

test94.png 0

test95.png 0

test96.png 1

test97.png 0

test98.png 1

test100.png 1

test102.png 1

test103.png 0

test104.png 1

test106.png 1

test107.png 0

test108.png 0

test111.png 0

test113.png 0

test114.png 0

test117.png 0

test118.png 0



In [8]:
model_pkl_file = "numbers_model.pkl"  

with open(model_pkl_file, 'wb') as file:  
    pickle.dump(KNN, file)