In [1]:
import numpy as np
import cv2
from PIL import Image
import pickle
from sklearn.svm import SVC

%matplotlib notebook
import matplotlib.pyplot as plt

### Image load

In [2]:
#baboon = Image.open("images/baboon.png")
baboon = cv2.imread("images/baboon.png")
baboon_gray = cv2.cvtColor(baboon, cv2.COLOR_BGR2GRAY)

In [3]:
#resize
baboon_28 = cv2.resize(baboon_gray, (28, 28))
baboon_28.shape

(28, 28)

In [8]:
baboon_back = cv2.resize(baboon_28, baboon_gray.shape)

In [4]:
baboon.shape

(512, 512, 3)

In [5]:
baboon_gray.shape

(512, 512)

In [6]:
plt.figure()
plt.imshow(baboon[:, :, ::-1])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x15d7fcfeac8>

In [7]:
plt.figure()
plt.imshow(baboon_gray, cmap="gray")

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x15d035f3908>

In [9]:
plt.figure()
plt.imshow(baboon_28, cmap="gray")

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x24142b92a88>

In [9]:
plt.figure()
plt.imshow(baboon_back, cmap="gray")

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x15d03a23808>

### NN

In [10]:
class NNMultiClass:
    """Multiclass neural network classifier"""
    def __init__(self, hidden_layers=None, learning_rate=0.001, max_iter=1000):
        self.hidden_layers = hidden_layers
        self.learning_rate = learning_rate * 1000
        self.max_iter = max_iter
        self.W_list = None
        self.b_list = None
        self.a_list = None
        self.z_list = None
        self.J_W_list = None
        self.J_b_list = None
        self.names = None
        self.labels = None
        self.name_to_label = None
        self.label_to_name = None
        self.y_shape = None
        
    def fit(self, X, y):
        """train neural network
        
        Parameters
        ----------
        X: input dataset
            (n_samples, n_features)
        y: output data
            (n_samples) or (n_samples, x)"""
        self.X = X.T
        self.size = X.size
        self.y_shape = y.shape
        
        if y.ndim == 1:
            self.y = y.reshape(-1, 1)
            self.y = self.update_y(self.y)
        elif y.shape[1] == 1:
            self.y = self.update_y(y)
        else:
            self.y = y.T
            
        if self.hidden_layers:
            self.layers = [self.X.shape[0]] + list(self.hidden_layers) + [self.y.shape[0]]
        else:
            self.layers = [self.X.shape[0], self.y.shape[0]]
        
        self.costs = []
        self.initialize_variables()
        self.initialize_weights()
        
        for i in range(self.max_iter):
            self.feed_forward(self.X)
            self.back_prop(self.y)
            
            if i % 10 == 0:
                self.costs.append(self.calculate_cost(self.y))
        
        return self
    
    def load(self, filename):
        """loads weights and biases from a pkl file
        
        format-> [weigths_list, biases_list, layer_sizes, label_data]
            label_data: [names, labels, name_to_label, label_to_name]"""
        file = filename
        if not file.endswith(".pickle"):
            file += ".pickle"
        with open(file, "rb") as f:
            data = pickle.load(f)
            
        # update variables in class
        self.W_list = data[0]
        self.b_list = data[1]
        self.layers = data[2]
        label_data = data[3]
        self.names = label_data[0]
        self.labels = label_data[1]
        self.name_to_label = label_data[2]
        self.label_to_name = label_data[3]
        
        self.initialize_variables()
        
        return self
        
    
    def save(self, filename):
        """loads weights and biases to a pkl file
        
        format-> [weigths_list, biases_list, layer_sizes, label_data]
            label_data: [names, labels, name_to_label, label_to_name]"""
        label_data = [self.names, self.labels, self.name_to_label, self.label_to_name]
        data = [self.W_list, self.b_list, self.layers, label_data]
        file = filename
        if not file.endswith(".pickle"):
            file += ".pickle"
        with open(file, "wb") as f:
            pickle.dump(data, f)
        
    def update_y(self, y):
        # create label and name lists and dicts
        # names: outputs in raw y (["dog", "cat", ...])
        # labels: new labels of outputs ([0, 1, ...])
        self.names = list(np.unique(y))
        self.labels = [i for i in range(len(self.names))]
        self.name_to_label = dict(zip(self.names, self.labels))
        self.label_to_name = dict(zip(self.labels, self.names))
        # update output array as a boolean array
        output = np.zeros((len(self.labels), y.shape[0]))
        for i, n in enumerate(y):
            output[self.name_to_label[n[0]], i] = 1
        
        return output
    
    def initialize_variables(self):
        self.a_list = [None] * len(self.layers)
        self.z_list = [None] * len(self.layers)
        self.J_W_list = [None] * (len(self.layers) - 1)
        self.J_b_list = [None] * (len(self.layers) - 1)
    
    def initialize_weights(self):
        self.W_list = [None] * (len(self.layers) - 1)
        self.b_list = [None] * (len(self.layers) - 1)
        for i in range(len(self.W_list)):
            self.W_list[i] = np.random.randn(self.layers[i+1], self.layers[i])
            self.b_list[i] = np.random.randn(self.layers[i+1], 1)
    
    def feed_forward(self, inputs):
        """feed forward and compute predicted outputs"""
        self.a_list[0] = inputs
        for i in range(1, len(self.layers) - 1):
            self.z_list[i] = self.W_list[i-1] @ self.a_list[i-1] + self.b_list[i-1]
            self.a_list[i] = self.activation(self.z_list[i])
        self.z_list[-1] = self.W_list[-1] @ self.a_list[-2] + self.b_list[-1]
        self.a_list[-1] = self.softmax(self.z_list[-1])
            
    def back_prop(self, y):
        """backpropogate and update weights and biases"""
        J_raw = 2 * (self.a_list[-1] - y)
        #J_raw = J_raw * self.d_activation(self.z_list[-1])
        for i in range(len(self.W_list) - 1, -1, -1):
            J = J_raw
            for j in range(len(self.W_list) - 1, i, -1):
                J = (J.T @ self.W_list[j]).T
                J = J * self.d_activation(self.z_list[j])
                
            # J for weights
            J_W = J @ self.a_list[i].T / self.size
            self.J_W_list[i] = J_W
            # J for biases
            J_b = np.sum(J, axis=1, keepdims=True) / self.size
            self.J_b_list[i] = J_b
        
        # update weights and biases
        for i in range(len(self.W_list) - 1, -1, -1):
            self.W_list[i] -= self.learning_rate * self.J_W_list[i]
            self.b_list[i] -= self.learning_rate * self.J_b_list[i]
    
    def predict(self, X):
        """predicts output"""
        self.feed_forward(X.T)
        if self.labels is None:
            return self.a_list[-1].T
        
        # arg of maximums by argmax
        max_args = np.argmax(self.a_list[-1], axis=0).T
        # update predictions according to labels
        y_pred = list(map(lambda x: self.label_to_name[x], max_args))
        y_pred = np.array(y_pred)
        
        return y_pred
    
    def predict_proba(self, X):
        """predict probabilities of each class"""
        self.feed_forward(X.T)
        output = self.a_list[-1].T
        
        return output

    def score(self, X, y):
        y_pred = self.predict(X).reshape(y.shape)
        
        return (y_pred == y).sum() / len(y)
    
    def calculate_cost(self, y):
        #return np.linalg.norm(self.a_list[-1] - self.y)**2 / self.size
        return np.sum(-y * np.log(self.a_list[-1]))
    
    def activation(self, z):
        return 1 / (1 + np.exp(-z))
    
    def d_activation(self, z):
        return np.cosh(z/2)**(-2) / 4
    
    def softmax(self, z):
        exp_z = np.exp(z)
        return exp_z / exp_z.sum(axis=0)

In [11]:
nn = NNMultiClass().load("digit_model")

#### svm classifier

In [12]:
with open("svm2.pickle", "rb") as f:
    svm = pickle.load(f)

### Find contours (digits) in image and predict

In [13]:
# read image and convert to grayscale
#img = cv2.imread("images/digits.png")
#img = cv2.imread("images/digits2.png")
img = cv2.imread("images/digits3.png")
gray = cv2.cvtColor(img.copy(), cv2.COLOR_BGR2GRAY)

In [16]:
# binarize with thresh
ret, thresh = cv2.threshold(gray.copy(), 100, 255, cv2.THRESH_BINARY_INV)

In [20]:
plt.figure()
plt.imshow(thresh, cmap="gray")

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x15d05f5d608>

In [21]:
# find contours
contours, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

In [30]:
# crop digits and put square frames
preprocessed_digits = []

for c in contours:
    x,y,w,h = cv2.boundingRect(c)
    
    # rectangle around image
    cv2.rectangle(img, (x, y), (x+w, y+h), color=(0, 255, 0), thickness=h//20)
    
    # crop digit
    digit = thresh[y:y+h, x:x+w]
    
    # resize digit to (18, 18)
    #resized_digit = cv2.resize(digit, (18, 18), interpolation=cv2.INTER_AREA)
    resized_digit = cv2.resize(digit, (18, 18))
    
    # add 5 pixel padding of black colors
    padded_digit = np.pad(resized_digit, ((5, 5), (5, 5)), "constant",
                         constant_values=0)
    
    # scale digit pixels from 0-255 to 0-1
    scaled_digit = padded_digit / 255.0
    
    preprocessed_digits.append(scaled_digit)

In [31]:
plt.figure()
plt.imshow(img, cmap="gray")

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x15d0669a848>

In [32]:
for i in preprocessed_digits:
    plt.figure()
    #prediction = nn.predict(np.array([i.ravel()]))[0]
    prediction = svm.predict(np.array([i.ravel()]))[0]
    plt.title("predicted as: {}".format(prediction))
    plt.imshow(i)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>