# A and F Classifier: The Brogrammers
Vinay Chitepu<br>
Delaney Gomen<br>
Alexandra Isaly


<br>
<br>

**NOTE:** Make sure to unzip the files in the directory before running.



### Importing Modules and Libraries

In [3]:
import warnings
warnings.simplefilter(action='ignore', category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D

### Global Variables

In [4]:
TRAIN_DATADIR = 'Training_Data'
SCORE_DATADIR = 'TestingData'

CATEGORIES = ['A', 'F']

### Preprocessing

In [5]:
def process_image(img):
    #Gaussian blur
    blur = cv2.GaussianBlur(img, (11,11), cv2.BORDER_DEFAULT)
    
    #Threshhold + Dilate + Erode
    ret, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    kernel = np.ones((5,5),np.uint8)
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
    sure_bg = cv2.dilate(opening, kernel, iterations=3)
    
    #Apply background to make background white
    imgRGB= cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    mask = sure_bg
    imgCopy = img.copy()
    imgCopy[mask==0] = 255
    
    return imgCopy

### Loading Data

In [6]:
def load_data(data = 'Training', img_size = 100, preprocess = False):
    
    if data.upper() == 'TRAINING': d = os.path.join(os.getcwd(), TRAIN_DATADIR)
    elif data.upper() == 'TESTING': d = os.path.join(os.getcwd(), SCORE_DATADIR)
    else: print("Incorrect paramter")
    
    data = []
    labels = []
    for cat in CATEGORIES:
        path = os.path.join(d, cat)
        label = CATEGORIES.index(cat)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path,img), cv2.IMREAD_GRAYSCALE)
                img_array = cv2.resize(img_array, (img_size,img_size))
                if preprocess:
                    img_array = process_image(img_array)
                data.append(img_array)
                labels.append(label)
            except Exception as e:
                pass
    
    data = np.array(data)
    data = data.reshape(data.shape[0], img_size, img_size, 1)
    
    return np.array(data), np.array(labels)

In [21]:
X, y = load_data(data='Training', img_size=64)

### Checking Data

In [22]:
# 100x100 image but has like
X[45].shape

(64, 64, 1)

### Normalize

In [23]:
X = X/255

### Building CNN in Tensorflow

In [25]:
# Initializing model
model = Sequential()

# Layer 1
model.add(Conv2D(64, (3,3), activation = 'relu', input_shape = X.shape[1:]))
model.add(MaxPooling2D(pool_size=(2,2)))

# Layer 2
model.add(Conv2D(64, (3,3), activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

# Layer 3
model.add(Conv2D(64, (3,3), activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

# Layer 4
model.add(Flatten())  # Converts 2-D to 1-D
model.add(Dense(128, activation = 'relu'))

# Output Layer
model.add(Dropout(0.5))
model.add(Dense(1, activation = 'sigmoid'))

# Compiling Model
model.compile(loss = 'binary_crossentropy',   # There are only 2 classes
             optimizer = 'adam',              # Optimization Function
             metrics = ['accuracy'])          # Using accuracy

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


### Training Model

In [26]:
model.fit(X, y, batch_size=3, epochs=20, validation_split=0.3)

Train on 140 samples, validate on 61 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x19d0ca7d128>

### Exporting Model

In [27]:
model.save('easy_model.h5')
# model.save('easy_model_backup.h5')

### Importing Model

In [24]:
model = load_model('easy_model.h5')

### Testing Model

In [28]:
X_test, y_test = load_data(data='Testing', img_size=64, preprocess=True)
model.evaluate(X_test, y_test)



[1.096046740182525e-07, 1.0]

### Looking at Predictions

In [12]:
preds = model.predict_classes(X_test)

In [13]:
preds = preds.reshape(-1)

In [14]:
preds

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [15]:
accuracy_score(y_test, preds)

1.0

In [16]:
a = []; b = []; yell = [];
for i in range(len(y_test)):
    a.append(y_test[i])
    b.append(preds[i])
    if y_test[i] == preds[i]:
        yell.append(True)
    else:
        yell.append(False)

predictions = pd.DataFrame(data={'Actual': a, 'Prediction': b, 'Correct': yell })

def conv_to_letters(ent):
    i = ent
    if i == 0:
        i = 'a'
    else:
        i = 'f'
    return i
        
predictions.Actual = predictions.Actual.apply(conv_to_letters)
predictions.Prediction = predictions.Prediction.apply(conv_to_letters)

#### Sorting out wrong predictions

In [17]:
predictions[predictions.Correct == False]

Unnamed: 0,Actual,Prediction,Correct


### Saving labels

In [18]:
est_labels = np.array(predictions.Prediction)

In [19]:
est_labels

array(['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
       'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
       'a', 'a', 'a', 'a', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f',
       'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f',
       'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f'], dtype=object)

In [20]:
with open('test.txt', 'w') as file:
    file.write(str(est_labels))