In [22]:
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sobel_canny_detector import *
import pickle
import matplotlib.pyplot as plt

# Load dataset & Preprocessing
Load the ASL train dataset from the local directory, already preprocessed images by using sobel-canny edge detection techniques.

In [23]:
import random
import cv2

dataset_directory = './preprocessed/sobel-canny-combination'
sign_types = ['A', 'B', 'C', 'D', 'del', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'nothing', 'O', 'P', 'Q', 'R', 'S', 'space', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'] 
label_and_image_paths = [(sign_type, f'{dataset_directory}/{sign_type}/{sign_type}{i}.jpg') for sign_type in sign_types
                                                                     for i in range(1, 3001)]
random.seed(180)
random.shuffle(label_and_image_paths)

split_point = int(len(label_and_image_paths) * 0.8)
train_list = label_and_image_paths[:split_point]
test_list = label_and_image_paths[split_point:]

# SGD Classifier

In [24]:
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score

batch_size = 32
n_neighbors = 5

# Initialize Classifier model
model = SGDClassifier(loss='hinge', alpha=0.01, max_iter=1000, random_state=180)

print("----[Model Train Start]----")
# Train model in batches
for batch_start in range(0, len(train_list), batch_size):
  batch_end = batch_start + batch_size
  batch_list = train_list[batch_start:batch_end]

  print(f'Batch {batch_start}/{len(train_list)}')

  # Load images
  images = []
  labels = []
  for label, path in batch_list:
    image = cv2.imread(path)
    images.append(image.flatten())
    labels.append(label)
  images = np.array(images)
  labels = np.array(labels)

  # Partially train the model
  model.partial_fit(images, labels, np.array(sign_types))

print("----[Model Train End]----")

----[Model Train Start]----
Batch 0/69600
Batch 32/69600
Batch 64/69600
Batch 96/69600
Batch 128/69600
Batch 160/69600
Batch 192/69600
Batch 224/69600
Batch 256/69600
Batch 288/69600
Batch 320/69600
Batch 352/69600
Batch 384/69600
Batch 416/69600
Batch 448/69600
Batch 480/69600
Batch 512/69600
Batch 544/69600
Batch 576/69600
Batch 608/69600
Batch 640/69600
Batch 672/69600
Batch 704/69600
Batch 736/69600
Batch 768/69600
Batch 800/69600
Batch 832/69600
Batch 864/69600
Batch 896/69600
Batch 928/69600
Batch 960/69600
Batch 992/69600
Batch 1024/69600
Batch 1056/69600
Batch 1088/69600
Batch 1120/69600
Batch 1152/69600
Batch 1184/69600
Batch 1216/69600
Batch 1248/69600
Batch 1280/69600
Batch 1312/69600
Batch 1344/69600
Batch 1376/69600
Batch 1408/69600
Batch 1440/69600
Batch 1472/69600
Batch 1504/69600
Batch 1536/69600
Batch 1568/69600
Batch 1600/69600
Batch 1632/69600
Batch 1664/69600
Batch 1696/69600
Batch 1728/69600
Batch 1760/69600
Batch 1792/69600
Batch 1824/69600
Batch 1856/69600
Batch 

In [26]:
# Test and Evaluate the model
accuracies = []
for batch_start in range(0, len(test_list), batch_size):
  batch_end = batch_start + batch_size
  batch_list = test_list[batch_start:batch_end]

  # Load images
  images = []
  labels = []
  for label, path in batch_list:
    image = cv2.imread(path)
    images.append(image.flatten())
    labels.append(label)
  images = np.array(images)
  labels = np.array(labels)

  # Partially train the model
  y_pred = model.predict(images)
  accuracy = accuracy_score(labels, y_pred)
  accuracies.append(accuracy)
  print(f"Accuracy on test batch {batch_start} set: {accuracy}")

Accuracy on test batch 0 set: 0.8125
Accuracy on test batch 32 set: 0.84375
Accuracy on test batch 64 set: 0.90625
Accuracy on test batch 96 set: 0.84375
Accuracy on test batch 128 set: 0.90625
Accuracy on test batch 160 set: 0.8125
Accuracy on test batch 192 set: 0.8125
Accuracy on test batch 224 set: 0.84375
Accuracy on test batch 256 set: 0.75
Accuracy on test batch 288 set: 0.75
Accuracy on test batch 320 set: 0.875
Accuracy on test batch 352 set: 0.71875
Accuracy on test batch 384 set: 0.78125
Accuracy on test batch 416 set: 0.875
Accuracy on test batch 448 set: 0.75
Accuracy on test batch 480 set: 0.9375
Accuracy on test batch 512 set: 0.8125
Accuracy on test batch 544 set: 0.75
Accuracy on test batch 576 set: 0.84375
Accuracy on test batch 608 set: 0.8125
Accuracy on test batch 640 set: 0.75
Accuracy on test batch 672 set: 0.625
Accuracy on test batch 704 set: 0.8125
Accuracy on test batch 736 set: 0.8125
Accuracy on test batch 768 set: 0.65625
Accuracy on test batch 800 set: 0.

In [27]:
# Save the trained model
model_name = 'sobel-canny_SGD_32batch'
model.save(f'./models/{model_name}.h5')

# Save the history
with open(f'./training_history/{model_name}.pkl', 'wb') as file:
  pickle.dump(accuracies, file)

AttributeError: 'SGDClassifier' object has no attribute 'save'

In [None]:
# # Plot training loss
# plt.figure(figsize=(10, 5))
# plt.subplot(1, 2, 1)
# plt.plot(history.history['loss'], label='Training Loss')
# plt.plot(history.history['val_loss'], label="Validation Loss")
# plt.title('Training and Validation Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.legend()

# # Plot training accuracy
# plt.subplot(1, 2, 2)
# plt.plot(history.history['accuracy'], label='Training Accuracy')
# plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
# plt.title('Training and Validation Accuracy')
# plt.xlabel('Epoch')
# plt.ylabel('Accuracy')
# plt.legend()

# plt.show()