## README

I followed the attached link to learn how pytorch neural nets work and and how to create a playing card classifier.
Note: Python 3.12 didn't work, try downgrading if so.

https://www.youtube.com/watch?v=tHL5STNJKag

In [None]:
!pip3 install torch torchvision timm matplotlib pandas numpy tqdm ipywidgets

In [None]:
import torch
import torch.nn as nn # provides neural network functions like convolution layers
import torch.optim as optim # provides optimizer
from torch.utils.data import Dataset, DataLoader
import torchvision # helps working with images easier
import torchvision.transforms as transforms 
from torchvision.datasets import ImageFolder
import timm # pytorch image model library, pretrained weights optimized for image classification

import matplotlib.pyplot as plt # data visualization
import pandas as pd
import numpy as np
import sys
from tqdm import tqdm # for progress bar
import os
from PIL import Image
import random


# determine if want to train new model or use exists
train = False

## STEP 1: Set up data set and date loader

In [None]:
# Generate synthethic images

def log2img(class_name):
    pixel_data = []
    found_start = False
    log_path = f"log_data/{class_name}.log"
    with open(log_path, 'r', encoding='utf-8', errors='ignore') as file:
        for line in file:
            # skip straight to the data
            if not found_start:
                if "index: 0" in line:
                    found_start = True
                continue
            else:
                parts = line.split(',')
                for part in parts:
                    if 'data:' in part:
                        # Extract the hex after 'data:'
                        word = part.split(': ')[1].strip().zfill(8)
                        # word = [R1, G1, B1, R0, G0, B0]
                        # print(f"word: {word}")
                        int_val = int(word, 16)  # Convert hexadecimal string to integer

                        # Extract individual R, G, and B components
                        R0 = ((int_val >> 27) & 0b11111) << 3
                        G0 = ((int_val >> 21) & 0b111111) << 2
                        B0 = ((int_val >> 16) & 0b11111) << 3
                        R1 = ((int_val >> 11) & 0b11111) << 3
                        G1 = ((int_val >> 5) & 0b111111) << 2
                        B1 = (int_val & 0b11111) << 3

                        p0 = [R0, G0, B0]
                        p1 = [R1, G1, B1]
                        
                        pixel_data.append(p1)
                        pixel_data.append(p0)

        height = 148
        width = 172
        data = np.array(pixel_data)
        target_shape = (height, width, 3)

        image_data = np.zeros((height * width, 3), dtype=np.uint8)
        image_data[:len(data)] = data
        image_data = image_data.reshape(target_shape)
        image = Image.fromarray(image_data)
        return image

def random_rotate_180(img):
    return img.rotate(180) if random.random() < 0.5 else img

# transform applied to all images
transform = transforms.Compose([
    transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.02),
    transforms.RandomPerspective(distortion_scale=0.25, p=0.5),
    transforms.Lambda(random_rotate_180), # handle card orientation
    transforms.RandomRotation(degrees=(-6,6)), # to handle camera rotation
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor()
])

# creates all the datasets for a specific class_name
def create_class_dataset(class_name):
    image = log2img(class_name)
    dataset_size = 30 # number of images per folder
    dataset_folders = ["train", "valid", "test"]

    for folder in dataset_folders:
        # make dir if doenst eist
        image_dir = f"dataset/{folder}/{class_name}"
        os.makedirs(image_dir, exist_ok=True)
        if folder == "train":
            # save og image into training set
            image.save(os.path.join(image_dir, f"{class_name}-0.jpg"))

        # create and save syntehtic images
        for i in range(1, dataset_size):
            synthetic_image = transform(image)
            synthetic_image_pil = transforms.ToPILImage()(synthetic_image)
            synthetic_image_pil.save(os.path.join(image_dir, f"{class_name}-{i}.jpg"))

    

In [None]:
# Create all data sets by iterating througho the log_data

if train:
    for log in os.listdir("log_data"):
        class_name = os.path.splitext(log)[0]
        create_class_dataset(class_name)

In [None]:
class PlayingCardDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data = ImageFolder(data_dir, transform=transform) # Creates classes using folder name
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]
    
    @property
    def classes(self):
        return self.data.classes

# transform input images
transform = transforms.Compose([
    transforms.Resize((128, 128)), # resize input image to desired pixel dimensions
    transforms.ToTensor(), # converts pixel RGB val from [0,255] -> [0,1]
])

train_dir = "dataset/train"
valid_dir = "dataset/valid"
test_dir = "dataset/test"

train_dataset = PlayingCardDataset(train_dir, transform)
valid_dataset = PlayingCardDataset(valid_dir, transform)
test_dataset = PlayingCardDataset(test_dir, transform)

# Data loading
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
## STEP 2: Design Neural Network

In [None]:
class CardClassifer(nn.Module):
    def __init__(self, num_classes=54):
        super(CardClassifer, self).__init__()

        # note: could design image classification architecture ourself by defining each layer
        # however, using an optimized image classificatino model with pretrained weights using timm
        self.base_model = timm.create_model('efficientnet_b0', pretrained=True)  

        self.features = nn.Sequential(*list(self.base_model.children())[:-1])

        enet_out_size = 1280 # default size of the efficientnet_b0, we will resize enout into our num of classe later

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(enet_out_size, num_classes)
        )
    
    def forward(self, x):
        # Connect these parts and return the output
        x = self.features(x)
        output = self.classifier(x)
        return output


## STEP 2: Train + Validate

In [None]:
# Note: want to validate the model on data it hasn't been trained on => split data into train and valid dataset.
# Terms: Epoch = one run through entire training dataset, step = one batch of data

# general idea: Load data in model in batches, then calculate loss and perform backpropagation to modify weights starting from last layer to minimize that loss

# training parameters
num_epochs = 6
train_losses, val_losses = [], []
model = CardClassifer()
criterion = nn.CrossEntropyLoss() # loss function (what model optimizes to minimize loss)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
if train:
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        for images, labels in tqdm(train_loader, desc='Training loop'):
            optimizer.zero_grad()
            outputs = model(images) # call forward on the images
            loss = criterion(outputs, labels)
            loss.backward() # backpropagation to update model weight
            optimizer.step()
            running_loss += loss.item() * labels.size(0)
        train_loss = running_loss / len(train_loader.dataset)
        train_losses.append(train_loss)
        
        # Validation phase
        model.eval()
        running_loss = 0.0
        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc='Validation loop'):
                outputs = model(images)
                loss = criterion(outputs, labels)
                running_loss += loss.item() * labels.size(0)
        val_loss = running_loss / len(val_loader.dataset)
        val_losses.append(val_loss)
        print(f"Epoch {epoch+1}/{num_epochs} - Train loss: {train_loss}, Validation loss: {val_loss}")
else: # load in already trained model
    # model = timm.create_model('efficientnet_b0', pretrained=False)  # define architetcture of model
    model_weights_path = "model.pth"
    model.load_state_dict(torch.load(model_weights_path))
    model.eval() # set to eval mode

In [None]:
# Visualize Loss

plt.plot(train_losses, label='Training loss')
plt.plot(val_losses, label='Validation loss')
plt.legend()
plt.title("Loss over epochs")
plt.show()

## Assessing the model

In [None]:
# Load and preprocess the image

transform = transforms.Compose([
    transforms.Resize((128, 128)), # resize input image to desired pixel dimensions
    transforms.ToTensor(), # converts pixel RGB val from [0,255] -> [0,1]
])

def preprocess_image(image_path, transform):
    image = Image.open(image_path).convert("RGB")
    return image, transform(image).unsqueeze(0)

# Predict using the model
def predict(model, image_tensor):
    model.eval()
    with torch.no_grad():
        outputs = model(image_tensor)
        probabilities = torch.nn.functional.softmax(outputs, dim=1)
        probabilities = probabilities.cpu().numpy().flatten()
        max_idx = np.argmax(probabilities)
        predicted_class = train_dataset.classes[max_idx]
    return predicted_class, probabilities

# Visualization
def visualize_predictions(original_image, predicted_class, probabilities, class_names):
    fig, axarr = plt.subplots(1, 2, figsize=(14, 7))
    
    # Display image
    axarr[0].imshow(original_image)
    axarr[0].axis("off")
    axarr[0].set_title(f"Prediction: {predicted_class}")
    
    # Display predictions
    axarr[1].barh(class_names, probabilities)
    axarr[1].set_xlabel("Probability")
    axarr[1].set_title("Class Predictions")
    axarr[1].set_xlim(0, 1)

    plt.tight_layout()
    plt.show()

In [None]:
# Get accuracy using test data
label_to_class = {v: k for k, v in ImageFolder(test_dir).class_to_idx.items()}

sample_size = 10
num_test_images = 0
num_correct = 0
random_idxes = random.sample(range(0, len(test_dataset)), sample_size)

for i in range(sample_size):
    if num_test_images == sample_size:
        break
    random_idx = random_idxes[i]
    image, label = test_dataset[random_idx]
    image_tensor = image.unsqueeze(0) # makes it a batch size of 1 when model expects a batch of inages as input
    predicted_class, probabilities = predict(model, image_tensor)
    num_test_images += 1
    if predicted_class == label_to_class[label]:
        num_correct += 1
    else: # display if incorrect
        original_image = transforms.ToPILImage()(image)
        visualize_predictions(original_image, predicted_class, probabilities, class_names)

    class_names = train_dataset.classes
print(f"Accuracy: {num_correct} / {sample_size} Correct ({100 * num_correct / sample_size}%)")

In [None]:
from glob import glob
test_images = glob("dataset/test/*/*")
test_examples = np.random.choice(test_images, 1)

for example in test_examples:
    original_image, image_tensor = preprocess_image(example, transform)
    predicted_class, probabilities = predict(model, image_tensor)

    class_names = train_dataset.classes 
    visualize_predictions(original_image, predicted_class, probabilities, class_names)

# manual test

def preprocess_user_image(image_path, transform):
    image = Image.open(image_path).convert("RGB")
    # image = image.rotate(90) # must be sideways
    return image, transform(image).unsqueeze(0)

user_image_name = "DA-2.jpg"
user_image_path = f"dataset/usertest/{user_image_name}"
original_image, image_tensor = preprocess_image(user_image_path, transform)
print(image_tensor.shape)
predicted_class, probabilities = predict(model, image_tensor)

# Assuming dataset.classes gives the class names
class_names = train_dataset.classes 
visualize_predictions(original_image, predicted_class, probabilities, class_names)



## Step 3: Save the state_dict of the model as a pth

In [None]:
# save .pth (trained weighted sums)
if train:
    model_path = "model.pth"
    model.eval()
    torch.save(model.state_dict(), model_path)

## Step 4: To use on STM32, convert to tflite model and quantize

In [None]:
!pip3 install nvidia-pyindex 

In [None]:
!pip3 install onnx_graphsurgeon onnx2tf sng4onnx tensorflow==2.15

In [None]:

import tensorflow as tf
import torch
import onnx2tf


input_shape = (1, 3, 128, 128)

# Step 1: Convert PyTorch model to ONNX
dummy_input = torch.randn(input_shape)  # Example input tensor
torch.onnx.export(model, dummy_input, "model.onnx", export_params=True)

In [None]:
# Step 2:Convert ONNX -> TF
!onnx2tf -i model.onnx -b 1 -osd -cotof

In [None]:
# test tf model
import tensorflow as tf
import numpy as np
from PIL import Image

def preprocess_image(image_path):
    img = Image.open(image_path)
    img = img.resize((128, 128))  # Resize the image to match model input size
    img = (np.array(img) / 255.0).astype(np.float32)  # Normalize pixel values to [0, 1], and is type float32
    img = np.expand_dims(img, axis=0)  # Add batch dimension # tf tensor = (1, 128, 128, 3)
    return img

# Test the model with an input image
def test_model_with_image(image_path, model):
    # Preprocess the image
    img = preprocess_image(image_path)
    # Perform inference
    prediction = model(img)
    return prediction


# Load tf model
model = tf.saved_model.load("saved_model")

image_name = "C8/C8-2.jpg"
image_path = f"dataset/test/{image_name}"
output_data = test_model_with_image(image_path, model)
print("Output:", output_data)

max_index = np.argmax(output_data)
label_to_class = {v: k for k, v in ImageFolder(test_dir).class_to_idx.items()}

print(f"prediction: {label_to_class[max_index]} ")


In [185]:
# convert tf -> tflite
from torch.utils.data import DataLoader, Subset

def rep_dataset():
    """Generator function to produce representative dataset for post-training quantization."""
    subset_indices = list(range(100))  # Indices of the first 100 samples
    subset = Subset(valid_dataset, subset_indices)
    # Use a few samples from the training set.
    for image_tensor, label in subset: # pytorch image tensor = (1,3, 128,128)
        image_tensor = image_tensor.unsqueeze(0) # add batch dimension
        image_tensor = image_tensor.permute(0, 2, 3, 1)  # Transpose dimensions to match tensor flow order of (1, 128, 128, 3)
        yield [tf.dtypes.cast(image_tensor, tf.float32)]


# Quantize the TF model = 8-bit linear quantization of an NN model 
converter = tf.lite.TFLiteConverter.from_saved_model("saved_model")
converter.signature_key = "serving_default"
converter.optimizations = [tf.lite.Optimize.DEFAULT] # st might've warned not to use this. try commneting out if dones twork
converter.representative_dataset = rep_dataset

# Ensure that if ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

converter.inference_input_type = tf.float32 # note: uint8 is far too inaccuarte
converter.inference_output_type = tf.float32

# Convert TF -> TFLITE
tflite_quantized_model = converter.convert()
with open("model.tflite", "wb") as f:
    f.write(tflite_quantized_model)

Summary on the non-converted ops:
---------------------------------
 * Accepted dialects: tfl, builtin, func
 * Non-Converted Ops: 167, Total Ops 413, % non-converted = 40.44 %
 * 167 ARITH ops

- arith.constant:  167 occurrences  (f32: 164, i32: 3)



  (f32: 9)
  (f32: 65)
  (f32: 16)
  (f32: 1)
  (f32: 65)
  (f32: 17)
  (f32: 65)
  (f32: 5)
fully_quantize: 0, inference_type: 6, input_inference_type: FLOAT32, output_inference_type: FLOAT32


In [187]:
import tensorflow as tf
from PIL import Image
import numpy as np
# test tflite model

def preprocess_image(image_path, input_details):
    img = Image.open(image_path)
    img = img.resize((input_details[0]['shape'][2], input_details[0]['shape'][1]))  # Resize image to model input shape (128, 128)
    img = (np.array(img) / 255.0).astype(np.float32)  # Normalize pixel values to [0, 1], and is type float32 # note: uint8 is far too inaccuarte
    img = np.expand_dims(img, axis=0)  # Add batch dimension
    return img

def test_model_with_image(image_path, interpreter):
    # Get input and output tensors
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    img = preprocess_image(image_path, input_details)
    interpreter.set_tensor(input_details[0]['index'], img) # Set input tensor
    # Run inference
    interpreter.invoke()
    # Get the output tensor
    output_data = interpreter.get_tensor(output_details[0]['index'])
    return output_data

# Load the TFLite model
interpreter = tf.lite.Interpreter(model_path="model.tflite")
interpreter.allocate_tensors()

image_name = "H4/H4-10.jpg"
image_path = f"dataset/test/{image_name}"
output_data = test_model_with_image(image_path, interpreter,)

print("Output:", output_data)
max_index = np.argmax(output_data)
label_to_class = {v: k for k, v in ImageFolder(test_dir).class_to_idx.items()}
print(label_to_class[max_index])



Output: [[-2.4195955 -1.5281656 -2.1649013 -7.5134807 -4.7118435 -6.7493978
  -5.2212324 -7.0040917 -4.966538  -5.3485794 -6.3673563 -4.5844965
  -6.622051  -7.8955216  0.6367356  4.966538  -6.1126623 -6.3673563
  -4.966538  -5.093885  -4.3298025 -7.6408277 -5.985315  -3.3110254
  -3.8204138 -5.857968  -4.839191  -2.6742897 -2.1649013 -7.386133
  -8.659605  -5.985315  -4.7118435 -4.5844965 -6.1126623 -4.7118435
  -6.1126623 -5.2212324 -6.7493978 -5.093885  -3.8204138 -2.1649013
   7.131439  -6.622051  -2.928984  -5.6032734 -4.839191  -5.4759264
  -6.7493978 -3.3110254 -2.8016367 -3.5657196 -3.9477608 -5.6032734]]
S3
