<a href="https://colab.research.google.com/github/willjhliang/traffic-sign-recognition/blob/main/models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Download dataset from github repo
!rm -r sample_data
!git clone https://github.com/willjhliang/traffic-sign-recognition.git
!mv traffic-sign-recognition/data .
!rm -r traffic-sign-recognition

In [None]:
import os
from copy import deepcopy
import itertools
import numpy as np
import pandas as pd
from PIL import Image
from matplotlib import pyplot as plt

from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split, KFold
from sklearn.base import clone

In [None]:
K = 58                  # Number of classes
S = 32                  # Size of image, dimension is (s, s, 3)
validation_ratio = 0.1  # Proportion of training data to set aside for validation

random_seed = 19104

# Dataset

In [None]:
def load_data(datapath):
    data = {}
    for k in range(K):
        data[k] = []
    for f in os.listdir(datapath):
        k = int(f[:3])
        img = Image.open(os.path.join(datapath, f)).convert('L')
        img = np.asarray(img) / 255
        data[k].append(img)
    return data

In [None]:
labels = pd.read_csv("data/labels.csv")

train_data = load_data('data/images/train')
test_data = load_data('data/images/test')

## Data Exploration

We'll explore the dataset by displaying example images from each class. We also plot the number of images belonging to each class and find that it's extremely variable.

In [None]:
plt.gray()
fig, axs = plt.subplots(6, 10)
fig.set_figheight(15)
fig.set_figwidth(15)
for k, (i, j) in itertools.zip_longest(range(K), list(itertools.product(range(6), range(10))), fillvalue=-1):
    axs[i,j].axis('off')
    if k >= 0:
        axs[i,j].imshow(train_data[k][0])


In [None]:
class_dist = [len(train_data[k]) for k in range(K)]
plt.bar(list(range(K)), class_dist)

## Data Preprocessing

To preprocess our data, we'll first augment the classes with fewer image examples. Our augmentation scheme includes cropping, rotation, and brightness changes; note that we don't apply any flips since it violates the symbols on traffic signs.

After augmentation, we reshape the data to an array format and store labels as integers.

In [None]:
import cv2

def center_crop(img, center_percentage):
    width, height = img.shape
    width_offset = int(width * (1 - center_percentage) / 2)
    height_offset = int(height * (1 - center_percentage) / 2)
    img = img[width_offset:width-width_offset, height_offset:height-height_offset]
    return img

def rotate_img(img, angle):
    height, width = img.shape
    center_x, center_y = (width // 2, height // 2)

    rot_mat = cv2.getRotationMatrix2D((center_x, center_y), angle, 1.0)
    cos = np.abs(rot_mat[0, 0])
    sin = np.abs(rot_mat[0, 1])

    new_width = int((height * sin) + (width * cos))
    new_height = int((height * cos) + (width * sin))
    rot_mat[0, 2] += (new_width / 2) - center_x
    rot_mat[1, 2] += (new_height / 2) - center_y

    img = cv2.warpAffine(img, rot_mat, (new_width, new_height))
    img = cv2.resize(img, (width, height))

    return img

def shift_brightness(img, shift):
    img = np.clip(img + shift, 0, 255)
    return img


In [None]:
from random import randint

def augment_img(img):
    rot_angle = randint(-30, 30)
    crop_center_percentage = randint(70, 90) / 100
    crop_center_percentage = 0.8
    brightness_shift = randint(-20, 20) / 100
    img = rotate_img(img, rot_angle)
    img = center_crop(img, crop_center_percentage)
    img = shift_brightness(img, brightness_shift)
    img = center_crop(img, 0.8)
    return img

In [None]:
largest_class_size = max([len(train_data[k]) for k in range(K)])
for k in range(K):
    size_diff = largest_class_size - len(train_data[k])
    for i in range(size_diff):
        train_data[k].append(augment_img(train_data[k][i % len(train_data[k])]))

In [None]:
aug_class_dist = [len(train_data[k]) for k in range(K)]

fig, axs = plt.subplots(1, 2)
fig.set_figheight(15)
fig.set_figwidth(15)
axs[0].bar(list(range(K)), class_dist)
axs[1].bar(list(range(K)), aug_class_dist)

In [None]:
def prepare_data(data):
    X = []
    y = []
    for k in range(K):
        for i in data[k]:
            i = cv2.resize(i, (S, S))
            X.append(np.expand_dims(i, 0))
            y.append(k)
    X = np.array(X)
    y = np.array(y)
    
    shuffled_indices = np.random.permutation(len(X))
    X = X[shuffled_indices]
    y = y[shuffled_indices]
    X_flattened = np.reshape(X, (X.shape[0], -1))
    
    return X, X_flattened, y

In [None]:
X_train, X_train_flattened, y_train = prepare_data(train_data)
X_test, X_test_flattened, y_test = prepare_data(test_data)

In [None]:
def get_validation(X_train, y_train):
    val_split = int(X_train.shape[0] * validation_ratio)
    X_train, X_val = X_train[val_split:], X_train[:val_split]
    y_train, y_val = y_train[val_split:], y_train[:val_split]
    return X_train, X_val, y_train, y_val

# Models

The following is a set of models we run on the data. Starting with the most simple baseline K-Nearest Neighbors, we move toward more complex models.
1. Baseline KNN
2. Adaboost
3. Logistic Regression
4. Kernelized SVM
5. Dense Neural Network
6. Convolutional Neural Network

We also test two more advanced strategies.
1. Autoencoder dimensionality reduction allows us to embed the images in a lower dimensional space, which may lead to stronger classification performance by simpler models.
2. Transfer learning with a CNN allows us to adapt weights from pre-trained networks to our traffic sign recognition problem.

## Baseline KNN

Train a baseline K-Nearest Neighbors models to classify traffic sign images. Use 10-Fold cross validation to determine the best value of K

In [None]:
kf = KFold(n_splits=10)

def runKFold(model_base, X_train, y_train, one_only=False):
   total_acc = 0
   for train_index, val_index in kf.split(X_train): # Iterate through all 10 folds
       # Split data into training data and validation data
       X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
       y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]
 
       # Train model
       model = clone(model_base)
       model.fit(X_train_fold, y_train_fold)
       total_acc += model.score(X_val_fold, y_val_fold)
       if one_only:
           return total_acc
   avg_acc = total_acc / 10
   return avg_acc

In [None]:
def knnBaseline(X_train, y_train, X_test, y_test):
    best_k = -1
    best_acc = 0
    val_accuracies = []
 
    # Iterate through possible values of k from 1 to 30, incrementing by 2
    for k_neighbors in range(1, 30, 2):
        avg_acc = runKFold(KNeighborsClassifier(n_neighbors = k_neighbors), X_train, y_train)
        val_accuracies.append(avg_acc)
        if avg_acc > best_acc:
            best_acc = avg_acc
            best_k = k_neighbors
 
    # Plot to show the best values
    plt.plot(list(range(1, 30, 2)), val_accuracies)
    plt.show()
    print("Best k: ", best_k)
 
    # Fit model with the best k value
    model = KNeighborsClassifier(n_neighbors=best_k)
    model.fit(X_train, y_train)
    
    # Accurcay on the test set
    return model.score(X_test, y_test)

In [None]:
print(knnBaseline(X_train_flattened, y_train, X_test_flattened, y_test))

## Adaboost

In [None]:
def adaboost(X_train, y_train, X_test, y_test):
    X_train, X_val, y_train, y_val = get_validation(X_train, y_train)
    learning_rates = np.linspace(0.1, 0.8, 8)
    accs = []
    for lr in learning_rates:
        model = AdaBoostClassifier(
            DecisionTreeClassifier(max_depth=1), n_estimators=200,
            algorithm="SAMME.R", learning_rate=lr, random_state=random_seed)
        acc = runKFold(clf, X_train, y_train, True)
        model.fit(X_train, y_train)
        acc = model.score(X_val, y_val)
        accs.append(acc)
  
    lr = learning_rates[np.argmax(accs)]
    clf = AdaBoostClassifier(
            DecisionTreeClassifier(max_depth=1), n_estimators=200,
            algorithm="SAMME.R", learning_rate=lr, random_state=random_seed)
    clf.fit(X_train, y_train)
 
    return clf.score(X_test, y_test)
 
adaboost(X_train_flattened, y_train, X_test_flattened, y_test)


## Logistic Regression

In [None]:
def logistic_regression(X_train, y_train, X_test, y_test):
    Cvalues = [0.0, 0.0001, 0.001, 0.01, 0.1, 1.0] # Values for how strong the regulation is
    penalties = ['none', 'elasticnet', 'l1', 'l2'] # Different penalties for logistic regression
    best_penalty = ''
    best_C = -1
    best_acc_ovr = 0
    
    for penalty in penalties:
        val_accuracies = []
        best_C_pen = -1
        best_acc_inside = 0
        for c in Cvalues:
            # 10-fold CV
            avg_acc = runKFold(LogisticRegression(penalty = penalty, C = c, multi_class = 'multinomial', solver = 'saga'), X_train, y_train)
            val_accuracies.append(avg_acc)
            if avg_acc > best_acc_inside:
                best_acc_inside = avg_acc
                best_C_pen = c
            if avg_acc > best_acc_ovr:
                best_acc_ovr = avg_acc
                best_C = c
                best_penalty = penalty
        
        # Plot the accuracies for the different C values for each penalty
        plt.plot(Cvalues, val_accuracies)
        plt.show()
        print("Best C value for ", penalty, "is ", best_C_pen)
 
    # Get the best combination
    print("Best C value and penalty overall ", best_C, " ", best_penalty)
 
    # Train new model with best combination
 
    bestlogModel = LogisticRegression(penalty = best_penalty, C = best_C, multi_class = 'multinomial')
    bestlogModel.fit(X_train, y_train)
    return bestlogModel.score(X_test, y_test)

logistic_regression(X_train_flattened, y_train, X_test_flattened, y_test)

## Kernelized SVM

In [None]:
from sklearn.svm import SVC
 
def kernel_svm(X_train, y_train, X_test, y_test):
   clf = SVC(kernel="linear")
   clf.fit(X_train, y_train)
   return clf.score(X_test, y_test)
 
kernel_svm(X_train_flattened, y_train, X_test_flattened, y_test)

## Dense Neural Network

In [None]:
class DenseNeuralNet(nn.Module):
    def __init__(self, input, hiddensize = 128):
        super().__init__()
        self.layer1 = nn.Linear(input, hiddensize)
        self.layer2 = nn.Linear(hiddensize, hiddensize)
        self.out_layer = nn.Linear(hiddensize, 58)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.out_layer(x)
        return x

## Convolutional Neural Network

In [None]:
import torch

class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.relu = torch.nn.ReLU()
        self.conv_1 = torch.nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.max_pool2d = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv_2 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.linear_1 = torch.nn.Linear(8 * 8 * 64, 128)
        self.linear_2 = torch.nn.Linear(128, K)

    def forward(self, x):
        x = self.relu(self.conv_1(x))
        x = self.max_pool2d(x)
        x = self.relu(self.conv_2(x))
        x = self.max_pool2d(x)
        x = x.reshape(x.size(0), -1)
        x = self.relu(self.linear_1(x))
        x = self.linear_2(x)
        return x
        

In [None]:
a = torch.Tensor(X_train)

train_set = torch.utils.data.TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train))
test_set = torch.utils.data.TensorDataset(torch.Tensor(X_test), torch.Tensor(y_test))
train_loader = torch.utils.data.DataLoader(train_set, batch_size=16, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=16, shuffle=True)

epochs = 10
model = CNN()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) 

In [None]:
train_loss = []
for epoch in range(epochs):
    running_loss = 0
    for itr, (image, label) in enumerate(train_loader):
        optimizer.zero_grad()
        y_predicted = model(image)
        label = label.long()
        loss = criterion(y_predicted, label.long())
        running_loss += loss.item()

        loss.backward()
        optimizer.step()

    train_loss.append(running_loss)
    print(f'epoch: {epoch+1}, loss: {running_loss:.4f}')

In [None]:
correct = 0
total = 0
with torch.no_grad():
    for itr, (image, label) in enumerate(test_loader):
        outputs = model(image)
        _, predicted = torch.max(outputs.data, 1)
        correct += predicted.eq(label.reshape(len(label),)).sum() 
        total += float(len(label))
    accuracy = correct / total
    print(f'Accuracy of Neural Network is {accuracy:.4f}')

## Autoencoder Dimensionality Reduction

### Adaboost

### Logistic Regression

### Kernelized SVM

## Transfer Learning with CNN