In [1]:
import pickle
import numpy as np
import pandas as pd
import cv2
from sklearn.utils import class_weight

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import seaborn as sns
sns.set_style("whitegrid")

In [12]:
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Flatten, Dropout, Conv2D, MaxPooling2D
from keras import utils
from keras.callbacks import Callback, LambdaCallback, EarlyStopping, ModelCheckpoint
from keras.losses import categorical_crossentropy
from keras.optimizers import Adadelta, Adam
from keras import backend as K
from keras.preprocessing import image
from keras.layers.normalization import BatchNormalization

Using TensorFlow backend.


In [3]:
import pickle

training_file = './data/train.p'
validation_file= './data/valid.p'
testing_file = './data/test.p'

with open(training_file, mode='rb') as f:
    train = pickle.load(f)
with open(validation_file, mode='rb') as f:
    valid = pickle.load(f)
with open(testing_file, mode='rb') as f:
    test = pickle.load(f)
    
X_train, y_train = train['features'], train['labels']
X_valid, y_valid = valid['features'], valid['labels']
X_test, y_test = test['features'], test['labels']

In [5]:
n_classes = 43
y_train_cat = utils.to_categorical(y_train, n_classes)
y_valid_cat = utils.to_categorical(y_valid, n_classes)
y_test_cat = utils.to_categorical(y_test, n_classes)

In [6]:
mean_train = np.mean(X_train)
std_train = np.std(X_train)
min_train = np.min(X_train)
max_train = np.max(X_train)

In [7]:
std_train

67.850888426332318

In [4]:
def histogramEqualisation(img):
    original_type = img.dtype
    if img.dtype != 'uint8':
        img = img.astype('uint8')
    img_yuv = cv2.cvtColor(img, cv2.COLOR_RGB2YUV)
    img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0])
    equalised = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2RGB)
    return equalised.astype(original_type)


In [5]:
def hist(X):
    out = []
    for i in range(len(X)):
        out.append(histogramEqualisation(X[i]))
    return np.array(out)
X_train_histogram =  hist(X_train)
X_valid_histogram = hist(X_valid)
X_test_histogram =  hist(X_test)

In [6]:
mean_hist_train = np.mean(X_train_histogram)
std_hist_train = np.std(X_train_histogram)
min_hist_train = np.min(X_train_histogram)
max_hist_train = np.max(X_train_histogram)

In [15]:
mean_hist_values = {
    'mean_hist_train': mean_hist_train,
    'std_hist_train': std_hist_train,
    'min_hist_train': min_hist_train,
    'max_hist_train': max_hist_train
}
np.save("data/mean_hist_values.npy", mean_hist_values)

In [7]:
def normalise1(x, is_hist = True):
    if is_hist == True:        
        return (x - min_hist_train) / (max_hist_train - min_hist_train)
    else:
        return (x - min_train) / (max_train - min_train)

In [12]:
def normalise2(x, is_hist = True):
    if is_hist == True:        
        return (x - mean_hist_train) /std_hist_train
    else:
        return (x - mean_train) /std_train

In [13]:
X_train_norm1 = normalise1(X_train, False)
X_valid_norm1 = normalise1(X_valid, False)
X_test_norm1 = normalise1(X_test, False)

In [14]:
X_train_hist_norm1 = normalise1(X_train_histogram)
X_valid_hist_norm1 = normalise1(X_valid_histogram)
X_test_hist_norm1 = normalise1(X_test_histogram)

In [32]:
np.save('data/X_train_hist_norm1.npy', X_train_hist_norm1)
np.save('data/X_valid_hist_norm1.npy', X_valid_hist_norm1)
np.save('data/X_test_hist_norm1.npy', X_test_hist_norm1)

In [15]:
class_weight_arr = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
class_weight_dict = {}
for i, val in enumerate(class_weight_arr) :
    class_weight_dict[i] = val
    
print(class_weight_dict)

{0: 4.4959948320413439, 1: 0.40872680291284941, 2: 0.40262640286937407, 3: 0.64228497600590628, 4: 0.45721981342793327, 5: 0.49047216349541933, 6: 2.2479974160206719, 7: 0.62734811609879215, 8: 0.64228497600590628, 9: 0.61309020436927408, 10: 0.44959948320413434, 11: 0.69169151262174522, 12: 0.42818998400393748, 13: 0.42149951550387599, 14: 1.1728682170542635, 15: 1.4986649440137811, 16: 2.2479974160206719, 17: 0.81745360582569881, 18: 0.74933247200689057, 19: 4.4959948320413439, 20: 2.6975968992248061, 21: 2.9973298880275623, 22: 2.4523608174770963, 23: 1.7983979328165374, 24: 3.3719961240310079, 25: 0.59946597760551246, 26: 1.4986649440137811, 27: 3.8537098560354375, 28: 1.685998062015504, 29: 3.3719961240310079, 30: 2.0750745378652353, 31: 1.1728682170542635, 32: 3.8537098560354375, 33: 1.3510501999456459, 34: 2.2479974160206719, 35: 0.74933247200689057, 36: 2.4523608174770963, 37: 4.4959948320413439, 38: 0.43509627406851714, 39: 2.9973298880275623, 40: 2.6975968992248061, 41: 3.853

In [16]:
def logger(epoch, logs):
    if epoch %2== 0: 
        print(epoch, logs['loss'], logs['acc'], logs['val_acc'])
logging_callback = LambdaCallback(
    on_epoch_end=logger)

stopping_callback = EarlyStopping(monitor='val_acc', min_delta= 0.001, patience=5)

In [17]:
def vggLike(ksize=(3,3), dropout=0.25): #taking my inspiration from vgg, a deeper network
    input_shape = (32,32,3)
    model = Sequential()
    model.add(Conv2D(32, kernel_size=ksize, activation='relu', padding='same', name='set1_conv1',input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(Conv2D(32, kernel_size=ksize, activation='relu', padding='same', name='set1_conv2'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2),strides=(2, 2), name='set1_pool'))
    
    model.add(Conv2D(64, kernel_size=ksize, activation='relu', padding='same', name='set2_conv1'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, kernel_size=ksize, activation='relu', padding='same', name='set2_conv2'))
    model.add(BatchNormalization())
    model.add(Conv2D(64, kernel_size=ksize, activation='relu', padding='same', name='set2_conv3'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2),name='set2_pool'))

    model.add(Flatten())
    model.add(Dense(1024, activation='relu', name='fc1'))
    model.add(BatchNormalization())
    model.add(Dropout(dropout))
    model.add(Dense(512, activation='relu' , name='fc2'))
    model.add(BatchNormalization())
    model.add(Dropout(dropout))
    model.add(Dense(n_classes, activation='softmax', name='final'))
    model.compile(loss=categorical_crossentropy,
              optimizer=Adam(),
              metrics=['accuracy'])
    return model

In [35]:
del vgglike
K.clear_session()
image_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3] )
batch_size = 32
epochs = 80
vgglike = vggLike(ksize=(3,3))
vgglikeHistory = vgglike.fit(X_train_hist_norm1, y_train_cat, batch_size=batch_size,
              epochs=epochs, verbose=0,shuffle=True, validation_data=(X_valid_hist_norm1, y_valid_cat), 
                class_weight=class_weight_dict, callbacks=[logging_callback])

0 0.597767194567 0.822006379494 0.864172335601
2 0.0731623331159 0.973476249347 0.847392290439
4 0.0520341100796 0.981896031495 0.962811791762
6 0.0535931652948 0.983936320009 0.861678004751
8 0.0176450696367 0.993131986551 0.921315192933
10 0.0153261176096 0.993936607374 0.963718820916
12 0.0322363908751 0.989769820999 0.974603174684
14 0.0238003399633 0.992700939682 0.972562358277
16 0.00594484336654 0.99775855628 0.976417233749
18 0.00721026885841 0.997011408401 0.984126984127
20 0.0189481068547 0.994741228196 0.977551020597
22 0.0104353533608 0.996609097963 0.97596371901
24 0.0101688605366 0.99678151671 0.985034013605
26 0.00821094157116 0.997126354206 0.974149659864


KeyboardInterrupt: 

In [36]:
vgglike.save('models/vgglike_hist_normed1')

In [39]:
score = vgglike.evaluate(X_test_hist_norm1, y_test_cat, verbose=0)
print(score)

[0.20368701582887203, 0.96476642915585265]


In [18]:
# datagenerator = image.ImageDataGenerator(
#     rotation_range=30.,
#     width_shift_range=0.2,
#     height_shift_range=0.2,
#     shear_range=0.5,
#     zoom_range=0.3,
#     fill_mode='nearest',
#     horizontal_flip=False,
#     vertical_flip=False
# )
# valid_generator = image.ImageDataGenerator(
# )


datagen = image.ImageDataGenerator(
    featurewise_center=False,
    featurewise_std_normalization=False,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    shear_range=0.1,
    rotation_range=10.)



In [52]:
train_generator = datagenerator.flow(X_train_hist_norm1, y_train_cat)
validation_generator = valid_generator.flow(X_valid_hist_norm1, y_valid_cat)


In [28]:
# del vgglike
K.clear_session()
image_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3] )
batch_size = 32
epochs = 20
vgglike = vggLike(ksize=(3,3))

vgglike.fit_generator(datagen.flow(X_train_hist_norm1, y_train_cat, batch_size=batch_size),
                    steps_per_epoch=X_train_hist_norm1.shape[0],
                    epochs=epochs,
                      verbose=0,
                    validation_data=(X_valid_hist_norm1, y_valid_cat),
                    callbacks=[logging_callback,
                               ModelCheckpoint('models/vgglike_image_gen2.h5', save_best_only=True)
                              ])

# vgglike.fit_generator(
#     train_generator,
#     steps_per_epoch=len(X_train_hist_norm1) // batch_size,
#     epochs=epochs, 
#     verbose=0,shuffle=True, 
#     class_weight=class_weight_dict,
#     validation_data=validation_generator,
#     validation_steps=800 // batch_size,
#     callbacks=[logging_callback])

0 1.06402726292 0.689416362551 0.883446712397
2 0.183936847253 0.941061524784 0.965079365079
4 0.12060290602 0.962182821345 0.978458050076
6 0.0866339539031 0.972901520186 0.989115646259
8 0.0686426073648 0.978677548205 0.960317460507
10 0.0586404841519 0.981436248168 0.977097505669
12 0.0547620543993 0.983735164804 0.98752834486
14 0.044346064035 0.986752492888 0.987528344671
16 0.03654052597 0.988275525159 0.991383219955
18 0.0392438928912 0.987585850197 0.992743764172


<keras.callbacks.History at 0x7f96ba128c50>

In [29]:
print(vgglike.evaluate(X_test_hist_norm1, y_test_cat, verbose=0))

[0.11079416088137381, 0.97205067301966896]


In [30]:
epochs = 60
vgglike.fit_generator(datagen.flow(X_train_hist_norm1, y_train_cat, batch_size=batch_size),
                    steps_per_epoch=X_train_hist_norm1.shape[0],
                    epochs=epochs,
                      verbose=0,
                    validation_data=(X_valid_hist_norm1, y_valid_cat),
                    callbacks=[logging_callback,
                               ModelCheckpoint('models/vgglike_image_gen3.h5', save_best_only=True)
                              ])


0 0.0303572981815 0.990085922037 0.985487528345
2 0.0256590855101 0.991321589701 0.991836734694
4 0.0244580753975 0.992499784477 0.988888888889
6 0.0298349802263 0.990689387653 0.990022675737
8 0.0289730039021 0.991235380327 0.993424036281
10 0.0234103601955 0.992758412598 0.992290249622
12 0.0242696679092 0.992471048046 0.989115646259
14 0.0211164894733 0.993907870916 0.993650793651
16 0.0184709816037 0.993907870916 0.993197279101
18 0.0177420368183 0.994568809449 0.995238095238
20 0.0198395129052 0.994281444869 0.994557823129
22 0.0176456950333 0.994482600075 0.995464852608
24 0.0174676853557 0.994597545936 0.996145124717
26 0.0149277747416 0.99566079485 0.984580499055
28 0.0134316799115 0.995976895888 0.994557823129
30 0.0152622894274 0.995430903187 0.994784580499
32 0.0140344789819 0.995373430271 0.992290249433
34 0.012915414671 0.99609184172 0.994557823129
36 0.0132369597519 0.995833213598 0.992063492063
38 0.0148237850656 0.995143538635 0.99387755102
40 0.0130026365038 0.99623552

<keras.callbacks.History at 0x7f96b9f44eb8>

In [31]:
print(vgglike.evaluate(X_test_hist_norm1, y_test_cat, verbose=0))

[0.078267840725359139, 0.98456057011845177]


In [8]:
# test images from the wild
from scipy.misc import imread, imresize

img1 = imread('data/traffic_sign1.jpg')
img2 = imread('data/traffic_sign14.jpg')
img3 = imread('data/traffic_sign0.jpg')
img4 = imread('data/traffic_sign17.jpg')
img5 = imread('data/traffic_sign35.jpg')
img6 = imread('data/traffic_sign2.jpg')
img7 = imread('data/traffic_sign22.jpg')
img8 = imread('data/traffic_sign4.png')

In [11]:

imglist = [img1, img2, img3, img4, img5, img6, img7, img8]
test_imgs = []
for i, img in enumerate(imglist):
    test_imgs.append(imresize(img,( 32,32)))
test_imgs_arr = np.array(test_imgs)
test_imgs_classes = np.array([1,14,0, 17,35, 2, 22,4])

In [13]:
X_test2_hist = normalise1(test_imgs_arr)
n_classes = 43
y_test2_cat = utils.to_categorical(test_imgs_classes, n_classes)

np.save('data/X_test2_hist_normed.npy',X_test2_hist )
np.save('data/y_test2_cat.npy', y_test2_cat)