# SIMPSONS DATASET

https://www.kaggle.com/alexattia/the-simpsons-characters-dataset/home

In [1]:
!ls ../datasets/the-simpsons-characters-dataset/simpsons_dataset/

abraham_grampa_simpson	  homer_simpson        otto_mann
agnes_skinner		  kent_brockman        patty_bouvier
apu_nahasapeemapetilon	  krusty_the_clown     principal_skinner
barney_gumble		  lenny_leonard        professor_john_frink
bart_simpson		  lionel_hutz	       rainier_wolfcastle
carl_carlson		  lisa_simpson	       ralph_wiggum
charles_montgomery_burns  maggie_simpson       selma_bouvier
chief_wiggum		  marge_simpson        sideshow_bob
cletus_spuckler		  martin_prince        sideshow_mel
comic_book_guy		  mayor_quimby	       simpsons_dataset
disco_stu		  milhouse_van_houten  snake_jailbird
edna_krabappel		  miss_hoover	       troy_mcclure
fat_tony		  moe_szyslak	       waylon_smithers
gil			  ned_flanders
groundskeeper_willie	  nelson_muntz


#### import libraries

In [2]:
from __future__ import print_function
import tensorflow.keras as keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.activations import elu, 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import backend as K 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.optimizers import Adam 


import os
import pickle
import cv2
import random 
import imutils
import numpy as np
from imutils import paths

### BUILD MODEL

In [2]:
class SmallVGGNet:
    @staticmethod
    def build(width, heigth, depth, classes):
        # initialize the model along with the input shape to be
        # "channels last" and the channels dimension itself
        model = Sequential()
        inputShape = (width, heigth, depth)
        chanDim = -1

        # if we are using "channels first", update the input shape and channels dimension
        if K.image_data_format() == "channels_first":
            inputShape = (depth, width, heigth)
            chanDim = 1

        # CONV => RELU => POOL
        model.add(Conv2D(32, (3,3), padding="same", input_shape=inputShape))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(3,3)))
        model.add(Dropout(0.25))

        # (CONV => RELU) * 2 => POOL
        model.add(Conv2D(64, (3,3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(Conv2D(64, (3,3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Dropout(0.25))

        # (CONV => RELU) * 2 => POOL
        model.add(Conv2D(128, (3,3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(Conv2D(128, (3,3), padding="same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis=chanDim))
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Dropout(0.25))

        # first (and only) set of FC => RELU layers
        model.add(Flatten())
        model.add(Dense(1024))
        model.add(Activation("relu"))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))

        # softmax classifier
        model.add(Dense(classes))
        model.add(Activation("softmax"))

        # return the constructed network architecture
        return model

#### Initialize configuration parameters 

In [3]:
#### initial configuration parameters
EPOCHS = 10
INIT_LR = 1e-3
BS = 32
IMAGE_DIMS = (96, 96, 3)


#### initialize data and labels

In [4]:
data = []
labels = []

#### initialize images 

In [8]:
print("[INFO] loading images...")
imagePaths = sorted(list(paths.list_images('../datasets/the-simpsons-characters-dataset/simpsons_dataset/')))
random.seed(42)
random.shuffle(imagePaths)

[INFO] loading images...


AttributeError: module 'pathlib' has no attribute 'list_images'

#### loop over input images 

In [7]:
for imagePath in imagePaths:
    # load the image, pre-process it, and store it in the data list
    image = cv2.imread(imagePath)
    image = cv2.resize(image, (IMAGE_DIMS[1], IMAGE_DIMS[0]))
    image = img_to_array(image)
    data.append(image)

    # extract the class label from the image path and update the labels list
    label = imagePath.split(os.path.sep)[-2]
    labels.append(label)

In [8]:
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
print("[INFO] data matrix: {:.2f}MB".format(data.nbytes / (1024 * 1000.0)))

[INFO] data matrix: 9043.06MB


#### BINARIZE THE LABELS

In [9]:
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

## PARTITION INTO TRAIN TEST AND VALIDATION 80-20 

In [None]:
(train_X, val_X, train_Y, val_Y) = train_test_split(data, labels, test_size=0.2, random_state=42)

### CONSTRUCT THE IMAGE GENERATOR FOR DATA AUGMENTATION 

In [None]:
aug = ImageDataGenerator(rotation_range=25, width_shift_range=0.1,
                         height_shift_range=0.1, shear_range=0.2,
                         horizontal_flip=True, fill_mode="nearest")

### INITIALIZE MODEL

In [None]:
print("[INFO] compiling model...")
model = SmallVGGNet.build(width=IMAGE_DIMS[1], heigth=IMAGE_DIMS[0],
                          depth=IMAGE_DIMS[2], classes=len(lb.classes_))
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])