# Scene Classifier

This is my attempt to create a scene classifier in TensorFlow using Convolutional Neural Networks.


## Data Loading and Preprocessing

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import glob

def load_dataset():
    train_path = "Dataset/seg_train"
    cv_path = "Dataset/seg_test"
    pred_path = "Dataset/seg_pred"

    classes = ["buildings", "forest", "glacier", "mountain", "sea", "street"]

    train_paths = []
    train_labels = []
    for i in range(len(classes)):
        fname = train_path + "/" + classes[i] #Dataset/seg_train/class[i]
        for x in glob.iglob(fname + "/*.jpg"):
            train_paths.append(x)
            train_labels.append(i)
  
    cv_paths = []
    cv_labels = []
    for i in range(len(classes)):
        fname = cv_path + "/" + classes[i] 
        for x in glob.iglob(fname + "/*.jpg"):
            cv_paths.append(x)
            cv_labels.append(i)

    pred_paths = []
    for x in glob.iglob(pred_path + "/*.jpg"):
        pred_paths.append(x)

    df_train = pd.DataFrame(list(zip(train_paths, train_labels)), columns=['X', 'Y']).sample(frac=1).reset_index(drop=True)
    df_cv = pd.DataFrame(list(zip(cv_paths, cv_labels)), columns=['X', 'Y']).sample(frac=1).reset_index(drop=True)
    df_pred = pd.DataFrame(pred_paths).sample(frac=1).reset_index(drop=True)

    return (df_train, df_cv, df_pred)


    
    



In [3]:
df_train, df_cv, df_test = load_dataset()
df_train


Unnamed: 0,X,Y
0,Dataset/seg_train/mountain/14437.jpg,3
1,Dataset/seg_train/forest/6578.jpg,1
2,Dataset/seg_train/buildings/19969.jpg,0
3,Dataset/seg_train/forest/11072.jpg,1
4,Dataset/seg_train/forest/8236.jpg,1
...,...,...
14029,Dataset/seg_train/glacier/3003.jpg,2
14030,Dataset/seg_train/buildings/1630.jpg,0
14031,Dataset/seg_train/sea/1394.jpg,4
14032,Dataset/seg_train/glacier/12358.jpg,2


In [4]:
from PIL import Image

def data_generator(dataframe, batch_size=16):
    m = dataframe.shape[0]
    i = 0
    while True:
        if i == m:
            i = 0
        X_batch = []
        Y_batch = []
        for j in range(batch_size):
            fname = dataframe.iloc[i, 0]
            label = dataframe.iloc[i, 1]
            y = np.zeros(6)
            y[label] = 1
            x = Image.open(fname)
            x = x.resize((128, 128))
            x = np.array(x)
            X_batch.append(x)
            Y_batch.append(y)
            i += 1
            if i == m:
                i = 0

        X_batch_np = np.array(X_batch)
        Y_batch_np = np.array(Y_batch)
        yield (X_batch_np, Y_batch_np)

      


## Building Our Model

In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Activation, Conv2D, MaxPooling2D, Dropout, Flatten, BatchNormalization, Concatenate
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import mixed_precision

%load_ext tensorboard
import datetime


policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    # Restrict TensorFlow to only use the first GPU
    try:
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
    except RuntimeError as e:
        # Visible devices must be set before GPUs have been initialized
        print(e)



2021-06-30 13:37:33.315756: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


The dtype policy mixed_float16 may run slowly because this machine does not have a GPU. Only Nvidia GPUs with compute capability of at least 7.0 run quickly with mixed_float16.


2021-06-30 13:37:33.775515: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2021-06-30 13:37:33.794333: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_SYSTEM_DRIVER_MISMATCH: system has unsupported display driver / cuda driver combination
2021-06-30 13:37:33.794349: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: gpu-server
2021-06-30 13:37:33.794352: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: gpu-server
2021-06-30 13:37:33.794388: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 465.31.0
2021-06-30 13:37:33.794399: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.42.1
2021-06-30 13:37:33.794402: E tensorflow/stream_executor/cuda/cuda_diagnostics.cc:313] kernel version 470.42.1 does not match DSO version 465.31.0 -- c

In [6]:
def inception(x, filters_1x1, filters_3x3, filters_5x5, reg):
    x1 = Conv2D(filters=filters_1x1, kernel_size=(1, 1), strides=(1, 1), padding="same", kernel_initializer="he_uniform", kernel_regularizer=l2(reg))(x)
    x1 = BatchNormalization()(x1)
    x1 = Activation("relu")(x1)
    
    x3 = Conv2D(filters=filters_3x3, kernel_size=(3, 3), strides=(1, 1), padding="same", kernel_initializer="he_uniform", kernel_regularizer=l2(reg))(x)
    x3 = BatchNormalization()(x3)
    x3 = Activation("relu")(x3)
    
    x5 = Conv2D(filters=filters_5x5, kernel_size=(5, 5), strides=(1, 1), padding="same", kernel_initializer="he_uniform", kernel_regularizer=l2(reg))(x)
    x5 = BatchNormalization()(x5)
    x5 = Activation("relu")(x5)
    
    output = Concatenate(axis=3)([x1, x3, x5])
    return output

def scene_classifier(width, height, depth, batch_size, reg=1e-8, drop=0.5):
    input = Input(shape=(width, height, depth))
    x = inception(input, filters_1x1=64, filters_3x3=64, filters_5x5=64, reg=reg)
    x = inception(x, filters_1x1=64, filters_3x3=64, filters_5x5=64, reg=reg)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(drop)(x)

    x = inception(x, filters_1x1=128, filters_3x3=128, filters_5x5=128, reg=reg)
    x = inception(x, filters_1x1=128, filters_3x3=128, filters_5x5=128, reg=reg)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(drop)(x)

    x = inception(x, filters_1x1=256, filters_3x3=256, filters_5x5=256, reg=reg)
    x = inception(x, filters_1x1=256, filters_3x3=256, filters_5x5=256, reg=reg)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(drop)(x)

    x = inception(x, filters_1x1=512, filters_3x3=512, filters_5x5=512, reg=reg)
    x = inception(x, filters_1x1=512, filters_3x3=512, filters_5x5=512, reg=reg)
    x = inception(x, filters_1x1=512, filters_3x3=512, filters_5x5=512, reg=reg)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(drop)(x)

    x = Flatten()(x)
    x = Dense(2048, kernel_regularizer=l2(reg), kernel_initializer="he_uniform")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Dropout(drop)(x)
    
    x = Dense(2048, kernel_regularizer=l2(reg), kernel_initializer="he_uniform")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Dropout(drop)(x)

    x = Dense(2048, kernel_regularizer=l2(reg), kernel_initializer="he_uniform")(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Dropout(drop)(x)

    x = Dense(6, kernel_regularizer=l2(reg), kernel_initializer="glorot_uniform")(x)
    output = Activation("softmax")(x)

    model = Model(inputs=input, outputs=output)
    return model

In [None]:
BATCH_SIZE = 32
EPOCHS = 2#20
LEARNING_RATE = 10**np.random.uniform(-8, 0, 1)
REG = 10**np.random.uniform(-8, 0, 1)
DROP = 10**np.random.uniform(0, 1, 1)


tsteps = 10#int(df_train.shape[0]/BATCH_SIZE)
cvsteps = 10#int(df_cv.shape[0]/BATCH_SIZE)

t_gen = data_generator(df_train, batch_size=BATCH_SIZE)
cv_gen = data_generator(df_cv, batch_size=BATCH_SIZE)


log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)


hparams = pd.DataFrame()


with tf.device('/device:GPU:0'):

    for lr in LEARNING_RATE:
        for reg in REG:
            for dp in DROP:
                

                opt = Adam(learning_rate=8e-5, beta_1=0.9, beta_2=0.999, epsilon=1e-08, amsgrad=False)
                model = scene_classifier(128, 128, 3, BATCH_SIZE, reg=0, drop=0)
                model.compile(optimizer=opt, loss="categorical_crossentropy", metrics=['accuracy'])
                training_hist = model.fit(x=t_gen, 
                    batch_size=BATCH_SIZE, 
                    epochs=EPOCHS, 
                    steps_per_epoch=tsteps,  
                    validation_data=cv_gen,
                    validation_steps=cvsteps,
                    validation_batch_size=BATCH_SIZE, 
                    verbose=1,
                    callbacks=[tensorboard_callback])
    
                row = pd.Series(data=[training_hist.history['val_accuracy'][-1], training_hist.history['val_loss'][-1], lr, reg, dp], 
                               index=["val_acc", "val_loss", "learning rate", "L2", "dropout"])
                hparams.append(row, ignore_index=True)
                
                del model
                del opt

                    

2021-06-30 13:36:29.042175: I tensorflow/core/profiler/lib/profiler_session.cc:126] Profiler session initializing.
2021-06-30 13:36:29.042194: I tensorflow/core/profiler/lib/profiler_session.cc:141] Profiler session started.
2021-06-30 13:36:29.042913: I tensorflow/core/profiler/lib/profiler_session.cc:159] Profiler session tear down.
2021-06-30 13:36:29.043421: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-06-30 13:36:29.796256: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)
2021-06-30 13:36:29.813022: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3693130000 Hz


Epoch 1/2


In [None]:
hparams


In [None]:
%load_ext tensorboard
%tensorboard --logdir logs/fit