# Machine Learning - Hw 2

# Multi-class Weather Classification


##Import 

Import libraries and print some versions.

To use GPU, set `Edit / Notebook settings / Hardware accelerator` to **GPU**.

In [0]:
%tensorflow_version 1.x

import numpy as np
import tensorflow as tf
import keras
import warnings

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten,\
                         Conv2D, MaxPooling2D, AveragePooling2D, GlobalAveragePooling2D,\
                         UpSampling2D
from keras.layers.normalization import BatchNormalization
from keras import regularizers
from keras import optimizers
from keras import callbacks

warnings.filterwarnings('ignore')

print("Tensorflow version %s" %tf.__version__)

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Tensorflow version 1.15.0
Found GPU at: /device:GPU:0


## Load data

Get data from  https://drive.google.com/drive/folders/1UzH28Q8xki8_DMYdDgHxi40-CJ800Kaq





In [29]:
import os
from google.colab import drive
from keras.preprocessing.image import ImageDataGenerator
drive.mount('/content/drive')

datadir = '/content/drive/My Drive/Hw2-ML'
trainingset = datadir + '/MWI-Dataset-1.1_3200/'
testset = datadir + '/MWI-testset/'

batch_size = 32
input_shape = ()
train_datagen = ImageDataGenerator(
    rescale = 1. / 255,\
    zoom_range=0.1,\
    rotation_range=45,\
    width_shift_range=0.1,\
    height_shift_range=0.1,\
    horizontal_flip=True,\
    vertical_flip=False)

train_generator = train_datagen.flow_from_directory(
    directory=trainingset,
    target_size=(200, 200),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=True
)

test_datagen = ImageDataGenerator(
    rescale = 1. / 255)

test_generator = test_datagen.flow_from_directory(
    directory=testset,
    target_size=(200, 200),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=False
)

num_samples = train_generator.n
num_classes = train_generator.num_classes
input_shape = train_generator.image_shape

classnames = [k for k,v in train_generator.class_indices.items()]

print("Image input %s" %str(input_shape))
print("Classes: %r" %classnames)

print('Loaded %d training samples from %d classes.' %(num_samples,num_classes))
print('Loaded %d test samples from %d classes.' %(test_generator.n,test_generator.num_classes))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Found 3200 images belonging to 4 classes.
Found 800 images belonging to 4 classes.
Image input (200, 200, 3)
Classes: ['HAZE', 'RAINY', 'SNOWY', 'SUNNY']
Loaded 3200 training samples from 4 classes.
Loaded 800 test samples from 4 classes.


## Show random image



In [0]:
import matplotlib.pyplot as plt

n = 3
x,y = train_generator.next()
# x,y size is train_generator.batch_size

for i in range(0,n):
    image = x[i]
    label = y[i].argmax()  # categorical from one-hot-encoding
    print(classnames[label])
    plt.imshow(image)
    plt.show()


## Pre-Trained model


In [0]:
import os
from keras.models import load_model

models_dir = datadir + '/models/'

ValerioNet1 = 'MWI_ValerioNet1'
ValerioNet2 = 'MWI_ValerioNet2'
Tansfer_ValerioNet1 = 'MWI_ValerioNet1_Transfer'
Tansfer_ValerioNet2 = 'MWI_ValerioNet2_Transfer'

def loadmodel(problem):
    filename = os.path.join(models_dir, '%s.h5' %problem)
    try:
        model = load_model(filename)
        print("\nModel loaded successfully from file %s\n" %filename)
    except OSError:    
        print("\nModel file %s not found!!!\n" %filename)
        model = None
    return model

def loadtransfermodel(problem):
    filename = os.path.join(models_dir, '%s.h5' %problem)
    try:
        transfer_model = load_model(filename)
        print("\nModel loaded successfully from file %s\n" %filename)
    except OSError:    
        print("\nModel file %s not found!!!\n" %filename)
        transfer_model = None
    return transfer_model

model = loadmodel(ValerioNet2)
#transfer_model = loadtransfermodel(Tansfer_ValerioNet1)


Model loaded successfully from file /content/drive/My Drive/Hw2-ML/models/MWI_ValerioNet2.h5



##Transfer Learning

In [0]:
from keras import applications
from keras.models import Model, Input


def load_backbone_net(input_shape):
    
    # define input tensor
    input0 = Input(shape=input_shape)

    # load a pretrained model on imagenet without the final dense layer
    feature_extractor = applications.vgg16.VGG16(include_top=False, weights='imagenet', input_tensor=input0)
    
    
    feature_extractor = feature_extractor.output
    feature_extractor = Model(input=input0, output=feature_extractor)
    optimizer = 'adam' #alternative 'SGD'

    feature_extractor.compile(loss=keras.losses.categorical_crossentropy, optimizer=optimizer, metrics=['accuracy'])

    return feature_extractor


def transferNet(feature_extractor, num_classes, output_layer_name, trainable_layers):
    
    # get the original input layer tensor
    input_t = feature_extractor.get_layer(index=0).input

    # set the feture extractor layers as non-trainable
    for idx,layer in enumerate(feature_extractor.layers):
      if layer.name in trainable_layers:
        layer.trainable = True
      else:
        layer.trainable = False

    # get the output tensor from a layer of the feature extractor
    output_extractor = feature_extractor.get_layer(name = output_layer_name).output
    
    #output_extractor = MaxPooling2D(pool_size=(4,4))(output_extractor)

    # flat the output of a Conv layer
    flatten = Flatten()(output_extractor) 
    flatten_norm = BatchNormalization()(flatten)

    # add a Dense layer
    dense = Dropout(0.4)(flatten_norm)
    dense = Dense(200, activation='relu')(dense)
    dense = BatchNormalization()(dense)
    
    # add a Dense layer
    dense = Dropout(0.4)(dense)
    dense = Dense(100, activation='relu')(dense)
    dense = BatchNormalization()(dense)

    # add the final output layer
    dense = BatchNormalization()(dense)
    dense = Dense(num_classes, activation='softmax')(dense)
    

    model = Model(input=input_t, output=dense, name="transferNet")
    
    optimizer = 'adam' #alternative 'SGD'
    model.compile(loss=keras.losses.categorical_crossentropy, optimizer=optimizer, metrics=['accuracy'])

    return model

# load the pre-trained model
feature_extractor = load_backbone_net(input_shape)
feature_extractor.summary()


# choose the layer from which you can get the features (block5_pool the end, glob_pooling to get the pooled version of the output)
name_output_extractor = "block5_pool"
trainable_layers = ["block5_conv3"]

# build the transfer model
transfer_model = transferNet(feature_extractor, num_classes, name_output_extractor, trainable_layers)
transfer_model.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 200, 200, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 200, 200, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 200, 200, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 100, 100, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 100, 100, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 100, 100, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 50, 50, 128)       0   

In [22]:
# fit the transferNet on the training data
#stopping = callbacks.EarlyStopping(monitor='val_acc', patience=3)

steps_per_epoch = train_generator.n//train_generator.batch_size
val_steps = test_generator.n//test_generator.batch_size+1

try:
    history_transfer = transfer_model.fit_generator(train_generator, epochs=50, verbose=1,\
                    steps_per_epoch=steps_per_epoch,\
                    validation_data=test_generator,\
                    validation_steps=val_steps)
except KeyboardInterrupt:
    pass


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


## Save the transfer model

In [24]:
import os

models_dir = '/content/drive/My Drive/Hw2-ML/models/'

def savemodel(model,problem):
    filename = os.path.join(models_dir, '%s.h5' %problem)
    transfer_model.save(filename)
    print("\nModel saved successfully on file %s\n" %filename)

# Save the model
savemodel(transfer_model,'MWI_ValerioNet2_Transfer')


Model saved successfully on file /content/drive/My Drive/Hw2-ML/models/MWI_ValerioNet2_Transfer.h5



## MWI-Dataset Evaluation Scores


Accuracy

In [25]:
test_generator = test_datagen.flow_from_directory(
    directory=testset,
    target_size=(200, 200),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=False,
)
val_steps=test_generator.n//test_generator.batch_size+1
loss, acc = transfer_model.evaluate_generator(test_generator,verbose=1,steps=val_steps)
print('Test loss: %f' %loss)
print('Test accuracy: %f' %acc)

Found 800 images belonging to 4 classes.
Test loss: 0.658900
Test accuracy: 0.835337


Confusion matrix analysis

In [26]:
import sklearn.metrics 
from sklearn.metrics import classification_report, confusion_matrix

val_steps=(test_generator.n//test_generator.batch_size+1)-1

test_generator = test_datagen.flow_from_directory(
    directory=testset,
    target_size=(200, 200),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=False
)

# predictions from transferNet
preds = transfer_model.predict_generator(test_generator,verbose=1,steps=val_steps)

Ypred = np.argmax(preds, axis=1)
Ytest = test_generator.classes  # shuffle=False in test_generator

cm = confusion_matrix(Ytest, Ypred)
print(cm)

conf = [] # data structure for confusions: list of (i,j,cm[i][j])
for i in range(0,cm.shape[0]):
  for j in range(0,cm.shape[1]):
    if (i!=j and cm[i][j]>0):
      conf.append([i,j,cm[i][j]])

col=2
conf = np.array(conf)
conf = conf[np.argsort(-conf[:,col])]  # decreasing order by 3-rd column (i.e., cm[i][j])

print('%-16s     %-16s  \t%s \t%s ' %('True','Predicted','errors','err %'))
print('------------------------------------------------------------------')
for k in conf:
  print('%-16s ->  %-16s  \t%d \t%.2f %% ' %(classnames[k[0]],classnames[k[1]],k[2],k[2]*100.0/test_generator.n))
  

Found 800 images belonging to 4 classes.
[[174   3   9  14]
 [  7 152  33   8]
 [  9  17 174   0]
 [ 12   9  13 166]]
True                 Predicted         	errors 	err % 
------------------------------------------------------------------
RAINY            ->  SNOWY             	33 	4.12 % 
SNOWY            ->  RAINY             	17 	2.12 % 
HAZE             ->  SUNNY             	14 	1.75 % 
SUNNY            ->  SNOWY             	13 	1.62 % 
SUNNY            ->  HAZE              	12 	1.50 % 
HAZE             ->  SNOWY             	9 	1.12 % 
SNOWY            ->  HAZE              	9 	1.12 % 
SUNNY            ->  RAINY             	9 	1.12 % 
RAINY            ->  SUNNY             	8 	1.00 % 
RAINY            ->  HAZE              	7 	0.88 % 
HAZE             ->  RAINY             	3 	0.38 % 


Precision, Recall, F-score

In [27]:
import sklearn.metrics 
from sklearn.metrics import classification_report, confusion_matrix

val_steps=(test_generator.n//test_generator.batch_size+1)

test_generator = test_datagen.flow_from_directory(
    directory=testset,
    target_size=(200 , 200),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=False,
)

preds = transfer_model.predict_generator(test_generator,verbose=1,steps=val_steps-1)

Ypred = np.argmax(preds, axis=1)
Ytest = test_generator.classes  # shuffle=False in test_generator

print(classification_report(Ytest, Ypred, labels=None, target_names=classnames, digits=3))

Found 800 images belonging to 4 classes.
              precision    recall  f1-score   support

        HAZE      0.861     0.870     0.866       200
       RAINY      0.840     0.760     0.798       200
       SNOWY      0.760     0.870     0.811       200
       SUNNY      0.883     0.830     0.856       200

    accuracy                          0.833       800
   macro avg      0.836     0.833     0.833       800
weighted avg      0.836     0.833     0.833       800



## SMART-I Evaluation Scores

In [28]:
import sklearn.metrics 
from sklearn.metrics import classification_report, confusion_matrix

smarti_test = datadir + '/SMART-I_Dataset'

test_generator = test_datagen.flow_from_directory(
    directory=smarti_test,
    target_size=(200, 200),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=False,
)

val_steps=test_generator.n//test_generator.batch_size+1


############## ACCURACY ##############
loss, acc = transfer_model.evaluate_generator(test_generator,verbose=1,steps=val_steps)
print('SMART-I loss: %f' %loss)
print('SMART-I accuracy: %f' %acc)

preds = transfer_model.predict_generator(test_generator,verbose=1,steps=val_steps)

Ypred = np.argmax(preds, axis=1)
Ytest = test_generator.classes  # shuffle=False in test_generator

############## PRECISION, RECALL, F-SCORE ##############
print(classification_report(Ytest, Ypred, labels=None, target_names=classnames, digits=3))

############## CONFUSION MATRIX ##############

cm = confusion_matrix(Ytest, Ypred)

print(cm)

conf = [] # data structure for confusions: list of (i,j,cm[i][j])
for i in range(0,cm.shape[0]):
  for j in range(0,cm.shape[1]):
    if (i!=j and cm[i][j]>0):
      conf.append([i,j,cm[i][j]])

col=2
conf = np.array(conf)
conf = conf[np.argsort(-conf[:,col])]  # decreasing order by 3-rd column (i.e., cm[i][j])

print('%-16s     %-16s  \t%s \t%s ' %('True','Predicted','errors','err %'))
print('------------------------------------------------------------------')
for k in conf:
  print('%-16s ->  %-16s  \t%d \t%.2f %% ' %(classnames[k[0]],classnames[k[1]],k[2],k[2]*100.0/test_generator.n))

Found 3038 images belonging to 4 classes.
SMART-I loss: 4.196744
SMART-I accuracy: 0.437788
              precision    recall  f1-score   support

        HAZE      0.000     0.000     0.000         0
       RAINY      0.262     0.424     0.324       521
       SNOWY      0.587     0.716     0.645      1421
       SUNNY      0.708     0.084     0.150      1096

    accuracy                          0.438      3038
   macro avg      0.389     0.306     0.280      3038
weighted avg      0.575     0.438     0.411      3038

[[   0    0    0    0]
 [  83  221  208    9]
 [ 194  181 1017   29]
 [  54  442  508   92]]
True                 Predicted         	errors 	err % 
------------------------------------------------------------------
SUNNY            ->  SNOWY             	508 	16.72 % 
SUNNY            ->  RAINY             	442 	14.55 % 
RAINY            ->  SNOWY             	208 	6.85 % 
SNOWY            ->  HAZE              	194 	6.39 % 
SNOWY            ->  RAINY             	181 