<a href="https://colab.research.google.com/github/vahid-khazaei-nezhad/Data_Science_homework/blob/main/My_Task1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installed libraires

In [None]:
pip install plot_keras_history

In [None]:
pip install keras.utils

In [None]:
pip install keras_tuner

# Importing Libraries

In [None]:
import numpy as np

import cv2

#from keras import regularizers
from keras.models import Sequential
#from keras.preprocessing import image
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from keras.layers import BatchNormalization
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras import optimizers
from keras_tuner import HyperParameters, BayesianOptimization
from keras import Input

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from plot_keras_history import show_history, plot_history

print('Libraries imported successfully')

Libraries imported successfully


# Loading dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data = np.load('/content/drive/MyDrive/Colab Notebooks/mnist_2digits_10K.npy') 

labels = np.load('/content/drive/MyDrive/Colab Notebooks/mnist_2digits_10K_labels.npy')

# Cheking the dataset shape, dtype

In [None]:
data.shape

(10000, 32, 32)

In [None]:
data.dtype

dtype('float32')

In [None]:
np.max(data), np.min(data)

(1.0, 0.0)

# Visualizing a sample from dataset with its label

# Doing Machine Learning (CNN)

## Concatening labels(two columns) into one target column

In [None]:
target_map = dict()

combined_labels = [sorted([l[0] ,l[1]]) for l in labels]

combined_labels = [str(l[0]) + str(l[1]) for l in combined_labels]

target_label = np.unique(combined_labels)

for index, value in enumerate(target_label):
    
    target_map[value] = index
    
##

combined_labels = [target_map[value] for value in combined_labels]

target = list(target_map)

## Splitting dataset into test & train with its labels

In [None]:
test_percent = 0.1

number_of_classes = len(target)

data_length = len(data)

x_train = data[: -int(data_length * test_percent)]

y_train = combined_labels[: -int(data_length * test_percent) :]

x_test = data[-int(data_length * test_percent) :]

y_test = combined_labels[-int(data_length * test_percent) :]

y_train = to_categorical(y_train, num_classes= number_of_classes, dtype='float32')

y_test = to_categorical(y_test, num_classes= number_of_classes, dtype='float32')

print('Train: ', x_train.shape, end= ', labels: ')
print(y_train.shape)
print('Test:  ', x_test.shape, end= ', labels: ')
print(y_test.shape)

Train:  (9000, 32, 32), labels: (9000, 45)
Test:   (1000, 32, 32), labels: (1000, 45)


## Building CNN Model with keras tuner options

In [None]:
def build_model(hp):
    
    number_of_classes = len(target)
    
    model = Sequential()
    
    model.add(
        Conv2D( 
               hp.Int("filters0", min_value= 32, max_value= 128, step= 8),
               (3, 3),
               input_shape= (32, 32, 1),
               activation= 'relu', 
               padding= hp.Choice('Padding0', ['same', 'valid'])
              )
    )
    
    model.add(MaxPooling2D(pool_size= (2, 2)))
    
    if hp.Boolean("batchNormalization"):

      model.add(BatchNormalization())
        
    for i in range(hp.Int("cnn_layers", min_value= 1, max_value= 2)):
                   
        model.add(
            Conv2D(
                    hp.Int(f"filters1_{i}", min_value= 32, max_value= 128, step= 8),
                    (3, 3),
                    activation= 'relu',
                    padding= hp.Choice('Padding1', ['same', 'valid'])
                 )
        )
        
        model.add(MaxPooling2D(pool_size= (2, 2)))
        
        if hp.Boolean(f"batchNormalization_{i}"):
            
            model.add(BatchNormalization())
        
        ###
    model.add(Flatten())
    
    num_of_layers = hp.Int('num_layers', min_value= 2, max_value= 5)
    
    for i in range(num_of_layers):
        
        model.add(
            
            Dense(
                
                units= hp.Int(f"units_{i}", min_value= 1024, max_value= 4096, step= 1024),
                
                activation= hp.Choice(f'activation2_{i}', ['relu', 'tanh', 'sigmoid']),
            )
        )
        
        drop_rate = hp.Float(f'dropout_value_{i}', min_value = 0.2, max_value= 0.5, sampling= "log")
    
        if hp.Boolean(f"dropout_{i}"):
                   
            model.add(Dropout(rate= drop_rate))
                   
    model.add(Dense(number_of_classes, activation= "softmax"))
                   
    learning_rate = hp.Float("lr", min_value=1e-3, max_value=1e-2, sampling= "log")
    
    momentum = hp.Float("momentum", min_value= 0.7, max_value= 0.9, sampling= "log")
                   
    nesterov = hp.Boolean("nesterov")
    
    model.compile(
        
        optimizer=  optimizers.SGD(learning_rate= learning_rate, momentum= momentum, nesterov= nesterov),
        
        loss= "categorical_crossentropy",
        
        metrics= ["accuracy"],
    )
    
    print('# paramters: ', model.count_params())
    
    return model

## Tuning model

In [None]:
tuner = BayesianOptimization(
                            build_model,
                            'val_loss',
                            20,
                            num_initial_points= 2,
                            alpha= 0.0001,
                            beta= 2.6,
                            seed= 23,
                            hyperparameters= HyperParameters(),
                            overwrite= True,
                            #directory="\Tunning",
                            project_name="InaitTask",
                            )
###

#tuner.search_space_summary()

tuner.search(x_train, y_train, epochs= 30, validation_split= 0.2, verbose= 2)

Number of paramters:  3203469

Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
120               |?                 |filters0
valid             |?                 |Padding0
False             |?                 |batchNormalization
2                 |?                 |cnn_layers
96                |?                 |filters1_0
same              |?                 |Padding1
True              |?                 |batchNormalization_0
4                 |?                 |num_layers
1024              |?                 |units_0
relu              |?                 |activation2_0
0.33728           |?                 |dropout_value_0
False             |?                 |dropout_0
3072              |?                 |units_1
tanh              |?                 |activation2_1
0.37362           |?                 |dropout_value_1
False             |?                 |dropout_1
0.0016213         |?                 |lr
0.78551           |?                 |momentu

## Choose best two top models

In [None]:
models = tuner.get_best_models()[0]

model.build(input_shape= (32, 32, 1))

best_model.summary()

## Get results

In [None]:
tuner.results_summary()

# Plotting models performances

In [None]:
import datetime

ct = datetime.datetime.now()

show_history(history)

plot_history(history, path= f'{int(ct.timestamp())}.png')

plt.close()