In [1]:
import numpy as np
import pandas as pd
import cv2
import random
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization 
from tensorflow.keras.layers import Conv2D, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping
import os
from keras.layers.convolutional import MaxPooling2D

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!pip freeze

absl-py==1.1.0
alabaster==0.7.12
albumentations==0.1.12
altair==4.2.0
appdirs==1.4.4
argon2-cffi==21.3.0
argon2-cffi-bindings==21.2.0
arviz==0.12.1
astor==0.8.1
astropy==4.3.1
astunparse==1.6.3
atari-py==0.2.9
atomicwrites==1.4.0
attrs==21.4.0
audioread==2.1.9
autograd==1.4
Babel==2.10.2
backcall==0.2.0
beautifulsoup4==4.6.3
bleach==5.0.0
blis==0.7.7
bokeh==2.3.3
branca==0.5.0
bs4==0.0.1
CacheControl==0.12.11
cached-property==1.5.2
cachetools==4.2.4
catalogue==2.0.7
certifi==2022.6.15
cffi==1.15.0
cftime==1.6.0
chardet==3.0.4
charset-normalizer==2.0.12
click==7.1.2
cloudpickle==1.3.0
cmake==3.22.5
cmdstanpy==0.9.5
colorcet==3.0.0
colorlover==0.3.0
community==1.0.0b1
contextlib2==0.5.5
convertdate==2.4.0
coverage==3.7.1
coveralls==0.5
crcmod==1.7
cufflinks==0.17.3
cupy-cuda111==9.4.0
cvxopt==1.2.7
cvxpy==1.0.31
cycler==0.11.0
cymem==2.0.6
Cython==0.29.30
daft==0.0.4
dask==2.12.0
datascience==0.10.6
debugpy==1.0.0
decorator==4.4.2
defusedxml==0.7.1
descartes==1.1.0
dill==0.3.5.1
distribu

In [4]:
num_classes=62 #actually 28  
img_size=28
def img_label(data_path, num_classes=62):
    data = pd.read_csv(data_path, header=None)
    data_rows = len(data)
    
    imgs = np.transpose(data.values[:,1:].reshape(data_rows, img_size, img_size, 1),axes=[0,2,1,3])#doubt  #values in csv were such that the image needs to be transposed before training

    labels = to_categorical(data.values[:,0], num_classes)
    
    return imgs/255, labels     

In [5]:
model=Sequential()
model.add(Conv2D(32, kernel_size = 3, activation='relu', input_shape = (28, 28, 1)))
model.add(BatchNormalization()) #doubt why BatchNormalization even after /255?
model.add(Conv2D(32, kernel_size = 3, activation='relu'))
model.add(BatchNormalization()) 
model.add(Conv2D(32, kernel_size = 5, strides=2, padding='same', activation='relu'))
model.add(BatchNormalization()) 
model.add(Dropout(0.4))

model.add(Conv2D(64, kernel_size = 3, activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size = 3, activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size = 5, strides=2, padding='same', activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.4))

model.add(Conv2D(128, kernel_size = 4, activation='relu'))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dropout(0.4))
model.add(Dense(62, activation='softmax'))

model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 batch_normalization (BatchN  (None, 26, 26, 32)       128       
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 24, 24, 32)        9248      
                                                                 
 batch_normalization_1 (Batc  (None, 24, 24, 32)       128       
 hNormalization)                                                 
                                                                 
 conv2d_2 (Conv2D)           (None, 12, 12, 32)        25632     
                                                                 
 batch_normalization_2 (Batc  (None, 12, 12, 32)       1

In [6]:
data_generator_aug = ImageDataGenerator(rotation_range=30,validation_split=0.1)  #to create rotated images too  #90% for training and 10% for validation

In [7]:
data_generator = ImageDataGenerator(validation_split=0.1) #90% for training and 10% for validation

In [8]:
X,Y=img_label('/content/drive/My Drive/mosaic/Emoji_Dataset/emnist-byclass-train-to-use.csv',62)

In [9]:
checkpoint = ModelCheckpoint('/content/drive/My Drive/modified_weights-{epoch:03d}.h5',
                                 monitor='val_acc',
                                 verbose=0,
                                 save_best_only=False,
                             
                                 mode='auto')

In [10]:
# total rows = 17*22013 = 374221
training_data_generator = data_generator_aug.flow(X, Y,batch_size=4860, subset='training')
validation_data_generator = data_generator.flow(X, Y,batch_size=4860,subset='validation')
history = model.fit(training_data_generator, steps_per_epoch=69, epochs=10, validation_data=validation_data_generator,validation_steps=1,callbacks=[checkpoint])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
model.save('/content/drive/My Drive/model_only_letters_bymerge.h5')

In [12]:
# model = load_model('/content/drive/My Drive/model_only_letters.h5',compile=True)

In [13]:
# from PIL import Image
# # test_image=cv2.imread('/content/drive/MyDrive/mosaic/sample_A.jpg',cv2.IMREAD_GRAYSCALE)
# # test_image = test_image.resize(28,28)
# test_image = Image.open('/content/drive/MyDrive/mosaic/sample_7_2.jpg').convert('L')
# test_image = test_image.resize((28,28), Image.ANTIALIAS)
# # test_image.save('greyscale.png')

# test_image

In [14]:
# # test_image=cv2.resize(test_image,(28,28))
# test_image=cv2.bitwise_not(test_image)

In [15]:
# plt.imshow(test_image,cmap='Greys_r')
# plt.show()

In [16]:
# class_mapping='0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
# x = np.reshape(test_image,(28,28,1))/255
# y = [x]
# y = np.array(y)
# result = np.argmax(model.predict(y))
# print(result)
# print(class_mapping[result])
# print(np.amax(model.predict(y)))

In [17]:
# i_predict = model.predict(y)
# for i in i_predict:
#     print(str(i))