In [6]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras

In [2]:
df = pd.read_csv('data/spam_ham_dataset.csv')
df.drop(['Unnamed: 0'], axis=1, inplace=True)

In [3]:
dset_train,dset_test,gt_train,gt_test = train_test_split(df['text'].values,df['label_num'].values,test_size=0.2,random_state=42)    

In [4]:
def convert_data(strData):
    x_nums = np.zeros((strData.shape[0], 256,256,256))
    for i in range(strData.shape[0]):
        row_idx = 0
        for j in range(len(strData[i])):
            x_nums[i][row_idx][j%256][ord(strData[i][j])] = 1
            if j%256 == 255:
                row_idx += 1    
    return x_nums

In [5]:
x_train = convert_data(dset_train)
x_test = convert_data(dset_test)

In [15]:
def create_model():
    inputLayer = keras.layers.Input(shape=(256,256,256))
    
    convLayer = keras.layers.Conv2D(32, (3,3), strides = 2,activation='relu',padding='same')(inputLayer)
    convLayer = keras.layers.Conv2D(32, (3,3), strides = 1,activation='relu',padding='same')(convLayer)
    convLayer = keras.layers.BatchNormalization()(convLayer)
    # 128,128,32

    convLayer = keras.layers.Conv2D(64, (3,3), strides = 2,activation='relu',padding='same')(convLayer)
    convLayer = keras.layers.Conv2D(64, (3,3), strides = 1,activation='relu',padding='same')(convLayer)
    convLayer = keras.layers.BatchNormalization()(convLayer)
    # 64,64,64

    convLayer = keras.layers.Conv2D(128, (3,3), strides = 2,activation='relu',padding='same')(convLayer)
    convLayer = keras.layers.Conv2D(128, (3,3), strides = 1,activation='relu',padding='same')(convLayer)
    convLayer = keras.layers.BatchNormalization()(convLayer)
    # 32,32,128

    convLayer = keras.layers.Conv2D(256, (3,3), strides = 2,activation='relu',padding='same')(convLayer)
    convLayer = keras.layers.Conv2D(256, (3,3), strides = 1,activation='relu',padding='same')(convLayer)
    convLayer = keras.layers.BatchNormalization()(convLayer)
    # 16,16,256

    convLayer = keras.layers.Conv2D(512, (3,3), strides = 2,activation='relu',padding='same')(convLayer)
    convLayer = keras.layers.Conv2D(512, (3,3), strides = 1,activation='relu',padding='same')(convLayer)
    convLayer = keras.layers.BatchNormalization()(convLayer)
    # 8,8,512

    averageLayer = keras.layers.AveragePooling2D(8)(convLayer)
    flattenLayer = keras.layers.Flatten()(averageLayer)

    denseLayer = keras.layers.Dense(256,activation='relu')(flattenLayer)
    denseLayer = keras.layers.Dense(256,activation='relu')(denseLayer)

    outputLayer = keras.layers.Dense(1, activation='sigmoid')(denseLayer)

    model = keras.models.Model(inputs=[inputLayer],outputs = [outputLayer])
    model.compile(loss = keras.losses.binary_crossentropy,optimizer = keras.optimizers.Adam(1e-3))
    
    return model

In [16]:
model = create_model()
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 256, 256, 256)]   0         
                                                                 
 conv2d_40 (Conv2D)          (None, 128, 128, 32)      73760     
                                                                 
 conv2d_41 (Conv2D)          (None, 128, 128, 32)      9248      
                                                                 
 batch_normalization_20 (Bat  (None, 128, 128, 32)     128       
 chNormalization)                                                
                                                                 
 conv2d_42 (Conv2D)          (None, 64, 64, 64)        18496     
                                                                 
 conv2d_43 (Conv2D)          (None, 64, 64, 64)        36928     
                                                           