In [1]:
############################################### IMPORTS ####################################
############ ALGEBRA #################
import numpy as np  # linear algebra
import pandas as pd  # data processing
import os
import json

############### SKLEARN ###############
from sklearn.model_selection import train_test_split 

############## TENSORFLOW ############
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import regularizers,layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TextVectorization,Conv1D, MaxPooling1D, LSTM, Bidirectional, Dense, BatchNormalization, Dropout

ModuleNotFoundError: No module named 'sklearn'

In [72]:
tf.__version__

'2.12.0'

In [73]:
def map_emotion(value):
    if value == 0:
        return 'sadness'
    elif value == 1:
        return 'joy'
    elif value == 2:
        return 'love'
    elif value == 3:
        return 'anger'
    elif value == 4:
        return 'fear'
    else:
        return 'Surprise'

In [74]:
######## DATA ############
FILE_PATH='/emotion-dataset'
df_train=pd.read_csv(os.path.join(FILE_PATH,'training.csv'))#Train dataset
df_test=pd.read_csv(os.path.join(FILE_PATH,'test.csv')) #Test dataset
df_val=pd.read_csv(os.path.join(FILE_PATH,'validation.csv'))#Validation dataset.

In [75]:
df_train=pd.concat([df_train, df_test,df_val], axis=0)

In [76]:
############### PREPARING DATASET ###################
X = df_train[['text']]
y = df_train[['label']]
X_train, X_eval, y_train, y_eval = train_test_split(X, y, test_size=0.1, random_state=42)
#Multi Categoryical Transformations
y_train = tf.keras.utils.to_categorical(y_train)
y_eval = tf.keras.utils.to_categorical(y_eval)

In [77]:
#PREPROCESSING OF DATASET
###########################################################################################
from nltk.corpus import stopwords
stopwords_english = set(stopwords.words('english'))
my_stopwords = set(["http", "'s", "n't", "'m", "'re", "'ve"])
stopwords_english.update(my_stopwords)

def preprocess_review(text):
    text = tf.strings.lower(text)
    text = tf.strings.regex_replace(text, r'\d+', '')
    text = tf.strings.regex_replace(text, '[^\w\s]', '')
    
    tokens = tf.strings.split(text)
    #Different Operations
    clean_text = tf.strings.reduce_join(tokens, separator=' ', axis=-1)
    
    return clean_text
###########################################################################

#Custom TextVectorization Layer
max_len=round(sum([len(i.split()) for i in X['text']])/len(X))
vectorize_layer=TextVectorization(
    max_tokens=50000,
    standardize='lower_and_strip_punctuation',
    split='whitespace',
    ngrams=None,
    output_mode='int',
    output_sequence_length=max_len,
    pad_to_max_tokens=True
    
)
vectorize_layer.adapt(X)
words_vocab=vectorize_layer.get_vocabulary()

###############################################################################
#Custom Embedding layer
embed_layer=layers.Embedding(
    input_dim=len(words_vocab),
    output_dim=128,
    embeddings_initializer='uniform',
    input_length=max_len,
)

################################################################################

In [78]:
###############################################################################################
#Custom CallBack and Scheduler
early_stopping = tf.keras.callbacks.EarlyStopping(
    patience=15,
    min_delta=0.001,
    restore_best_weights=True,
)

lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
  0.001,
  decay_steps=X.shape[0]*20,
  decay_rate=1,
  staircase=False)


##################################################################################
# Custom MODEL ASSEMBLING
def tf_model(vectorize_layer, embed_layer):
    model = Sequential()
    
    model.add(layers.Input(shape=(1,), dtype=tf.string))
    model.add(tf.keras.layers.Lambda(preprocess_review))
    model.add(vectorize_layer)
    model.add(embed_layer)
    
    model.add(layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(layers.MaxPooling1D(pool_size=2))

    model.add(layers.Bidirectional(layers.LSTM(units=64, return_sequences=True)))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.2))

    model.add(layers.Bidirectional(layers.LSTM(units=128)))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.2))

    model.add(layers.Dense(units=32, activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.2))

    model.add(layers.Dense(units=6, activation='softmax'))
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
        loss='categorical_crossentropy',
        metrics=['acc']
    )

    return model

In [79]:
#Model fitting.
model=tf_model(vectorize_layer, embed_layer)
model.fit(X_train,y_train,validation_data=[X_eval,y_eval],epochs=30,
                callbacks=[early_stopping],batch_size=50)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30


<keras.callbacks.History at 0x7a628e902110>

In [80]:
Emotions=['sadness','joy','love','anger','fear','Surprise']
Scores=model.predict(['I am happy about my life'])



In [81]:
Result=np.argpartition(-model.predict(['I am happy about my life']), kth=2, axis=1)[:, :2]
[[map_emotion(value) for value in row] for row in Result]



[['joy', 'love']]

In [82]:
#Saving and loading models...........
import pickle
with open('model_pickle','wb') as f:
    pickle.dump(model,f)
with open('model_pickle','rb') as f:
    model_pkl=pickle.load(f)

In [83]:
#Saving in tensorflow
model.save('model.tf')

In [84]:
#Saving using joblib
import joblib
joblib.dump(model,'model_job.pkl')

['model_job.pkl']

In [85]:
Scores=model_pkl.predict(['I am happy about my life'])
Card={}
for score,emotion in zip(Scores[0],Emotions):
    Card[emotion]=str(score)
with open('card.json','w') as file:
    data=Card
    json.dump(data,file)

