In [None]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Embedding, MaxPooling1D, GlobalMaxPooling1D, LSTM, Dropout,SimpleRNN,Bidirectional,Attention
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import re

In [None]:
MAX_SEQUENCE_LENGTH = 300
MAX_VOCAB_SIZE = 20000
EMBEDDING_DIM = 200

In [None]:
df = pd.read_csv('/content/drive/MyDrive/MentalHealthProject/Balanced/MentalHealthTextual.txt', names=['Class','Text'], sep='\t')
df
df.groupby('Class').count()
#, skiprows=1

Unnamed: 0_level_0,Text
Class,Unnamed: 1_level_1
__label__anger,50000
__label__disgust,50000
__label__fear,50000
__label__joy,50000
__label__sadness,50000
__label__surprise,50000


In [None]:
sentences = df['Text'].values.astype(str)
target = df['Class'].map({'__label__anger':0,'__label__fear':1,'__label__disgust':2,'__label__joy':3,'__label__sadness':4,'__label__surprise':5}).values

In [None]:
target_cat = to_categorical(target, num_classes=6)

In [None]:
tokenizer = Tokenizer(num_words=MAX_VOCAB_SIZE)
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences)
word2idx = tokenizer.word_index #dictionary of all unique words in our document

In [None]:
data = pad_sequences(sequences,maxlen=MAX_SEQUENCE_LENGTH)
num_words = min(MAX_VOCAB_SIZE,len(word2idx)+1)
print(MAX_SEQUENCE_LENGTH)
print(num_words)

300
20000


In [None]:
X_train, X_test, y_train, y_test = train_test_split(data,target_cat,test_size=0.2, shuffle=True, random_state=98,stratify=target_cat)


In [None]:
#LSTM
model2 = Sequential()

In [None]:
model2.add(Embedding(num_words,EMBEDDING_DIM,input_length=MAX_SEQUENCE_LENGTH,trainable=True,input_shape=(MAX_SEQUENCE_LENGTH,)))
model2.add(LSTM(1024,return_sequences=True))
model2.add(GlobalMaxPooling1D())
model2.add(Dense(6,activation='softmax'))
model2.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
early_stop = EarlyStopping(monitor='val_loss',patience=3,mode='min')
model2.fit(X_train,y_train,batch_size=128,epochs=1,validation_data=(X_test,y_test),callbacks=[early_stop])



<tensorflow.python.keras.callbacks.History at 0x7f382a563b38>

In [None]:
pred2 = model2.predict_classes(X_test)



In [None]:
def original_values(num):
    if num==0:
        return '__label__anger'
    elif num==1:
        return '__label__fear'
    elif num==2:
        return '__label__disgust'
    elif num==3:
        return '__label__joy'
    elif num==4:
        return '__label__sadness'
    elif num==5:
        return '__label__surprise'   

In [None]:
#convert 0,1,2 back to original form 
pred = np.array([original_values(num) for num in pred2])
y_test = np.argmax(y_test,axis=-1)
y_true = np.array([original_values(num) for num in y_test])
print(classification_report(y_true,pred))

                   precision    recall  f1-score   support

   __label__anger       0.88      0.83      0.85     10000
 __label__disgust       0.91      0.93      0.92     10000
    __label__fear       0.93      0.92      0.92     10000
     __label__joy       0.86      0.93      0.89     10000
 __label__sadness       0.95      0.92      0.93     10000
__label__surprise       0.93      0.93      0.93     10000

         accuracy                           0.91     60000
        macro avg       0.91      0.91      0.91     60000
     weighted avg       0.91      0.91      0.91     60000

