In [1]:
# models/Deeplearning
#----------------------------------------------------------
# vishnuam300@gmail.com
# VISHNU A M

### Data Collection and Preprocessing

In [2]:
# Load data set function 
import pandas as pd

def load_data(filename, label):
    with open(filename, 'r', encoding='utf-8') as file:
        lines = file.readlines()
    return pd.DataFrame({'text': lines, 'label': label})
# Load data
sadness = load_data('sadness-ratings-0to1.train.txt', 'sadness')
anger = load_data('anger-ratings-0to1.train.txt', 'anger')
joy = load_data('joy-ratings-0to1.train.txt', 'joy')
fear = load_data('fear-ratings-0to1.train.txt', 'fear')

In [3]:
# Combine data
data = pd.concat([sadness, anger, joy, fear], ignore_index=True)
# Shuffle data
data = data.sample(frac=1).reset_index(drop=True)

### Exploratory Data Analysis (EDA)

In [4]:
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Text cleaning function
def clean_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove special characters
    text = re.sub(r'[^a-z\s]', '', text) 
    # Tokenize
    tokens = word_tokenize(text)
    # Remove stopwords
    tokens = [word for word in tokens if word not in stopwords.words('english')]  
    return ' '.join(tokens)

# Apply cleaning
data['text'] = data['text'].apply(clean_text)

### Deepleraning libraries

In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences




### Train-test split

In [6]:
# Split data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data['text'], data['label'], test_size=0.2, random_state=42)

# Encode labels
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

In [7]:
# Tokenization and padding
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_train_pad = pad_sequences(X_train_seq, maxlen=100)
X_test_pad = pad_sequences(X_test_seq, maxlen=100)

In [8]:
# Model Deep learning
model_dl = Sequential()
model_dl.add(Embedding(input_dim=5000, output_dim=128, input_length=100))
model_dl.add(SpatialDropout1D(0.2))
model_dl.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model_dl.add(Dense(4, activation='softmax'))

# Compile and train
model_dl.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_dl.fit(X_train_pad, y_train, epochs=5, batch_size=64, validation_data=(X_test_pad, y_test))

# Evaluate model
loss, accuracy = model_dl.evaluate(X_test_pad, y_test)
print('Accuracy:', accuracy)




Epoch 1/5


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 1.0


In [9]:
# Sample inputs
sample_texts = [
    "I am feeling very sad and down today.",
    "I am so angry about what happened!",
    "I am absolutely overjoyed with the news!",
    "I am really scared about the future."
]

cleaned_samples = [clean_text(text) for text in sample_texts]
sample_seq = tokenizer.texts_to_sequences(cleaned_samples)
sample_pad = pad_sequences(sample_seq, maxlen=100)
sample_predictions = model_dl.predict(sample_pad)
decoded_predictions = le.inverse_transform(sample_predictions.argmax(axis=1))

#results
for text, emotion in zip(sample_texts, decoded_predictions):
    print(f"Text: {text} => Predicted Emotion: {emotion}")

Text: I am feeling very sad and down today. => Predicted Emotion: sadness
Text: I am so angry about what happened! => Predicted Emotion: anger
Text: I am absolutely overjoyed with the news! => Predicted Emotion: fear
Text: I am really scared about the future. => Predicted Emotion: fear
