<a href="https://colab.research.google.com/github/vnavya2004/BTP/blob/main/Paper(3569580)_spanish.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
# Step 2: Install required libraries
!pip install pandas nltk torch torchvision

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score

# Initialize NLTK resources
import nltk
nltk.download('punkt')
nltk.download('stopwords')




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [20]:

# Step 3: Data Cleaning and Data Pre-Processing
def data_cleaning(df):
    ps = PorterStemmer()
    stop_words = set(stopwords.words('english'))

    def preprocess_text(text):
        words =  word_tokenize(str(text).lower())
        cleaned_words = []
        for word in words:
            if word.isalpha():  # Keep only alphabetic words
                word = word.lower()  # Convert to lowercase
                if word not in stop_words:  # Remove stop words
                    word = ps.stem(word)  # Stemming
                    cleaned_words.append(word)
        return ' '.join(cleaned_words)

    df['cleaned_text'] = df['Tweets_english'].apply(preprocess_text)  # Adjust column name if necessary
    return df['cleaned_text']


In [3]:

# Step 5: FCL method to detect depression (PyTorch model)
class FCLModel(nn.Module):
    def __init__(self, vocab_size, embed_dim):
        super(FCLModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=64, kernel_size=5)
        self.pool = nn.MaxPool1d(kernel_size=2)
        self.dropout = nn.Dropout(0.5)
        self.lstm = nn.LSTM(embed_dim, 100, batch_first=True)
        self.fc = nn.Linear(100, 1)  # Binary classification

    def forward(self, x):
        x = self.embedding(x).unsqueeze(1)  # Add channel dimension
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x, _ = self.lstm(x.unsqueeze(1))  # Pass through LSTM
        x = self.fc(x[:, -1, :])  # Get the output from the last time step
        return torch.sigmoid(x)

In [23]:
!pip install gensim
import gensim

from google.colab import files
uploaded = files.upload()  # Manually upload your .xlsx file

# Change the file name as per the uploaded file
df = pd.read_excel(next(iter(uploaded.keys()))) # Provide the path to your Excel file

# Data Cleaning and Pre-Processing
cleaned_data = data_cleaning(df)




Saving spanish_translated.xlsx to spanish_translated (1).xlsx


In [22]:
df.head()

Unnamed: 0.1,Unnamed: 0,Tweets,Labels,Tweets_english,cleaned_text
741,,"No encuentro forma de sentirme bien, jamas enc...",1,"I can't find a way to feel good, I never find ...",ca find way feel good never find moment smile ...
1315,,"Honestamente, lo mejor que he hecho en toda mi...",0,Honestly the best thing I've ever done in my e...,honestli best thing ever done entir life past ...
2051,,Cualquiera quiere charlar felicitaciones por l...,0,Anyone want to chat congratulations on ranking...,anyon want chat congratul rank know gener post...
930,,intento tragar el nudo que tengo en la gargant...,1,I try to swallow the lump in my throat...but i...,tri swallow lump throat make cri
1476,,Los maestros que llaman en frío son los peores...,0,"Teachers who cold call are the worst, so I bui...",teacher cold call worst built extens alert nam...


In [8]:
# Load FastText model
ft_model = gensim.models.KeyedVectors.load('/content/drive/MyDrive/embeddings/cc.en.300.model')  # Change to your FastText model path

In [25]:

# Step 4: Creation of Vocabulary for FCL method
# Step 4: Creation of Vocabulary for FCL method
def create_embedding_layer(cleaned_data, ft_model):
    embeddings = []
    for tweet in cleaned_data:
        # Access word vectors using bracket notation
        tweet_embeddings = [ft_model[word] for word in tweet.split() if word in ft_model]
        # Check if the word is in the model's vocabulary to avoid KeyError
        if tweet_embeddings:  # Check if tweet_embeddings is not empty
            embeddings.append(np.mean(tweet_embeddings, axis=0))  # Average embeddings
        else:  # If tweet_embeddings is empty (no known words)
            embeddings.append(np.zeros(ft_model.vector_size))  # Append a zero vector
    return np.array(embeddings)
# Create Embedding Layer
embeddings = create_embedding_layer(cleaned_data, ft_model)


In [26]:
# Prepare labels
labels = df['Labels'].values  # Adjust based on your dataset

# Split the data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(embeddings, labels, test_size=0.2, random_state=42)
# Step 5: FCL method to detect depression (PyTorch model)
# Step 5: FCL method to detect depression (PyTorch model)
class FCLModel(nn.Module):
    def __init__(self, input_dim):  # Modified constructor
        super(FCLModel, self).__init__()
        # Removed unnecessary embedding layer
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=64, kernel_size=5)
        self.pool = nn.MaxPool1d(kernel_size=2)
        self.dropout = nn.Dropout(0.5)
        self.lstm = nn.LSTM(64, 100, batch_first=True)  # Changed input_dim to 64
        self.fc = nn.Linear(100, 1)  # Binary classification

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension
        x = self.pool(torch.relu(self.conv1(x)))
        # Remove view, LSTM expects 3D input (batch_size, seq_len, features)
        # The output of pool has (batch_size, out_channels, seq_len)
        # Permute to get (batch_size, seq_len, out_channels)
        x = x.permute(0, 2, 1)
        x, _ = self.lstm(x)  # Pass through LSTM
        x = self.fc(x[:, -1, :])  # Get the output from the last time step
        return torch.sigmoid(x)
# Build FCL model
model = FCLModel(input_dim=embeddings.shape[1])






In [27]:
# progress bar
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score
from tqdm import tqdm  # Import tqdm for progress bar

def evaluate_model(model, x_train, y_train, x_test, y_test, epochs=10, batch_size=32):
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(epochs):
        model.train()
        # Use tqdm to create a progress bar for training
        for i in tqdm(range(0, len(x_train), batch_size), desc=f"Epoch {epoch + 1}/{epochs}", unit="batch"):
            # Change dtype to torch.float32 for inputs
            inputs = torch.tensor(x_train[i:i + batch_size], dtype=torch.float32)
            labels = torch.tensor(y_train[i:i + batch_size], dtype=torch.float32).view(-1, 1)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    # Evaluate
    model.eval()
    with torch.no_grad():
        # Change dtype to torch.float32 for x_test
        y_pred = model(torch.tensor(x_test, dtype=torch.float32)).numpy()
        y_pred = (y_pred > 0.5).astype(int)

    # Metrics
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred))
    print("Recall:", recall_score(y_test, y_pred))
    print("F1 Score:", f1_score(y_test, y_pred))
    print("Accuracy:", accuracy_score(y_test, y_pred))


In [28]:
evaluate_model(model, x_train, y_train, x_test, y_test)


Epoch 1/10: 100%|██████████| 55/55 [00:11<00:00,  4.81batch/s]
Epoch 2/10: 100%|██████████| 55/55 [00:07<00:00,  7.50batch/s]
Epoch 3/10: 100%|██████████| 55/55 [00:04<00:00, 11.89batch/s]
Epoch 4/10: 100%|██████████| 55/55 [00:04<00:00, 11.69batch/s]
Epoch 5/10: 100%|██████████| 55/55 [00:05<00:00, 10.51batch/s]
Epoch 6/10: 100%|██████████| 55/55 [00:04<00:00, 12.90batch/s]
Epoch 7/10: 100%|██████████| 55/55 [00:04<00:00, 11.49batch/s]
Epoch 8/10: 100%|██████████| 55/55 [00:05<00:00, 10.69batch/s]
Epoch 9/10: 100%|██████████| 55/55 [00:04<00:00, 12.71batch/s]
Epoch 10/10: 100%|██████████| 55/55 [00:04<00:00, 11.45batch/s]


Confusion Matrix:
[[196  35]
 [101 106]]
Precision: 0.75177304964539
Recall: 0.5120772946859904
F1 Score: 0.6091954022988506
Accuracy: 0.6894977168949772
