<a href="https://colab.research.google.com/github/viyas52/stock-market-prediction-using-twitter-sentiment-analysis/blob/main/Tweet_Sentiment_PRED.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import torch
import pandas as pd
import numpy as np
from transformers import BertTokenizer, BertModel
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset


class BertClassifier(nn.Module):
    def __init__(self, freeze=False):
        super(BertClassifier, self).__init__()

        input_layer = 768
        hidden_layer = 50
        output_layer = 2

        self.bert = BertModel.from_pretrained('bert-base-uncased')

        self.classifier = nn.Sequential(
            nn.Linear(input_layer, hidden_layer),
            nn.ReLU(),
            nn.Linear(hidden_layer, output_layer)
        )

        if freeze:
            for param in self.bert.parameters():
                param.requires_grad = False

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids, attention_mask)
        h_cls = outputs[0][:, 0, :]
        logits = self.classifier(h_cls)
        return logits

# Load model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertClassifier()
model.load_state_dict(torch.load("stock_sentiment_model.pt", map_location=device))
model.to(device)
model.eval()


df = pd.read_csv("cleaned_twitter_data.csv")

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

MAX_LEN = 64

def tokenize_tweets(texts):
    tokens = tokenizer.batch_encode_plus(
        texts.tolist(),
        max_length=MAX_LEN,
        pad_to_max_length=True,
        truncation=True,
        return_tensors="pt"
    )
    return tokens['input_ids'], tokens['attention_mask']

input_ids, attention_masks = tokenize_tweets(df['Text_Cleaned'])

# Convert to PyTorch tensors and move to device
input_ids = input_ids.to(device)
attention_masks = attention_masks.to(device)


with torch.no_grad():
    logits = model(input_ids, attention_masks)
    preds = torch.argmax(logits, dim=1).cpu().numpy()

df['Sentiment'] = preds


df[['createdAt','Text_Cleaned', 'Sentiment']].to_csv("predicted_tweets.csv", index=False)
print("Predictions saved to predicted_tweets.csv")



Predictions saved to predicted_tweets.csv


In [6]:

df_predicted = pd.read_csv("predicted_tweets.csv")



df_predicted['createdAt'] = pd.to_datetime(df_predicted['createdAt'], errors='coerce')
df_predicted['Date'] = df_predicted['createdAt'].dt.date


df_predicted['Sentiment'] = df_predicted['Sentiment']
df_predicted.drop(columns=['createdAt'], inplace=True)

df_predicted["Date"] = pd.to_datetime(df_predicted["Date"])

aggregated_senti = df_predicted.groupby("Date")["Sentiment"].apply(lambda x: 1 if x.mean() >= 0.5 else 0).reset_index()
aggregated_senti_path = "aggregated_senti.csv"
aggregated_senti.to_csv(aggregated_senti_path, index=False)


aggregated_senti.head()

Unnamed: 0,Date,Sentiment
0,2025-03-12,1
1,2025-03-13,1
2,2025-03-14,1
3,2025-03-15,1
4,2025-03-16,1
