In [53]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import DistilBertModel, DistilBertTokenizer
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split

In [58]:
df = pd.read_csv("FOMC_data_no_token_BERT.csv")
df = df.drop(columns = ["Unnamed: 0.1", "Unnamed: 0"], axis = 1)

In [59]:
df

Unnamed: 0,date,minutes,tar_rate_new,next_tar_rate,change,does_change,label_delta
0,1993-02-03,A meeting of the Federal Open Market Committee...,3.00,3.00,0.00,0,no change
1,1993-03-23,A meeting of the Federal Open Market Committee...,3.00,3.00,0.00,0,no change
2,1993-05-18,A meeting of the Federal Open Market Committee...,3.00,3.00,0.00,0,no change
3,1993-07-07,A meeting of the Federal Open Market Committee...,3.00,3.00,0.00,0,no change
4,1993-08-17,A meeting of the Federal Open Market Committee...,3.00,3.00,0.00,0,no change
...,...,...,...,...,...,...,...
235,2022-09-21,"The Federal Reserve, the central bank of the U...",3.25,4.00,0.75,1,Increase
236,2022-11-02,"The Federal Reserve, the central bank of the U...",4.00,4.50,0.50,1,Increase
237,2022-12-14,"The Federal Reserve, the central bank of the U...",4.50,4.75,0.25,1,Increase
238,2023-02-01,"The Federal Reserve, the central bank of the U...",4.75,5.00,0.25,1,Increase


In [64]:
X = df["minutes"]
y = df["label_delta"].astype("category").cat.codes 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=10 )

### Set up DistilBert model and dataloader

In [65]:
class FedRatePredictor(nn.Module):
    def __init__(self, num_classes=3, pretrained_model_name='distilbert-base-uncased'):
        super(FedRatePredictor, self).__init__()
        self.tokenizer = DistilBertTokenizer.from_pretrained(pretrained_model_name)
        self.encoder = DistilBertModel.from_pretrained(pretrained_model_name)
        self.fc = nn.Linear(self.encoder.config.hidden_size, num_classes)

    def forward(self, input_text):
        tokens = self.tokenizer(input_text, padding=True, truncation=True, return_tensors='pt')
        embeddings = self.encoder(**tokens).last_hidden_state
        pooled_output = embeddings[:, 0, :]
        output = self.fc(pooled_output)
        return output
    
class FOMCDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return {"Text": self.X.iloc[idx], "Diff": self.y.iloc[idx]}
    
train_dataset = FOMCDataset(X_train, y_train)
test_dataset = FOMCDataset(X_test, y_test)

train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False)

### Training the DistilBert model

In [67]:
model = FedRatePredictor(num_classes=3)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
for epoch in range(num_epochs):
    epoch_loss = 0
    model.train()
    for batch in train_dataloader:
        minutes, label = batch['Text'][0], batch['Diff'][0].item()

        # Forward pass
        output = model(minutes)
        target = torch.tensor([label], dtype=torch.long)
        loss = criterion(output, target)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(train_dataloader)}')


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/10, Loss: 1.5980125181376934
Epoch 2/10, Loss: 1.1332616278280814
Epoch 3/10, Loss: 1.0016082382450502
Epoch 4/10, Loss: 1.0022477805614471
Epoch 5/10, Loss: 0.710025509664168
Epoch 6/10, Loss: 0.813838025710235
Epoch 7/10, Loss: 0.9894269929112246
Epoch 8/10, Loss: 1.0540623317162197


In [21]:
model.eval()  # set the model to evaluation mode
total_correct = 0
total_samples = 0

with torch.no_grad():
    for batch in train_dataloader:
        minutes, label = batch['Text'][0], batch['Diff'][0].item()

        # Forward pass
        output = model(minutes)
        target = torch.tensor([label], dtype=torch.long)

        # Get predictions
        _, predictions = torch.max(output, dim=1)

        # Update totals
        total_correct += (predictions == target).sum().item()
        total_samples += target.size(0)

accuracy = total_correct / total_samples
print(f'Training Accuracy: {accuracy:.2f}')


Training Accuracy: 0.68


In [22]:
model.eval()  # set the model to evaluation mode
total_correct = 0
total_samples = 0

with torch.no_grad():
    for batch in test_dataloader:
        minutes, label = batch['Text'][0], batch['Diff'][0].item()

        # Forward pass
        output = model(minutes)
        target = torch.tensor([label], dtype=torch.long)

        # Get predictions
        _, predictions = torch.max(output, dim=1)

        # Update totals
        total_correct += (predictions == target).sum().item()
        total_samples += target.size(0)

accuracy = total_correct / total_samples
print(f'Test Accuracy: {accuracy:.2f}')

Training Accuracy: 0.69


Roberta

In [23]:
from transformers import RobertaTokenizer, RobertaModel

In [27]:
class FedRatePredictorROBERTA(nn.Module):
    def __init__(self, num_classes=3, pretrained_model_name='roberta-base'):
        super(FedRatePredictorROBERTA, self).__init__()
        self.tokenizer = RobertaTokenizer.from_pretrained(pretrained_model_name)
        self.encoder = RobertaModel.from_pretrained(pretrained_model_name)
        self.fc = nn.Linear(self.encoder.config.hidden_size, num_classes)

    def forward(self, input_text):
        tokens = self.tokenizer(input_text, padding=True, truncation=True, return_tensors='pt', max_length=512)
        embeddings = self.encoder(**tokens).last_hidden_state
        pooled_output = embeddings[:, 0, :]
        output = self.fc(pooled_output)
        return output

In [28]:
model = FedRatePredictorROBERTA(num_classes=3)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


num_epochs = 5
for epoch in range(num_epochs):
    epoch_loss = 0
    for batch in train_dataloader:
        minutes, label = batch['Text'][0], batch['Diff'][0].item()

        # Forward pass
        output = model(minutes)
        target = torch.tensor([label], dtype=torch.long)
        loss = criterion(output, target)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(train_dataloader)}')

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/5, Loss: 1.1096748577781455
Epoch 2/5, Loss: 0.8899901729697982
Epoch 3/5, Loss: 0.8802334690311303
Epoch 4/5, Loss: 0.8634929419495165
Epoch 5/5, Loss: 0.8855544488178566


In [29]:

# Get accuracy on training data
model.eval()
with torch.no_grad():
    correct_predictions = 0
    for batch in train_dataloader:
        minutes, label = batch['Text'][0], batch['Diff'][0].item()

        output = model(minutes)
        target = torch.tensor([label], dtype=torch.long)

        _, predicted_label = torch.max(output, dim=1)
        correct_predictions += (predicted_label == target).sum().item()

    accuracy = correct_predictions / len(train_dataset)

print('Training Accuracy: ', accuracy)

Training Accuracy:  0.6822916666666666


In [33]:
# Evaluation mode
model.eval()

# Initialize a variable to store the number of correct predictions
correct_predictions = 0

# No gradient calculation in this block
with torch.no_grad():
    for batch in test_dataloader:
        minutes, label = batch['Text'][0], batch['Diff'][0].item()

        # Get the model's output
        output = model(minutes)
        target = torch.tensor([label], dtype=torch.long)

        # Get the predicted label
        _, predicted_label = torch.max(output, dim=1)

        # Check if the prediction is correct and update the correct_predictions counter
        correct_predictions += (predicted_label == target).sum().item()

# Calculate the accuracy
accuracy = correct_predictions / len(test_dataset)

print('Test Accuracy: ', accuracy)


Test Accuracy:  0.6875


## FinBERT + adding dense layers with ReLU + dropout

In [37]:
from transformers import AutoTokenizer, AutoModel, AdamW
from torch.optim.lr_scheduler import StepLR
import torch.nn.functional as F

# Create a new class for the model
class FinRatePredictorFINBERT(nn.Module):
    def __init__(self, num_classes=3, pretrained_model_name='ProsusAI/finbert'):
        super(FinRatePredictorFINBERT, self).__init__()
        self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name)
        self.encoder = AutoModel.from_pretrained(pretrained_model_name)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(self.encoder.config.hidden_size, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, input_text):
        inputs = self.tokenizer(' '.join(input_text), padding=True, truncation=True, return_tensors='pt')
        embeddings = self.encoder(**inputs).last_hidden_state
        pooled_output = embeddings[:, 0, :]
        x = self.fc1(pooled_output)
        x = F.relu(x)
        x = self.dropout(x)
        output = self.fc2(x)
        return output


In [42]:
# Calculate accuracy (a classification metric)
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100 
    return acc

In [38]:

# Instantiate the model
model = FinRatePredictorFINBERT()

# Set the optimizer and the learning rate scheduler
optimizer = AdamW(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=1, gamma=0.9)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    epoch_loss = 0
    model.train()
    for batch in train_dataloader:
        minutes, label = batch['Text'][0], batch['Diff'][0].item()

        # Forward pass
        output = model(minutes)
        target = torch.tensor([label], dtype=torch.long)
        loss = criterion(output, target)

        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Update learning rate
        scheduler.step()

        epoch_loss += loss.item()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(train_dataloader)}')


Downloading (…)okenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Some weights of the model checkpoint at ProsusAI/finbert were not used when initializing BertModel: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch 1/5, Loss: 0.9558581778546795
Epoch 2/5, Loss: 0.8726615972506503
Epoch 3/5, Loss: 0.9159736119521161
Epoch 4/5, Loss: 0.9227104634822657
Epoch 5/5, Loss: 0.8853018715356787


In [40]:
# Get accuracy on training data
model.eval()
with torch.no_grad():
    correct_predictions = 0
    for batch in train_dataloader:
        minutes, label = batch['Text'][0], batch['Diff'][0].item()

        output = model(minutes)
        target = torch.tensor([label], dtype=torch.long)

        _, predicted_label = torch.max(output, dim=1)
        correct_predictions += (predicted_label == target).sum().item()

    accuracy = correct_predictions / len(train_dataset)

print('Training Accuracy: ', accuracy)

Training Accuracy:  0.6822916666666666


In [46]:
# Get accuracy on training data
model.eval()
with torch.no_grad():
    correct_predictions = 0
    for batch in test_dataloader:
        minutes, label = batch['Text'][0], batch['Diff'][0].item()

        output = model(minutes)
        target = torch.tensor([label], dtype=torch.long)

        _, predicted_label = torch.max(output, dim=1)
        correct_predictions += (predicted_label == target).sum().item()

    accuracy = correct_predictions / len(test_dataset)

print('Training Accuracy: ', accuracy)

Training Accuracy:  0.6875


In [44]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

def report_metrics(y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average='weighted')
    rec = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    cm = confusion_matrix(y_true, y_pred)
    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall: {rec:.4f}")
    print(f"F1-score: {f1:.4f}")
    print(f"Confusion matrix:\n{cm}")
    
    
report_metrics(target, predicted_label)

Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1-score: 1.0000
Confusion matrix:
[[1]]


In [45]:
predicted_label

tensor([2])