# End model to predict financial incentives and disincentives

John Brandt

Last updated: Aug 19, 2019


This notebook contains a gold standard baseline (LSTM with gold standard labels) as well as a noisy implementation of snorkel labels with roBERTa encoding.

In [1]:
import numpy as np
import torch
import pandas as pd
from torch import nn

## Gold standard baseline

Shallow RNN with RoBERTa encoded words.

In [11]:
encodings = np.load('../data/interim/encodings.npy')
df = pd.read_csv("../data/processed/gold_standard.csv")

In [12]:
encodings.shape

(50, 20, 1024)

In [99]:
class Model(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(Model, self).__init__()

        # Defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.output_size = output_size

        # Defining the layers
        # RNN Layer
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)   
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, self.output_size)
    
    def forward(self, x):
        
        batch_size = x.size(0)

        #Initializing hidden state for first input using method defined below
        hidden = self.init_hidden(batch_size)

        # Passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.rnn(x, hidden)
        
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(hidden)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
         # We'll send the tensor holding the hidden state to the device we specified earlier as well
        return hidden

In [56]:
model = Model(input_size = 1024, output_size = 3, hidden_dim = 100, n_layers = 1)
model.to("cpu")

n_epochs = 100
lr = 0.001

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = lr)

In [69]:
from torch.utils.data import DataLoader, TensorDataset
Y = df['class'][:50] - 1

train_data = TensorDataset(torch.from_numpy(encodings),
                          torch.from_numpy(np.array(Y)))

valid_data = TensorDataset(torch.from_numpy(encodings),
                          torch.from_numpy(np.array(Y)))

train_loader = DataLoader(train_data, shuffle = True, batch_size = 5)
valid_loader = DataLoader(valid_data, shuffle = True, batch_size = 5)

In [68]:
# obtain one batch of training data
dataiter = iter(train_loader)
sample_x, sample_y = dataiter.next()
print('Sample input size: ', sample_x.size()) # batch_size, seq_length
print('Sample label size: ', sample_y.size()) # batch_size

Sample input size:  torch.Size([5, 20, 1024])
Sample label size:  torch.Size([5])


In [77]:
counter = 0
device = "cpu"
print_every = 100
batch_size = 10


for epoch in range(1, n_epochs + 1):
    h = model.init_hidden(10)
    for inputs, labels in train_loader:
        counter += 1
        h = tuple([each.data for each in h])
        optimizer.zero_grad()
        inputs = inputs.type(torch.FloatTensor)
        output, h = model(inputs)
        loss = criterion(output.squeeze(), labels.long())
        loss.backward()
        optimizer.step()
        if counter % print_every == 0:
            # Get validation loss
            val_h = model.init_hidden(batch_size)
            val_losses = []
            model.eval()
            for inputs, labels in valid_loader:

                # Creating new variables for the hidden state, otherwise
                # we'd backprop through the entire training history
                val_h = tuple([each.data for each in val_h])

                inputs = inputs.type(torch.FloatTensor)
                output, val_h = model(inputs)
                val_loss = criterion(output.squeeze(), labels.long())
                val_losses.append(val_loss.item())

            model.train()
            print("Epoch: {}/{}...".format(epoch+1, n_epochs),
                  "Step: {}...".format(counter),
                  "Loss: {:.6f}...".format(loss.item()),
                  "Val Loss: {:.6f}".format(np.mean(val_losses)))

Epoch: 11/100... Step: 100... Loss: 0.000110... Val Loss: 0.000128
Epoch: 21/100... Step: 200... Loss: 0.000095... Val Loss: 0.000116
Epoch: 31/100... Step: 300... Loss: 0.000081... Val Loss: 0.000104
Epoch: 41/100... Step: 400... Loss: 0.000078... Val Loss: 0.000095
Epoch: 51/100... Step: 500... Loss: 0.000112... Val Loss: 0.000088
Epoch: 61/100... Step: 600... Loss: 0.000066... Val Loss: 0.000082
Epoch: 71/100... Step: 700... Loss: 0.000075... Val Loss: 0.000076
Epoch: 81/100... Step: 800... Loss: 0.000090... Val Loss: 0.000071
Epoch: 91/100... Step: 900... Loss: 0.000065... Val Loss: 0.000067
Epoch: 101/100... Step: 1000... Loss: 0.000065... Val Loss: 0.000062


## Snorkel end model with RoBERTA-LSTM input module and noise-aware output head

In [7]:
probas = np.load('../data/interim/snorkel_proba.npy')

In [43]:
class Middle(nn.Module):
    def __init__(self, input_size, hidden_dim, n_layers):
        super(Middle, self).__init__()

        # Defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        # Defining the layers
        # RNN Layer
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)   
        # Fully connected layer
    
    def forward(self, x):
        
        batch_size = x.size(0)

        #Initializing hidden state for first input using method defined below
        hidden = self.init_hidden(batch_size)

        # Passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.rnn(x, hidden)    
        return hidden.reshape((50, 150))
    
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
         # We'll send the tensor holding the hidden state to the device we specified earlier as well
        return hidden
    
mid_mod = Middle(input_size = 1024,
                        hidden_dim = 150,
                        n_layers = 1)

In [44]:
from metal.end_model import EndModel

if torch.cuda.is_available():
    device = 'cuda'
else:
    device='cpu'
end_model = EndModel(layer_out_dims = [1024,150,3],
                     middle_modules = [mid_mod], 
                     seed=123, device=device)


Network architecture:
Sequential(
  (0): IdentityModule()
  (1): Sequential(
    (0): Middle(
      (rnn): RNN(1024, 150, batch_first=True)
    )
    (1): ReLU()
  )
  (2): Linear(in_features=150, out_features=3, bias=True)
)



In [None]:
X = encodings

end_model.train_model((X, probas[:50]), 
                      valid_data = (X, df['class'][:50]), 
                      lr=0.01, l2=0.01, batch_size=256, 
                      n_epochs=50,
                      checkpoint_metric='accuracy',
                      checkpoint_metric_mode='max')

## To do: Snorkel end model with RoBERTa-LSTM input module and multi-task output head

## To do: Snorkel end model with RoBERTa-LSTM input module and multi-task output head, with concatenation of additional feature engineering

## To do: Snorkel end model with RoBERTa-LSTM input module and multi-task output head, with concatenation of additional feature engineering and synonym augmentation