In [3]:
import torch

!pip install pytorch-pretrained-bert
from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM


Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [0]:
# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Load pre-trained model (weights)
BERT = BertModel.from_pretrained('bert-base-uncased')

def getLatentVector(input):

  BERT.eval()

  # Tokenized input
  tokenized_text = tokenizer.tokenize(input)

  # Convert token to vocabulary indices
  indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
  # Define sentence A and B indices associated to 1st and 2nd sentences (see paper)
  # print(indexed_tokens)
  segments_ids = [0]*(len(indexed_tokens))

  # Convert inputs to PyTorch tensors
  tokens_tensor = torch.tensor([indexed_tokens])
  segments_tensors = torch.tensor([segments_ids])

  # Predict hidden states features for each layer
  encoded_layers, _ = BERT(tokens_tensor, segments_tensors)
  # We have a hidden states for each of the 12 layers in model bert-base-uncased
  assert len(encoded_layers) == 12
  return _

In [0]:
import torch.nn as nn
import torch.nn.functional as F

class TwoLayerNet(nn.Module):
  def __init__(self, D_in, H, D_out):
    """
    In the constructor we instantiate two nn.Linear modules and assign them as
    member variables.
    
    D_in: input dimension
    H: dimension of hidden layer
    D_out: output dimension
    """
    super(TwoLayerNet, self).__init__()
    self.linear1 = nn.Linear(D_in, H) 
    self.linear2 = nn.Linear(H, D_out)
  
  def forward(self, x):
    """
    In the forward function we accept a Variable of input data and we must 
    return a Variable of output data. We can use Modules defined in the 
    constructor as well as arbitrary operators on Variables.
    """
    h_relu = F.elu(self.linear1(x))
    y_pred = F.softmax(self.linear2(h_relu))
    return y_pred

In [0]:
import pandas as pd
import numpy as np

dataset = pd.read_csv('tmobile_data.csv')
dataset = np.array(dataset)

X = dataset[0:25, 0:1]
y = dataset[0:24, 1]

X_encoded = []
for i in range(len(X)):
  X_encoded.append(getLatentVector(str(X[i][0])))

y_encoded = []
for i in range(len(y)): 
  tensor = torch.LongTensor([y[i]])
  y_encoded.append(tensor)
  
#split input and output
X_train = X_encoded[0:24]
y_train = y_encoded
X_test = X_encoded[24:]
y_test = dataset[24:, 1] 


In [27]:
N, D_in, H, D_out = 2, 768, 500, 5

# Construct our model by instantiating the class defined above
model = TwoLayerNet(D_in, H, D_out)

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)  

iter = 0
for epoch in range(N):
  for i in range(len(X_train)):
    
    inputs = X_train[i]
    labels = y_train[i]
        
    # Forward pass to get output/logits
    outputs = model(inputs)
    
    print(outputs)
    print(labels)
    
    # Calculate Loss: softmax --> cross entropy loss
    loss = criterion(outputs, labels)
    # Getting gradients w.r.t. parameters
    loss.backward(retain_graph=True)

    # Updating parameters
    optimizer.step()


    
    print('Iteration: {}. Loss: {}'.format(iter, loss.item()))
  
  print("Next Epoch")



tensor([[0.1646, 0.2200, 0.1974, 0.2295, 0.1883]], grad_fn=<SoftmaxBackward>)
tensor([4])
Iteration: 0. Loss: 1.6213830709457397
tensor([[0.1548, 0.2193, 0.2434, 0.1989, 0.1837]], grad_fn=<SoftmaxBackward>)
tensor([3])
Iteration: 0. Loss: 1.6110379695892334
tensor([[0.1436, 0.2008, 0.2029, 0.2304, 0.2223]], grad_fn=<SoftmaxBackward>)
tensor([2])
Iteration: 0. Loss: 1.6069830656051636
tensor([[0.1320, 0.1782, 0.2133, 0.2532, 0.2233]], grad_fn=<SoftmaxBackward>)
tensor([0])
Iteration: 0. Loss: 1.6782821416854858
tensor([[0.0956, 0.1525, 0.1996, 0.2775, 0.2747]], grad_fn=<SoftmaxBackward>)
tensor([2])
Iteration: 0. Loss: 1.6122926473617554
tensor([[0.1187, 0.1276, 0.2431, 0.2782, 0.2324]], grad_fn=<SoftmaxBackward>)
tensor([2])
Iteration: 0. Loss: 1.5683858394622803
tensor([[0.0914, 0.1101, 0.2389, 0.2720, 0.2876]], grad_fn=<SoftmaxBackward>)
tensor([0])
Iteration: 0. Loss: 1.7214200496673584
tensor([[0.1343, 0.1096, 0.3115, 0.2463, 0.1982]], grad_fn=<SoftmaxBackward>)
tensor([0])
Iterati

In [28]:
#Testing our toy example: "What kind of phone plans do you have for small businesses?"
print(model(X_test[0]))

tensor([[9.9218e-01, 1.7513e-05, 7.7347e-03, 4.6146e-05, 2.3258e-05]],
       grad_fn=<SoftmaxBackward>)


