Test Rahul's Recurrent models with current code

In [11]:
import torch
import torch
import torch.nn as nn
 
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [12]:
torch.cuda.is_available()

True

In [1]:
import sys

In [2]:
sys.path.append('../src/')

In [3]:
from dialog_environment import *

In [5]:
env = DialogEnvironment()

In [107]:
import torch
import torch.nn as nn

OUTPUT_SIZE = 300 ## can be changed for integration with mlp or whatever else

def subsample(data, target, n=15):
    return [x[::n] for x in data], [y[::n] for y in target]


class DialogData(torch.utils.data.Dataset):

    def __init__(self, state_vects, subsample_n=None):

        data = []
        targets = []

        for convo_ind, vects in state_vects.items():
            input_state, next_state = vects[0], vects[1]
            # can add raw state here? idk
            data.append(input_state)
            targets.append(next_state)

        assert len(data) == len(targets)

        if subsample_n:
            data, targets = subsample(data, targets, subsample_n)

        self.data = torch.stack(data)
        self.targets = torch.stack(targets)
        
    def __getitem__(self, index):
        return self.data[index], self.targets[index]
    
    def __len__(self):
        return len(self.data)


class EncoderRNN(nn.Module):
    
    def __init__(self, hidden_size, num_layers,
                 device='cpu', drop_prob=0, lstm=True, feature_norm=False,
                 input_size=34):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.device = device

        if lstm:
            memory_cell = nn.LSTM
        else:
            memory_cell = nn.GRU

        self.memory_cell = memory_cell(input_size,
                                       hidden_size,
                                       num_layers,
                                       batch_first=True,
                                       # make dropout 0 if num_layers is 1
                                       dropout=drop_prob * (num_layers != 1),
                                       bidirectional=False)

        if feature_norm:
            self.norm = nn.InstanceNorm1d(num_features=input_size)
        else:
            self.norm = nn.Identity()

    def forward(self, x):
        # transpose to have features as channels
        x = x.transpose(1, 2)
        # run through feature norm
        x = self.norm(x)
        # transpose back
        x = x.transpose(1, 2)

        out, _ = self.memory_cell(x)
        return out


class DecoderRNN(nn.Module):

    def __init__(self, hidden_size, num_layers,
                 device='cpu', drop_prob=0, lstm=True, feature_norm=False):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.device = device

        if lstm:
            memory_cell = nn.LSTM
        else:
            memory_cell = nn.GRU

        self.memory_cell = memory_cell(hidden_size,
                                       OUTPUT_SIZE,
                                       num_layers,
                                       batch_first=True,
                                       # make dropout 0 if num_layers is 1
                                       dropout=drop_prob * (num_layers != 1),
                                       bidirectional=False)

    def forward(self, x):
        out, _ = self.memory_cell(x)
        return out


class Seq2Seq(nn.Module):
    def __init__(self,  hidden_size, num_layers,
                 device='cpu', drop_prob=0, lstm=True, feature_norm=False,
                 input_size=300):
        super().__init__()
        self.encoder = EncoderRNN(hidden_size,
                                  num_layers,
                                  device,
                                  drop_prob,
                                  lstm,
                                  feature_norm,
                                  input_size=input_size,
                                  )
        self.decoder = DecoderRNN(hidden_size,
                                  num_layers,
                                  device,
                                  drop_prob,
                                  lstm,
                                  feature_norm,
                                  )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    
    
class Actor(nn.Module):
    """
    Direct application of Sequence to Sequence Network. Input a state and reply. 
    
    """
    def __init__(self,  hidden_size, num_layers,
                 device='cuda', drop_prob=0, lstm=True, feature_norm=False,
                 input_size=300):
        super().__init__()
        self.seq2seq = Seq2Seq(hidden_size=hidden_size,num_layers=num_layers,device=device, drop_prob=drop_prob, lstm=lstm, feature_norm=feature_norm,
                          input_size = input_size)
    
    def forward(self,x):
        
        mu = self.seq2seq(x)
        logstd = torch.zeros_like(mu)
        std = torch.exp(logstd)
        return mu, .001*std # Kinda guessed. 

    
class Critic(nn.Module):
    """
    Combination of two encoders for the state and action embeddings to predict value. 
    
    """
    
    def __init__(self, hidden_size, num_layers,
                 device='cpu', drop_prob=0, lstm=True, feature_norm=False,
                 input_size=300):
        
        super().__init__()
        
        self.state_encoder = EncoderRNN(
            hidden_size=hidden_size,
            num_layers=num_layers,
            device=device,
            drop_prob=drop_prob,
            lstm=lstm,
            feature_norm=feature_norm,
            input_size=input_size,
                            )
        self.action_encoder =  EncoderRNN(
            hidden_size=hidden_size,
            num_layers=num_layers,
            device=device,
            drop_prob=drop_prob,
            lstm=lstm,
            feature_norm=feature_norm,
            input_size=input_size,
                            )
        
        self.MLP = nn.Linear(in_features=120,out_features=1)
        
        
    def forward(self,state,action):
        state = self.state_encoder(state)
        action = self.action_encoder(action)
        # reshape 
        state_action = torch.cat([state,action],dim=2).reshape(-1,120)
        state_action = F.relu(self.MLP(state_action))
        return state_action
    
    
    
  




class Discriminator(nn.Module):
    """
    Combination of two encoders for the state and action embeddings to predict value. 
    
    """
    
    def __init__(self, hidden_size, num_layers,
                 device='cpu', drop_prob=0, lstm=True, feature_norm=False,
                 input_size=300):
        
        super().__init__()
        
        self.state_encoder = EncoderRNN(
            hidden_size=hidden_size,
            num_layers=num_layers,
            device=device,
            drop_prob=drop_prob,
            lstm=lstm,
            feature_norm=feature_norm,
            input_size=input_size,
                            )
        self.action_encoder =  EncoderRNN(
            hidden_size=hidden_size,
            num_layers=num_layers,
            device=device,
            drop_prob=drop_prob,
            lstm=lstm,
            feature_norm=feature_norm,
            input_size=input_size,
                            )
        
        self.MLP = nn.Linear(in_features=120,out_features=1)
        
        
    def forward(self,state,action):
        state = self.state_encoder(state)
        action = self.action_encoder(action)
        # reshape 
        state_action = torch.cat([state,action],dim=2).reshape(-1,120)
        prob = torch.sigmoid(self.MLP(state_action))
        return prob

In [108]:
model = Discriminator(input_size = 300, hidden_size=1,device='cuda',num_layers=1)
model = model.to('cuda')
state, expert_action, raw_state, raw_expert_action = env.reset()
state = state.to('cuda')
expert_action = expert_action.to('cuda')
prob = model(state.resize(1,60,300),expert_action.resize(1,60,300))

In [109]:
prob

tensor([[0.5029]], device='cuda:0', grad_fn=<SigmoidBackward>)

In [98]:
torch.cat([state,action],dim=2).reshape(-1,120).shape

torch.Size([1, 120])

In [65]:
model = Policy(hidden_size=2,device='cuda',num_layers=2)

In [66]:
state, expert_action, raw_state, raw_expert_action = env.reset()

In [77]:
model = model.to('cuda')

In [78]:
state = state.to('cuda')
expert_action = expert_action.to('cuda')

In [69]:
mu,std = model(state.resize(1,60,300))

In [70]:
mu, std

(tensor([[[-0.0004,  0.0147, -0.0232,  ...,  0.0033,  0.0190, -0.0076],
          [-0.0007,  0.0199, -0.0328,  ...,  0.0029,  0.0300, -0.0119],
          [-0.0006,  0.0213, -0.0370,  ...,  0.0017,  0.0356, -0.0143],
          ...,
          [ 0.0012,  0.0210, -0.0421,  ..., -0.0021,  0.0384, -0.0165],
          [ 0.0012,  0.0210, -0.0421,  ..., -0.0021,  0.0384, -0.0165],
          [ 0.0012,  0.0210, -0.0421,  ..., -0.0021,  0.0384, -0.0165]]],
        device='cuda:0', grad_fn=<CudnnRnnBackward>),
 tensor([[[1.0000e-07, 1.0000e-07, 1.0000e-07,  ..., 1.0000e-07,
           1.0000e-07, 1.0000e-07],
          [1.0000e-07, 1.0000e-07, 1.0000e-07,  ..., 1.0000e-07,
           1.0000e-07, 1.0000e-07],
          [1.0000e-07, 1.0000e-07, 1.0000e-07,  ..., 1.0000e-07,
           1.0000e-07, 1.0000e-07],
          ...,
          [1.0000e-07, 1.0000e-07, 1.0000e-07,  ..., 1.0000e-07,
           1.0000e-07, 1.0000e-07],
          [1.0000e-07, 1.0000e-07, 1.0000e-07,  ..., 1.0000e-07,
           1.

# Test : Get action :-)

In [71]:
def get_action(mu, std):
    action = torch.normal(mu, std)
    action = action.data.numpy()
    return action


In [72]:
action = get_action(mu.cpu(),std.cpu())

In [73]:
action[0][0][0]

-0.00039523863

In [74]:
mu[0][0][0]

tensor(-0.0004, device='cuda:0', grad_fn=<SelectBackward>)

In [None]:
if __name__ == '__main__':
    d = torch.load('./dat/preprocess/padded_vectorized_states.pt')
    raw = torch.load('./dat/preprocess/raw_states.pt')

    for index, vects in d.items():
        # each is 60 x 300
        input_state, next_state = vects[0], vects[1]
        # raw strings corresponding to embeddings
        raw_input_state, raw_next_state = list(raw.keys())[index], raw[list(raw.keys())[index]]
        if index > 1:
            break

    dataset = DialogData(d)
    print(len(dataset))
    print(dataset[0][0].shape) # initial state at index 0
    print(dataset[0][1].shape) # next state at index 0

    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=5,
                                         shuffle=True,
                                         num_workers=0,
                                        )

    model = Seq2Seq(hidden_size=2, num_layers=2)

    for index, (data, target) in enumerate(loader):
        
        print(index, data.shape, target.shape)

        # run through model to test
        result = model(data.cpu()).detach()

        print(result.shape)

        break