Hopefully last setup necessary for preparing the environment for Inverse RL procedure on conversational AI. In this version we use pre-trained word embeddings and combine them with the raw states to provide a clear flow of logic as a part of the convo.

In [1]:
"""
OpenAI Gym inspired environment for this NLP task. 

Upon resetting environment, returns the state and expert action in raw and embedding form. 

In our case conversations are only pairs, although this is a scalable approach, and as a
starting point for that we include a .step(action) function which simply returns done=True. This also allows us
to make our framework as similar as possible to previously successful approaches using GAIL. 

"""


import torch
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import csv
import random
import re
import os
import unicodedata
import codecs
import numpy as np
from io import open
import itertools
import math
import matplotlib.pyplot as plt

class DialogEnvironment(object):
    """
    
    Gym environment for dialog.
    
    """
    def __init__(self, mode='train'):
        

        self.conversations = torch.load('../apps/dat/preprocess/padded_vectorized_states.pt')
        self.raw_conversations = torch.load('../apps/dat/preprocess/raw_states.pt')
        
        
        self.conversations_visited = []
        
    def clear(self):
        self.conversations_visited = [] #
    def current_state(self):
        return i  # i for current conversation index, j for current word (these should be odd? )
    
    def reset(self):
        """
        Start a new trajectory, aka a new conversation. Environment does this by 
        picking a random i in the length of the total conversations. 

        Using random with replacement, so it is possible to revisit environments.

        I will leave this as a TODO in case without replacement is preferred. 
        """
        while True: #some indices have been removed, for various resions. This while loop 
                    # allows us to keep trying until a viable conversation is selected. 
            try:
                self.i = random.randint(a=0,b=len(self.conversations))
                self.conversations_visited.append(self.i)
                self.conversation = self.conversations[self.i]
                break
            except:
                pass


        state = self.conversation[0]
        expert_action = self.conversation[0]
        
        raw_state = list(self.raw_conversations.keys())[self.i], 
        
        raw_expert_action = self.raw_conversations[list(self.raw_conversations.keys())[self.i]]

        return state, expert_action, raw_state, raw_expert_action
    
    def step(self,action):
        done = True

        return done



In [2]:
env = DialogEnvironment()

In [3]:
state, expert_action, raw_state, raw_expert_action = env.reset()

In [58]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

class Actor(nn.Module):
    def __init__(self):
        super(Actor,self).__init__()
        
        self.encoder = nn.RNN(input_size = 300,hidden_size=300)
        
    def forward(self,x):
    
        x = self.encoder(x)
        return x
    
class SimpleRNN(nn.Module):
    def __init__(self, input_size=300, hidden_size=32, output_size=1):
        # This just calls the base class constructor
        super().__init__()
        # Neural network layers assigned as attributes of a Module subclass
        # have their parameters registered for training automatically.
        self.rnn1 = torch.nn.RNN(input_size, hidden_size//2, nonlinearity='relu', batch_first=False)
        self.rnn2 = torch.nn.RNN(input_size, hidden_size//2, nonlinearity='relu', batch_first=False)
        self.linear = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x1,x2):
        # The RNN also returns its hidden state but we don't use it.
        # While the RNN can also take a hidden state as input, the RNN
        # gets passed a hidden state initialized with zeros by default.
        h1 = self.rnn1(x1)[0]
        h2 = self.rnn2(x2)[0]
        h = torch.cat([h1,h2],dim=2)
        x = self.linear(h)
        return x
    


In [59]:
simprnn = SimpleRNN()


In [61]:
out = simprnn(state.resize(1,60,300),expert_action.resize(1,60,300))

out.shape

torch.Size([1, 60, 1])

In [57]:
%debug

> [0;32m/scratch/nsk367/anaconda3/envs/irl/lib/python3.8/site-packages/torch/nn/modules/rnn.py[0m(174)[0;36mcheck_input[0;34m()[0m
[0;32m    172 [0;31m        [0mexpected_input_dim[0m [0;34m=[0m [0;36m2[0m [0;32mif[0m [0mbatch_sizes[0m [0;32mis[0m [0;32mnot[0m [0;32mNone[0m [0;32melse[0m [0;36m3[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    173 [0;31m        [0;32mif[0m [0minput[0m[0;34m.[0m[0mdim[0m[0;34m([0m[0;34m)[0m [0;34m!=[0m [0mexpected_input_dim[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 174 [0;31m            raise RuntimeError(
[0m[0;32m    175 [0;31m                'input must have {} dimensions, got {}'.format(
[0m[0;32m    176 [0;31m                    expected_input_dim, input.dim()))
[0m
ipdb> exit


In [7]:
        
ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout
class RNNModel(nn.Module):
    """Container module with an encoder, a recurrent module, and a decoder."""

    def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False):
        super(RNNModel, self).__init__()
        self.ntoken = ntoken
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        if rnn_type in ['LSTM', 'GRU']:
            self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout)
        else:
            try:
                nonlinearity = {'RNN_TANH': 'tanh', 'RNN_RELU': 'relu'}[rnn_type]
            except KeyError:
                raise ValueError( """An invalid option for `--model` was supplied,
                                 options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""")
            self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout)
        self.decoder = nn.Linear(nhid, ntoken)

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            if nhid != ninp:
                raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.nhid = nhid
        self.nlayers = nlayers

    def init_weights(self):
        initrange = 0.1
        nn.init.uniform_(self.encoder.weight, -initrange, initrange)
        nn.init.zeros_(self.decoder.weight)
        nn.init.uniform_(self.decoder.weight, -initrange, initrange)

    def forward(self, input, hidden):
        emb = self.drop(self.encoder(input))
        output, hidden = self.rnn(emb, hidden)
        output = self.drop(output)
        decoded = self.decoder(output)
        decoded = decoded.view(-1, self.ntoken)
        return F.log_softmax(decoded, dim=1), hidden

    def init_hidden(self, bsz):
        weight = next(self.parameters())
        if self.rnn_type == 'LSTM':
            return (weight.new_zeros(self.nlayers, bsz, self.nhid),
                    weight.new_zeros(self.nlayers, bsz, self.nhid))
        else:
            return weight.new_zeros(self.nlayers, bsz, self.nhid)

tensor([[ 0.0532,  0.1359,  0.0235,  ..., -0.2295, -0.2234, -0.0766],
        [ 0.0985,  0.2500, -0.2702,  ..., -0.0626,  0.2442,  0.1778],
        [ 0.0014,  0.3565, -0.0555,  ..., -0.1124,  0.0783,  0.2240],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])

In [21]:
test = nn.RNN(input_size = 300,hidden_size=300)

In [22]:
out = test(torch.randn(1,60,300))

In [39]:
nn.RNN()

RNN(300, 300)