In [4]:
import torch, torch.autograd as autograd
import torch.nn as nn, torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable as avar
    
from SimpleTask import SimpleGridTask
from TransportTask import TransportTask
from NavTask import NavigationTask
from SeqData import SeqData
from Henaff_pt2 import ForwardModelFFANN
from LSTMFM import LSTMForwardModel


import os, sys, pickle, numpy as np, numpy.random as npr, random as r

In [44]:
def sample_gumbel(shape, eps=1e-20):
    U = torch.rand(shape) #.cuda()
    return -avar(torch.log(-torch.log(U + eps) + eps))

def gumbel_softmax_sample(logits, temperature):
    y = logits + sample_gumbel(logits.size())
    return F.softmax(y / temperature, dim=-1)

def gumbel_softmax(logits, temperature):
    """
    input: [*, n_class]
    return: [*, n_class] an one-hot vector
    """
    y = gumbel_softmax_sample(logits, temperature)
    shape = y.size()
    _, ind = y.max(dim=-1)
    y_hard = torch.zeros_like(y).view(-1, shape[-1])
    y_hard.scatter_(1, ind.view(-1, 1), 1)
    y_hard = y_hard.view(*shape)
    return (y_hard - y).detach() + y

In [73]:
def greedy_valueF(state):
    state = state.squeeze()
    #ForwardModel.printState(state)
    vx = torch.sum((state[0:15]-state[34:49]).pow(2))
    #print('vx',vx)
    vy = torch.sum((state[15:30]-state[49:64]).pow(2))
    #print('vy',vy)
    value = -( vx + vy ) 
    return value

def greedy_cont_valueF(state):
    state = state.squeeze()
    _,ix = state[0:15].max(0)
    _,gx = state[34:49].max(0)
    _,iy = state[15:30].max(0)
    _,gy = state[49:64].max(0)
    #ForwardModel.printState(state)
    vx = torch.sum((ix - gx)*(ix - gx))
    #print('vx',vx)
    vy = torch.sum((iy - gy)*(iy - gy))
    #print('vy',vy)
    value = -( vx + vy ) 
    return value

def greedy_CE(state):
    state = state.squeeze()
    _, gx = state[34:49].max(0)
    _, gy = state[49:64].max(0)
    
    px = state[0:15]
    py = state[15:30]
    
    loss = torch.nn.CrossEntropyLoss()
    vx = loss(px.unsqueeze(dim=0), gx)
    vy  = loss(py.unsqueeze(dim=0), gy)
#     print(vx,vy)
    return - (vx + vy)
    

In [7]:
f_model_name = 'LSTM_FM_1_99' 
s = 'navigation' # 'transport'
trainf, validf = s + "-data-train-small.pickle", s + "-data-test-small.pickle"
print('Reading Data')
train, valid = SeqData(trainf), SeqData(validf)

Reading Data
Reading navigation-data-train-small.pickle
	Built
Reading navigation-data-test-small.pickle
	Built


In [102]:
def generateTask(px,py,orien,gx,gy):
    direction = NavigationTask.oriens[orien]
    gs = np.array([gx, gy])
    env = NavigationTask(agent_start_pos=[np.array([px,py]), direction],goal_pos=gs)
    return env

class SimulationPolicy(nn.Module):
    def __init__(self,  env, layerSizes=[100,100]):
        super(SimulationPolicy, self).__init__()
        self.actionSize = len(env.actions)
        self.stateSize = len(env.getStateRep(oneHotOutput=True))
        self.env = env
        print("State Size: " , self.stateSize)
        print("Action Size: ", self.actionSize)
        
        # Input space: [Batch, observations], output:[Batch, action_space]
        self.layer1 = nn.Linear(self.stateSize, layerSizes[0])
        self.layer2 = nn.Linear(layerSizes[0], layerSizes[1])
        self.layer3 = nn.Linear(layerSizes[1], self.actionSize)
        
    def sample(self,state,temperature=2):
        output = F.relu( self.layer1(state) )
        output = F.relu( self.layer2(output) ) # F.sigmoid
        output = self.layer3(output)
        #print(output.shape)
        m = nn.LogSoftmax(dim=1)
        output = m(output)
        return gumbel_softmax(output, temperature)
    
    def forward(self, state):
        output = F.relu( self.layer1(state) )
        output = F.relu( self.layer2(output) ) # F.sigmoid
        output = self.layer3(output) 
        output = F.softmax(output,dim=1)
        return output
    
    def trainSad(self, forwardModel):
        
        optimizer = optim.Adam(self.parameters(), lr = 0.0005 )

        maxDepth = 2
        treeBreadth = 1
        for p in forwardModel.parameters(): p.requires_grad = False
#         p = npr.randint(0,15,2)
#         orien = npr.randint(0,4,1)
#         g = npr.randint(0,15,2)
        cenv = generateTask(0,0,0,0,6)
#       cenv = generateTask(p[0],p[1],orien,g[0],g[1])
        s0 = avar(torch.FloatTensor([self.env.getStateRep()]), requires_grad=False)
        for i in range(0,3000):
            tree = Tree(s0,forwardModel,self,greedy_valueF, self.env,maxDepth,treeBreadth)
            loss = tree.getLossFromLeaves()
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
            if i % 200 == 0: print('Loss',i,":",loss.data[0])
        
# POSSIBLE IDEA
# Dont just consider the leaves; consider all the nodes as possible leaves (consider all subpaths too)

class Node(object):
    
    def __init__(self, parent_node, state, action):
        self.parent = parent_node
        self.children = []
        self.state = state
        self.action = action
        
    def addChild(self, child):
        self.children.append(child)
        
class Tree(object):
    
    def __init__(self, initialState, forwardModel, simPolicy, valueF, env,maxDepth=5, branchingFactor=3):
        self.simPolicy = simPolicy
        self.maxDepth, self.branchFactor = maxDepth, branchingFactor
        self.forwardModel = forwardModel
        self.valueF = valueF
        parent = Node(None,initialState,None)
        self.allStates = [initialState]
        self.allActions = []
        self.env = env
#         print('Generating growth')
        # Generate Tree
        self.tree_head = self.grow(parent,0,self.branchFactor)
        #self.tAllStates = tf.stack(self.allStates)
        # Get leaves
#         print('Getting leaves')
        q, self.leaves = [ parent ], []
        while len(q) >= 1:
            currNode = q.pop()
            for child in currNode.children:
                if len( child.children ) == 0: self.leaves.append( child )
                else: q.append( child )
        #print(self.leaves)
    
    def getPathFromLeaf(self,leafNumber):
        leaf = self.leaves[leafNumber]
        path = [leaf.state]
        actions = [leaf.action]
        currNode = leaf
        while not currNode.parent is None:
            #print(currNode.state)
            path.append(currNode.parent.state)
            if not currNode.parent.action is None:
                actions.append(currNode.parent.action)
            currNode = currNode.parent
        return (list(reversed(path)),list(reversed(actions)))
    
    def grow(self,node,d,b,verbose=False):
        if verbose: print('Grow depth: ',d)
        if verbose: self.env.printState(node.state[0].data.numpy())
        if d == self.maxDepth : return node
        #print(d)
        for i in range(b):
            # Sample the current action
            a_s =  [torch.squeeze(self.simPolicy.sample(node.state))]
            #print(a_s)
            #concat_vec = torch.cat([node.state, a_s], 1)
            #print("concat_vec",concat_vec.data.numpy(),d)
            inital_state =  np.squeeze(node.state.data.numpy())
            #print(inital_state.shape)
            #print(a_s[0].shape)
            self.forwardModel.reInitialize(1)
            current_state, _ = self.forwardModel.forward(inital_state,a_s, 1)
            # Build the next subtre
            current_state = current_state.unsqueeze(dim=0)
            
            self.allStates.append(current_state)
            self.allActions.append(a_s)
            if verbose: print("int_state at depth",d)
            if verbose: self.env.printState(node.state[0].data.numpy())
            if verbose: print("a_s at depth ",d," and breath",i)
            #if verbose: self.env.printAction(a_s[0])
            #self.env.printAction(a_s[0])
            if verbose: print("curr_state at depth",d)
            if verbose: self.env.printState(current_state[0].data.numpy())
            node.addChild( self.grow( Node(node, current_state, a_s), d+1, b) )
        return node
    
    def getBestPlan(self):
        bestInd, bestVal = 0, avar(torch.FloatTensor( [float('-inf')])) #float('-inf')
        for i, leaf in enumerate(self.leaves):
            currVal = self.valueF(leaf.state)
            #print('State')
            #self.forwardModel.printState(leaf.state[0])
            #print('Value',currVal)
            if currVal.data.numpy() > bestVal.data.numpy():
                bestInd = i
                bestVal = currVal
        #print(bestVal)
        return self.getPathFromLeaf( bestInd )
    
    def getLossFromLeaves(self):
        totalLosses = avar(torch.FloatTensor([0.0]))
        for leaf in self.leaves:
            totalLosses += -self.valueF( leaf.state )
            #print(leaf.action[0].data.numpy().argmax(),-self.valueF( leaf.state ).data[0])
        return totalLosses/len(self.leaves)
        
    

In [103]:
exampleEnv = generateTask(0,0,0,0,6)
ForwardModel = LSTMForwardModel(train.lenOfInput,train.lenOfState)
ForwardModel.load_state_dict( torch.load(f_model_name) )

In [104]:
SimPolicy = SimulationPolicy(exampleEnv)
SimPolicy.trainSad(ForwardModel)

State Size:  64
Action Size:  10
Loss 0 : 1.873307228088379
Loss 200 : 0.8485720753669739
Loss 400 : 0.9242890477180481
Loss 600 : 0.6366820335388184
Loss 800 : 0.9035859107971191
Loss 1000 : 0.8245418071746826
Loss 1200 : 0.8485720753669739
Loss 1400 : 0.8538669347763062
Loss 1600 : 0.7285916805267334
Loss 1800 : 0.6852318048477173
Loss 2000 : 0.9242890477180481
Loss 2200 : 0.7951648235321045
Loss 2400 : 0.8245418071746826
Loss 2600 : 0.9191847443580627
Loss 2800 : 1.1712342500686646


In [127]:
s_0 = torch.unsqueeze(avar(torch.FloatTensor(exampleEnv.getStateRep())), dim =0)
print(s_0.shape)
tree = Tree(s_0,ForwardModel,SimPolicy,greedy_cont_valueF,exampleEnv,2,1)

torch.Size([1, 64])


In [128]:
print(tree.getBestPlan())

([Variable containing:

Columns 0 to 12 
    1     0     0     0     0     0     0     0     0     0     0     0     0

Columns 13 to 25 
    0     0     1     0     0     0     0     0     0     0     0     0     0

Columns 26 to 38 
    0     0     0     0     1     0     0     0     1     0     0     0     0

Columns 39 to 51 
    0     0     0     0     0     0     0     0     0     0     0     0     0

Columns 52 to 63 
    0     0     0     1     0     0     0     0     0     0     0     0
[torch.FloatTensor of size 1x64]
, Variable containing:

Columns 0 to 9 
 0.8910 -0.1370 -0.0768 -0.0846  0.0070  0.3068  0.0612  0.0359  0.0164 -0.0849

Columns 10 to 19 
 0.0601  0.0204 -0.0369 -0.0166 -0.0536  0.1463 -0.0293  0.0066  0.0666  0.1221

Columns 20 to 29 
 0.5031  0.0218  0.0328  0.0091 -0.1062  0.0459  0.0833 -0.0883 -0.0215  0.1998

Columns 30 to 39 
 0.9345  0.0313  0.0001 -0.0228  0.9535 -0.0164  0.0161 -0.0141  0.0019 -0.0191

Columns 40 to 49 
-0.0039 -0.0117  0.0229 -0.029

### 

In [None]:
f_model_name = 'forward-ffann-noisy-wan-1.pt' # 6 gets 99% on 0.1% noise
exampleEnv = NavigationTask()
ForwardModel = ForwardModelFFANN(exampleEnv)
ForwardModel.load_state_dict( torch.load(f_model_name) )
#ForwardModel.printState(avar(torch.FloatTensor(testEnv.getStateRep()), requires_grad=False))
#SimPolicy = SimulationPolicy(exampleEnv)

In [None]:
SimPolicy = SimulationPolicy(exampleEnv)
SimPolicy.trainSad(ForwardModel)


In [None]:
testEnv = generateTask(0,0,0,2,3)
print(testEnv.getStateRep())
s0 = avar(torch.FloatTensor([testEnv.getStateRep()]), requires_grad=False)
#greedy_valueF(s0)
tree = Tree(s0,ForwardModel,SimPolicy,greedy_cont_valueF,4,5)
nodes,actions = tree.getBestPlan()
actions

In [None]:
actions

In [None]:
#Testing Simulation Policy
env = NavigationTask() 
simpolicy = SimulationPolicy(env)
simpolicy.sample(avar(torch.FloatTensor([env.getStateRep()])))

In [None]:
#Loading and testing Forward Model
f_model_name = 'forward-ffann-noisy-wan-1.pt' # 6 gets 99% on 0.1% noise
exampleEnv = NavigationTask()
f = ForwardModelFFANN(exampleEnv)
f.load_state_dict( torch.load(f_model_name) )
start = np.zeros(74, dtype=np.float32)
start[0+4] = 1
start[15+6] = 1
start[15+15+0] = 1
start[15+15+4+8] = 1
start[15+15+4+15+7] = 1
start[15+15+4+15+15+4] = 1.0
f.test(start)
print('-----\n','Starting manualTest loop')
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()
for i in range(5):
    width, height = 15, 15
    p_0 = np.array([npr.randint(0,width),npr.randint(0,height)])
    start_pos = [p_0, r.choice(NavigationTask.oriens)]
    goal_pos = np.array([ npr.randint(0,width), npr.randint(0,height) ])
    checkEnv = NavigationTask(
        width=width, height=height, agent_start_pos=start_pos, goal_pos=goal_pos,
        track_history=True, stochasticity=0.0, maxSteps=10)
    s_0 = checkEnv.getStateRep()
    #a1, a2 = np.zeros(10), np.zeros(10)
    #a1[ npr.randint(0,10) ] = 1
    #a2[ npr.randint(0,10) ] = 1
    numActions = 3
    currState = avar( torch.FloatTensor(s_0).unsqueeze(0) )
    print('Start State')
    f.printState( currState[0] )
    actionSet = []
    for j in range(numActions):
        action = np.zeros( 10 )
        action[ npr.randint(0,10) ] = 1
        action += npr.randn( 10 )*0.1
        action = softmax( action )
        print('\tSoft Noisy Action ',j,'=',action)
        #### Apply Gumbel Softmax ####
        temperature = 0.01
        logProbAction = torch.log( avar(torch.FloatTensor(action)) ) 
        actiong = gumbel_softmax(logProbAction, temperature)
        ##############################
        print('\tGumbel Action ',j,'=',actiong.data.numpy())
        actionSet.append( actiong )
        checkEnv.performAction( np.argmax(action) )
        a = actiong  # avar( torch.FloatTensor(actiong) )
        currState = f.forward( torch.cat([currState[0],a]).unsqueeze(0) )
        print("Intermediate State",j)
        f.printState( currState[0] )
    #checkEnv.performAction(np.argmax(a1))
    #checkEnv.performAction(np.argmax(a2))
    s_1 = checkEnv.getStateRep()
    #inval = np.concatenate( (s_0,a1) )
    #outval1 = f.forward( avar(torch.FloatTensor(inval).unsqueeze(0)) )
    #print(outval1.shape)
    #print(a2.shape)
    #inval2 = np.concatenate( (outval1[0].data.numpy(),a2) )
    #outval2 = f.forward( avar(torch.FloatTensor(inval2).unsqueeze(0)) )
    for action in actionSet:
        f.printAction(action)
    print('Predicted')
    f.printState( currState[0] )
    print('Actual')
    s1 = avar( torch.FloatTensor( s_1 ).unsqueeze(0) )
    f.printState( s1[0] ) 
    print("Rough accuracy", torch.sum( (currState - s1).pow(2) ).data[0] )
    #print('Predicted',currState.data[0].numpy())
    #print('Actual',s_1)
    #outval1 = f.test(inval,s_1)
    print('----\n')