In [1]:
import torch, torch.autograd as autograd
import torch.nn as nn, torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable as avar
    
from SimpleTask import SimpleGridTask
from TransportTask import TransportTask
from NavTask import NavigationTask
from SeqData import SeqData



import os, sys, pickle, numpy as np, numpy.random as npr, random as r

In [95]:
class GreedyValuePredictor(nn.Module):
    def __init__(self,  env, layerSizes=[100,100]):
        super(GreedyValuePredictor, self).__init__()
        self.stateSize = len(env.getStateRep(oneHotOutput=True))
        self.env = env
        self.rewardSize = 1
        print("State Size: " , self.stateSize)        
        # Input space: [Batch, observations], output:[Batch, Reward]
        self.layer1 = nn.Linear(self.stateSize, layerSizes[0])
        self.layer2 = nn.Linear(layerSizes[0], layerSizes[1])
        self.layer3 = nn.Linear(layerSizes[1], self.rewardSize)
        
    def forward(self,state):
        output = F.relu( self.layer1(state) )
        output = F.relu( self.layer2(output) ) # F.sigmoid
        output = self.layer3(output)
        #print(output.shape)
        m = nn.Sigmoid()
        output = m(output)
        return output
    
    def train(self, trainSet, validSet, nEpochs=1500, batch_size=200, validateEvery=200, vbs=500, printEvery=200):
        optimizer = optim.Adam(self.parameters(), lr = 0.0003)
        state_size = self.stateSize
        lossFunction = nn.BCELoss()
        
        train_x, train_y = trainSet
        train_x = avar( torch.FloatTensor(train_x), requires_grad=False)
        train_y = avar( torch.FloatTensor(train_y), requires_grad=False)
        valid_x, valid_y = validSet 
        valid_x = avar( torch.FloatTensor(valid_x), requires_grad=False)
        valid_y = avar( torch.FloatTensor(valid_y), requires_grad=False)
        ntrain, nvalid = len(train_x), len(valid_x)
        
        def getRandomMiniBatch(dsx,dsy,mbs,nmax):
            choices = torch.LongTensor( np.random.choice(nmax, size=mbs, replace=False) )
            return dsx[choices], dsy[choices]
        for epoch in range(nEpochs):
            if epoch % printEvery == 0: print('Epoch:',epoch, end='')
            loss = 0.0
            self.zero_grad() # Zero out gradients
            batch_x, batch_y = getRandomMiniBatch(train_x,train_y,batch_size,ntrain)
            prediction = self.forward(batch_x) #[-1,:]
            label = batch_y.unsqueeze(dim=1)
            #print(label.shape, prediction.shape)
            loss = lossFunction(prediction, label)
            loss.backward()
            optimizer.step()
            if epoch % printEvery == 0: print(" -> AvgLoss",str(loss.data[0]/ batch_size))
            if epoch % validateEvery == 0:
                batch_vx, batch_vy = getRandomMiniBatch(valid_x,valid_y,batch_size,nvalid)
                predv = self.forward(batch_vx) #[-1,:]
                vy = batch_vy.unsqueeze(dim=1)
                acc = self._accuracyBatch(vy,predv)
                print("VACC (noiseless) =",'%.4f' % acc,end=', ')
                print('/n')
                
    def _accuracyBatch(self,ylist,yhatlist):
        n, acc = ylist.data.shape[0], 0.0 
        for i in range(n):
            acc += self._accuracySingle(ylist[i], yhatlist[i])
        return acc / n

    # Accuracy averaged over subvecs
    def _accuracySingle(self,label,prediction):
        #print(label.data[0],prediction.data[0])
        if label.data[0] == 1.0:
            locAcc = 1.0 if prediction.data[0] > 0.5 else 0.0
        else:
            locAcc = 1.0 if prediction.data[0] < 0.5 else 0.0
        return locAcc 

In [74]:
ts = "navigation-data-state_to_reward-train.pickle"
vs = "navigation-data-state_to_reward-valid.pickle"
############
print('Reading Data')
with open(ts,'rb') as inFile:
    print('\tReading',ts); trainSet = pickle.load(inFile)
with open(vs,'rb') as inFile:
    print('\tReading',vs); validSet = pickle.load(inFile)

Reading Data
	Reading navigation-data-state_to_reward-train.pickle
	Reading navigation-data-state_to_reward-valid.pickle


In [96]:
env = NavigationTask()
greedyvp = GreedyValuePredictor(env)

State Size:  64


In [97]:
greedyvp.train( trainSet, validSet)

Epoch: 0 -> AvgLoss 0.003300641179084778
VACC (noiseless) = 1.0000, /n
Epoch: 200 -> AvgLoss 0.0016815374791622163
VACC (noiseless) = 0.9900, /n
Epoch: 400 -> AvgLoss 0.0015091486275196075
VACC (noiseless) = 0.9900, /n
Epoch: 600 -> AvgLoss 0.0005108853429555893
VACC (noiseless) = 1.0000, /n
Epoch: 800 -> AvgLoss 8.769051171839237e-05
VACC (noiseless) = 1.0000, /n
Epoch: 1000 -> AvgLoss 2.8275982476770877e-05
VACC (noiseless) = 1.0000, /n
Epoch: 1200 -> AvgLoss 1.6828174702823164e-05
VACC (noiseless) = 1.0000, /n
Epoch: 1400 -> AvgLoss 8.750113192945718e-06
VACC (noiseless) = 1.0000, /n


In [98]:
def generateTask(px,py,orien,gx,gy):
    direction = NavigationTask.oriens[orien]
    gs = np.array([gx, gy])
    env = NavigationTask(agent_start_pos=[np.array([px,py]), direction],goal_pos=gs)
    return env

In [112]:
env = generateTask(0,1,2,3,2)
state = avar( torch.FloatTensor(env.getStateRep()), requires_grad=False).view(1,-1)
print(state.shape)
greedyvp.forward(state).data.numpy()

torch.Size([1, 64])


array([[4.086873e-06]], dtype=float32)

In [None]:
torch.save(fm.state_dict(), "greedy_value_predictor")

In [23]:
train_x, train_y = trainSet
ntrain= len(train_x)
x, l = getRandomMiniBatch(train_x,train_y,200,ntrain)
print(l.shape)

(200,)


In [10]:
def getRandomMiniBatch(dsx,dsy,mbs,nmax):
    choices = torch.LongTensor( np.random.choice(nmax, size=mbs, replace=False) )
    return dsx[choices], dsy[choices]