In [1]:
import numpy as np
import h5py
import main
from models.savn import SAVN
from models.basemodel import BaseModel
from models.gcn import GCN
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import transforms
from runners.train_util import get_params
from models.model_io import ModelOptions, ModelInput
from utils.net_util import gpuify
from utils.net_util import resnet_input_transform
import torch.nn.functional as F
import time
from runners.train_util import compute_learned_loss, SGD_step
#import matplotlib.pyplot as plt


In [2]:
MODEL_PATH_DICT = {'SAVN' : 'pretrained_models/savn_pretrained.dat',
                   'NON_ADAPTIVE_A3C': 'pretrained_models/nonadaptivea3c_pretrained.dat',
                   'GCN':'pretrained_models/gcn_pretrained.dat' }
GLOVE_FILE = './data/thor_glove/glove_map300d.hdf5'
ACTION_LIST = ['MoveAhead', 'RotateLeft', 'RotateRight', 'LookUp', 'LookDown', 'Done']


In [3]:
class FakeArgs():
    def __init__(self, model='SAVN',glove_file=GLOVE_FILE,inner_lr=0.0001):
        self.action_space = 6
        self.glove_dim = 300
        self.hidden_state_sz = 512
        self.dropout_rate = 0.25
        self.num_steps = 6 # initialized in main_eval.py
        self.gpu_id = -1
        self.learned_loss = True if model=='SAVN' else False
        self.inner_lr = inner_lr
        self.model = model
        self.glove_file = GLOVE_FILE
        
        
class Agent():
    def __init__(self,args, model):
        self.gpu_id = args.gpu_id
        self.model = model
        self.hidden = None #initialized in function call
        self.last_action_probs = None #initialized in function call
        self.resnet18 = None #initialized in function call
        self.hidden_state_sz = args.hidden_state_sz
        self.action_space = args.action_space
        self.learned_loss = args.learned_loss
        self.learned_input = None #initialized in function call
        
    def set_target(self,target_glove_embedding):
        self.target_glove_embedding = target_glove_embedding
        
    def eval_at_state(self, model_options,frame):
        model_input = ModelInput()
#         if self.episode.current_frame is None:
#             model_input.state = self.state()
#         else:
#             model_input.state = self.episode.current_frame
        #process_frame to shape [1,3,224,224], for input to resnet18
        processed_frame = self.preprocess_frame(resnet_input_transform(frame, 224).unsqueeze(0))
        resnet18_features = self.resnet18(processed_frame)
        
        model_input.state = resnet18_features
        model_input.hidden = self.hidden
        model_input.target_class_embedding = gpuify(torch.Tensor(self.target_glove_embedding),gpu_id=self.gpu_id)
        model_input.action_probs = self.last_action_probs

        return model_input, self.model.forward(model_input, model_options)
        
    def reset_hidden(self):
        if self.gpu_id >= 0:
            with torch.cuda.device(self.gpu_id):
                self.hidden = (
                    torch.zeros(1, self.hidden_state_sz).cuda(),
                    torch.zeros(1, self.hidden_state_sz).cuda(),
                )
        else:
            self.hidden = (
                torch.zeros(1, self.hidden_state_sz),
                torch.zeros(1, self.hidden_state_sz),
            )
        self.last_action_probs = gpuify(
            torch.zeros((1, self.action_space)), self.gpu_id
        )
        
    def action(self, model_options, frame,training=False):
        if training:
            self.model.train()    #torch.nn
        else:
            self.model.eval()    

        model_input, out = self.eval_at_state(model_options,frame)  
        self.hidden = out.hidden
        prob = F.softmax(out.logit, dim=1)
        #print(prob)
        action = prob.multinomial(1).data
        #log_prob = F.log_softmax(out.logit, dim=1)
        self.last_action_probs = prob
        
        if self.learned_loss:
            
            res = torch.cat((self.hidden[0], self.last_action_probs), dim=1)
            #if DEBUG: print("agent/action  learned loss", res.size())
            if self.learned_input is None:
                self.learned_input = res
            else:
                self.learned_input = torch.cat((self.learned_input, res), dim=0)
        
        return out.value, prob, action
    
    
    
    def preprocess_frame(self, frame):
        """ Preprocess the current frame for input into the model. """
        state = torch.Tensor(frame)
        return gpuify(state, self.gpu_id)
    
    def init_resnet18(self):
        
        resnet18 = models.resnet18(pretrained=True)
        modules = list(resnet18.children())[:-2]
        self.resnet18 = nn.Sequential(*modules)
        for p in self.resnet18.parameters():
            p.requires_grad = False

In [4]:
def load_glove_embedding(glove_file):
    glove_embedding_dict = {}
    with h5py.File(glove_file, "r") as f:
        for key in f.keys():
            glove_embedding_dict[key] = f[key].value
    return glove_embedding_dict

In [5]:
def load_model(model_path,args):
    if args.model=='NON_ADAPTIVE_A3C':
        model = BaseModel(args)
    elif args.model == 'GCN':
        model = GCN(args)
    else:
        model = SAVN(args)
    saved_state = torch.load(
                model_path, map_location=lambda storage, loc: storage
            )
    model.load_state_dict(saved_state)
    
    model_options = ModelOptions()
    params_list = [get_params(model, args.gpu_id)]
    model_options.params = params_list[-1]
    
    return model, model_options
    
def init_agent(args, model):
    agent = Agent(args, model)
    agent.reset_hidden()
    agent.init_resnet18()
    return agent

    

In [42]:
def find_target(args,agent, controller, model_options, target, glove_embedding_dict, action_list, max_step=10):
    agent.set_target(glove_embedding_dict[target])
    event = controller.step(action='Initialize')
    action = None
    for i in range(max_step):
        frame = event.frame
        _,_, action = agent.action(model_options, frame)
        print(i, action_list[action[0,0]])
        if action[0,0] == 5: 
            print("Agent stopped after move ", i)
            break
            
        event = controller.step(action=action_list[action[0,0]])
        #print(event.metadata['lastActionSuccess'])
            
        #use gradient from interaction loss
        if args.learned_loss:
            if i % args.num_steps == 5 and i/args.num_steps < 4:
                learned_loss = compute_learned_loss(args, agent, args.gpu_id, model_options)
                inner_gradient = torch.autograd.grad(
                        learned_loss["learned_loss"],
                        [v for _, v in model_options.params.items()],
                        create_graph=True,
                        retain_graph=True,
                        allow_unused=True,
                    )
                print("gradient update")
                model_options.params = SGD_step(model_options.params, inner_gradient, args.inner_lr)
            
            
        
        time.sleep(1)
        
    return event.frame
        
    

In [48]:
from ai2thor.controller import Controller

# Kitchens: FloorPlan1 - FloorPlan30
# Living rooms: FloorPlan201 - FloorPlan230
# Bedrooms: FloorPlan301 - FloorPlan330
# Bathrooms: FloorPLan401 - FloorPlan430

controller = Controller(scene='FloorPlan1', gridSize=0.25)

#

In [49]:
TARGET = 'Toaster'
args = FakeArgs(model='GCN',inner_lr=0.0001)
glove_embedding_dict = load_glove_embedding(args.glove_file)
model, model_options = load_model(MODEL_PATH_DICT[args.model],args)
agent = init_agent(args, model)
final_frame = find_target(args, agent, controller, model_options, 
            target=TARGET, glove_embedding_dict = glove_embedding_dict,
            action_list = ACTION_LIST,max_step=50)





  d_inv_sqrt = np.power(rowsum, -0.5).flatten()


0 LookDown
1 RotateLeft
2 RotateLeft
3 RotateLeft
4 RotateLeft
5 RotateLeft
6 MoveAhead
7 MoveAhead
8 MoveAhead
9 RotateRight
10 Done
Agent stopped after move  10


In [47]:
controller.stop()

In [None]:
# Success 1: Fridge GarbageCan, Toaster
#         3: Microwave

In [41]:
controller.step(action="LookUp")

<ai2thor.server.Event at 0x7febe8afbf50>

In [90]:
controller.step(action="RotateRight")

<ai2thor.server.Event at 0x7febc83f18d0>

In [88]:
glove_embedding_dict.keys()

dict_keys(['AlarmClock', 'Apple', 'AppleSlice', 'Bathtub', 'Bed', 'Blinds', 'Book', 'Bowl', 'BowlDirty', 'BowlFilled', 'Box', 'Bread', 'BreadSliced', 'ButterKnife', 'Cabinet', 'Candle', 'CellPhone', 'Chair', 'Cloth', 'CoffeeMachine', 'Container', 'ContainerFull', 'CounterTop', 'CreditCard', 'Cup', 'Dirt', 'Egg', 'EggFried', 'EggShell', 'Fork', 'Fridge', 'GarbageCan', 'HousePlant', 'KeyChain', 'Knife', 'Lamp', 'Laptop', 'Lettuce', 'LettuceSliced', 'LightSwitch', 'Microwave', 'Mirror', 'Mug', 'MugFilled', 'Newspaper', 'Omelette', 'Painting', 'PaintingHanger', 'Pan', 'Pen', 'Pencil', 'Pillow', 'Plate', 'Plunger', 'Pot', 'Potato', 'PotatoSliced', 'RemoteControl', 'Sandwich', 'ScrubBrush', 'ShowerDoor', 'Sink', 'SoapBar', 'SoapBottle', 'Spoon', 'SportsEquipment', 'SprayBottle', 'Statue', 'StoveBurner', 'StoveKnob', 'TableTop', 'Television', 'TissueBox', 'Toaster', 'Toilet', 'ToiletPaper', 'Tomato', 'TomatoSliced', 'Towel', 'TowelHolder', 'VacuumCleaner', 'Watch', 'WateringCan'])