# GradCAM Visualization Demo with ResNet50


In [1]:
import time
import logging
import os
import glob
import sys
from sys import stdout
from collections import deque
import copy
from vis import vis_paths

import numpy as np
import cv2
import PIL
import numpy as np
import matplotlib.pyplot as plt
from mlagents.envs.environment import UnityEnvironment

import imageio

# set logger
logger = logging.getLogger()
logger.setLevel(logging.INFO)

env_name = '1226_final_without_visual_goals'
path = "C:/data/ml-agents-old/scripts/envs/{}/HBF-navigation-experiment.exe".format(env_name)
env = UnityEnvironment(file_name=path, worker_id=0, seed=1, no_graphics=False)

INFO:mlagents.envs:
'StationAcademy' started successfully!
Unity Academy name: StationAcademy
        Reset Parameters : {}


In [2]:
import tensorflow as tf
import numpy as np
import sys
import time
import scipy.misc
import scipy.signal
import pickle

import config
import network

class Agent():
    def __init__(self, name, trainer, global_episode, model_path, env, brain_name):
        self.name = name
        self.trainer = trainer
        self.global_episode = global_episode
        self.summary_writer = tf.summary.FileWriter('./log/' + name)
        self.network = network.Network(name, trainer) # local network
        from_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global')
        to_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, name)
        self.copy_network = [b.assign(a) for a, b in zip(from_var, to_var)] # op to sync from global network
        self.model_path = model_path
        self.brain_name = brain_name
        self.env = env
        self.env_info = self.env.reset(train_mode=True)[brain_name]
            
    # static function to save frame during training
    def make_gif(images, fname, duration=2, true_image=False, salience=False, salIMGS=None):
        import moviepy.editor as mpy
        def make_frame(t):
            try:
                x = images[int(len(images)/duration*t)]
            except:
                x = images[-1]
            if true_image:
                return x.astype(np.uint8)
            else:
                return ((x+1)/2*255).astype(np.uint8)
        
        def make_mask(t):
            try:
                x = salIMGS[int(len(salIMGS)/duration*t)]
            except:
                x = salIMGS[-1]
            return x

        clip = mpy.VideoClip(make_frame, duration=duration)
        if salience == True:
            mask = mpy.VideoClip(make_mask, ismask=True,duration= duration)
            clipB = clip.set_mask(mask)
            clipB = clip.set_opacity(0)
            mask = mask.set_opacity(0.1)
            mask.write_gif(fname, fps = len(images) / duration,verbose=False)
        else:
            clip.write_gif(fname, fps = len(images) / duration,verbose=False)

    # static function
    def resize_image(image):
        image = image.astype(np.float32) / 255.0
        return image
        #return scipy.misc.imresize(image, [84, 84])
        
    # !!!!!
    # stack sliding windows into discret 1d array
    def window_stack(self, a, stepsize=10, trim=2):
        a = a[trim:a.shape[0]-trim, trim:a.shape[1]-trim]
        b = []
        for i in range(int(a.shape[0] / stepsize)):
            for j in range(int(a.shape[1] / stepsize)):
                b.append(np.mean(a[i*stepsize:i*stepsize+stepsize,j*stepsize:j*stepsize+stepsize]))

        b = np.array(b)
        # map to [0, 8) int
        b = b * 7
        b = b.astype(int)

        return b

    def discount(x, gamma):
        return scipy.signal.lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1]        

    def train(self, train_buffer, sess, boot_value):
        train_buffer = np.array(train_buffer)
        # unroll from train_buffer
        input_image = np.array(train_buffer[:, 0].tolist())
        aux_action = np.array(train_buffer[:, 1].tolist())
        aux_reward = np.array(train_buffer[:, 2:3].tolist())
        aux_velocity = np.array(train_buffer[:, 3].tolist())
        action = train_buffer[:, 4]
        reward = train_buffer[:, 5]
        value = train_buffer[:, 6]
        depth_pred = train_buffer[:, 7] # <- ?
        true_depth = np.array(train_buffer[:, 8].tolist())

        reward_plus = np.asarray(reward.tolist() + [boot_value])
        disc_reward = Agent.discount(reward_plus, config.GAMMA)[:-1]
        value_plus = np.asarray(value.tolist())
        #advantage = Agent.discount(reward + config.GAMMA*value_plus[1:] - value_plus[:-1], config.GAMMA)
        advantage = disc_reward - value_plus
        vl, pl, el, dl, dl2, gradn, _ , d_tmp= sess.run([self.network.value_loss,
            self.network.policy_loss,
            self.network.entropy_loss,
            self.network.depth_loss,
            self.network.depth_loss2,
            self.network.gradient_norm,
            self.network.apply_gradient, self.network.depth_loss_], feed_dict={
                self.network.input_image: input_image,
                self.network.input_action: aux_action,
                self.network.input_reward: aux_reward,
                self.network.input_velocity: aux_velocity,
                self.network.true_value: disc_reward,
                self.network.advantage: advantage,
                self.network.action: action,
                self.network.true_depth: true_depth,
                self.network.lstm1_state_c_in: self.train_lstm1_state_c,
                self.network.lstm1_state_h_in: self.train_lstm1_state_h,
                self.network.lstm2_state_c_in: self.train_lstm2_state_c,
                self.network.lstm2_state_h_in: self.train_lstm2_state_h
            })
        sys.stdout.flush()
        return vl, pl, el, dl, dl2, gradn, _

    def get_action(self, action):
        move_forward  = action // 3
        rotate = action % 3
        return [move_forward, rotate]

    def get_vel(self, prev_coord, coord, action_transformed):
        if action_transformed[0] == 0:
            distance = np.linalg.norm(coord-prev_coord)
            # cos 15 degrees, corresponding to turning angle in unity
            if action_transformed[1] == 0:
                result = [0.96*distance, (coord[1]-prev_coord[1]), -0.26*distance, 0, -15, 0]
            if action_transformed[1] == 1:
                # cos 15 degrees, corresponding to turning angle in unity
                result = [distance, (coord[1]-prev_coord[1]), 0, 0, 0, 0]
            if action_transformed[1] == 2:
                result = [0.96*distance, (coord[1]-prev_coord[1]), 0.26*distance, 0, 15, 0]

        elif action_transformed[0] == 1:
            if action_transformed[1] == 0:
                result = [0, 0, 0, 0, -15, 0]
            if action_transformed[1] == 1:
                # cos 15 degrees, corresponding to turning angle in unity
                result = [0, 0, 0, 0, 0, 0]
            if action_transformed[1] == 2:
                result = [0, 0, 0, 0, 15, 0]
        else:
            assert 0, "action_transformed's first element is unknown"

        result = np.array(result)
        return result

    def run(self, sess, trainer, saver, coordinator):
        print('starting agent:', self.name)
        sys.stdout.flush()
        with sess.as_default(), sess.graph.as_default():
            while not coordinator.should_stop():
                sess.run(self.global_episode.assign_add(1))
                print('episode:', sess.run(self.global_episode))
                sys.stdout.flush()
                
                ep = sess.run(self.global_episode)
                ep_reward = 0
                ep_step = 0
                ep_start_time = time.time()

                sess.run(self.copy_network)
                self.train_buffer = []
                frame_buffer = []
                running = True

                # !!!!!!
                # self.game.reset()
                # rgb, prev_d = self.game.frame()
                rgb = np.asarray(self.env_info.visual_observations[1][0])
                prev_d = self.window_stack(np.asarray(self.env_info.visual_observations[0][0]))

                frame_buffer.append(rgb * 255)
                # rgb = Agent.resize_image(rgb)
                prev_act_idx = 0
                prev_reward = 0
                prev_vel = np.array([0.0]*6)
                prev_coord = self.env_info.vector_observations[0][-3:]

                self.lstm1_state_c, self.lstm1_state_h, self.lstm2_state_c, self.lstm2_state_h = self.network.lstm1_init_state_c, self.network.lstm1_init_state_h,self.network.lstm2_init_state_c,self.network.lstm2_init_state_h
                
                self.env_info = self.env.reset(train_mode=True)[self.brain_name]
                print('initial position:({:.2f} {:.2f} {:.2f})'.format(self.env_info.vector_observations[0][-3:][0], 
                                                                       self.env_info.vector_observations[0][-3:][1],
                        self.env_info.vector_observations[0][-3:][2]))
                # !!!!!!
                # while self.game.running():
                ep_step_max = 1000
                positions = []
                while True:
                    if len(self.train_buffer)==0:
                        self.train_lstm1_state_h = self.lstm1_state_h
                        self.train_lstm1_state_c = self.lstm1_state_c
                        self.train_lstm2_state_h = self.lstm2_state_h
                        self.train_lstm2_state_c = self.lstm2_state_c
                    act_prob, pred_value, depth_pred, self.lstm1_state_c, self.lstm1_state_h, self.lstm2_state_c, self.lstm2_state_h = sess.run([self.network.policy,
                        self.network.value, self.network.depth_pred,
                        self.network.lstm1_state_c_out, 
                        self.network.lstm1_state_h_out, 
                        self.network.lstm2_state_c_out, 
                        self.network.lstm2_state_h_out]
                        , 
                        feed_dict={self.network.input_image: [rgb], 
                        self.network.input_action: [prev_act_idx], 
                        self.network.input_reward: [[prev_reward]], 
                        self.network.input_velocity: [prev_vel],
                        self.network.lstm1_state_c_in:self.lstm1_state_c,
                        self.network.lstm1_state_h_in:self.lstm1_state_h,
                        self.network.lstm2_state_c_in:self.lstm2_state_c,
                        self.network.lstm2_state_h_in:self.lstm2_state_h
                    })
                    
                    self.action_visualize = act_prob
                    action = np.random.choice(act_prob[0], p=act_prob[0])
                    action_idx = np.argmax(act_prob==action)
                    
                    # !!!!!!
                    # rgb_next, d, vel, reward, running = self.game.step(action_idx)
                    action_transformed = self.get_action(action_idx)
                    self.env_info = self.env.step(action_transformed)[self.brain_name] # send the action to the environment
                    rgb_next = np.asarray(self.env_info.visual_observations[1][0])  # get the next state
                    d = self.window_stack(np.asarray(self.env_info.visual_observations[0][0]))

                    reward = self.env_info.rewards[0]                   # get the reward
                    done = self.env_info.local_done[0]                  # see if episode has finished
                    coord = self.env_info.vector_observations[0][-3:]

                    # !!!!!!
                    # only an estimation, get velocity from previous and current coordination, and action vector
                    vel = self.get_vel(prev_coord, coord, action_transformed)
                    
                    sys.stdout.write('\r episode:{}, step: {}, position:({:.2f} {:.2f} {:.2f}), score:{:.2f}, action:{}'.format(self.name, ep_step,
                        self.env_info.vector_observations[0][-3:][0], self.env_info.vector_observations[0][-3:][1],
                        self.env_info.vector_observations[0][-3:][2], ep_reward, str(action_idx)))
                    sys.stdout.flush()
                    
                    positions.append([self.env_info.vector_observations[0][-3:][0], 
                                     self.env_info.vector_observations[0][-3:][1], 
                                     self.env_info.vector_observations[0][-3:][2]])
                    
                    self.train_buffer.append([rgb, prev_act_idx, prev_reward, prev_vel, action_idx, 
                                         reward, pred_value[0][0], depth_pred, prev_d])

                    ep_reward += reward
                    ep_step += 1
                    
                    running = not ((ep_step >= ep_step_max) or done)

                    if running:
                        if ep%config.SAVE_PERIOD==0:
                            frame_buffer.append(rgb_next * 255)
                        # rgb_next = Agent.resize_image(rgb_next)
                        rgb = rgb_next
                    
                    prev_act_idx = action_idx
                    prev_reward = reward
                    prev_vel = vel
                    prev_d = d

                    if len(self.train_buffer)==config.GRADIENT_CHUNK and running:
                        boot_value = sess.run(self.network.value, feed_dict={
                            self.network.input_image: [rgb], 
                            self.network.input_action: [prev_act_idx], 
                            self.network.input_reward: [[prev_reward]], 
                            self.network.input_velocity: [prev_vel],
                            self.network.lstm1_state_c_in:self.lstm1_state_c,
                            self.network.lstm1_state_h_in:self.lstm1_state_h,
                            self.network.lstm2_state_c_in:self.lstm2_state_c,
                            self.network.lstm2_state_h_in:self.lstm2_state_h
                        })
                        vl, pl, el, dl, dl2, gradn, _ = self.train(self.train_buffer, sess, boot_value)
                        self.train_buffer = []
                        sess.run(self.copy_network)
                    if not running:
                        break
                if len(self.train_buffer)>0:
                    vl, pl, el, dl, dl2, gradn, _ = self.train(self.train_buffer, sess, 0.0)
                    self.test1 = [vl, pl, el, dl, dl2, gradn]

                ep_finish_time = time.time()
                print(self.name, 'elapse', str(int(ep_finish_time-ep_start_time)), 'seconds, reward:',ep_reward)
                sys.stdout.flush()

                
                if ep%config.SAVE_PERIOD==0:
                    imgs = np.array(frame_buffer)
                    Agent.make_gif(imgs, './frame/image'+str(ep)+'_'+str(ep_reward)+'.gif', duration=len(imgs)*0.066, true_image=True, salience=False)
                    print('frame saved')
                    sys.stdout.flush()
                

                if ep%config.SAVE_PERIOD==0:
                    saver.save(sess, self.model_path+'/model'+str(ep)+'.cptk')
                    print('model saved')
                    sys.stdout.flush()

                    summary = tf.Summary()
                    summary.value.add(tag='Losses/Policy Loss', simple_value=float(pl))
                    summary.value.add(tag='Losses/Value Loss', simple_value=float(vl))
                    summary.value.add(tag='Losses/Entropy Loss', simple_value=float(el))
                    summary.value.add(tag='Losses/Depth Loss', simple_value=float(dl))
                    summary.value.add(tag='Losses/Depth Loss2', simple_value=float(dl2))
                    summary.value.add(tag='Losses/Gradient Norm', simple_value=float(gradn))
                    summary.value.add(tag='Performance/Reward', simple_value=float(ep_reward))
                    self.summary_writer.add_summary(summary, ep)
                    self.summary_writer.flush()
                    
                if ep%config.SAVE_PERIOD==0:
                    print('save positions')
                    sys.stdout.flush()
                    
                    with open('./positions/'+str(ep), 'wb') as fp:
                        pickle.dump(positions, fp)
                        
                        

    def evaluate(self, sess, saver, coordinator):
        print('evaluation:', self.name)
        sys.stdout.flush()
        with sess.as_default(), sess.graph.as_default():
            while not coordinator.should_stop():
                sess.run(self.global_episode.assign_add(1))
                print('episode:', sess.run(self.global_episode))
                sys.stdout.flush()
                
                ep = sess.run(self.global_episode)
                ep_reward = 0
                ep_step = 0

                sess.run(self.copy_network)
                frame_buffer = []
                running = True

                rgb = np.asarray(self.env_info.visual_observations[1][0])

                frame_buffer.append(rgb * 255)
                
                self.lstm1_state_c, self.lstm1_state_h, self.lstm2_state_c, self.lstm2_state_h = self.network.lstm1_init_state_c, self.network.lstm1_init_state_h,self.network.lstm2_init_state_c,self.network.lstm2_init_state_h
                
                self.env_info = self.env.reset(train_mode=True)[self.brain_name]
                print('initial position:({:.2f} {:.2f} {:.2f})'.format(self.env_info.vector_observations[0][-3:][0], 
                                                                       self.env_info.vector_observations[0][-3:][1],
                                                                       self.env_info.vector_observations[0][-3:][2]))

                ep_step_max = 1000
                positions = []
                while True:
                    
                    self.train_lstm1_state_h = self.lstm1_state_h
                    self.train_lstm1_state_c = self.lstm1_state_c
                    self.train_lstm2_state_h = self.lstm2_state_h
                    self.train_lstm2_state_c = self.lstm2_state_c
                    act_prob, pred_value, depth_pred, self.lstm1_state_c, self.lstm1_state_h, self.lstm2_state_c, self.lstm2_state_h = sess.run([self.network.policy,
                        self.network.value, self.network.depth_pred,
                        self.network.lstm1_state_c_out, 
                        self.network.lstm1_state_h_out, 
                        self.network.lstm2_state_c_out, 
                        self.network.lstm2_state_h_out], 
                        feed_dict={self.network.input_image: [rgb], 
                        self.network.input_action: [prev_act_idx], 
                        self.network.input_reward: [[prev_reward]], 
                        self.network.input_velocity: [prev_vel],
                        self.network.lstm1_state_c_in:self.lstm1_state_c,
                        self.network.lstm1_state_h_in:self.lstm1_state_h,
                        self.network.lstm2_state_c_in:self.lstm2_state_c,
                        self.network.lstm2_state_h_in:self.lstm2_state_h})
                    
                    self.action_visualize = act_prob
                    action = np.random.choice(act_prob[0], p=act_prob[0])
                    action_idx = np.argmax(act_prob==action)
                    
                    action_transformed = self.get_action(action_idx)
                    self.env_info = self.env.step(action_transformed)[self.brain_name] # send the action to the environment
                    rgb_next = np.asarray(self.env_info.visual_observations[1][0])  # get the next state
                    d = self.window_stack(np.asarray(self.env_info.visual_observations[0][0]))

                    reward = self.env_info.rewards[0]                   # get the reward
                    done = self.env_info.local_done[0]                  # see if episode has finished
                    coord = self.env_info.vector_observations[0][-3:]

                    vel = self.get_vel(prev_coord, coord, action_transformed)
                    
                    sys.stdout.write('\r episode:{}, step: {}, position:({:.2f} {:.2f} {:.2f}), score:{:.2f}, action:{}'.format(self.name, ep_step,
                        self.env_info.vector_observations[0][-3:][0], self.env_info.vector_observations[0][-3:][1],
                        self.env_info.vector_observations[0][-3:][2], ep_reward, str(action_idx)))
                    sys.stdout.flush()
                    
                    positions.append([self.env_info.vector_observations[0][-3:][0], 
                                     self.env_info.vector_observations[0][-3:][1], 
                                     self.env_info.vector_observations[0][-3:][2]])
                    
                    ep_reward += reward
                    ep_step += 1
                    
                    running = not ((ep_step >= ep_step_max) or done)

                    if running:
                        if ep%config.SAVE_PERIOD==0:
                            frame_buffer.append(rgb_next * 255)
                        # rgb_next = Agent.resize_image(rgb_next)
                        rgb = rgb_next
                    
                    if not running:
                        break
                        
#                 if ep%config.SAVE_PERIOD==0:
#                     imgs = np.array(frame_buffer)
#                     Agent.make_gif(imgs, './frame/image'+str(ep)+'_'+str(ep_reward)+'.gif', duration=len(imgs)*0.066, true_image=True, salience=False)
#                     print('frame saved')
#                     sys.stdout.flush()
                                    
#                 if ep%config.SAVE_PERIOD==0:
#                     print('save positions')
#                     sys.stdout.flush()
                    
#                     with open('./positions/'+str(ep), 'wb') as fp:
#                         pickle.dump(positions, fp)

In [3]:
env.step()
default_brain = env.external_brain_names[0]
brain = env.brains[default_brain]
env_info = env.reset(train_mode=True)[default_brain]


INFO:mlagents.envs:Connected new brain:
Unity brain name: hbf-agent
        Number of Visual Observations (per agent): 2
        Camera Resolutions: ['CameraResolution(84, 84, 1)', 'CameraResolution(84, 84, 3)']
        Vector Observation space size (per agent): 9
        Vector Action space type: discrete
        Vector Action space size (per agent): [2, 3]
        Vector Action descriptions: 


In [4]:
# ckpt.model_checkpoint_path
# from tensorflow.python.tools.inspect_checkpoint import print_tensors_in_checkpoint_file
# print_tensors_in_checkpoint_file(file_name="./model/1226_final_without_visual_goals\\model5020.cptk", tensor_name='', all_tensors=False)

evaluation = True
model_path = "./model/1226_final_without_visual_goals"
model_time = "1577341381.6629117"

# model_path = './model'
frame_path = './frame'
if not os.path.exists(model_path):
    os.makedirs(model_path)
if not os.path.exists(frame_path):
    os.makedirs(frame_path)

In [5]:
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
import os
import threading
import sys
import time

import config
import network
# import agent
    
with tf.device('cpu:0'):
    global_episode = tf.Variable(0, trainable=False, dtype=tf.int32)
    trainer = tf.train.RMSPropOptimizer(config.LEARNING_RATE, decay=config.DECAY, 
                                        momentum=config.MOMENTUM, epsilon=config.EPSILON)
    master_network = network.Network('global', trainer)
    agent = Agent('thread_0_'+model_time, trainer, global_episode, 
                           model_path, env, default_brain)
    saver = tf.train.Saver()




The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

INFO:numexpr.utils:NumExpr defaulting to 8 threads.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor



Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
Call initializer instance with the dtype 

In [6]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    ckpt = tf.train.get_checkpoint_state(model_path)
    saver.restore(sess, ckpt.model_checkpoint_path)

INFO:tensorflow:Restoring parameters from ./model/1226_final_without_visual_goals\model5040.cptk


In [7]:
# Replace vanila relu to guided relu to get guided backpropagation.
import tensorflow as tf

from tensorflow.python.framework import ops
from tensorflow.python.ops import gen_nn_ops

@ops.RegisterGradient("GuidedRelu")
def _GuidedReluGrad(op, grad):
    return tf.where(0. < grad, gen_nn_ops._relu_grad(grad, op.outputs[0]), tf.zeros_like(grad))

### main function

In [8]:
print(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='thread_0_'+model_time))

[<tf.Variable 'thread_0_1577341381.6629117/Variable:0' shape=(8, 8, 3, 16) dtype=float32_ref>, <tf.Variable 'thread_0_1577341381.6629117/Variable_1:0' shape=(16,) dtype=float32_ref>, <tf.Variable 'thread_0_1577341381.6629117/Variable_2:0' shape=(4, 4, 16, 32) dtype=float32_ref>, <tf.Variable 'thread_0_1577341381.6629117/Variable_3:0' shape=(32,) dtype=float32_ref>, <tf.Variable 'thread_0_1577341381.6629117/Variable_4:0' shape=(3872, 256) dtype=float32_ref>, <tf.Variable 'thread_0_1577341381.6629117/Variable_5:0' shape=(256,) dtype=float32_ref>, <tf.Variable 'thread_0_1577341381.6629117/lstm1/rnn/basic_lstm_cell/kernel:0' shape=(321, 256) dtype=float32_ref>, <tf.Variable 'thread_0_1577341381.6629117/lstm1/rnn/basic_lstm_cell/bias:0' shape=(256,) dtype=float32_ref>, <tf.Variable 'thread_0_1577341381.6629117/lstm2/rnn/basic_lstm_cell/kernel:0' shape=(4204, 1024) dtype=float32_ref>, <tf.Variable 'thread_0_1577341381.6629117/lstm2/rnn/basic_lstm_cell/bias:0' shape=(1024,) dtype=float32_ref>

In [9]:
# with tf.variable_scope("thread_0_1577341381.6629117", reuse=True): # root variable scope
#     bar3 = tf.get_variable('global/Variable_17/RMSProp_1:0') # reuse (equivalent to the above)
bar2 = [var for var in tf.global_variables() if var.op.name=="thread_0_1577341381.6629117/global/Variable_17/RMSProp_1"][0]


In [10]:
bar2

<tf.Variable 'thread_0_1577341381.6629117/global/Variable_17/RMSProp_1:0' shape=(512,) dtype=float32_ref>

In [22]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
import tensorflow as tf
import numpy as np
slim = tf.contrib.slim
import gradcam_utils as utils
eval_graph = tf.Graph()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver.restore(sess, ckpt.model_checkpoint_path)

    with eval_graph.as_default():
        with eval_graph.gradient_override_map({'Relu': 'GuidedRelu'}):
            
            images = tf.placeholder("float", [224, 224, 3])
            labels = tf.placeholder(tf.float32, [6])
            frame_buffer = []
            agent.network

            sess.run(agent.copy_network)
            ep_reward = 0
            ep_step = 0

            running = True

            rgb = np.asarray(agent.env_info.visual_observations[1][0])

            agent.train_buffer = []

            prev_d = agent.window_stack(np.asarray(agent.env_info.visual_observations[0][0]))

            frame_buffer.append(rgb * 255)
            # rgb = Agent.resize_image(rgb)
            prev_act_idx = 0
            prev_reward = 0
            prev_vel = np.array([0.0]*6)
            prev_coord = agent.env_info.vector_observations[0][-3:]

            agent.lstm1_state_c, agent.lstm1_state_h, agent.lstm2_state_c, agent.lstm2_state_h = agent.network.lstm1_init_state_c, agent.network.lstm1_init_state_h, agent.network.lstm2_init_state_c, agent.network.lstm2_init_state_h
            agent.env_info = agent.env.reset(train_mode=True)[agent.brain_name]
            print('initial position:({:.2f} {:.2f} {:.2f})'.format(env_info.vector_observations[0][-3:][0], 
                                                                   env_info.vector_observations[0][-3:][1],
                                                                   env_info.vector_observations[0][-3:][2]))

            ep_step_max = 1000
            positions = []
            
            act_prob, pred_value, depth_pred, lstm1_state_c, lstm1_state_h, lstm2_state_c, lstm2_state_h = sess.run([agent.network.policy,
                agent.network.value, agent.network.depth_pred,
                agent.network.lstm1_state_c_out, 
                agent.network.lstm1_state_h_out, 
                agent.network.lstm2_state_c_out, 
                agent.network.lstm2_state_h_out], 
                feed_dict = {agent.network.input_image: [rgb], 
                agent.network.input_action: [prev_act_idx], 
                agent.network.input_reward: [[prev_reward]], 
                agent.network.input_velocity: [prev_vel],
                agent.network.lstm1_state_c_in:agent.lstm1_state_c,
                agent.network.lstm1_state_h_in:agent.lstm1_state_h,
                agent.network.lstm2_state_c_in:agent.lstm2_state_c,
                agent.network.lstm2_state_h_in:agent.lstm2_state_h})

            agent.action_visualize = act_prob
            action = np.random.choice(act_prob[0], p=act_prob[0])
            action_idx = np.argmax(act_prob==action)

            # !!!!!!!
            cost = (-1) * tf.reduce_sum(tf.multiply(labels, tf.log(act_prob)), axis=1)
            net = tf.log(act_prob)
            print("cost", cost)
            print("net", net)

            # !!!!!!!
            with tf.variable_scope("thread_0_1577341381.6629117", reuse=tf.AUTO_REUSE): # root variable scope
                bar3 = tf.get_variable('global/Variable_17/RMSProp_1:0') # reuse (equivalent to the above)

            target_conv_layer = [var for var in tf.global_variables() if var.op.name=="thread_0_1577341381.6629117/global/Variable_17/RMSProp_1"]
        
            # with tf.variable_scope('thread_'+str(i)+'_'+model_time, reuse=True):
            #     v1 = tf.get_variable("var", [1])

            y_c = tf.reduce_sum(tf.multiply(net, labels), axis=1)
            print('y_c:', y_c)
            target_conv_layer_grad = tf.gradients(y_c, target_conv_layer)
            print('target_conv_layer_grad:', target_conv_layer_grad)

            # Guided backpropagtion back to input layer
            gb_grad = tf.gradients(cost, rgb)[0]
            print('gb_grad:', gb_grad)
            

INFO:tensorflow:Restoring parameters from ./model/1226_final_without_visual_goals\model5040.cptk


In [23]:
# if var.op.name=="thread_0_1577341381.6629117/Variable_5:0"
bar2

<tf.Variable 'thread_0_1577341381.6629117/global/Variable_17/RMSProp_1:0' shape=(512,) dtype=float32_ref>

In [None]:
model_path = "nav_a3c/model/1226_final_without_visual_goals/"
model.load_weights(checkpoint_path)
ckpt = tf.train.get_checkpoint_state(model_path)
saver.restore(sess, ckpt.model_checkpoint_path)

In [None]:
print(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='lstm1'))
print(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name))
# agent_arr[0].network.policy
# current_scope = tf.contrib.framework.get_name_scope()
# print(current_scope)
# tf.get_variable('thread_0_1577341381.6629117/Variable_5', [], dtype=tf.float32)

In [None]:

with tf.Session(graph=eval_graph) as sess:    
    sess.run(init)
    saver.restore(sess, latest_checkpoint)    
            
            images = tf.placeholder("float", [224, 224, 3])
            labels = tf.placeholder(tf.float32, [6])
            frame_buffer = []
            agent.network

            sess.run(agent.copy_network)
            ep_reward = 0
            ep_step = 0

            running = True

            rgb = np.asarray(agent.env_info.visual_observations[1][0])

            agent.train_buffer = []

            prev_d = agent.window_stack(np.asarray(agent.env_info.visual_observations[0][0]))

            frame_buffer.append(rgb * 255)
            # rgb = Agent.resize_image(rgb)
            prev_act_idx = 0
            prev_reward = 0
            prev_vel = np.array([0.0]*6)
            prev_coord = agent.env_info.vector_observations[0][-3:]

            agent.lstm1_state_c, agent.lstm1_state_h, agent.lstm2_state_c, agent.lstm2_state_h = agent.network.lstm1_init_state_c, agent.network.lstm1_init_state_h, agent.network.lstm2_init_state_c, agent.network.lstm2_init_state_h
            agent.env_info = agent.env.reset(train_mode=True)[agent.brain_name]
            print('initial position:({:.2f} {:.2f} {:.2f})'.format(env_info.vector_observations[0][-3:][0], 
                                                                   env_info.vector_observations[0][-3:][1],
                                                                   env_info.vector_observations[0][-3:][2]))

            ep_step_max = 1000
            positions = []
            while True:
                act_prob, pred_value, depth_pred, lstm1_state_c, lstm1_state_h, lstm2_state_c, lstm2_state_h = sess.run([agent.network.policy,
                    agent.network.value, agent.network.depth_pred,
                    agent.network.lstm1_state_c_out, 
                    agent.network.lstm1_state_h_out, 
                    agent.network.lstm2_state_c_out, 
                    agent.network.lstm2_state_h_out], 
                    feed_dict = {agent.network.input_image: [rgb], 
                    agent.network.input_action: [prev_act_idx], 
                    agent.network.input_reward: [[prev_reward]], 
                    agent.network.input_velocity: [prev_vel],
                    agent.network.lstm1_state_c_in:agent.lstm1_state_c,
                    agent.network.lstm1_state_h_in:agent.lstm1_state_h,
                    agent.network.lstm2_state_c_in:agent.lstm2_state_c,
                    agent.network.lstm2_state_h_in:agent.lstm2_state_h})

                agent.action_visualize = act_prob
                action = np.random.choice(act_prob[0], p=act_prob[0])
                action_idx = np.argmax(act_prob==action)
                
                # !!!!!!!
                cost = (-1) * tf.reduce_sum(tf.multiply(labels, tf.log(act_prob)), axis=1)
                
                net = tf.log(act_prob)
                
                # !!!!!!!
                target_conv_layer = [var for var in tf.global_variables() if var.op.name=="thread_0_1577341381.6629117/global/Variable_5/RMSProp_1"]
                # with tf.variable_scope('thread_'+str(i)+'_'+model_time, reuse=True):
                #     v1 = tf.get_variable("var", [1])
                
                y_c = tf.reduce_sum(tf.multiply(net, labels), axis=1)
                print('y_c:', y_c)
                target_conv_layer_grad = tf.gradients(y_c, target_conv_layer)
                print('target_conv_layer_grad:', target_conv_layer_grad)

                # Guided backpropagtion back to input layer
                gb_grad = tf.gradients(cost, rgb)[0]
                print('gb_grad:', gb_grad)

                init = tf.global_variables_initializer()

                action_transformed = agent.get_action(action_idx)
                agent.env_info = agent.env.step(action_transformed)[agent.brain_name] # send the action to the environment
                rgb_next = np.asarray(agent.env_info.visual_observations[1][0])  # get the next state
                d = agent.window_stack(np.asarray(agent.env_info.visual_observations[0][0]))

                reward = agent.env_info.rewards[0]                   # get the reward
                done = agent.env_info.local_done[0]                  # see if episode has finished
                coord = agent.env_info.vector_observations[0][-3:]

                vel = agent.get_vel(prev_coord, coord, action_transformed)

                sys.stdout.write('\r episode:{}, step: {}, position:({:.2f} {:.2f} {:.2f}), score:{:.2f}, action:{}'.format(agent.name, ep_step,
                    agent.env_info.vector_observations[0][-3:][0], agent.env_info.vector_observations[0][-3:][1],
                    agent.env_info.vector_observations[0][-3:][2], ep_reward, str(action_idx)))
                sys.stdout.flush()

                positions.append([agent.env_info.vector_observations[0][-3:][0], 
                                 agent.env_info.vector_observations[0][-3:][1], 
                                 agent.env_info.vector_observations[0][-3:][2]])

                ep_reward += reward
                ep_step += 1

                break
                
    prob_np = sess.run(prob, feed_dict={images: batch_img})
    # print('prob_np:', prob_np)
    print('prob_np:', prob_np.shape)
    
    # cost_np, gb_grad_value, target_conv_layer_value, target_conv_layer_grad_value = sess.run([cost, gb_grad, target_conv_layer, target_conv_layer_grad], feed_dict={images: batch_img, labels: prob_np})
    net_np, y_c_np, gb_grad_value, target_conv_layer_value, target_conv_layer_grad_value = sess.run([net, y_c, gb_grad, target_conv_layer, target_conv_layer_grad], feed_dict={images: batch_img, labels: batch_label})
    
#     print("net_np:", net_np)
#     print("y_c_np:", y_c_np)
#     print("gb_grad_value:", gb_grad_value)
#     print("target_conv_layer_value:", target_conv_layer_value)
#     print("target_conv_layer_grad_value:", target_conv_layer_grad_value)
    
    for i in range(batch_size):
        # print('See visualization of below category')
        # utils.print_prob(batch_label[i], './synset.txt')
        utils.print_prob(prob_np[i], './synset.txt')
        # print('gb_grad_value[i]:', gb_grad_value[i])
        # print('gb_grad_value[i] shape:', gb_grad_value[i].shape)
        utils.visualize(batch_img[i], target_conv_layer_value[i], target_conv_layer_grad_value[i], gb_grad_value[i])
    


In [24]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
import tensorflow as tf
import numpy as np

from nets import resnet_v1
slim = tf.contrib.slim

import utils

# Create mini-batch for demo

img1 = utils.load_image("./demo.png", normalize=False)
img2 = utils.load_image("./shihtzu_mypuppy.jpg", normalize=False)
img3 = utils.load_image("./tiger.jpg", normalize=False)

batch1_img = img1.reshape((1, 224, 224, 3))
batch1_label = np.array([1 if i == 242 else 0 for i in range(1000)])  # 1-hot result for Boxer
batch1_label = batch1_label.reshape(1, -1)

batch2_img = img2.reshape((1, 224, 224, 3))
batch2_label = np.array([1 if i == 155 else 0 for i in range(1000)])  # 1-hot result for Shih-Tzu
batch2_label = batch2_label.reshape(1, -1)

batch3_img = img3.reshape((1, 224, 224, 3))
batch3_label = np.array([1 if i == 292 else 0 for i in range(1000)])  # 1-hot result for tiger
batch3_label = batch3_label.reshape(1, -1)

batch_img = np.concatenate((batch1_img, batch2_img, batch3_img), 0)
batch_label = np.concatenate((batch1_label, batch2_label, batch3_label), 0)
batch_size = 3

# batch_img = np.concatenate((batch1_img), 0)
# batch_label = np.concatenate((batch1_label), 0)
# batch_size = 1
# batch_img = np.expand_dims(batch_img, 0)
# batch_label = batch_label.reshape(batch_size, -1)

# Create tensorflow graph for evaluation
eval_graph = tf.Graph()
with eval_graph.as_default():
    with eval_graph.gradient_override_map({'Relu': 'GuidedRelu'}):
        images = tf.placeholder("float", [batch_size, 224, 224, 3])
        labels = tf.placeholder(tf.float32, [batch_size, 1000])
        
        preprocessed_images = utils.resnet_preprocess(images)
        
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            with slim.arg_scope([slim.batch_norm], is_training=False):
                # is_training=False means batch-norm is not in training mode. Fixing batch norm layer.
                # net is logit for resnet_v1. See is_training messing up issue: https://github.com/tensorflow/tensorflow/issues/4887
                net, end_points = resnet_v1.resnet_v1_50(preprocessed_images, 1000)
        
        prob = end_points['predictions'] # after softmax
        print('prob:', prob)
        
        print('lables:', labels)
        cost = (-1) * tf.reduce_sum(tf.multiply(labels, tf.log(prob)), axis=1)
        print('cost:', cost)

        # Get last convolutional layer gradient for generating gradCAM visualization
        # print('endpoints:', end_points.keys())
        target_conv_layer = end_points['resnet_v1_50/block4/unit_2/bottleneck_v1']
        # target_conv_layer = end_points['resnet_v1_50/block3/unit_5/bottleneck_v1']
        
        # gradient for partial linearization. We only care about target visualization class. 
        y_c = tf.reduce_sum(tf.multiply(net, labels), axis=1)
        print('y_c:', y_c)
        target_conv_layer_grad = tf.gradients(y_c, target_conv_layer)[0]
        print('target_conv_layer_grad:', target_conv_layer_grad)

        # Guided backpropagtion back to input layer
        gb_grad = tf.gradients(cost, images)[0]
        print('gb_grad:', gb_grad)

        init = tf.global_variables_initializer()
        
        # Load resnet v1 weights
        
        # latest_checkpoint = tf.train.latest_checkpoint("model/resnet_v1_50.ckpt")
        latest_checkpoint = "model/resnet_v1_50.ckpt"
        ## Optimistic restore.
        reader = tf.train.NewCheckpointReader(latest_checkpoint)
        saved_shapes = reader.get_variable_to_shape_map()
        variables_to_restore = tf.global_variables()
        for var in variables_to_restore:
          if not var.name.split(':')[0] in saved_shapes:
            print("WARNING. Saved weight not exists in checkpoint. Init var:", var.name)
          else:
            # print("Load saved weight:", var.name)
            pass

        var_names = sorted([(var.name, var.name.split(':')[0]) for var in variables_to_restore
                if var.name.split(':')[0] in saved_shapes])
        restore_vars = []
        with tf.variable_scope('', reuse=True):
            for var_name, saved_var_name in var_names:
                try:
                    curr_var = tf.get_variable(saved_var_name)
                    var_shape = curr_var.get_shape().as_list()
                    if var_shape == saved_shapes[saved_var_name]:
                        # print("restore var:", saved_var_name)
                        restore_vars.append(curr_var)
                    else:
                        print("cannot restore var:", saved_var_name)
                except ValueError:
                    print("Ignore due to ValueError on getting var:", saved_var_name) 
        saver = tf.train.Saver(restore_vars)
        
        
        
# Run tensorflow 

with tf.Session(graph=eval_graph) as sess:    
    sess.run(init)
    saver.restore(sess, latest_checkpoint)    
    
    prob_np = sess.run(prob, feed_dict={images: batch_img})
    # print('prob_np:', prob_np)
    print('prob_np:', prob_np.shape)
    
    # cost_np, gb_grad_value, target_conv_layer_value, target_conv_layer_grad_value = sess.run([cost, gb_grad, target_conv_layer, target_conv_layer_grad], feed_dict={images: batch_img, labels: prob_np})
    net_np, y_c_np, gb_grad_value, target_conv_layer_value, target_conv_layer_grad_value = sess.run([net, y_c, gb_grad, target_conv_layer, target_conv_layer_grad], feed_dict={images: batch_img, labels: batch_label})
    
#     print("net_np:", net_np)
#     print("y_c_np:", y_c_np)
#     print("gb_grad_value:", gb_grad_value)
#     print("target_conv_layer_value:", target_conv_layer_value)
#     print("target_conv_layer_grad_value:", target_conv_layer_grad_value)
    
    for i in range(batch_size):
        # print('See visualization of below category')
        # utils.print_prob(batch_label[i], './synset.txt')
        utils.print_prob(prob_np[i], './synset.txt')
        # print('gb_grad_value[i]:', gb_grad_value[i])
        # print('gb_grad_value[i] shape:', gb_grad_value[i].shape)
        utils.visualize(batch_img[i], target_conv_layer_value[i], target_conv_layer_grad_value[i], gb_grad_value[i])
    

ModuleNotFoundError: No module named 'nets'