# CNN Agent

This notebook is used to evaluate the CNN model trained from `Basic_CNN.ipynb`. In this notebook, we used our best TD3 4 joints RL model as the agent and using image acquired from the simulation to retrieve ground truth. 

In [None]:
import sys
sys.path.append('/tum_nrp/grpc/python/communication')
sys.path.append('./src')
sys.path.append('/tum_nrp/rlmodel/sb3')

import experiment_api_wrapper as eaw
import torch
from torch.utils.tensorboard import SummaryWriter
import numpy as np
from stable_baselines3 import TD3

from stable_baselines3.her.her_replay_buffer import HerReplayBuffer
from stable_baselines3.her.goal_selection_strategy import GoalSelectionStrategy

from network import Model
from env_cnn import SimGoalEnv
from train_helpers import evaluate3

%load_ext autoreload
%autoreload 2
%load_ext tensorboard

In [None]:
# test the connection with the simulation

exp = eaw.ExperimentWrapper()

server_id = exp.client.test()
if server_id:
    print(f"Simulation is available, id: {server_id}.")
else:
    print("There's something wrong with the sim!")

In [None]:
# set the parameters

params = {
    "VERBOSE": 1,
    "SETTING": 'full_constrained',                 # 'reduced', 'reduced2', 'reduced3', 'reduced3+', 'reduced4', 'reduced4+', 'full'
    "OBJ_SPACE_LOW": np.array([-0.92, -0.51, 0.58, -0.44, -0.48, 0, -np.pi/2, -np.pi/2, -0.001, -np.pi/2, -0.001, -np.pi]), # observation-space (ee-pos, cyl-pos, joints)
    "OBJ_SPACE_HIGH": np.array([0.92, 1.32, 2.07, 0.48, 0.44, 1.12, np.pi/2, 0.001, np.pi, np.pi/2, np.pi, np.pi]),
    "SPACE_NORM": 1,                   #  1 -> yes, 0 -> no (normalize the action and observation space)
    "CYLINDER": 'whole_table',         # 'no'(no augmentation), fix', 'semi_random', 'semi_random_sides', 'half_table', '3/4-table', '7/8-table', 'whole_table'
    "BUFFER_SIZE": 2000,
    "THRESHOLD": 0.20,                  # initial treshold
    "THRESHOLD_SCHEDULING": 1,         # 1-> yes, 0-> no
    "MIN_THRESHOLD": 0.02,
    "REWARD_TYPE": 'dense',            # 'sparse', 'dense', 'extra_dense'
    "LEARNING_STARTS": 100,            # number of random movements before learning starts,#
    "TOGGLE_REWARD": 0,
    "STEPS": 10000,                     # number of steps while training (=num_episodes when MAX_EPISODE_LEGTH is 1)
    "MAX_EPISODE_LENGTH": 1,           # 'None' (no limit) or value 
    "EXPLORATION": 1,                  # just let it on 1 and ignore it
    "USE_HER": 1,                      # 1-> yes, 0-> no
    "GLOBAL_STEPPER": 0, 
    "EVALUATION_STEPS": 500,        # number of evaluation steps per investigates treshold (x4)
    "EVALS": [0.20, 0.15, 0.10, 0.05],  # here, the list MUST contain always 4 tresholds for evaluation
    "BATCH_SIZE": 100,
    "ACTION_NOISE": None,
    "LR": 1e-3,
    # td3 specific
    "TARGET_POLICY_NOISE": 0.2,
    "TARGET_NOISE_CLIP": 0.5, 
    #CNN specific
    "hidden_channel": 20,              #Number of hidden channels in CNN layer
    "hidden_layer": 100               #Number of hidden layers in FC layer
}

In [None]:
# create an environment

writer = SummaryWriter('logs/agent')

model = Model(params)
model.load_state_dict(torch.load("saved_models/cnn_model", map_location=torch.device('cpu')))
print("The CNN model is loaded.")

env = SimGoalEnv(exp, params, writer, model)

# model type
model_class = TD3

# HER goal selection strategy
goal_selection_strategy = 'future'

# if True the HER transitions will get sampled online
online_sampling = True

# time limit for the episodes
max_episode_length = 1

# model
model = model_class(
    "MultiInputPolicy",
    env,
    learning_rate=params["LR"],
    buffer_size=params["BUFFER_SIZE"],
    learning_starts=params["LEARNING_STARTS"],
    batch_size=params["BATCH_SIZE"],
    action_noise=params["ACTION_NOISE"],
    replay_buffer_class=HerReplayBuffer,
    # Parameters for HER
    replay_buffer_kwargs=dict(
        n_sampled_goal=4,
        goal_selection_strategy=goal_selection_strategy,
        online_sampling=online_sampling,
        max_episode_length=max_episode_length),
    target_policy_noise=params["TARGET_POLICY_NOISE"], 
    target_noise_clip=params["TARGET_NOISE_CLIP"], 
    tensorboard_log="logs/agent_model",
    verbose=params["VERBOSE"]
)

In [None]:
# load best RL model

load_file_name = "TD3_0.2_dense_reduced4+_10000_20210729-123802"

model = TD3.load('../../sb3/saved_models/'+load_file_name, env=env)

In [None]:
# evaluate

env.set_eval(ev=True)

evaluate3(model, env, params, writer, max_threshold = max(params["EVALS"]))