# Choose Model to evaluate

In [None]:
model_to_eval = None
#######
# TD3 #
#######
model_to_eval = "TD3"          # -> best TD3 model (4 joints)

#######
# SAC #
#######
# model_to_eval = "SAC_4joints"  # -> best SAC model (4 joints)
# model_to_eval = "SAC_6joints"  # -> best SAC model (6 joints)

####################
# evaluation steps #
####################
eval_steps = 500

# Imports

In [None]:
# general imports
import sys                       
import numpy as np                

# gym for spaces and environment definitions
import gym
# env checker
from stable_baselines3.common import env_checker

# stable baselines3 -> SAC, TD3
from stable_baselines3 import SAC
from stable_baselines3 import TD3
from stable_baselines3.sac import MlpPolicy

# stbale baselines3 -> HER
from stable_baselines3.her.her_replay_buffer import HerReplayBuffer
from stable_baselines3.her.goal_selection_strategy import GoalSelectionStrategy

# Tensorboard
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('tensorboard_log/',comment="-SAC_HER_buff20000")

# grpc communication
sys.path.insert(1, '/tum_nrp/grpc/python/communication')
import experiment_api_wrapper as eaw

# for auto-reloading external modules see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

from env import SimEnv, SimGoalEnv
from train_helpers import evaluate, evaluate2, evaluate3, train

# Params

In [None]:
# this is needed because we have to create a new environment in the following step

params = {
    "VERBOSE": 1,
    "SETTING": 'reduced4+',            # 'reduced', 'reduced2', 'reduced3', 'reduced3+', 'reduced4', 'reduced4+'
    "OBJ_SPACE_LOW": np.array([-1.5, -1, -0.55, -0.44, -0.48, 0, -np.pi/2, -np.pi/2, -np.pi, -np.pi, -np.pi, -np.pi]), # observation-space (ee-pos, cyl-pos, joints)
    "OBJ_SPACE_HIGH": np.array([1.5, 1.8, 2.2, 0.48, 0.44, 1.12, np.pi/2, np.pi/2, np.pi, np.pi, np.pi, np.pi]),
    "SPACE_NORM": 0,                   #  1 -> yes, 0 -> no (normalize the action and observation space)
    "CYLINDER": 'no',                  # 'no', fix', 'semi_random', 'semi_random_sides', 'half_table', '3/4-table', '7/8-table', 'whole_table'
    "BUFFER_SIZE": 1600,
    "THRESHOLD": 0.2,                  # initial treshold
    "THRESHOLD_SCHEDULING": 1,         # 1-> yes, 0-> no
    "MIN_THRESHOLD": 0.01,
    "REWARD_TYPE": 'sparse',           # 'sparse', 'dense', 'extra_dense'
    "LEARNING_STARTS": 1,              # number of random movements before learning starts,#
    "TOGGLE_REWARD": 0,
    "STEPS": 1000,                     # number of steps while training (=num_episodes when MAX_EPISODE_LEGTH is 1)
    "MAX_EPISODE_LENGTH": 1,           # 'None' (no limit) or value 
    "EXPLORATION": 1,                  # just let it on 1 and ignore it
    "WRITER": writer,
    "USE_HER": 1,                      # 1-> yes, 0-> no
    "ENTROPY_COEFFICIENT": 0.007,      # 'auto' or value between 0 and 1 // 0.007 turned out to work well
    "GLOBAL_STEPPER": 0, 
    "EVALUATION_STEPS": eval_steps,    # number of evaluation steps per investigates treshold (x4)
    "EVALS": [0.10, 0.07, 0.05, 0.03], # here, the list MUST contain always 4 tresholds for evaluation
    "BATCH_SIZE": 64,
    "ACTION_NOISE": None,
    "RANDOM_EXPLORATION": 0.0,
    "LR": 3e-4,
    "TB_LOGGER": None}

if model_to_eval == "TD3":
    params["OBJ_SPACE_LOW"] = np.array([-0.92, -0.51, 0.58, -0.44, -0.48, 0, -np.pi/2, -np.pi/2, -0.001, -np.pi/2, -0.001, -np.pi])
    params["OBJ_SPACE_HIGH"] = np.array([0.92, 1.32, 2.07, 0.48, 0.44, 1.12, np.pi/2, 0.001, np.pi, np.pi/2, np.pi, np.pi])
    params["SPACE_NORM"] = 1
    params["BUFFER_SIZE"] = 2000
    params["BATCH_SIZE"] = 100
elif model_to_eval == "SAC_4joints":
    params["USE_HER"] = 0
    params["SPACE_NORM"] = 1
elif model_to_eval == "SAC_6joints":
    params["SETTING"] = 'full_constrained'
else:
    print("Choose a valid model name.")

# Create Environment

In [None]:
# create a experiment (connection)
exp = eaw.ExperimentWrapper()

# test if simulation can be reached
server_id = exp.client.test()
if server_id:
    print("Simulation is available, id: ", server_id)
else:
    print("Simulation NOT available")
    
# make a new environment
env = None
if params["USE_HER"] == 1:
    env = SimGoalEnv(exp, params, writer)
else:
    env = SimEnv(exp, params, writer)
    
# check env
# env_checker.check_env(env)

# Load Model

In [None]:
# load model
model_path = None
if model_to_eval == "TD3":
    model_path = "./saved_models/" + "TD3_0.2_dense_reduced4+_10000_20210729-123802"
elif model_to_eval == "SAC_4joints":
    model_path = "./saved_models/" + "SAC_0.04_sparse_reduced4+_4000_0_20210713-113846"
elif model_to_eval == "SAC_6joints":
    model_path = "./saved_models/" + "SAC_0.03_sparse_full_constrained_18000_1_20210918-235627"
else:
    print("Choose a valid model name.")

    
##########
# no HER #
##########
if model_to_eval ==  "SAC_4joints":
    model = SAC.load(model_path)

##########
#   HER  #
##########
# if her -> env has also to be given
elif model_to_eval ==  "TD3":
    model = TD3.load(model_path, env)
elif model_to_eval ==  "SAC_6joints":
    model = SAC.load(model_path, env)

# Set Environment

In [None]:
# change env in model
model.set_env(env)

# Evaluate Model

In [None]:
# disable plotting while evaluation because of some issues with inconsistent lengths
env.set_eval(ev=True)


#####################
# normal evaluation #
#####################
#evaluate(model, env, params, writer)


###################
# fast evaluation # -> with color encouded scatterplot !
###################
evaluate3(model, env, params, writer)