This notebook trains a robot arm to reach a randomly spawning cylinder based on coordinates extracted from image data. Reading the image data from a .pi file, a PyTorch custom dataset class processes and retrieves the cylinder coordinates from the images.

Retrieving the cylinder coordinates
- A camera mounted above the table captures images of the whole table surface. 
- A pipeline then processes the images and extracts the location of the cylinders via subtraction, thresholding, and dilation.

The resulting dataset is then passed to and handled by a train and a test data loader.

# Imports

In [None]:
#%reload_ext autoreload

# general imports
import sys                       
import numpy as np                

# gym for spaces and environment definitions
import gym
# env checker
from stable_baselines3.common import env_checker

# stable baselines3 -> SAC
from stable_baselines3 import SAC
from stable_baselines3.sac import MlpPolicy

# stbale baselines3 -> HER
from stable_baselines3.her.her_replay_buffer import HerReplayBuffer
from stable_baselines3.her.goal_selection_strategy import GoalSelectionStrategy

# Tensorboard
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('tensorboard_log/',comment="-SAC_HER_buff20000")

# grpc communication
sys.path.insert(1, '/tum_nrp/grpc/python/communication')
import experiment_api_wrapper as eaw

# for auto-reloading external modules see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

from env import SimEnv, SimGoalEnv
from train_helpers import evaluate, train

# Image Data

In [None]:
# for path minipulation
import os

# tensor Operations
import torch

# custom dataset class for the extraction of the coordintes from the image data
os.chdir('/tum_nrp/rlmodel/image_processing/src/')
from custom_dataset import CMLRPiDataset
from transform_nonzeros import non_zeros
os.chdir('/tum_nrp/rlmodel/sb3')

# dataset and dataloaders
from torch.utils.data import Dataset, DataLoader

# transforms for the transformation of the custom dataset
try:
    from torchvision import transforms
except ModuleNotFoundError:
    !pip install torchvision
    from torchvision import transforms

In [None]:
# path to the folder of the dataset 
DATASET_PATH = "/tum_nrp/rlmodel/image_processing/data/"
# path to the folder where models are saved
CHECKPOINT_PATH = "/tum_nrp/rlmodel/image_processing/saved_models/"

# number of workers for the dataloader
num_workers = 0
batch_size = 1 # has to be one!

# width and height of the table in the images captured by the top mounted camera
width = 120
height = 116

# image transformations
transform=transforms.Compose([non_zeros(float(width), float(height))])    

# create the dataset
dataset = CMLRPiDataset(
    root_dir=DATASET_PATH,
    data_pi_name ='training_data.pt',
    mask_pi_name = 'no_cylinder.pt',
    threshold=200,
    transform=transform
)

# retrive the number of samples in the dataset 
dataset_len = dataset.__len__()


# split the dataset in test train and valdatuion
train_set, test_set = torch.utils.data.random_split(dataset, [int(dataset.__len__()*0.7),  
                                                                (dataset.__len__()-int(dataset.__len__()*0.7))])

# definition of the corresponding data loaders
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, drop_last=True, pin_memory=True, num_workers=num_workers)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=num_workers)

# sanity check

# retrive the first batch of the training data
dataiter = iter(train_loader)
cyl_loc, labels = dataiter.next()

error_x = 0
error_y = 0

# caculate the average error of the detected x and y value for the fist batch
for i in range(batch_size):
    error_x += abs((abs(labels[i][0])-(abs(cyl_loc[i][0]))))
    error_y += abs((abs(labels[i][1])-(abs(cyl_loc[i][1]))))
print(error_x/batch_size, error_y/batch_size)

# Parameters

In [None]:
params = {
    "img_pi_data": 1,
    "train_loader": train_loader,
    "test_loader": test_loader,
    "VERBOSE": 1,
    "SETTING": 'reduced4',            # 'reduced', 'reduced2', 'reduced3', 'reduced3+', 'reduced4', 'reduced4+'
    "OBJ_SPACE_LOW": np.array([-1.8, -1, -0.55, -0.44, -0.48, 0, -np.pi/2, -np.pi/2, -np.pi, -np.pi, -np.pi, -np.pi]), # observation-space (ee-pos, cyl-pos, joints)
    "OBJ_SPACE_HIGH": np.array([1.5, 1.8, 2.2, 0.48, 0.44, 1.12, np.pi/2, np.pi/2, np.pi, np.pi, np.pi, np.pi]),
    "CNN_policy": 0,          #  1 -> yes, 0 -> no  (this setting is used to train an agent by directly feeding the image, based on CNN policy provided by stable baseline 3)
    "IMAGE_SHAPE": np.array([120,120,3]), # width, height and channel, only useful when CNN = 1 !
    "SPACE_NORM": 1,                   #  1 -> yes, 0 -> no (normalize the action and observation space)
    "CYLINDER": 'no',         # 'no', fix', 'semi_random', 'semi_random_sides', 'half_table', '3/4-table', '7/8-table', 'whole_table'
    "BUFFER_SIZE": 2000,
    "THRESHOLD": 0.20,                  # initial treshold
    "THRESHOLD_SCHEDULING": 1,          # 1-> yes, 0-> no
    "MIN_THRESHOLD": 0.02,
    "REWARD_TYPE": 'sparse',           # 'sparse', 'dense', 'extra_dense'
    "LEARNING_STARTS": 100,#100,            # number of random movements before learning starts,#
    "TOGGLE_REWARD": 0,
    "STEPS": 5000,#1500,              # number of steps while training (=num_episodes when MAX_EPISODE_LEGTH is 1)
    "MAX_EPISODE_LENGTH": 1,           # 'None' (no limit) or value 
    "EXPLORATION": 1,                  # just let it on 1 and ignore it
    "WRITER": writer,
    "USE_HER": 1,                      # 1-> yes, 0-> no
    "ENTROPY_COEFFICIENT": 0.007,      # 'auto' or value between 0 and 1 // 0.007 turned out to work well
    "GLOBAL_STEPPER": 0, 
    "EVALUATION_STEPS": 100,#50,       # number of evaluation steps per investigates treshold (x4)
    "EVALS": [0.2, 0.15, 0.1, 0.05], # here, the list MUST contain always 4 tresholds for evaluation
    "BATCH_SIZE": 64,
    "ACTION_NOISE": None,
    "RANDOM_EXPLORATION": 0.0,
    "LR": 3e-4,
    "TB_LOGGER": None}

# Model

In [None]:
# create a experiment (connection)
exp = eaw.ExperimentWrapper()

# test if simulation can be reached
#server_id = exp.client.test()
#if server_id:
#    print("Simulation is available, id: ", server_id)
#else:
#    print("Simulation NOT available")
    
    
# create an environment (choice depends on usage of HER)
if params["USE_HER"] == 1:
    env = SimGoalEnv(exp, params, writer)
else:
    env = SimEnv(exp, params, writer)

# check env
env_checker.check_env(env, warn=True)

# set reinforcement learning algorithm
model_class = SAC

########################
# Model initialization #
########################

if params["USE_HER"] == 1:
    # underlying model

    # HER goal selection strategy
    goal_selection_strategy = 'future'

    # if True the HER transitions will get sampled online
    online_sampling = True

    # time limit for the episodes
    max_episode_length = 1

    # model
    model = model_class(
        "MultiInputPolicy",
        env,
        buffer_size=params["BUFFER_SIZE"],
        learning_starts=params["LEARNING_STARTS"], 
        ent_coef=params["ENTROPY_COEFFICIENT"],
        replay_buffer_class=HerReplayBuffer,
        # Parameters for HER
        replay_buffer_kwargs=dict(
            n_sampled_goal=4,
            goal_selection_strategy=goal_selection_strategy,
            online_sampling=online_sampling,
            max_episode_length=max_episode_length,
        ),
        verbose=params["VERBOSE"],
    )
    
    # start training
    train(model, env, params)
    
else:
    if params["CNN_policy"] == 1:
        model = model_class(
            "MultiInputPolicy", 
            env,
            buffer_size=params["BUFFER_SIZE"], 
            learning_starts=params["LEARNING_STARTS"], 
            ent_coef=params["ENTROPY_COEFFICIENT"],
            verbose=params["VERBOSE"]
    )
    else:
        model = model_class(
            "MlpPolicy", 
            env,
            buffer_size=params["BUFFER_SIZE"], 
            learning_starts=params["LEARNING_STARTS"], 
            ent_coef=params["ENTROPY_COEFFICIENT"],
            verbose=params["VERBOSE"]
    )

    # start training (without train method)
    model.learn(params["STEPS"])
    

In [None]:
##############
# save model #
##############

model.save("reduced4_500_img_pi")


In [None]:
####################
# model evaluation #
####################

# disable plotting while evaluation because of some issues with inconsistent lengths
env.set_eval(ev=True)

evaluate(model, env, params)
