In [1]:
import os
import yaml
import pickle
import numpy as np
import random
import time
import matplotlib.pyplot as plt
import matplotlib
import tensorflow as tf
import pickle
from statistics import mean
from plot import remove_repetitive_labels
from torch.utils.data import DataLoader, Dataset

from transformer_model import *
from process_data import Task_data
from outlier_detection import detect_outlier
from transformations import *

def get_position_difference_per_step(d1, d2):
    return np.linalg.norm(d1 - d2, axis = 1)


def swap_dict_level(data):
    temp = {}
    for obj, value in data.items():
        for demo, traj in value.items():
            if not demo in temp:
                temp[demo] = {}
            temp[demo][obj] = traj
    return temp

def create_tags(objs):
    one_hots = torch.eye(len(objs))
    tag_dict = {}
    for i, obj in enumerate(objs):
        tag_dict[obj] = one_hots[i]
    return tag_dict

def random_rotation(x, axis='x'):
    new_x = x.copy()
    degree, idx = random.randrange(0, 360), random.randrange(0, x.shape[0])
    rot = R.from_euler(axis, degree, degrees=True)
    H = np.zeros([4,4])
    H[:3,:3] = rot.as_matrix()
    rand_pt = x[idx,:3].copy()
#     rand_pt[1:] = rand_pt[1:]
#     print("pre:",new_x)
    new_x[:,:3]  = new_x[:,:3] - rand_pt
    new_x = lintrans(new_x, H)
    new_x[:,:3] = new_x[:,:3] + rand_pt
#     print("post:",new_x)
    return new_x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Cuda available: ", torch.cuda.is_available())
seed = 321
random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

colors = ['red', 'blue', 'yellow', 'orange', 'green', 'purple','pink']
task_files = ["2022-10-06", "2022-10-27", "2022-12-01"]
data_dir = '../Process_data/postprocessed'
# task_dims = ['x', 'y', 'z']
task_dims = ['x', 'y', 'z', 'qx', 'qy', 'qz', 'qw']
n_dims = len(task_dims)
n_train = 30
n_tasks = len(task_files)


# find all objects first
all_objs = []
for file in task_files:
    task_config_dir = os.path.join(data_dir, file)
    with open(os.path.join(task_config_dir, 'task_config.yaml')) as file:
        config = yaml.load(file, Loader=yaml.FullLoader)
    objs = config["individuals"]
    all_objs = all_objs + objs
unique_objs = ['trajectory'] + sorted(list(set(all_objs)))
n_objs = len(unique_objs)

obj_tags = create_tags(unique_objs)
task_tags = create_tags(task_files)

train_objs_pos, train_traj_pos = [], []
valid_objs_pos, valid_traj_pos = [], []
test_objs_pos, test_traj_pos = [], []

# Load data
for task_file in task_files:
    task_config_dir = os.path.join(data_dir, task_file)
    with open(os.path.join(task_config_dir, 'task_config.yaml')) as file:
        config = yaml.load(file, Loader=yaml.FullLoader)
    project_dir = config["project_path"] # Modify this to your need
    base_dir = os.path.join(project_dir, config["postprocessed_dir"])
    triangulation = 'dlc3d'
    template_dir = os.path.join(project_dir, config["postprocessed_dir"],f'transformations/{triangulation}')
    individuals = config["individuals"] # The objects that we will place a reference frame on

    with open(os.path.join(base_dir, 'processed', triangulation, 'gripper_trajs_in_obj_aligned_filtered.pickle',), 'rb') as f1:
        gripper_trajs_in_obj_all_actions = pickle.load(f1)
    with open(os.path.join(base_dir, 'processed', triangulation, 'HTs_obj_in_ndi.pickle',), 'rb') as f2:
        HTs_obj_in_ndi_all_actions = pickle.load(f2)
    with open(os.path.join(base_dir, 'processed', triangulation, 'gripper_traj_in_grouped_objs_aligned_filtered.pickle',), 'rb') as f3:
        gripper_trajs_in_grouped_objs_all_actions = pickle.load(f3)
    with open(os.path.join(base_dir, 'processed', triangulation, 'HTs_grouped_objs_in_ndi.pickle',), 'rb') as f4:
        HTs_grouped_objs_in_ndi_all_actions = pickle.load(f4)

    ind = 0  # index of action to be tested
    # gripper_trajs_in_ndi = gripper_trajs_truncated[ind]
    gripper_traj_in_obj = gripper_trajs_in_obj_all_actions[ind]
    gripper_traj_in_grouped_obj = gripper_trajs_in_grouped_objs_all_actions[ind]
    gripper_traj_in_generalized_obj = gripper_traj_in_obj | gripper_traj_in_grouped_obj

    HTs_obj_in_ndi = HTs_obj_in_ndi_all_actions[ind]
    HTs_grouped_obj_in_ndi = HTs_grouped_objs_in_ndi_all_actions[ind]
    if not 'global' in HTs_obj_in_ndi.keys():
        HTs_obj_in_ndi = swap_dict_level(HTs_obj_in_ndi)
        HTs_grouped_obj_in_ndi = swap_dict_level(HTs_grouped_obj_in_ndi)
    HTs_generalized_obj_in_ndi = HTs_obj_in_ndi | HTs_grouped_obj_in_ndi

    outliers = []
    std_thres = 3

    for individual in individuals:
        n_std = std_thres
        outlier_individual = detect_outlier(gripper_traj_in_generalized_obj[individual], n=n_std)
        print(f'The outliers for individual {individual} are {outlier_individual}')
        outliers += outlier_individual
    outliers = list(set(outliers))
    bad_demos = outliers

    demos = sorted(list(HTs_generalized_obj_in_ndi['global'].keys()))
    demos = [demo for demo in demos if demo not in bad_demos]
    
    train_demos_pool = [demo for demo in random.sample(demos,30)]
    random.shuffle(train_demos_pool)
    test_valid_demos_pool = [demo for demo in demos if demo not in train_demos_pool]
    
    train_demos = train_demos_pool[:n_train]
    # validation and test demo split
    test_valid_demos_pool_updated = [demo for demo in test_valid_demos_pool if demo not in train_demos]
    split_size = int(len(test_valid_demos_pool_updated)/2)
    valid_demos = test_valid_demos_pool_updated[:split_size]
    test_demos = test_valid_demos_pool_updated[split_size:]
    
    print(f'The number of training pool for task {task_file} is: {len(train_demos_pool)}')
    print(f'The number of outliers is: {len(outliers)} with remaining {len(demos)}')
    print(f'Training/Test Size: {len(train_demos)}/{len(valid_demos)}/{len(test_demos)}')
    for demo in train_demos:
        traj = gripper_traj_in_obj['global'][demo][task_dims].to_numpy()
        obj_buffer = obj_tags['trajectory'].repeat([traj.shape[0], 1])
        task_buffer = task_tags[task_file].repeat([traj.shape[0], 1])
        
        new_traj_data = np.concatenate([traj, obj_buffer, task_buffer], axis=1)
        obj_pos_all = [] 
        for obj_ind in individuals:
            mat = HTs_generalized_obj_in_ndi[obj_ind][demo]
            if n_dims==3:
                obj_pos = np.concatenate([mat[:3,3], obj_tags[obj_ind], task_tags[task_file]])
            else:
                obj_pos = np.concatenate([mat[:3,3], R.from_matrix(mat[:3,:3]).as_quat(), obj_tags[obj_ind], task_tags[task_file]])
            obj_pos_all.append(obj_pos)

        train_objs_pos.append(np.stack(obj_pos_all))
        train_traj_pos.append(new_traj_data)
    
    for demo in valid_demos:
        traj = gripper_traj_in_obj['global'][demo][task_dims].to_numpy()
        obj_buffer = obj_tags['trajectory'].repeat([traj.shape[0], 1])
        task_buffer = task_tags[task_file].repeat([traj.shape[0], 1])
        new_traj_data = np.concatenate([traj, obj_buffer, task_buffer], axis=1)
        obj_pos_all = [] 
        for obj_ind in individuals:
            mat = HTs_generalized_obj_in_ndi[obj_ind][demo]
            if n_dims==3:
                obj_pos = np.concatenate([mat[:3,3], obj_tags[obj_ind], task_tags[task_file]])
            else:
                obj_pos = np.concatenate([mat[:3,3], R.from_matrix(mat[:3,:3]).as_quat(), obj_tags[obj_ind], task_tags[task_file]])
            obj_pos_all.append(obj_pos)

        valid_objs_pos.append(np.stack(obj_pos_all))
        valid_traj_pos.append(new_traj_data)
    
    for demo in test_demos:
        traj = gripper_traj_in_obj['global'][demo][task_dims].to_numpy()
        obj_buffer = obj_tags['trajectory'].repeat([traj.shape[0], 1])
        task_buffer = task_tags[task_file].repeat([traj.shape[0], 1])
        new_traj_data = np.concatenate([traj, obj_buffer, task_buffer], axis=1)
        obj_pos_all = [] 
        for obj_ind in individuals:
            mat = HTs_generalized_obj_in_ndi[obj_ind][demo]
            if n_dims==3:
                obj_pos = np.concatenate([mat[:3,3], obj_tags[obj_ind], task_tags[task_file]])
            else:
                obj_pos = np.concatenate([mat[:3,3], R.from_matrix(mat[:3,:3]).as_quat(), obj_tags[obj_ind], task_tags[task_file]])
            obj_pos_all.append(obj_pos)

        test_objs_pos.append(np.stack(obj_pos_all))
        test_traj_pos.append(new_traj_data)


2023-04-19 20:04:10.309244: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-19 20:04:10.392366: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-19 20:04:10.393722: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Importing the dtw module. When using in academic works please cite:
  T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.
  J. Stat. Soft., doi:10.18637/jss.v031.i07.

Cuda available:  True
The outliers for individual teabag1 are ['134734', '134772']
The outliers for individual cup are ['134732', '134733', '134734', '134726', '134742', '134735']
The number of training pool for task 2022-10-06 is: 30
The number of outliers is: 7 with remaining 44
Training/Test Size: 30/7/7
The outliers for individual pitcher are ['331733']
The outliers for individual cup are ['331733']
The number of training pool for task 2022-10-27 is: 30
The number of outliers is: 1 with remaining 46
Training/Test Size: 30/8/8
The outliers for individual net are ['882800', '882789', '882817', '882823', '882831']
The outliers for individual puck are []
The number of training pool for task 2022-12-01 is: 30
The number of outliers is: 5 with remaining 40
Training/Test Size: 30/5/

In [2]:
contiguous_traj = np.concatenate(train_traj_pos)

In [3]:
# matplotlib.rcParams.update({'font.size': 10})
# fig = plt.figure(figsize = (8, 6))
# ax = fig.add_subplot(1, 1, 1, projection='3d')
# ax.set_facecolor('white')
# ax.locator_params(nbins=3, axis='z')
# colors = ['red', 'blue', 'yellow', 'orange', 'green', 'purple','pink']

# ax.plot(contiguous_traj[:,2], contiguous_traj[:,1], -contiguous_traj[:,0], 'o')
    
# ax.set_xlabel('x (mm)')
# ax.set_ylabel('y (mm)')
# ax.set_zlabel('z (mm)')
# ax.set_box_aspect([ub - lb for lb, ub in (getattr(ax, f'get_{a}lim')() for a in 'xyz')])
# handles, labels = ax.get_legend_handles_labels()
# newHandles_temp, newLabels_temp = remove_repetitive_labels(handles, labels)
# newLabels, newHandles = [], []

# for handle, label in zip(newHandles_temp, newLabels_temp):
#     if label not in ['start', 'middle', 'end']:
#         newLabels.append(label)
#         newHandles.append(handle)
# plt.legend(newHandles, newLabels, loc = 'upper left',  prop={'size': 10})
# plt.show()

In [4]:
class TrajectoryDataset(Dataset):
    def __init__(self, obj_data, traj_data, transform_dims, transform=lambda a : a,
                 max_seq_len=128):
        self.traj_data = traj_data
        self.obj_data = obj_data
        self.transform = transform
        self.dims = transform_dims
        self.max_seq_len = max_seq_len

    def __getitem__(self, idx):
        traj_data = self.traj_data[idx].copy()
        obj_data = self.obj_data[idx].copy()
        x = np.concatenate([obj_data, traj_data])
        # Transformation process
        x[:,:self.dims] = self.transform(x[:,:self.dims])
        
        obj_data = torch.tensor(x[:obj_data.shape[0],:])
        traj_data = torch.tensor(x[obj_data.shape[0]:,:])
        if traj_data.shape[0] < self.max_seq_len:
            diff = self.max_seq_len - traj_data.shape[0]
            pad = torch.zeros([diff, traj_data.shape[1]])
            traj_data = torch.cat([traj_data, pad])
        traj_hidden = traj_data.clone()
        traj_hidden[:,:self.dims] = 0
        return obj_data, traj_data, traj_hidden

    def __len__(self):
        return len(self.obj_data)

def normalize_wrapper(average, std):
    return lambda x: normalize_3d(x, average, std)

def normalize_3d(entry, average, std):
    entry[:,:3] = (entry[:,:3] - average)/std
    return entry
    
def get_obj_tag(entry):
    tag_seq = entry[n_dims:n_dims+n_objs]
    return (tag_seq == 1).nonzero(as_tuple=True)[0]

train_mean = np.mean(contiguous_traj[:,:3], axis=0)
train_std = np.std(contiguous_traj[:,:3])/3

norm_func = normalize_wrapper(train_mean, train_std)
train_objs_pos = list(map(norm_func, train_objs_pos))
train_traj_pos = list(map(norm_func, train_traj_pos))
valid_objs_pos = list(map(norm_func, valid_objs_pos))
valid_traj_pos = list(map(norm_func, valid_traj_pos))
test_objs_pos = list(map(norm_func, test_objs_pos))
test_traj_pos = list(map(norm_func, test_traj_pos))

size_modifier = int(60/n_train)
# Create dataloaders
training_data = TrajectoryDataset(train_objs_pos*size_modifier, train_traj_pos*size_modifier,
                                  n_dims, transform=random_rotation)
valid_data = TrajectoryDataset(valid_objs_pos, valid_traj_pos, n_dims)
test_data = TrajectoryDataset(test_objs_pos, test_traj_pos, n_dims)
train_dataloader = DataLoader(training_data, batch_size=128, shuffle=True)
valid_dataloader = DataLoader(valid_data, batch_size=128, shuffle=False)
test_dataloader = DataLoader(test_data, batch_size=128, shuffle=False)

In [5]:
# %matplotlib notebook
# # shows random set of augmented trajectory
# matplotlib.rcParams.update({'font.size': 10})
# fig = plt.figure(figsize = (8, 6))
# ax = fig.add_subplot(1, 1, 1, projection='3d')
# ax.set_facecolor('white')
# ax.locator_params(nbins=3, axis='z')
# colors = ['red', 'blue', 'yellow', 'orange', 'green', 'purple','pink']
# for i_data, sample_data in enumerate(test_data):
#     obj_seq, traj_seq, _ = sample_data
#     traj_seq = traj_seq[traj_seq[:,n_dims]==1]
#     if i_data%10: continue
#     obj1_pos = obj_seq[1,]
#     obj0_pos = obj_seq[0,]
#     line = ax.plot(traj_seq[:,2], traj_seq[:,1], -traj_seq[:,0], '--', color=colors[i_data%len(colors)], 
#                    label = f'demo {i_data}')
#     ax.plot(obj1_pos[2], obj1_pos[1], -obj1_pos[0], 'o',
#             color=colors[i_data%len(colors)], label=f'{i_data}')
#     ax.plot(obj0_pos[2], obj0_pos[1], -obj0_pos[0], 'x',
#             color=colors[i_data%len(colors)], label=f'{i_data}')
# ax.set_xlabel('x (mm)')
# ax.set_ylabel('y (mm)')
# ax.set_zlabel('z (mm)')
# ax.set_box_aspect([ub - lb for lb, ub in (getattr(ax, f'get_{a}lim')() for a in 'xyz')])
# handles, labels = ax.get_legend_handles_labels()
# newHandles_temp, newLabels_temp = remove_repetitive_labels(handles, labels)
# newLabels, newHandles = [], []

# for handle, label in zip(newHandles_temp, newLabels_temp):
#     if label not in ['start', 'middle', 'end']:
#         newLabels.append(label)
#         newHandles.append(handle)
# plt.legend(newHandles, newLabels, loc = 'upper left',  prop={'size': 10})
# plt.show()

In [6]:
# # shows random set of augmented trajectory
# matplotlib.rcParams.update({'font.size': 10})
# fig = plt.figure(figsize = (8, 6))
# ax = fig.add_subplot(1, 1, 1, projection='3d')
# ax.set_facecolor('white')
# ax.locator_params(nbins=3, axis='z')
# colors = ['red', 'blue', 'yellow', 'orange', 'green', 'purple','pink']
# for i_data, sample_data in enumerate(training_data):
#     obj_seq, traj_seq, _ = sample_data
#     traj_seq = traj_seq[traj_seq[:,n_dims]==1]
#     if i_data%100: continue
#     obj1_pos = obj_seq[1,]
#     obj0_pos = obj_seq[0,]
#     line = ax.plot(traj_seq[:,2], traj_seq[:,1], -traj_seq[:,0], '--', color=colors[i_data%len(colors)], 
#                    label = f'demo {i_data}')
#     ax.plot(obj1_pos[2], obj1_pos[1], -obj1_pos[0], 'o',
#             color=colors[i_data%len(colors)], label=f'{i_data}')
#     ax.plot(obj0_pos[2], obj0_pos[1], -obj0_pos[0], 'x',
#             color=colors[i_data%len(colors)], label=f'{i_data}')
# ax.set_xlabel('x (mm)')
# ax.set_ylabel('y (mm)')
# ax.set_zlabel('z (mm)')
# ax.set_box_aspect([ub - lb for lb, ub in (getattr(ax, f'get_{a}lim')() for a in 'xyz')])
# handles, labels = ax.get_legend_handles_labels()
# newHandles_temp, newLabels_temp = remove_repetitive_labels(handles, labels)
# newLabels, newHandles = [], []

# for handle, label in zip(newHandles_temp, newLabels_temp):
#     if label not in ['start', 'middle', 'end']:
#         newLabels.append(label)
#         newHandles.append(handle)
# plt.legend(newHandles, newLabels, loc = 'upper left',  prop={'size': 10})
# plt.show()

In [7]:
def loss_func(pred, truth):
    losses = 0
    for i in range(truth.shape[0]):
        mask = truth[i,:,n_dims]==1
        losses += F.mse_loss(pred[i,mask,:], truth[i,mask,:])
    return losses/truth.shape[0]

def train_epoch(model, optimizer, t_dataloader):
    train_losses = []
    model.train()
    for sample_batched in t_dataloader:
        # input modification
        optimizer.zero_grad()
        obj_seq, traj_seq, traj_target = sample_batched
        obj_seq_input = obj_seq.to(device)
        traj_seq_gt = traj_seq.to(device)
        traj_target_input = traj_target.to(device)
        output_seq = model(obj_seq_input, traj_target_input)
        loss = F.mse_loss(output_seq, traj_seq_gt[:,:,:n_dims])
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())
    return train_losses
        
model_path = "./models"
train_log_dir = './logs'
checkpoint_file = os.path.join(model_path, f"checkpoint2_{n_dims}D_{n_train}dms.pt")
# model_file = os.path.join(model_path, f"saved_model_full_{n_dims}d_{n_train}demos.pt")


In [8]:
print_interval = 10
best_score = 10000
total_epochs = 50000
loss_hist = []
epoch = 0
# Load model or create new
# if os.path.exists(checkpoint_file):
#     saved_file = torch.load(checkpoint_file)
# #     best_score = saved_file['best_score']
#     loss_hist = saved_file['losses']
#     epoch = len(loss_hist)*print_interval
#     new_model = saved_file['model']
#     adam = saved_file['optimizer']
#     print("Loaded Saved Model:", checkpoint_file)
# else:
#     new_model = TFModelLite(task_dim=n_dims + n_objs + n_tasks, traj_dim=n_dims, 
#                             embed_dim=32, nhead=8, layers=3).to(device)
#     adam = optim.Adam(new_model.parameters(), lr=1e-3)
#     print("Create New Model...")

In [9]:
# loss_arr = np.array(loss_hist)
# fig = plt.figure(figsize = (8, 6))
# plt.plot(loss_arr[:2500,0], label='training')
# plt.plot(loss_arr[:2500,1], label='validation')
# plt.ylabel('loss')
# plt.xlabel('per 10 epoch')
# plt.legend()
# plt.show()

In [None]:
import optuna


def objective(trial):
    # Integer parameter
    num_layers = trial.suggest_int("num_layers", 1, 6)

    # Integer parameter (log)
    embed_size = trial.suggest_categorical("embed_size", [2**i for i in range(4,9)])

    # Integer parameter (discretized)
    num_heads = trial.suggest_categorical("num_heads", [2**i for i in range(0,5)])


    # Floating point parameter (log)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)

    # Floating point parameter (discretized)
    drop_path_rate = trial.suggest_float("dropout_rate", 0.0, 1.0, step=0.1)
    
    search_model = TFModelLite(task_dim=n_dims + n_objs + n_tasks, traj_dim=n_dims, 
                            embed_dim=embed_size, nhead=num_heads, layers=num_layers,
                              dropout=drop_path_rate).to(device)
    search_opt = optim.Adam(search_model.parameters(), lr=drop_path_rate)
    for i in range(1000):
        t_losses  = train_epoch(search_model, search_opt, train_dataloader)
  
    search_model.eval()
    v_losses = []
    for sample_batched in valid_dataloader:
        obj_seq, traj_seq, traj_target = sample_batched
        obj_seq_input = obj_seq.to(device)
        traj_seq_gt = traj_seq.to(device)
        traj_target_input = traj_target.to(device)
        output_seq = search_model(obj_seq_input, traj_target_input)
        loss = F.mse_loss(output_seq, traj_seq_gt[:,:,:n_dims])
        v_losses.append(loss.item())
    t_loss_mean, v_loss_mean = mean(t_losses), mean(v_losses)
    print(f"Epoch {epoch}, Train/Valid Loss: {round(t_loss_mean,4)}/{round(v_loss_mean,4)}")
    del search_model
    return v_loss_mean
    
study = optuna.create_study()
study.optimize(objective, n_trials=100)

study.best_params

[32m[I 2023-04-19 20:04:18,808][0m A new study created in memory with name: no-name-2604d1f5-f31d-4885-b331-979991e0d5c7[0m
[32m[I 2023-04-19 20:13:03,067][0m Trial 0 finished with value: 0.13931016186403627 and parameters: {'num_layers': 3, 'embed_size': 64, 'num_heads': 2, 'learning_rate': 0.0008344110015038636, 'dropout_rate': 0.6000000000000001}. Best is trial 0 with value: 0.13931016186403627.[0m


Epoch 0, Train/Valid Loss: 0.1621/0.1393


[32m[I 2023-04-19 20:31:52,613][0m Trial 1 finished with value: 0.4337856760693801 and parameters: {'num_layers': 2, 'embed_size': 256, 'num_heads': 2, 'learning_rate': 7.111748948798166e-05, 'dropout_rate': 0.8}. Best is trial 0 with value: 0.13931016186403627.[0m


Epoch 0, Train/Valid Loss: 0.2553/0.4338


[32m[I 2023-04-19 21:22:55,978][0m Trial 2 finished with value: 0.13470240202944517 and parameters: {'num_layers': 6, 'embed_size': 256, 'num_heads': 4, 'learning_rate': 3.419743116598501e-05, 'dropout_rate': 0.4}. Best is trial 2 with value: 0.13470240202944517.[0m


Epoch 0, Train/Valid Loss: 0.3426/0.1347


[32m[I 2023-04-19 21:28:32,994][0m Trial 3 finished with value: 0.10990427604199905 and parameters: {'num_layers': 1, 'embed_size': 32, 'num_heads': 16, 'learning_rate': 1.4603617084838966e-05, 'dropout_rate': 0.1}. Best is trial 3 with value: 0.10990427604199905.[0m


Epoch 0, Train/Valid Loss: 0.1136/0.1099


[32m[I 2023-04-19 21:34:09,578][0m Trial 4 finished with value: 0.11617716170139747 and parameters: {'num_layers': 1, 'embed_size': 32, 'num_heads': 16, 'learning_rate': 4.2141211263364274e-05, 'dropout_rate': 0.30000000000000004}. Best is trial 3 with value: 0.10990427604199905.[0m


Epoch 0, Train/Valid Loss: 0.1472/0.1162


[32m[I 2023-04-19 21:42:43,649][0m Trial 5 finished with value: 0.11305385258101008 and parameters: {'num_layers': 4, 'embed_size': 16, 'num_heads': 1, 'learning_rate': 0.0023086968919588725, 'dropout_rate': 0.5}. Best is trial 3 with value: 0.10990427604199905.[0m


Epoch 0, Train/Valid Loss: 0.1439/0.1131


[32m[I 2023-04-19 21:56:48,437][0m Trial 6 finished with value: 0.2672126004016175 and parameters: {'num_layers': 3, 'embed_size': 128, 'num_heads': 2, 'learning_rate': 0.0013745385019749367, 'dropout_rate': 0.6000000000000001}. Best is trial 3 with value: 0.10990427604199905.[0m


Epoch 0, Train/Valid Loss: 0.1958/0.2672


[32m[I 2023-04-19 22:07:36,551][0m Trial 7 finished with value: 0.45044133161480354 and parameters: {'num_layers': 1, 'embed_size': 256, 'num_heads': 2, 'learning_rate': 0.0003164055983359737, 'dropout_rate': 0.0}. Best is trial 3 with value: 0.10990427604199905.[0m


Epoch 0, Train/Valid Loss: 0.4314/0.4504


[32m[I 2023-04-19 22:19:43,893][0m Trial 8 finished with value: 0.11201041530365023 and parameters: {'num_layers': 6, 'embed_size': 16, 'num_heads': 2, 'learning_rate': 2.7402237045114592e-05, 'dropout_rate': 0.4}. Best is trial 3 with value: 0.10990427604199905.[0m


Epoch 0, Train/Valid Loss: 0.1479/0.112


[32m[I 2023-04-19 22:28:14,299][0m Trial 9 finished with value: 0.10971861988648891 and parameters: {'num_layers': 3, 'embed_size': 64, 'num_heads': 1, 'learning_rate': 0.001749903175706015, 'dropout_rate': 0.8}. Best is trial 9 with value: 0.10971861988648891.[0m


Epoch 0, Train/Valid Loss: 0.1804/0.1097


[32m[I 2023-04-19 22:38:36,933][0m Trial 10 finished with value: 0.14548981190790253 and parameters: {'num_layers': 4, 'embed_size': 64, 'num_heads': 1, 'learning_rate': 0.008925691530808242, 'dropout_rate': 1.0}. Best is trial 9 with value: 0.10971861988648891.[0m


Epoch 0, Train/Valid Loss: 0.2131/0.1455


[32m[I 2023-04-19 22:47:06,330][0m Trial 11 finished with value: 0.4237384890909454 and parameters: {'num_layers': 2, 'embed_size': 32, 'num_heads': 16, 'learning_rate': 1.11585847949996e-05, 'dropout_rate': 0.0}. Best is trial 9 with value: 0.10971861988648891.[0m


Epoch 0, Train/Valid Loss: 0.4888/0.4237


[32m[I 2023-04-19 22:54:49,250][0m Trial 12 finished with value: 0.11446869006047769 and parameters: {'num_layers': 2, 'embed_size': 64, 'num_heads': 8, 'learning_rate': 0.0001579032792179632, 'dropout_rate': 0.2}. Best is trial 9 with value: 0.10971861988648891.[0m


Epoch 0, Train/Valid Loss: 0.1371/0.1145


[32m[I 2023-04-19 23:12:15,353][0m Trial 13 finished with value: 0.11928544603481853 and parameters: {'num_layers': 5, 'embed_size': 32, 'num_heads': 16, 'learning_rate': 1.1378739484441627e-05, 'dropout_rate': 0.8}. Best is trial 9 with value: 0.10971861988648891.[0m


Epoch 0, Train/Valid Loss: 0.1441/0.1193


[32m[I 2023-04-19 23:18:45,270][0m Trial 14 finished with value: 0.17040495162813274 and parameters: {'num_layers': 1, 'embed_size': 128, 'num_heads': 1, 'learning_rate': 0.00030984207497204935, 'dropout_rate': 1.0}. Best is trial 9 with value: 0.10971861988648891.[0m


Epoch 0, Train/Valid Loss: 0.2037/0.1704


[32m[I 2023-04-19 23:27:56,966][0m Trial 15 finished with value: 0.2111507769157629 and parameters: {'num_layers': 3, 'embed_size': 64, 'num_heads': 4, 'learning_rate': 0.00013500511615935625, 'dropout_rate': 0.8}. Best is trial 9 with value: 0.10971861988648891.[0m


Epoch 0, Train/Valid Loss: 0.229/0.2112


[32m[I 2023-04-19 23:41:49,677][0m Trial 16 finished with value: 0.1144405054497664 and parameters: {'num_layers': 5, 'embed_size': 32, 'num_heads': 8, 'learning_rate': 0.004115476087067434, 'dropout_rate': 0.2}. Best is trial 9 with value: 0.10971861988648891.[0m


Epoch 0, Train/Valid Loss: 0.141/0.1144


[32m[I 2023-04-19 23:50:23,098][0m Trial 17 finished with value: 0.12055891485524996 and parameters: {'num_layers': 2, 'embed_size': 32, 'num_heads': 16, 'learning_rate': 0.0005738566919028195, 'dropout_rate': 0.7000000000000001}. Best is trial 9 with value: 0.10971861988648891.[0m


Epoch 0, Train/Valid Loss: 0.1559/0.1206


[32m[I 2023-04-20 00:00:42,620][0m Trial 18 finished with value: 0.1116975330833974 and parameters: {'num_layers': 4, 'embed_size': 64, 'num_heads': 1, 'learning_rate': 0.0019690240814875457, 'dropout_rate': 0.1}. Best is trial 9 with value: 0.10971861988648891.[0m


Epoch 0, Train/Valid Loss: 0.1384/0.1117


[32m[I 2023-04-20 00:04:53,832][0m Trial 19 finished with value: 0.12003800921834881 and parameters: {'num_layers': 1, 'embed_size': 16, 'num_heads': 1, 'learning_rate': 0.0007869495806299928, 'dropout_rate': 0.9}. Best is trial 9 with value: 0.10971861988648891.[0m


Epoch 0, Train/Valid Loss: 0.1457/0.12


[32m[I 2023-04-20 00:32:07,944][0m Trial 20 finished with value: 0.11655530658732326 and parameters: {'num_layers': 5, 'embed_size': 128, 'num_heads': 16, 'learning_rate': 9.59942812367851e-05, 'dropout_rate': 0.6000000000000001}. Best is trial 9 with value: 0.10971861988648891.[0m


Epoch 0, Train/Valid Loss: 0.1542/0.1166


[32m[I 2023-04-20 00:42:30,814][0m Trial 21 finished with value: 0.11136623013321938 and parameters: {'num_layers': 4, 'embed_size': 64, 'num_heads': 1, 'learning_rate': 0.0025843456766644316, 'dropout_rate': 0.1}. Best is trial 9 with value: 0.10971861988648891.[0m


Epoch 0, Train/Valid Loss: 0.1498/0.1114


[32m[I 2023-04-20 00:50:58,544][0m Trial 22 finished with value: 0.1109778398055522 and parameters: {'num_layers': 3, 'embed_size': 64, 'num_heads': 1, 'learning_rate': 0.004005691077137172, 'dropout_rate': 0.1}. Best is trial 9 with value: 0.10971861988648891.[0m


Epoch 0, Train/Valid Loss: 0.1312/0.111


[32m[I 2023-04-20 00:59:25,788][0m Trial 23 finished with value: 0.11084479056964658 and parameters: {'num_layers': 3, 'embed_size': 64, 'num_heads': 1, 'learning_rate': 0.00387417885069775, 'dropout_rate': 0.2}. Best is trial 9 with value: 0.10971861988648891.[0m


Epoch 0, Train/Valid Loss: 0.1376/0.1108


[32m[I 2023-04-20 01:05:56,308][0m Trial 24 finished with value: 0.1094187590337022 and parameters: {'num_layers': 2, 'embed_size': 64, 'num_heads': 1, 'learning_rate': 0.007880792589997262, 'dropout_rate': 0.30000000000000004}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1427/0.1094


[32m[I 2023-04-20 01:12:26,983][0m Trial 25 finished with value: 0.1176460273129138 and parameters: {'num_layers': 2, 'embed_size': 32, 'num_heads': 4, 'learning_rate': 0.00846606102359514, 'dropout_rate': 0.30000000000000004}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1457/0.1176


[32m[I 2023-04-20 01:20:08,049][0m Trial 26 finished with value: 0.11142625733566948 and parameters: {'num_layers': 2, 'embed_size': 64, 'num_heads': 8, 'learning_rate': 0.009961455762611898, 'dropout_rate': 0.30000000000000004}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1486/0.1114


[32m[I 2023-04-20 01:25:58,773][0m Trial 27 finished with value: 0.14193958097568485 and parameters: {'num_layers': 1, 'embed_size': 64, 'num_heads': 16, 'learning_rate': 0.0013481186949240388, 'dropout_rate': 0.4}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1514/0.1419


[32m[I 2023-04-20 01:31:58,035][0m Trial 28 finished with value: 0.11219276425454458 and parameters: {'num_layers': 2, 'embed_size': 32, 'num_heads': 1, 'learning_rate': 0.004938948994412617, 'dropout_rate': 0.5}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1483/0.1122


[32m[I 2023-04-20 01:36:34,665][0m Trial 29 finished with value: 0.1773780185575182 and parameters: {'num_layers': 1, 'embed_size': 64, 'num_heads': 1, 'learning_rate': 0.0010693367916944255, 'dropout_rate': 0.6000000000000001}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1807/0.1774


[32m[I 2023-04-20 01:53:52,287][0m Trial 30 finished with value: 0.4752698676707514 and parameters: {'num_layers': 3, 'embed_size': 128, 'num_heads': 16, 'learning_rate': 0.0004646815559176733, 'dropout_rate': 0.0}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.416/0.4753


[32m[I 2023-04-20 02:02:21,587][0m Trial 31 finished with value: 0.11321444845234613 and parameters: {'num_layers': 3, 'embed_size': 64, 'num_heads': 1, 'learning_rate': 0.006265682760154372, 'dropout_rate': 0.2}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1352/0.1132


[32m[I 2023-04-20 02:10:51,063][0m Trial 32 finished with value: 0.11848092442169847 and parameters: {'num_layers': 3, 'embed_size': 64, 'num_heads': 1, 'learning_rate': 0.003211687242714922, 'dropout_rate': 0.2}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1476/0.1185


[32m[I 2023-04-20 02:29:46,510][0m Trial 33 finished with value: 0.12158660121620268 and parameters: {'num_layers': 2, 'embed_size': 256, 'num_heads': 1, 'learning_rate': 0.005636754224807002, 'dropout_rate': 0.1}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1669/0.1216


[32m[I 2023-04-20 02:39:05,663][0m Trial 34 finished with value: 0.11615551223090041 and parameters: {'num_layers': 3, 'embed_size': 64, 'num_heads': 4, 'learning_rate': 0.0030679055227741006, 'dropout_rate': 0.30000000000000004}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1688/0.1162


[32m[I 2023-04-20 02:58:01,119][0m Trial 35 finished with value: 0.36824373559631607 and parameters: {'num_layers': 2, 'embed_size': 256, 'num_heads': 1, 'learning_rate': 0.0015340516763681375, 'dropout_rate': 0.4}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.2229/0.3682


[32m[I 2023-04-20 03:02:42,011][0m Trial 36 finished with value: 0.11028170894940661 and parameters: {'num_layers': 1, 'embed_size': 64, 'num_heads': 1, 'learning_rate': 0.0018668979083433024, 'dropout_rate': 0.5}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1578/0.1103


[32m[I 2023-04-20 03:07:03,173][0m Trial 37 finished with value: 0.13359464915632183 and parameters: {'num_layers': 1, 'embed_size': 16, 'num_heads': 2, 'learning_rate': 0.000969222677496624, 'dropout_rate': 0.5}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1302/0.1336


[32m[I 2023-04-20 03:13:04,873][0m Trial 38 finished with value: 0.126388345305206 and parameters: {'num_layers': 1, 'embed_size': 64, 'num_heads': 16, 'learning_rate': 0.001989178554812917, 'dropout_rate': 0.7000000000000001}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1504/0.1264


[32m[I 2023-04-20 03:18:09,938][0m Trial 39 finished with value: 0.12354080643578687 and parameters: {'num_layers': 1, 'embed_size': 32, 'num_heads': 8, 'learning_rate': 0.0022715655879794235, 'dropout_rate': 0.7000000000000001}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1422/0.1235


[32m[I 2023-04-20 03:37:03,831][0m Trial 40 finished with value: 0.1517943303320095 and parameters: {'num_layers': 2, 'embed_size': 256, 'num_heads': 2, 'learning_rate': 0.0063139099689688135, 'dropout_rate': 0.9}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.6914/0.1518


[32m[I 2023-04-20 03:47:32,500][0m Trial 41 finished with value: 0.11742753607372716 and parameters: {'num_layers': 4, 'embed_size': 64, 'num_heads': 1, 'learning_rate': 0.0035803434266178436, 'dropout_rate': 0.30000000000000004}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1547/0.1174


[32m[I 2023-04-20 03:52:13,520][0m Trial 42 finished with value: 0.13009047785444094 and parameters: {'num_layers': 1, 'embed_size': 64, 'num_heads': 1, 'learning_rate': 0.0015882368521835807, 'dropout_rate': 0.4}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1634/0.1301


[32m[I 2023-04-20 03:58:48,472][0m Trial 43 finished with value: 0.12759052657100653 and parameters: {'num_layers': 2, 'embed_size': 64, 'num_heads': 1, 'learning_rate': 0.002996041436642916, 'dropout_rate': 0.5}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.16/0.1276


[32m[I 2023-04-20 04:07:19,096][0m Trial 44 finished with value: 0.11692244373443253 and parameters: {'num_layers': 3, 'embed_size': 64, 'num_heads': 1, 'learning_rate': 3.978365286372158e-05, 'dropout_rate': 0.2}. Best is trial 24 with value: 0.1094187590337022.[0m


Epoch 0, Train/Valid Loss: 0.1405/0.1169


[32m[I 2023-04-20 04:13:01,933][0m Trial 45 finished with value: 0.10868180113995528 and parameters: {'num_layers': 2, 'embed_size': 16, 'num_heads': 1, 'learning_rate': 0.007060943198960553, 'dropout_rate': 0.1}. Best is trial 45 with value: 0.10868180113995528.[0m


Epoch 0, Train/Valid Loss: 0.1469/0.1087


[32m[I 2023-04-20 04:17:35,634][0m Trial 46 finished with value: 0.3642611263273179 and parameters: {'num_layers': 1, 'embed_size': 16, 'num_heads': 4, 'learning_rate': 0.005229653738645501, 'dropout_rate': 0.0}. Best is trial 45 with value: 0.10868180113995528.[0m


Epoch 0, Train/Valid Loss: 0.368/0.3643


[32m[I 2023-04-20 04:23:44,574][0m Trial 47 finished with value: 0.11438849946395752 and parameters: {'num_layers': 2, 'embed_size': 16, 'num_heads': 16, 'learning_rate': 0.008090579033206905, 'dropout_rate': 0.1}. Best is trial 45 with value: 0.10868180113995528.[0m


Epoch 0, Train/Valid Loss: 0.1746/0.1144


[32m[I 2023-04-20 04:29:28,308][0m Trial 48 finished with value: 0.11068478428064904 and parameters: {'num_layers': 2, 'embed_size': 16, 'num_heads': 1, 'learning_rate': 0.007102919499415657, 'dropout_rate': 0.4}. Best is trial 45 with value: 0.10868180113995528.[0m


Epoch 0, Train/Valid Loss: 0.1373/0.1107


[32m[I 2023-04-20 04:33:50,074][0m Trial 49 finished with value: 0.5524826129689304 and parameters: {'num_layers': 1, 'embed_size': 16, 'num_heads': 2, 'learning_rate': 0.009642851429880384, 'dropout_rate': 0.0}. Best is trial 45 with value: 0.10868180113995528.[0m


Epoch 0, Train/Valid Loss: 0.5238/0.5525


[32m[I 2023-04-20 04:38:18,366][0m Trial 50 finished with value: 0.10858995755387868 and parameters: {'num_layers': 1, 'embed_size': 32, 'num_heads': 1, 'learning_rate': 5.449764952082217e-05, 'dropout_rate': 0.9}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1524/0.1086


[32m[I 2023-04-20 04:42:44,934][0m Trial 51 finished with value: 0.11210912751284301 and parameters: {'num_layers': 1, 'embed_size': 32, 'num_heads': 1, 'learning_rate': 2.2440465387479034e-05, 'dropout_rate': 0.9}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1571/0.1121


[32m[I 2023-04-20 04:47:12,274][0m Trial 52 finished with value: 0.1123534305670987 and parameters: {'num_layers': 1, 'embed_size': 32, 'num_heads': 1, 'learning_rate': 1.7392782060711776e-05, 'dropout_rate': 0.8}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1583/0.1124


[32m[I 2023-04-20 04:51:38,673][0m Trial 53 finished with value: 0.16469822835794837 and parameters: {'num_layers': 1, 'embed_size': 32, 'num_heads': 1, 'learning_rate': 4.226241998393557e-05, 'dropout_rate': 1.0}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.2267/0.1647


[32m[I 2023-04-20 04:57:42,695][0m Trial 54 finished with value: 0.11425364966266191 and parameters: {'num_layers': 2, 'embed_size': 32, 'num_heads': 1, 'learning_rate': 0.004417967219890875, 'dropout_rate': 0.9}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1585/0.1143


[32m[I 2023-04-20 05:03:24,544][0m Trial 55 finished with value: 0.1158705764362364 and parameters: {'num_layers': 1, 'embed_size': 32, 'num_heads': 16, 'learning_rate': 0.00022403838714904893, 'dropout_rate': 0.8}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1548/0.1159


[32m[I 2023-04-20 05:10:18,023][0m Trial 56 finished with value: 0.11384886587399251 and parameters: {'num_layers': 2, 'embed_size': 16, 'num_heads': 8, 'learning_rate': 6.910409249923243e-05, 'dropout_rate': 0.6000000000000001}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1575/0.1138


[32m[I 2023-04-20 05:16:51,026][0m Trial 57 finished with value: 0.1125818627469424 and parameters: {'num_layers': 1, 'embed_size': 128, 'num_heads': 1, 'learning_rate': 0.002473213518255137, 'dropout_rate': 0.1}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1391/0.1126


[32m[I 2023-04-20 05:22:56,847][0m Trial 58 finished with value: 0.11006240766778992 and parameters: {'num_layers': 2, 'embed_size': 32, 'num_heads': 1, 'learning_rate': 0.007594797606640733, 'dropout_rate': 0.7000000000000001}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1448/0.1101


[32m[I 2023-04-20 05:35:34,829][0m Trial 59 finished with value: 0.1206435299046346 and parameters: {'num_layers': 6, 'embed_size': 32, 'num_heads': 1, 'learning_rate': 0.007004484118412864, 'dropout_rate': 0.9}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1592/0.1206


[32m[I 2023-04-20 05:47:09,467][0m Trial 60 finished with value: 0.11252234708410555 and parameters: {'num_layers': 3, 'embed_size': 32, 'num_heads': 16, 'learning_rate': 0.004690699426447086, 'dropout_rate': 0.8}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1471/0.1125


[32m[I 2023-04-20 05:53:16,703][0m Trial 61 finished with value: 0.11437069382470177 and parameters: {'num_layers': 2, 'embed_size': 32, 'num_heads': 1, 'learning_rate': 0.008044350823356293, 'dropout_rate': 0.7000000000000001}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1388/0.1144


[32m[I 2023-04-20 05:59:22,935][0m Trial 62 finished with value: 0.11730197319179439 and parameters: {'num_layers': 2, 'embed_size': 32, 'num_heads': 1, 'learning_rate': 0.005430127014851284, 'dropout_rate': 1.0}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1527/0.1173


[32m[I 2023-04-20 06:05:56,494][0m Trial 63 finished with value: 0.34018444818458593 and parameters: {'num_layers': 1, 'embed_size': 128, 'num_heads': 1, 'learning_rate': 0.004064125399476699, 'dropout_rate': 0.7000000000000001}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1728/0.3402


[32m[I 2023-04-20 06:12:02,579][0m Trial 64 finished with value: 0.11554085468046668 and parameters: {'num_layers': 2, 'embed_size': 32, 'num_heads': 1, 'learning_rate': 0.0068185091832433, 'dropout_rate': 0.6000000000000001}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1393/0.1155


[32m[I 2023-04-20 06:19:09,743][0m Trial 65 finished with value: 0.4918457160839895 and parameters: {'num_layers': 2, 'embed_size': 64, 'num_heads': 4, 'learning_rate': 0.009725900248579332, 'dropout_rate': 0.8}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.264/0.4918


[32m[I 2023-04-20 06:29:58,690][0m Trial 66 finished with value: 0.11940125358833029 and parameters: {'num_layers': 1, 'embed_size': 256, 'num_heads': 1, 'learning_rate': 0.0007233722593442728, 'dropout_rate': 0.30000000000000004}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1428/0.1194


[32m[I 2023-04-20 06:37:08,231][0m Trial 67 finished with value: 0.11414305842032661 and parameters: {'num_layers': 3, 'embed_size': 16, 'num_heads': 1, 'learning_rate': 1.2631828604328644e-05, 'dropout_rate': 0.9}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1519/0.1141


[32m[I 2023-04-20 06:49:52,287][0m Trial 68 finished with value: 0.11913336232400408 and parameters: {'num_layers': 4, 'embed_size': 64, 'num_heads': 8, 'learning_rate': 0.0032598478055964352, 'dropout_rate': 0.5}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1563/0.1191


[32m[I 2023-04-20 06:55:35,584][0m Trial 69 finished with value: 0.11799748357631855 and parameters: {'num_layers': 1, 'embed_size': 32, 'num_heads': 16, 'learning_rate': 0.0061325477018659225, 'dropout_rate': 0.2}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1313/0.118


[32m[I 2023-04-20 07:02:10,041][0m Trial 70 finished with value: 0.33717991011181486 and parameters: {'num_layers': 2, 'embed_size': 64, 'num_heads': 1, 'learning_rate': 0.0011595706945507886, 'dropout_rate': 0.0}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.4425/0.3372


[32m[I 2023-04-20 07:07:52,419][0m Trial 71 finished with value: 0.11286631782546133 and parameters: {'num_layers': 2, 'embed_size': 16, 'num_heads': 1, 'learning_rate': 0.007552880605588781, 'dropout_rate': 0.5}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1464/0.1129


[32m[I 2023-04-20 07:13:35,746][0m Trial 72 finished with value: 0.11166580990995058 and parameters: {'num_layers': 2, 'embed_size': 16, 'num_heads': 1, 'learning_rate': 0.005310169356447677, 'dropout_rate': 0.4}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1314/0.1117


[32m[I 2023-04-20 07:19:18,805][0m Trial 73 finished with value: 0.11297552349375348 and parameters: {'num_layers': 2, 'embed_size': 16, 'num_heads': 1, 'learning_rate': 0.007960147583328464, 'dropout_rate': 0.4}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1446/0.113


[32m[I 2023-04-20 07:26:27,899][0m Trial 74 finished with value: 0.11288443163844133 and parameters: {'num_layers': 3, 'embed_size': 16, 'num_heads': 1, 'learning_rate': 0.0027004927063622054, 'dropout_rate': 0.4}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1419/0.1129


[32m[I 2023-04-20 07:30:48,107][0m Trial 75 finished with value: 0.11558264762697425 and parameters: {'num_layers': 1, 'embed_size': 16, 'num_heads': 2, 'learning_rate': 0.0034709391996154398, 'dropout_rate': 0.5}. Best is trial 50 with value: 0.10858995755387868.[0m


Epoch 0, Train/Valid Loss: 0.1667/0.1156


In [None]:
study_file = os.path.join(model_path, "saved_study.pkl")
with open(study_file, "wb") as fout:
    pickle.dump(study, fout)

In [None]:
while epoch < total_epochs:
    start = time.time()
    t_losses = train_epoch(new_model, adam, train_dataloader)
    if epoch%print_interval==0:
        new_model.eval()
        v_losses = []
        for sample_batched in valid_dataloader:
            obj_seq, traj_seq, traj_target = sample_batched
            obj_seq_input = obj_seq.to(device)
            traj_seq_gt = traj_seq.to(device)
            traj_target_input = traj_target.to(device)
            output_seq = new_model(obj_seq_input, traj_target_input)
            loss = F.mse_loss(output_seq, traj_seq_gt[:,:,:n_dims])
            v_losses.append(loss.item())
        t_loss_mean, v_loss_mean = mean(t_losses), mean(v_losses)
        print(f"Epoch {epoch}, Train/Valid Loss: {round(t_loss_mean,4)}/{round(v_loss_mean,4)}",
             f"Runtime: {round(time.time()-start, 2)}s")
        loss_hist.append([t_loss_mean, v_loss_mean])
        if best_score > v_loss_mean:
            best_score = v_loss_mean
            best_epoch = epoch
            best_file = os.path.join(model_path, f"best2_{n_dims}D_{n_train}dms.pt")
            best_checkpoint = { 
                'epoch': epoch,
                'best_epoch': best_epoch,
                'losses':loss_hist,
                'model': new_model,
                'optimizer': adam}
            torch.save(best_checkpoint, best_file)
    epoch += 1

In [None]:
# torch.save(new_model, model_file)
# with open(filepath, 'wb') as handle:
#     pickle.dump(loss_hist, handle)

In [None]:
def to_obj_index(onehot):
    for i in range(onehot.shape[0]):
        if onehot[i]==1:
            return i
    return 0

In [None]:
# checkpoint = { 
#     'epoch': epoch,
#     'best_epoch': best_epoch,
#     'losses':loss_hist,
#     'model': new_model,
#     'optimizer': adam}
# torch.save(checkpoint, checkpoint_file)

In [None]:
new_model.eval()
plot_dataloader = DataLoader(test_data, batch_size=1)
tfile = task_files[2]

In [None]:
# %matplotlib notebook
demo_input = next(iter(plot_dataloader))
obj_seq, traj_seq, traj_target = demo_input
# if to_obj_index(task_tags[tfile])!=to_obj_index(obj_seq[0,0,-n_tasks:]): 
#     print(obj_seq[0,0,-n_tasks:])
obj_seq_input = obj_seq.to(device)
traj_target_input = traj_target.to(device)

predicted_traj_tf = new_model(obj_seq_input, traj_target_input).cpu()
predicted_traj = predicted_traj_tf.detach().numpy()
mask = traj_seq[0,:,n_dims]==1
d = get_position_difference_per_step(predicted_traj[0,mask,:3], traj_seq.numpy()[0,mask,:3])*train_std
print(f"Transformer Model({n_dims}D) average distance: {np.mean(d)}mm\nStart distance {d[0]}mm, End distance: {d[-1]}mm")
matplotlib.rcParams.update({'font.size': 10})
fig = plt.figure(figsize = (10, 5))
ax2 = fig.add_subplot(1, 1, 1, projection='3d')
colors = ['red', 'blue', 'yellow', 'orange', 'green', 'purple','pink']
obj0_pos = obj_seq[0,0,]*train_std
obj1_pos = obj_seq[0,1,]*train_std
traj_seq.squeeze(0)
mask = traj_seq[0,:,n_dims]==1
scaled_traj_seq = traj_seq[0,mask]*train_std
scaled_predicted_traj = predicted_traj[0,mask]*train_std
line = ax2.plot(scaled_traj_seq[:,2], scaled_traj_seq[:,1], -scaled_traj_seq[:,0],
                 '--', color='red', label = f'ground truth')
ax2.plot(obj0_pos[2], obj0_pos[1], -obj0_pos[0], 'o',
        color='red', label=f'{unique_objs[to_obj_index(obj0_pos[n_dims:n_dims+n_objs])]}')
ax2.plot(obj1_pos[2], obj1_pos[1], -obj1_pos[0], 'x',
        color='red', label=f'{unique_objs[to_obj_index(obj1_pos[n_dims:n_dims+n_objs])]}')
line = ax2.plot(scaled_predicted_traj[:,2], scaled_predicted_traj[:,1], -scaled_predicted_traj[:,0], '--', color='blue', 
               label = f'predicted')

ax2.set_xlabel('x (mm)')
ax2.set_ylabel('y (mm)')
ax2.set_zlabel('z (mm)')
ax2.set_box_aspect([ub - lb for lb, ub in (getattr(ax2, f'get_{a}lim')() for a in 'xyz')])

plt.legend(loc = 'upper left',  prop={'size': 10})
plt.show()

In [None]:
# matplotlib.rcParams.update({'font.size': 10})
# fig = plt.figure(figsize = (9, 2*n_dims))
# axs = fig.subplots(n_dims, 1)

# for i in range(n_dims):
#     axs[i].plot(-traj_seq[0,mask,i], color='red',  label = f' ground truth')
#     axs[i].plot(-predicted_traj[mask,i], color='blue', label=f'predict')
#     axs[i].set_xlabel('time')
#     axs[i].set_ylabel(task_dims[i])
#     axs[i].set_title(f'{task_dims[i]}-axis vs Time')

# plt.show()

In [None]:
F.mse_loss(predicted_traj_tf, traj_seq[:,:,:n_dims])

In [None]:
distances = []
task_names = ["Pick&Place", "Pouring", "Pucking"]
for i, file in enumerate(task_files):
    for demo_input in plot_dataloader:
        obj_seq, traj_seq, traj_target = demo_input
        if to_obj_index(task_tags[file])!=to_obj_index(obj_seq[0,0,-n_tasks:]): continue
        obj_seq_input = obj_seq.to(device)
        traj_target_input = traj_target.to(device)
        predicted_traj_tf = new_model(obj_seq_input, traj_target_input).cpu()
        predicted_traj = predicted_traj_tf.detach().numpy()[0]
        mask = traj_seq[0,:,n_dims]==1
        d = get_position_difference_per_step(predicted_traj[mask,:3], traj_seq.numpy()[0,mask,:3])*train_std
        distances.append([d[0], d[-1], np.mean(d)])
    mean_ds = np.mean(np.array(distances), axis = 0)
    print(f"{task_names[i]}({n_dims}D) -- Average distance: {mean_ds[-1]}mm\nStart: {mean_ds[0]}mm, End: {mean_ds[1]}mm")

In [None]:
Pick&Place(7D) -- Average distance: 62.9868538975021mm
Start: 42.26923237081197mm, End: 79.4552935564277mm
Pouring(7D) -- Average distance: 58.23420816977872mm
Start: 37.770702922837636mm, End: 101.59201766144635mm
Pucking(7D) -- Average distance: 55.91966660978153mm
Start: 36.81040472164073mm, End: 84.82568633828863mm

In [None]:
Pick&Place(7D) -- Average distance: 36.98310315657745mm
Start: 21.2115110550846mm, End: 47.86264182557085mm
Pouring(7D) -- Average distance: 46.367497285512215mm
Start: 24.491665857492908mm, End: 87.40841715349492mm
Pucking(7D) -- Average distance: 47.31723368561045mm
Start: 27.708466152663124mm, End: 75.37036924421082mm

In [None]:
Pick&Place(7D) -- Average distance: 29.20051638363695mm
Start: 12.295744638832234mm, End: 34.171612610837954mm
Pouring(7D) -- Average distance: 39.874439044829494mm
Start: 16.50106556405996mm, End: 72.80145115882218mm
Pucking(7D) -- Average distance: 42.77711302522513mm
Start: 20.29008394561259mm, End: 64.28164246493601mm