In [1]:
import pickle
import itertools
import os
import random
import math
import time
import collections
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from collections import deque
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, concatenate, Conv2D, MaxPooling2D

from libs_cupy.utils import *
from libs_cupy.generate_boxes import  *
import cupy as cp

In [2]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
tf.get_logger().setLevel('INFO')
tf.keras.backend.floatx()

plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (20,10)

In [3]:
def load_bbox(r_boxes, env, agent):
    step = 0
    done = False
    
    used_boxes, pred_pos, actions = [],[],[]
    history, remains, comb, s_orders = [],[],[],[]
    next_history, next_comb, next_remains, next_s_orders = [],[],[],[]
    
    while not done:
        state = env.container.copy()
        state_h = env.update_h().copy()
        step += 1
        k = min(K, len(r_boxes))
        
        selected = cbn_select_boxes(r_boxes[:n_candidates], k)
        s_order = get_selected_order(selected, k)
        s_loc_c, pred_pos_c, used_boxes_c, next_state_c, num_loaded_box_c,next_cube_c = get_selected_location(s_order, pred_pos, used_boxes, state)
        loaded_mh_c = cp.array([get_loaded_mh(s_loc, env.length, env.breadth,
                                             env.height) for s_loc in s_loc_c])
        in_state, in_r_boxes = raw_to_input(state_h, s_order, r_boxes,
                                          num_max_remain, env.height)
        
        action_idx = agent.get_action(in_state, loaded_mh_c, in_r_boxes)
        print(len(loaded_mh_c), action_idx)
        
        history.append(in_state[action_idx])
        loaded_mh = get_loaded_mh(s_loc_c[action_idx], env.length,
                                 env.breadth, env.height)
        comb.append(loaded_mh)
        actions.append(action_idx)
        remains.append(in_r_boxes[action_idx])
        
        env.convert_state(next_cube_c[action_idx])
        
        next_state = env.container.copy()
        next_state_h = env.container_h.copy()
        next_history.append(next_state_h)
        
        num_loaded_box = num_loaded_box_c[action_idx]
        if num_loaded_box != 0:
            new_used_boxes = get_remain(used_boxes, used_boxes_c[action_idx])
            r_boxes = get_remain(new_used_boxes, r_boxes)
        else:
            r_boxes = get_remain(s_order[action_idx], r_boxes)
        
        used_boxes = used_boxes_c[action_idx]
        pred_pos = pred_pos_c[action_idx]
        
        if len(r_boxes) == 0:
            done = True
            next_remains.append(cp.zeros((num_max_remain, 3)))
            next_comb.append(cp.zeros((1, env.length, env.breadth, 2)))
            next_s_orders.append(cp.zeros((1,1,3)))
        else:
            next_remains.append(r_boxes)
            k = min(K, len(r_boxes))
            selected = cbn_select_boxes(r_boxes[:n_candidates], k)
            s_order = get_selected_order(selected, k)
            s_loc_c,_,_,_,_,_ = get_selected_location(s_order, pred_pos,
                                                     used_boxes, next_state)
            loaded_mh_c = cp.array([get_loaded_mh(s_loc, env.length,env.breadth,
                                                  env.height) for s_loc in s_loc_c])
            next_comb.append(loaded_mh_c)
            next_s_orders.append(s_order)
            
            if cp.sum(env.container_h != env.height) == 0:
                done = True
    
    tr_b = env.terminal_reward()
    return tr_b, step, history, used_boxes, remains, comb, pred_pos, actions, s_orders, next_history, next_comb, next_remains, next_s_orders, r_boxes

In [4]:
mi = 'U1'; t='PP'
with open('data/preprocessed_packing/'+mi + '_'+ t +'_r.pickle', 'rb') as handle: 
    boxes_multi = pickle.load(handle)
for i in range(len(boxes_multi)):
    boxes_multi[i] = boxes_multi[i][:,:3]//10

bboxes = cp.load('data/preprocessed_pcked/bboxes1.npy')
bboxes = bboxes[:, :, :, :, 0]
for i in range(len(boxes_multi)):
    if cp.sum(boxes_multi[i][:,0]>198) or cp.sum(boxes_multi[i][:,1]>114) or cp.sum(boxes_multi[i][:,2]>110):
        print('n', i)

In [5]:
len(boxes_multi[0]), boxes_multi[0]

(32,
 array([[56, 53, 24],
        [56, 53, 24],
        [56, 53, 24],
        [56, 53, 24],
        [56, 53, 24],
        [56, 53, 24],
        [56, 53, 24],
        [56, 53, 24],
        [56, 53, 24],
        [56, 53, 24],
        [56, 53, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24],
        [47, 35, 24]], dtype=int64))

In [6]:
class ActorCritic_UseCNN(tf.keras.Model):
    def __init__(self, state_size, selected_size, remain_size, output_size):
        super(ActorCritic_UseCNN, self).__init__()
        
        self.case_cnn1 = Conv2D(filters=16, kernel_size=3, activation='relu',
                               padding='valid', input_shape=state_size)
        self.case_cnn2 = Conv2D(filters=16, kernel_size=3, activation='relu',
                               padding='valid')
        
        self.select_cnn1 = Conv2D(filters=16, kernel_size=3, activation='relu',
                                 padding='valid', input_shape=selected_size)
        self.select_cnn2 = Conv2D(filters=16, kernel_size=3, activation='relu',
                                 padding='valid')
        
        cs_size = state_size[:2] + (state_size[2] + selected_size[2], )
        self.cs_cnn1 = Conv2D(filters=16, kernel_size=3, activation='relu',
                             padding='valid', input_shape=cs_size)
        self.cs_cnn2 = Conv2D(filters=16, kernel_size=3, activation='relu',
                             padding='valid')
        
        self.remain_cnn1 = Conv1D(filters=32, kernel_size=2, activation='relu',
                                 padding='same', input_shape=remain_size)
        self.remain_cnn2 = Conv1D(filters=32, kernel_size=2, activation='relu',
                                 padding='same')
        
        self.d1 = Dense(512, activation='relu')
        self.d2 = Dense(256, activation='relu')
        self.d3 = Dense(128, activation='relu')
        self.out = Dense(output_size)
    
    def call(self, cb_list):
        c,s,r = cb_list[0], cb_list[1], cb_list[2]
        cs = tf.concat([c,s],-1)
        
        c = self.case_cnn1(c)
        c = MaxPooling2D(pool_size=(2,2))(c)
        c = self.case_cnn2(c)
        c = MaxPooling2D(pool_size=(2,2))(c)
        c = Flatten()(c)
        
        s = self.select_cnn1(s)
        s = MaxPooling2D(pool_size=(2,2))(s)
        s = self.select_cnn2(s)
        s = MaxPooling2D(pool_size=(2,2))(s)
        s = Flatten()(s)
        
        cs = self.cs_cnn1(cs)
        cs = MaxPooling2D(pool_size=(2,2))(cs)
        cs = self.cs_cnn2(cs)
        cs = MaxPooling2D(pool_size=(2,2))(cs)
        cs = Flatten()(cs)
        
        r = self.remain_cnn1(r)
        r = self.remain_cnn2(r)
        r = MaxPooling1D(pool_size=1)(r)
        r = Flatten()(r)
        
        x = concatenate([c,s,cs,r])
        x = self.d1(x)
        x = self.d2(x)
        x = self.d3(x)
        q = self.out(x)
        return q

In [7]:
class ActorCritic_ExceptCNN(tf.keras.Model):
    def __init__(self, state_size, selected_size, remain_size, output_size):
        super(ActorCritic_ExceptCNN, self).__init__()
        
        l1, b1, k1 = state_size
        self.state_size = (l1 * b1 * k1, )
        self.case_dnn1 = Dense(256, activation='relu', input_shape = self.state_size)
        self.case_dnn2 = Dense(256, activation='relu')
        
        l2, b2, k2 = selected_size
        self.selected_size = (l2 * b2 * k2, )
        self.select_dnn1 = Dense(256, activation='relu', 
                                 input_shape = self.state_size)
        self.select_dnn2 = Dense(256, activation='relu')
        
        self.cs_size = (l1*b1*k1 + l2*b2*k2, )
        self.cs_dnn1 = Dense(256, activation='relu', input_shape = self.cs_size)
        self.cs_dnn2 = Dense(256, activation='relu')
        
        r, k = remain_size
        self.remain_size = (r*k, )
        self.remain_dnn1 = Dense(256, activation='relu', 
                                 input_shape=self.remain_size)
        self.remain_dnn2 = Dense(256, activation='relu')
        
        self.fc1 = Dense(512, activation='relu')
        self.fc2 = Dense(256, activation='relu')
        self.fc3 = Dense(128, activation='relu')
        self.fc_out = Dense(output_size)
        
    def call(self, cb_list):
        c,s,r = cb_list[0], cb_list[1], cb_list[2]
        
        c = tf.reshape(c, [-1, self.state_size[0]])
        s = tf.reshape(s, [-1, self.selected_size[0]])
        r = tf.reshape(r, [-1, self.remain_size[0]])
        cs = tf.concat([c,s], -1)
        
        c = self.case_dnn1(c)
        c = self.case_dnn2(c)
        
        s = self.select_dnn1(s)
        s = self.select_dnn2(s)
        
        cs = self.cs_dnn1(cs)
        cs = self.cs_dnn2(cs)
        
        r = self.remain_dnn1(r)
        r = self.remain_dnn2(r)
        
        x = concatenate([c,s,cs,r])
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        q = self.fc_out(x)
        return q

In [8]:
class ActorCritic_OnlyDense(tf.keras.Model):
    def __init__(self, state_size, selected_size, remain_size, output_size):
        super(ActorCritic_OnlyDense, self).__init__()
        
        self.d1 = Dense(512, activation='relu')
        self.d2 = Dense(256, activation='relu')
        self.d3 = Dense(128, activation='relu')
        self.out = Dense(output_size)
        
    def call(self, cb_list):
        c,s,r = cb_list[0], cb_list[1], cb_list[2]
        cs = tf.concat([c,s], -1)
        
        c = Flatten()(c)
        s = Flatten()(s)
        cs = Flatten()(cs)
        r = Flatten()(r)
        
        x = concatenate([c,s,cs,r])
        x = self.d1(x)
        x = self.d2(x)
        x = self.d3(x)
        q = self.out(x)
        
        return q

In [9]:
class ActorCriticAgent:
    def __init__(self, L=20,B=20,H=20,n_remains=5,lr=1e-8,gamma=0.99,model_num=1):
        
        self.state_size = (L,B,1)
        self.selected_size = (L,B,2)
        self.remain_size = (n_remains, 3)
        self.output_size = 1
        
        self.gamma = gamma
        self.lr = lr
        
        if model_num == 0: #Use CNN
            self.actor = ActorCritic_UseCNN(self.state_size, self.selected_size,
                                           self.remain_size, self.output_size)
            self.critic = ActorCritic_UseCNN(self.state_size, self.selected_size,
                                            self.remain_size, self.output_size)
        elif model_num == 1: #Use DNN
            self.actor = ActorCritic_ExceptCNN(self.state_size, self.selected_size,
                                              self.remain_size, self.output_size)
            self.critic = ActorCritic_ExceptCNN(self.state_size, self.selected_size,
                                               self.remain_size, self.output_size)
        else: #Use Small DNN
            self.actor = ActorCritic_OnlyDense(self.state_size, self.selected_size,
                                              self.remain_size, self.output_size)
            self.critic = ActorCritic_OnlyDense(self.state_size, self.selected_size,
                                               self.remain_size, self.output_size)
        
        self.memory = deque(maxlen=500)
        
        self.actor_optimizer = Adam(self.lr)
        self.critic_optimizer = Adam(self.lr)
        self.avg_actor_loss, self.avg_critic_loss = 0, 0
        
    def get_action(self, state, s_locs, r_boxes):
        state = cp.asnumpy(state)
        s_locs = cp.asnumpy(s_locs)
        r_boxes = cp.asnumpy(r_boxes)
        actor = self.actor([state, s_locs, r_boxes])
        print(actor)
        argmax_idx = np.where(actor == tf.math.reduce_max(actor))
        action_idx = argmax_idx[0][0]
        return action_idx
    
    def append_sample(self, history, s_boxes, remains, action, reward, last, t_history, t_s_boxes, t_remains):
        self.memory.append(( history, s_boxes, remains, action, reward, last, t_history, t_s_boxes, t_remains))
        
    def train(self):
        batch = random.sample(self.memory, len(self.memory))
        
        history = cp.array([sample[0] for sample in batch])
        s_boxes = cp.array([sample[1] for sample in batch])
        remains = cp.array([sample[2] for sample in batch])
        action = cp.array([sample[3] for sample in batch])
        reward = cp.array([sample[4] for sample in batch])
        dones = cp.array([sample[5] for sample in batch])
        next_history = [sample[6] for sample in batch]
        next_s_boxes = [sample[7] for sample in batch]
        next_remains = [sample[8] for sample in batch]
        
        with tf.GradientTape() as actor_tape, tf.GradientTape() as critic_tape:
            actor = self.actor([history, s_boxes, remains])
            value = self.critic([history, s_boxes, remains])
            
            print('actor')
            print(actor)
            print('value')
            print(value)
            
            targets = []
            action_idx = cp.stack([cp.arange(len(self.memory)),action],axis=1)
            print(action_idx)
            acts = tf.gather_nd(actor, action_idx, batch_dims=0, name=None)
            predicts = tf.gather_nd(value, action_idx, batch_dims=0, name=None)
            
            for i in range(len(self.memory)):
                next_value = self.critic([next_history[i],next_s_boxes[i],
                                         next_remains[i]])
                next_max_value = tf.math.reduce_max(next_value)
                targets.append([(1-0.75)*reward[i] + (1-dones[i])*0.75*next_max_value])
            
            targets = cp.array(targets)
            actor_loss = tf.reduce_mean(tf.square(targets - acts))
            critic_loss = tf.reduce_mean(tf.square(targets - predicts))
            
            self.avg_actor_loss += actor_loss.numpy()
            self.avg_critic_loss += critic_loss.numpy()
        
        actor_grads = actor_tape.gradient(actor_loss, self.actor.trainable_variables)
        critic_grads = critic_tape.gradient(critic_loss, self.critic.trainable_variables)
        self.actor_optimizer.apply_gradients(zip(actor_grads,
                                                self.actor.trainable_variables))
        self.critic_optimizer.apply_gradients(zip(critic_grads,
                                                 self.critic.trainable_variables))

In [10]:
num_episode = 1000
env1 = Bpp3DEnv(198, 114, 110, bboxes[0])
env2 = Bpp3DEnv(198, 114, 110, bboxes[1])
env3 = Bpp3DEnv(198, 114, 110, bboxes[2])
env4 = Bpp3DEnv(198, 114, 110, bboxes[3])
env_all = [env1, env2, env3, env4]

tr_l, h_fill, avg_loss_l,history_eps,used_boxes_eps  = [],[],[],[],[]

num_max_boxes = 100 #len(boxes_multi)
K = 3
n_candidates = 3 #4
num_max_remain = num_max_boxes #-K
agent = ActorCriticAgent(L=198, B=114, H=110, n_remains = num_max_remain,  
                 lr=1e-4, gamma = 0.99, model_num = 1)
print('num_max_boxes',num_max_boxes,'num_max_remain',num_max_remain)

num_max_boxes 100 num_max_remain 100


In [11]:
agent = ActorCriticAgent(L=198, B=114, H=110, n_remains = num_max_remain,  
                 lr=1e-4, gamma = 0.99, model_num = 2)
st=time.time()
boxes_all = np.array(boxes_multi)[0].copy()
boxes_all = cp.array(boxes_all)
r_boxes = boxes_all.copy()
used_boxes, predicts, h_results, h_idx = [], [], [], []

while len(r_boxes) > 0:
    print('==================', len(used_boxes), r_boxes.shape)
    bbox_results = []
    for i in range(4):
        env_t = time.time()
        (env_all[i]).reset()
        r = load_bbox(r_boxes[:num_max_remain], env_all[i], agent)
        print(i, r[0], len(r[3]), time.time()-env_t)
        bbox_results.append(r)
    idx = np.argmax([bbox_results[0][0],bbox_results[1][0],bbox_results[2][0],bbox_results[3][0]])    
    tr_b, sp, history, u_b, remains, comb, pred_pos, actions, s_orders, \
        t_history, t_combs, t_remains, t_s_orders, r_b = bbox_results[idx]
    if len(u_b)==0:
          break
    h_results.append(bbox_results)
    h_idx.append(idx)
    used_boxes = used_boxes + u_b
    r_boxes = get_remain(used_boxes, boxes_all)

h_path = [r[i] for r,i in zip(h_results,h_idx) ]
print(time.time() - st)



  after removing the cwd from sys.path.


tf.Tensor([[-0.7240225]], shape=(1, 1), dtype=float32)
1 0


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()