In [None]:
import os
import random
import math
import time
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from collections import deque
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, concatenate, Conv2D, MaxPooling2D

from libs.utils import *
from libs.generate_boxes import  *

In [None]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
tf.get_logger().setLevel('INFO')
tf.keras.backend.floatx()

plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (20,10)

In [None]:
def vis_box(sizes,positions,fs=(10,5), title=''):
    colors = get_colors(len(positions))
    plt.figure('SPLTV', figsize=(15,5))
    #ax = fig.gca(projection='3d')
    ax = plt.subplot(141, projection='3d')
    #ax.set_aspect('auto')
    plt.title(title)
    #ax.subplot(sub[0],sub[1],sub[2],projection='3d')
    pc = plotCubeAt2(positions,sizes,colors=colors, edgecolor="w")
    ax.add_collection3d(pc)    
    ax.set_xlim([0,20])
    ax.set_ylim([0,20])
    ax.set_zlim([0,20])
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    
    ax = plt.subplot(142,projection='3d')
    plt.title(title + '(z)')
    pc = plotCubeAt2(positions,sizes,colors=colors, edgecolor="w")
    ax.add_collection3d(pc)    
    ax.set_xlim([0,20])
    ax.set_ylim([0,20])
    ax.set_zlim([0,20])
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    ax.view_init(90,-90)
    
    ax = plt.subplot(143,projection='3d')
    plt.title(title + '(x)')
    pc = plotCubeAt2(positions,sizes,colors=colors, edgecolor="w")
    ax.add_collection3d(pc)    
    ax.set_xlim([0,20])
    ax.set_ylim([0,20])
    ax.set_zlim([0,20])
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    ax.view_init(0,0)
    
    ax = plt.subplot(144,projection='3d')
    plt.title(title + '(y)')
    pc = plotCubeAt2(positions,sizes,colors=colors, edgecolor="w")
    ax.add_collection3d(pc)    
    ax.set_xlim([0,20])
    ax.set_ylim([0,20])
    ax.set_zlim([0,20])
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    ax.view_init(0,-90)
    
    plt.show()

    return colors

In [None]:
def vis_box_colors(colors,sizes,positions,fs=(10,5), title=''):
    colors = colors
    plt.figure('SPLTV', figsize=(15,5))
    #ax = fig.gca(projection='3d')
    ax = plt.subplot(141, projection='3d')
    #ax.set_aspect('auto')
    plt.title(title)
    #ax.subplot(sub[0],sub[1],sub[2],projection='3d')
    pc = plotCubeAt2(positions,sizes,colors=colors, edgecolor="w")
    ax.add_collection3d(pc)    
    ax.set_xlim([0,20])
    ax.set_ylim([0,20])
    ax.set_zlim([0,20])
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    
    ax = plt.subplot(142,projection='3d')
    plt.title(title + '(z)')
    pc = plotCubeAt2(positions,sizes,colors=colors, edgecolor="w")
    ax.add_collection3d(pc)    
    ax.set_xlim([0,20])
    ax.set_ylim([0,20])
    ax.set_zlim([0,20])
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    ax.view_init(90,-90)
    
    ax = plt.subplot(143,projection='3d')
    plt.title(title + '(x)')
    pc = plotCubeAt2(positions,sizes,colors=colors, edgecolor="w")
    ax.add_collection3d(pc)    
    ax.set_xlim([0,20])
    ax.set_ylim([0,20])
    ax.set_zlim([0,20])
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    ax.view_init(0,0)
    
    ax = plt.subplot(144,projection='3d')
    plt.title(title + '(y)')
    pc = plotCubeAt2(positions,sizes,colors=colors, edgecolor="w")
    ax.add_collection3d(pc)    
    ax.set_xlim([0,20])
    ax.set_ylim([0,20])
    ax.set_zlim([0,20])
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    ax.view_init(0,-90)
    
    plt.show()

In [16]:
class ActorCritic(tf.keras.Model):
    def __init__(self, state_size, selected_size, remain_size, output_size):
        super(ActorCritic, self).__init__()
        self.case_cnn1 = Conv2D(filters=16, kernel_size=3, activation='relu', padding="valid", input_shape = selected_size)
        self.case_cnn2 = Conv2D(filters=16, kernel_size=3, activation='relu', padding="valid")

        self.select_cnn1 = Conv2D(filters=16, kernel_size=3, activation='relu', padding="valid", input_shape = selected_size)
        self.select_cnn2 = Conv2D(filters=16, kernel_size=3, activation='relu', padding="valid")
        
        self.remain_cnn1 = Conv1D(filters=32, kernel_size=2, activation='relu', padding="same", input_shape = remain_size )
        self.remain_cnn2 = Conv1D(filters=32, kernel_size=2, activation='relu', padding="same")
        
        self.d1 = Dense(256, activation='relu')
        self.d2 = Dense(256, activation='relu')
        self.out = Dense(output_size)
        
    def call(self, cb_list):
        c, s, r = cb_list[0], cb_list[1], cb_list[2]

        c = self.case_cnn1(c)
        print(c.shape)
        c = MaxPooling2D(pool_size=(2, 2))(c)
        print(c.shape)
        c = self.case_cnn2(c)
        print(c.shape)
        c = MaxPooling2D(pool_size=(2, 2))(c)
        print(c.shape)
        c = Flatten()(c)

        s = self.select_cnn1(s)
        s = MaxPooling2D(pool_size=(2, 2))(s)
        s = self.select_cnn2(s)
        s = MaxPooling2D(pool_size=(2, 2))(s)
        s = Flatten()(s)

        r = self.remain_cnn1(r)
        r = self.remain_cnn2(r)
        r = MaxPooling1D(pool_size=1)(r)
        r = Flatten()(r)
        
        x = concatenate([c,s, r])
        x = self.d1(x)
        x = self.d2(x)
        q = self.out(x)
        return q

NameError: name 'tf' is not defined

In [None]:
class ActorCriticAgent:
    def __init__(self, L=20,B=20,H=20,n_remains=5,lr=1e-8,gamma=0.99):
        self.state_size = (L,B,1)
        self.selected_size = (L,B,H)
        self.remain_size = (n_remains, 3)
        self.output_size = 1
        
        self.lr = lr
        self.gamma = gamma
        
        self.actor = ActorCritic(self.state_size, self.selected_size,
                          self.remain_size, self.output_size)
        self.critic = ActorCritic(self.state_size, self.selected_size,
                           self.remain_size, self.output_size)
        
        self.actor_optimizer = Adam(learning_rate = self.lr)
        self.critic_optimizer = Adam(learning_rate = self.lr)
        
        self.memory = deque(maxlen=500)
        
        self.avg_actor_loss = 0
        self.avg_critic_loss = 0
        
    def append_sample(self, history, s_boxes, remains, action, reward, last, t_history, t_s_boxes, t_remains):
        self.memory.append((history, s_boxes, remains, action, reward, last, t_history, t_s_boxes, t_remains))
        
    def get_action(self, state, s_locs, r_boxes):
        actor = self.actor([state, s_locs, r_boxes])
        argmax_idx = np.where(actor == tf.math.reduce_max(actor))
        action_idx = argmax_idx[0][0]
        return action_idx
    
    def get_actor_loss():
        pass
    
    def get_critic_loss():
        pass
    
    def train(self):
        batch = random.sample(self.memory, len(self.memory))
        
        history = np.array([sample[0] for sample in batch])
        s_boxes = np.array([sample[1] for sample in batch])
        remains = np.array([sample[2] for sample in batch])
        action = np.array([sample[3] for sample in batch])
        reward = np.array([sample[4] for sample in batch])
        dones = np.array([sample[5] for sample in batch])
        next_history = [sample[6] for sample in batch]
        next_s_boxes = [sample[7] for sample in batch]
        next_remains = [sample[8] for sample in batch]
        
        #print(history.shape, s_boxes.shape, remains.shape, action.shape, reward.shape, dones.shape)
        #print(len(next_history), len(next_s_boxes), len(next_remains))
        #print(next_history[0].shape, next_s_boxes[0].shape, next_remains[0].shape)
        
        with tf.GradientTape() as actor_tape, tf.GradientTape() as critic_tape:
            actor = self.actor([history, s_boxes, remains])
            value = self.critic([history, s_boxes, remains])
            
            targets = []
            action_idx = np.stack([np.arange(len(self.memory)),action],axis=1)
            acts = tf.gather_nd(actor, action_idx, batch_dims=0, name=None)
            predicts = tf.gather_nd(value, action_idx, batch_dims=0, name=None)
            
            for i in range(len(self.memory)):
                next_value = self.critic([next_history[i],next_s_boxes[i],
                                         next_remains[i]])
                next_max_value = tf.math.reduce_max(next_value)
                targets.append([(1-0.875)*reward[i] + (1-dones[i])*0.75*next_max_value])
            
            targets = np.array(targets)
            actor_loss = tf.reduce_mean(tf.square(targets - acts))
            critic_loss = tf.reduce_mean(tf.square(targets - predicts))
            
            self.avg_actor_loss += actor_loss.numpy()
            self.avg_critic_loss += critic_loss.numpy()
        
        actor_grads = actor_tape.gradient(actor_loss, self.actor.trainable_variables)
        critic_grads = critic_tape.gradient(critic_loss, self.critic.trainable_variables)
        self.actor_optimizer.apply_gradients(zip(actor_grads,
                                                self.actor.trainable_variables))
        self.critic_optimizer.apply_gradients(zip(critic_grads,
                                                 self.critic.trainable_variables))

In [None]:
class Bpp3DEnv():#(gym.Env):
    #metadata = {'render.modes': ['human']}
    #
    def __init__(self,length=20, breadth=20, height=20):
        super(Bpp3DEnv, self).__init__()
        self.length=length
        self.breadth=breadth
        self.height=height
        self.container_h=np.zeros((self.length,self.breadth))
        self.container=np.zeros((self.length, self.breadth, self.height))
    
    def update_h(self):
        idx = np.where(self.container == 1)
        h = pd.DataFrame(np.transpose(idx, (1,0)))
        h.columns = ['0','1','2']
        h = h.groupby(['0','1']).agg({'0':'first','1':'first','2':'max'}).values
        self.reset_h()
        self.container_h[h[:,0],h[:,1]] = h[:,2]+1
        return self.container_h
    
    def convert_state(self, new_container):
        self.container = new_container
        self.update_h()
    
    def next_state(self, upleft,bxl,bxb,bxh):
        next_container_h = self.container_h.copy()
        loading_area_h = self.container_h[upleft[0]:upleft[0]+bxl, upleft[1]:upleft[1]+bxb]
        max_h = np.max(loading_area_h).astype('int')
        next_container_h[upleft[0]:upleft[0]+bxl,upleft[1]:upleft[1]+bxb] = bxh + max_h
        
        next_container = self.container.copy()
        next_container[upleft[0]:upleft[0]+bxl, upleft[1]:upleft[1]+bxb, max_h:bxh + max_h] = 1
        
        return next_container, next_container_h
    
    def step(self, upleft,bxl,bxb,bxh):
        n_s, n_h = self.next_state(upleft,bxl,bxb,bxh)
        self.convert_state(n_s)
        return n_s
    
    def reset(self):
        self.container = np.zeros((self.length,self.breadth, self.height))
        self.container_h = np.zeros((self.length,self.breadth))
    
    def reset_h(self):
        self.container_h = np.zeros((self.length,self.breadth))
        
    def terminal_reward(self):
        return np.sum(self.container)/(self.length*self.breadth*self.height)

In [None]:
with open('Data/preprocessed_data/U1_PP_r.pickle', 'rb') as handle:
    data = pickle.load(handle)
bbox = pd.read_csv('preprocessed_data_bbox/U1_bbox.csv')

for i in range(len(data)): data[i] = data[i]//10
    
bbox = np.ceil(( bbox.values * 0.93)/10).astype('int')
bbox

In [None]:
max_episode = 1000
N_MDD = 5
K = 3
N_candidates = 4

boxes, gt_pos = generation_3dbox_random(case_size=[[20,20,20]],min_s=1,
                                        N_mdd = N_MDD)
num_max_boxes = len(boxes)
num_max_remain = num_max_boxes - K
env = Bpp3DEnv()
agent = ActorCriticAgent(L=20,B=20,H=20,n_remains=num_max_remain,
                        lr=0.001,gamma=0.99)

used_box_list, pred_pos_list = [], []
box_list, pos_list = [],[]
frac_l, avg_actor_loss, avg_critic_loss = [],[],[]
for episode in range(max_episode):
    boxes, gt_pos = generation_3dbox_random(case_size=[[20,20,20]],min_s=1,
                                        N_mdd = N_MDD)
    boxes = boxes[0]
    gt_pos = gt_pos[0]

    box_list.append(boxes)
    pos_list.append(gt_pos)

    num_max_boxes = len(boxes)
    num_max_remain = num_max_boxes - K
    st = time.time()
    env.reset()
    done = False
    step = 0
    print(np.sum(env.container))
    
    history, used_boxes, remains, comb, pred_pos, actions, s_orders = [],[],[],[],[],[],[]
    next_history, next_comb, next_remains, next_s_orders = [],[],[],[]
    
    r_boxes = np.array(np.array(boxes).copy())
    
    while not done:
        state = env.container.copy()
        k = min(K, len(r_boxes))
        step += 1
        
        selected = cbn_select_boxes(r_boxes[:N_candidates], k)
        s_order = get_selected_order(selected, k)
        
        state_h = env.update_h().copy()
        in_state, in_r_boxes = raw_to_input(state_h, s_order,
                                           r_boxes, num_max_remain)
        s_loc_c, pred_pos_c, used_boxes_c, next_state_c, num_loaded_box_c, next_cube_c = get_selected_location(s_order, pred_pos, used_boxes, state)
        
        action_idx = agent.get_action(in_state, s_loc_c, in_r_boxes)
        num_loaded_box = num_loaded_box_c[action_idx]
        
        if num_loaded_box != 0:
            history.append(in_state[action_idx])
            remains.append(in_r_boxes[action_idx])
            s_orders.append(s_order[action_idx])
            comb.append(s_loc_c[action_idx])
            actions.append(action_idx)
            
            new_used_boxes = get_remain(used_boxes, used_boxes_c[action_idx])
            r_boxes = get_remain(new_used_boxes, r_boxes)
            
            used_boxes = used_boxes_c[action_idx]
            pred_pos = pred_pos_c[action_idx]
            
            env.convert_state(next_cube_c[action_idx])
            
            next_state = env.container.copy()
            next_state_h = env.container_h.copy()
            
            next_history.append(next_state_h)      
            if len(r_boxes) == 0:
                done = True
                next_remains.append(np.zeros((num_max_remain, 3)))
                next_comb.append(np.zeros((1,20,20,20)))
                next_s_orders.append(np.zeros((1,1,3)))
            else:
                next_remains.append(r_boxes)
                k = min(K, len(r_boxes))
                selected = cbn_select_boxes(r_boxes[:N_candidates], k)
                s_order = get_selected_order(selected, k)
                s_loc_c,_,_,_,_,_ = get_selected_location(s_order, pred_pos,
                                                         used_boxes, next_state)
                next_comb.append(s_loc_c)
                next_s_orders.append(s_order)
        else:
            r_boxes = get_remain(s_order[action_idx], r_boxes)
            if len(r_boxes) == 0:
                done = True
                
        if done:
            terminal_reward = env.terminal_reward()
            frac_l.append(env.terminal_reward())
            
            is_last = False
            N = len(history)
            for i in range(N):
                if i == N-1:
                    is_last = True
                reward = (agent.gamma ** (N-i-1)) * terminal_reward
                in_next_history, in_next_remains = raw_to_input(next_history[i],
                                                               next_s_orders[i],
                                                               next_remains[i],
                                                               num_max_remain)
                
                agent.append_sample(history[i], comb[i], remains[i], actions[i],
                                   reward, is_last, in_next_history,
                                   next_comb[i], in_next_remains)
            agent.train()
            avg_actor_loss.append(agent.avg_actor_loss / float(step))
            avg_critic_loss.append(agent.avg_critic_loss / float(step))
            used_box_list.append(used_boxes)
            pred_pos_list.append(pred_pos)

    log = "=====episode: {:5d} | ".format(episode)
    log += "memory length: {:5d} | ".format(len(agent.memory))
    log += "env.terminal_reward(): {:.3f} | ".format(env.terminal_reward())
    log += "avg actor loss : {:6f} ".format(agent.avg_actor_loss / float(step))
    log += "avg critic loss : {:6f} ".format(agent.avg_critic_loss / float(step))
    log += "time: {:.3f}".format(time.time()-st)
    print(log)
    
    agent.avg_actor_loss = 0
    agent.avg_critic_loss = 0
    agent.memory.clear()