In [1]:
import copy
import math
import random
import numpy as np

In [2]:
datas = np.load('data/data.npy')
targets = np.load('data/target.npy')
datas.shape, targets.shape

((10, 20, 20), (10, 20, 20))

In [3]:
from collections import deque
memory = deque(maxlen=1000)

def append_sample(state, mask, target, reward):
    memory.append((state, mask, target, reward))

In [4]:
def get_furniture_list(data):
    furnitures = {}
    for i in range(1, 17):
        furnitures[i] = [[]]
        for j in range(20):
            for k in range(20):
                if data[j][k] == i:
                    furnitures[i][j].append(data[j][k])
            furnitures[i].append([])

    for i in range(1, 17):
        for j in range(20, -1, -1):
            if len(furnitures[i][j]) == 0:
                del(furnitures[i][j])
            
    for i in range(16, 0, -1):
        if len(furnitures[i]) == 0:
            del(furnitures[i])
    
    for key in furnitures.keys():
        furnitures[key] = np.array(furnitures[key])
    
    return furnitures

In [5]:
def get_mask(state, f):
    mask = np.ones((20,20))
    shape = f.shape
    
    zero_list = []
    for i in range(20):
        for j in range(20):
            if state[i][j] != 0:
                mask[i][j] = 0
            else:
                zero_list.append((i,j))
    
    temp_zero_list = copy.deepcopy(zero_list)
    for zero in zero_list:
        z_y, z_x = zero
        if z_y + shape[0] > 20 or z_x + shape[1] > 20:
            mask[z_y][z_x] = 0
            temp_zero_list.remove(zero)
            
    zero_list = copy.deepcopy(temp_zero_list)
    
    for zero in zero_list:
        z_y, z_x = zero
        
        for i in range(z_y, z_y + shape[0]):
            flag = False
            for j in range(z_x, z_x + shape[1]):
                if state[i][j] != 0:
                    mask[z_y][z_x] = 0
                    flag = True
                    break              
            if flag:
                break
            
    return mask

In [6]:
def get_action_space(mask):
    action_space = []
    for i in range(20):
        for j in range(20):
            if mask[i][j] == 1:
                action_space.append((i,j))
                
    return action_space

In [7]:
def batch_object(state, furniture, action):
    f = furniture[0][0]
    shape = furniture.shape
    
    for i in range(action[0], action[0] + shape[0]):
        for j in range(action[1], action[1] + shape[1]):
            state[i][j] = f

In [8]:
def distance(p1, p2):
    return math.sqrt((p1[0]-p2[0])**2 + (p1[1]-p2[1])**2)

In [9]:
def calculate_reward(d_pos_list, t_pos_list):
    reward = 0
    
    for d, t in zip(d_pos_list, t_pos_list):
        reward += distance(d, t)
        
    return -reward

In [10]:
def get_pos_list(state, target, furniture):
    d_pos_list = []
    t_pos_list = []

    for key, f in furniture.items():
        d_pos = (-1,-1)
        t_pos = (-1,-1)

        for i in range(20):
            flag = False
            for j in range(20):
                if state[i][j] == key:
                    d_pos = (i,j)
                    flag = True
                    break
            if flag:
                break

        for i in range(20):
            flag = False
            for j in range(20):
                if target[i][j] == key:
                    t_pos = (i,j)
                    flag = True
                    break
            if flag:
                break

        d_pos_list.append(d_pos)
        t_pos_list.append(t_pos)

    return d_pos_list, t_pos_list

In [20]:
for i in range(10):
    data = datas[i]
    target = targets[i]
    
    state = np.zeros((20,20))
    furniture = get_furniture_list(data)
    print(i)
    for key, f in furniture.items():
        mask = get_mask(state, f)
        action_space = get_action_space(mask)
        if np.array(action_space).shape[0] == 0:
            reward = -500
        else:
            action = random.sample(action_space, 1)[0]
            batch_object(state, f, action)
            d_pos_list, t_pos_list = get_pos_list(state, target, furniture)
            reward = calculate_reward(d_pos_list, t_pos_list)
        print(reward)
        
        append_sample(state, mask, target, reward)
    print('')

0
-72.99230571198827
-66.60218131856713
-62.13127076498324
-50.385083063823316

1
-23.595995606264097
-40.97007627194694
-48.401020844495825
-53.72001256518899

2
-35.45584412271571
-35.66488620013075
-41.74764873042897
-31.425130167877285

3
-59.0547440633889
-56.32391424558824
-64.53077639811254
-52.79404427318775
-51.91132186353546

4
-53.84625306377854
-67.19686256163884
-76.11216174642648
-58.6479125498535
-44.61461617148059

5
-41.16257399332898
-48.60555405674399
-62.20702456547943
-42.952739986254535

6
-53.1061568884413
-64.4981918009339
-61.46489542256099
-61.18986401964274
-62.13447888150532
-60.44857587677435

7
-53.98986120397865
-56.18119891706954
-68.33691388755159
-69.89678981749474
-55.855159257152124

8
-41.55772648675293
-39.66864256860248
-37.43593958375732

9
-59.26536215611305
-65.85114859373996
-62.78889084544141
-57.74352982825415
-500
-59.45847430438574
-74.32454305170424



In [22]:
batch = random.sample(memory, 3)

In [3]:
data = datas[0]
target = targets[0]
data.shape, target.shape

((20, 20), (20, 20))

In [None]:
state = np.zeros((20,20))