In [2]:
from __future__ import division

from copy import copy

import numpy as np
import gym
from gym import spaces
import random

# import state_embedding as se

import numpy as np
import scipy as sp

from scipy.linalg import block_diag

import tensorflow as tf

class CREnv(gym.Env):

    def __init__(self, board_path="./board.csv"):
        super(CREnv, self).__init__()

        self.all_directions = [(1,0), (1,1), (0,1), (-1,1), (-1,0), (-1,-1), (0,-1), (1,-1)]

        self.state_shape = (6,)

        self.board_path = board_path
        n_actions = 3
        self.action_space = spaces.Discrete(n_actions)
        self.observation_space = spaces.Box(low=0, high=100, shape=self.state_shape, dtype=np.float32)

    def reset(self):

        self.board = np.genfromtxt(self.board_path, delimiter=',')

        self.head_value = 20
        
        self.path_length = 0

        self.connection = False
        self.collide = False

        self.obstacles = []

        # parse the board and get pins of each net
        self.nets = {}
        for i in range(self.board.shape[0]):
            for j in range(self.board.shape[1]):
                if abs(self.board[i,j])>=2:
                    net_idx = abs(self.board[i,j])
                    if net_idx in self.nets:
                        self.nets[net_idx].append((i,j))
                    else:
                        self.nets[net_idx] = [(i,j)]
                elif self.board[i,j]==1:
                    self.obstacles.append((i,j))

        self.other_nets = copy(self.nets)
        # initialize the action node
        self.paths45 = dict()
        self.pairs_idx = int(min(self.nets.keys()))
        self.max_pair = int(max(self.nets.keys()))
        self.head = self.nets[self.pairs_idx][0]
        self.target = self.nets[self.pairs_idx][1:]
        self.board[self.head] = self.head_value
        # self.other_nets.pop(self.pairs_idx)

        self.pre_head = self.find_ini_prehead()
        self.last_node = self.head
        self.board[self.head] = self.head_value

        # self.net_model = self.train_embedding_network()

        # state = self.board_embedding()

        # return state

    def check_direction(self, direction):

        x = self.head[0] + direction[0]
        y = self.head[1] + direction[1]
        mid_node = np.array(self.head)+np.array(direction)/2
        mid_node = tuple(mid_node)
        if 0 <= x < self.board.shape[0] and 0 <= y < self.board.shape[1]:
            if not self.paths45.get(mid_node):
                if (x,y) in self.target:
                    return 2
                elif self.board[(x,y)] == 0:
                    return 1
        return 0

    def find_ini_prehead(self):

        possible_ds = []
        for d in self.all_directions:
            if self.check_direction(d)>0:
                tem_target = (self.head[0]+d[0], self.head[1]+d[1])
                possible_ds.append(d)

        if len(possible_ds)>0:
            best_d = random.choice(possible_ds)
        else:
            best_d = random.choice(self.all_directions)

        return (self.head[0]-best_d[0], self.head[1]-best_d[1])

    # def board_embedding(self):
        
    #     # embed current nets
    #     current_net_vector = current_net(self.head, self.pre_head, self.target)

    #     # embed other nets
    #     node_features, adj_mat = nets_to_graph(self.other_nets)
    #     # print(node_features)
    #     # print(adj_mat)

    #     return current_net_vector

    # def train_embedding_network(self):
        
    #     # train network to embed nets
    #     from preprocessing import preprocess_graph, construct_feed_dict, sparse_to_tuple, mask_test_edges
    #     from model import GCNModelAE, GCNModelVAE

    #     import time
    #     # Settings
    #     learning_rate = 0.01

    #     features, adj_mat = nets_to_graph(self.nets)
    #     print(features.shape)
    #     print(adj_mat.shape)

    #     adj_norm = preprocess_graph(adj_mat)
    #     num_nodes = adj_mat.shape[0]

    #     num_features = features.shape[-1]
    #     features_nonzero = features.nnz

    #     model = GCNModelVAE(num_features, num_nodes, features_nonzero)

    #     pos_weight = float(adj_mat.shape[0] * adj_mat.shape[0] - adj_mat.sum()) / adj_mat.sum()
    #     norm = adj_mat.shape[0] * adj_mat.shape[0] / float((adj_mat.shape[0] * adj_mat.shape[0] - adj_mat.sum()) * 2)

    #     features = convert_sparse_matrix_to_sparse_tensor(features)
    #     adj_norm = convert_sparse_matrix_to_sparse_tensor(adj_norm)
    #     adj_mat = convert_sparse_matrix_to_sparse_tensor(adj_mat)

    #     adj_label = tf.reshape(tf.sparse.to_dense(adj_mat, validate_indices=False), [-1])
    #     adj_label = tf.cast(adj_label, tf.float32)
    #     features = tf.cast(features, tf.float32)
    #     adj_norm = tf.cast(adj_norm, tf.float32)

    #     optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    #     epochs = 20

    #     val_roc_score = []

    #     # Iterate over epochs.
    #     for epoch in range(epochs):
    #         print("Start of epoch %d" % (epoch,))
    #         t = time.time()
    #         with tf.GradientTape() as tape:
    #             reconstructed = model([features, adj_norm])
    #             loss = norm * tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(labels=adj_label, 
    #                                                                                 logits=reconstructed, 
    #                                                                                 pos_weight=pos_weight))
            
    #         correct_prediction = tf.math.equal(
    #                             tf.cast(tf.math.greater_equal(tf.math.sigmoid(reconstructed), 0.5), tf.int32),
    #                                     tf.cast(adj_label, tf.int32))
    #         accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    #         print("The loss is: {}; The accuracy is: {}.".format(loss.numpy(), accuracy.numpy()))
            
    #         grads = tape.gradient(loss, model.trainable_weights)
    #         optimizer.apply_gradients(zip(grads, model.trainable_weights))
        
    #     model.summary()
        
    #     model.save_weights('embedding', save_format='tf')
    #     return model

2021-09-11 13:04:37.219355: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


In [2]:
import tensorflow as tf

print('TensorFlow:', tf.__version__)


def mlp(hidden_sizes= (32, 64), output_size= 1, activation= 'relu', activation_output= None, kernel_initalizer= 'glorot_uniform', name= 'MLP'):
    """
        MLP - Multilayer Perceptron
        ---------------------------

            Hidden Sizes = [32,32] Size of HIDDEN Layers 
            Output Size = (1) Size of OUTPUT Layer
            Activation = RELU
            Output Activation  = NONE
            Kernel Initializer = glorot uniform
            bias inintializer =  ZEROS

    """
    model = tf.keras.Sequential(name=name)
    layer_idx = 1
    
    # model.add(tf.keras.layers.Dense(units=32, activation=activation, name= name+"input", kernel_initializer= kernel_initalizer, bias_initializer= 'zeros', input_shape=input_shape))

    for h in hidden_sizes:
        model.add(tf.keras.layers.Dense(units=h, activation=activation, name= name+str(layer_idx), kernel_initializer= kernel_initalizer, bias_initializer= 'zeros'))
        layer_idx += 1
    
    model.add(tf.keras.layers.Dense(units= output_size, activation= activation_output, name= name + '_output'))

    return model

def info_embedding(info_dim=[6, 4, 4, 2], embed_dim=[6,8,8,4], name="actor"):
    """
        info_dim contains feature dimensions of current net (6), other net (4), paths (4) and obstacles (2)
        embed_dim contains embeded dimensions of current net, other net, paths and obstacles
    """
    current_nets = tf.keras.Input(shape=(info_dim[0],), name=name+"_current_nets")

    other_net_embed_dim = embed_dim[1]
    other_net_name = name+"_other_net"
    model_other_net = mlp(hidden_sizes=[], output_size=other_net_embed_dim, activation='relu', 
                activation_output=None, name=other_net_name, kernel_initalizer='glorot_uniform')

    model_other_net.build(input_shape=(None,None,info_dim[1]))
    model_other_net.summary()

    paths_embed_dim = embed_dim[2]
    paths_name = name+"_paths"
    model_paths = mlp(hidden_sizes=[], output_size=paths_embed_dim, activation='relu', 
                activation_output=None, name=paths_name, kernel_initalizer='glorot_uniform')

    model_paths.build(input_shape=(None,None,info_dim[2]))
    model_paths.summary()

    obstacles_embed_dim = embed_dim[3]
    obstacles_name = name+"_obstacles"
    model_obstacles = mlp(hidden_sizes=[], output_size=obstacles_embed_dim, activation='relu', 
                activation_output=None, name=obstacles_name, kernel_initalizer='glorot_uniform')

    model_obstacles.build(input_shape=(None,None,info_dim[3]))
    model_obstacles.summary()

    aggregate_outputs = tf.keras.layers.Concatenate()([current_nets,tf.math.reduce_sum(model_other_net.output, 1),
                                                      tf.math.reduce_sum(model_paths.output,1),
                                                      tf.math.reduce_sum(model_obstacles.output,1)])

    aggregate_inputs = [current_nets, model_other_net.input, model_paths.input, model_obstacles.input]

    return aggregate_inputs, aggregate_outputs


def simple_actor_critic(hidden_sizes=(32, 32), activation='relu', activation_output=None, 
                        kernel_initalizer='glorot_uniform', name='simple_actor_critic'):

    info_dim = [6,4,4,2]
    embed_dim = [6,8,8,4]

    actor_embed_input, actor_embed_output = info_embedding(info_dim=info_dim, embed_dim=embed_dim, name='actor')
    critic_embed_input, critic_embed_output = info_embedding(info_dim=info_dim, embed_dim=embed_dim, name='critic')

    actor = mlp(hidden_sizes=hidden_sizes, output_size=4, activation=activation, 
                 activation_output=activation_output, name="actor", kernel_initalizer=kernel_initalizer)
    
    critic = mlp(hidden_sizes= hidden_sizes, output_size= 1, activation= activation, 
                  activation_output= activation_output, name="actor", kernel_initalizer= kernel_initalizer)

    print('Model Summary: ' + name)

    actor.build(input_shape = (None, sum(embed_dim)))

    critic.build(input_shape = (None, sum(embed_dim)))

    actor_out = actor(actor_embed_output)
    critic_out = critic(critic_embed_output)

    _actor = tf.keras.Model(actor_embed_input, actor_out, name=name)
    _critic = tf.keras.Model(critic_embed_input, critic_out, name=name)

    _actor.summary()
    _critic.summary()

    def forward(inp= None):
        logits = _actor(inp['vec_obs'])
        values = _critic(inp['vec_obs'])
        return logits, values

    return {"forward": forward, "trainable_networks": [_actor, _critic]}

# model_1 = mlp(name='model-1')
# model_2 = mlp(name='model-2')
# model_3 = mlp(name='model-3')

# model_1.build(input_shape=(None,None,4))
# model_2.build(input_shape=(None,None,4))

# model_3.build(input_shape=(None,5))

# current_inputs = tf.keras.Input(shape=(3,))

# x = tf.keras.layers.Concatenate()([current_inputs,tf.math.reduce_sum(model_1.output, 1),tf.math.reduce_sum(model_2.output,1)])

# out = model_3(x)
# model_1_2 = tf.keras.Model([current_inputs, model_1.input, model_2.input], out, name='model-1+2')

# model_1_2.summary()



# info_dim = [6,8,8,4]

# embed_inputs, embed_outputs = info_embedding()

# model = mlp(name='agg_model')
# model.build(input_shape=(None,sum(info_dim)))

# out = model(embed_outputs)
# final_model = tf.keras.Model(embed_inputs, out, name='final_model')

# final_model.summary()

import numpy as np

models = simple_actor_critic()

input0 = [np.ones((6,)), np.ones((6,))]
input1 = [np.zeros((6,4)), np.zeros((7,4))]
input2 = [np.ones((2,4)), np.ones((3,4))]
input3 = [np.ones((129,2)), np.ones((192,2))]

vec = {"vec_obs":[input0, input1, input2, input3]}

# value = models.get_action_logp_value(vec)

value = models["forward"](vec)
print(value)


TensorFlow: 2.5.0
Model: "actor_other_net"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
actor_other_net_output (Dens (None, None, 8)           40        
Total params: 40
Trainable params: 40
Non-trainable params: 0
_________________________________________________________________
Model: "actor_paths"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
actor_paths_output (Dense)   (None, None, 8)           40        
Total params: 40
Trainable params: 40
Non-trainable params: 0
_________________________________________________________________
Model: "actor_obstacles"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
actor_obstacles_output (Dens (None, None, 4)           12        
Total params: 12
Trainable params: 12
Non-trainable params: 0

ValueError: Layer simple_actor_critic expects 4 input(s), but it received 8 input tensors. Inputs received: [<tf.Tensor: shape=(6,), dtype=float64, numpy=array([1., 1., 1., 1., 1., 1.])>, <tf.Tensor: shape=(6,), dtype=float64, numpy=array([1., 1., 1., 1., 1., 1.])>, <tf.Tensor: shape=(6, 4), dtype=float64, numpy=
array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])>, <tf.Tensor: shape=(7, 4), dtype=float64, numpy=
array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])>, <tf.Tensor: shape=(2, 4), dtype=float64, numpy=
array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])>, <tf.Tensor: shape=(3, 4), dtype=float64, numpy=
array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])>, <tf.Tensor: shape=(129, 2), dtype=float64, numpy=
array([[1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.]])>, <tf.Tensor: shape=(192, 2), dtype=float64, numpy=
array([[1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.]])>]

In [8]:
# GNN-based embedding with edges input
import numpy as np
import itertools

def current_net(head, pre_head, targets):

    target_embed = np.zeros((2,))
    tem_head = np.array(head)
    sum_euclid_distance = 0
    for t in targets:
        tem_t = np.array(t)
        sum_euclid_distance += 1/np.linalg.norm(tem_t - tem_head)
    for t in targets:
        tem_t = np.array(t)
        ratio_dist = 1/np.linalg.norm(tem_t - tem_head)/sum_euclid_distance
        target_embed += tem_t*ratio_dist
    current_net_vector = np.array(list(head)+list(pre_head)+list(target_embed))

    return current_net_vector

def nets_to_edgeFeature(nets):

    min_idx = int(min(nets.keys()))
    max_idx = int(max(nets.keys()))

    features = []

    for i in range(min_idx+1, max_idx+1):
        net = nets[i]
        edges = list(itertools.combinations(net, 2))[0]
        edges = list(edges[0] + edges[1])
        print(edges)
        features.append(edges)
    
    return features

env = CREnv(board_path="./board.csv")
env.reset()

print(nets_to_edgeFeature(env.other_nets))

[7, 16, 25, 19]
[5, 12, 28, 18]
[12, 15, 20, 16]
[9, 9, 27, 11]
[10, 16, 20, 17]
[12, 14, 20, 13]
[[7, 16, 25, 19], [5, 12, 28, 18], [12, 15, 20, 16], [9, 9, 27, 11], [10, 16, 20, 17], [12, 14, 20, 13]]


In [1]:
# GCN-based embedding

from copy import copy

import numpy as np
import gym
from gym import spaces
import random

# import state_embedding as se

import numpy as np
import scipy as sp

from scipy.linalg import block_diag

import tensorflow as tf

def convert_sparse_matrix_to_sparse_tensor(X):
    coo = X.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    return tf.SparseTensor(indices, coo.data, coo.shape)

def current_net(head, pre_head, targets):

    target_embed = np.zeros((2,))
    tem_head = np.array(head)
    sum_euclid_distance = 0
    for t in targets:
        tem_t = np.array(t)
        sum_euclid_distance += 1/np.linalg.norm(tem_t - tem_head)
    for t in targets:
        tem_t = np.array(t)
        ratio_dist = 1/np.linalg.norm(tem_t - tem_head)/sum_euclid_distance
        target_embed += tem_t*ratio_dist
    current_net_vector = np.array(list(head)+list(pre_head)+list(target_embed))

    return current_net_vector

def nets_to_graph(nets):

    min_idx = int(min(nets.keys()))
    max_idx = int(max(nets.keys()))

    features = [list(xy) for xy in nets[min_idx]]
    num_nodes = len(nets[min_idx])
    adj_matrix = np.ones((num_nodes, num_nodes)).tolist()
    for i in range(min_idx+1, max_idx+1):
        features += [list(xy) for xy in nets[i]]
        num_nodes = len(nets[i])
        adj_matrix = block_diag(adj_matrix, np.ones((num_nodes, num_nodes)))

    features = sp.sparse.csr_matrix(features)
    adj_matrix = sp.sparse.csr_matrix(adj_matrix)
    return features, adj_matrix

env = CREnv(board_path="./board.csv")
embedding = env.reset()

2021-09-01 13:01:18.733305: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


(14, 2)
(14, 14)


2021-09-01 13:01:20.896994: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2021-09-01 13:01:20.987473: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-01 13:01:20.988309: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1733] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 2080 Ti computeCapability: 7.5
coreClock: 1.545GHz coreCount: 68 deviceMemorySize: 10.76GiB deviceMemoryBandwidth: 573.69GiB/s
2021-09-01 13:01:20.988352: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
2021-09-01 13:01:21.021263: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublas.so.11
2021-09-01 13:01:21.021507: I tensorflow/stream_executor

Start of epoch 0


2021-09-01 13:01:23.935426: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcublasLt.so.11


The loss is: 1.8289093971252441; The accuracy is: 0.4285714328289032.
Start of epoch 1
The loss is: 1.033353567123413; The accuracy is: 0.5408163070678711.
Start of epoch 2
The loss is: 1.18331778049469; The accuracy is: 0.43877550959587097.
Start of epoch 3
The loss is: 1.2470974922180176; The accuracy is: 0.37755101919174194.
Start of epoch 4
The loss is: 0.9296584725379944; The accuracy is: 0.3979591727256775.
Start of epoch 5
The loss is: 0.8681005239486694; The accuracy is: 0.3877550959587097.
Start of epoch 6
The loss is: 0.7542531490325928; The accuracy is: 0.4591836631298065.
Start of epoch 7
The loss is: 0.5684778690338135; The accuracy is: 0.5714285969734192.
Start of epoch 8
The loss is: 0.6365965604782104; The accuracy is: 0.40816327929496765.
Start of epoch 9
The loss is: 0.591610848903656; The accuracy is: 0.4183673560619354.
Start of epoch 10
The loss is: 0.6306016445159912; The accuracy is: 0.44897958636283875.
Start of epoch 11
The loss is: 0.61833655834198; The accura

In [1]:
from __future__ import division

from copy import copy
from copy import deepcopy
from scipy.spatial import distance
import itertools

import functools
import numpy as np
import gym
from gym import spaces
import os, random


class CREnv(gym.Env):
    """
        The env is for general routing problem on the generated circuits, 
        it has 3 candudate actions: straight, 90-degree clockwise and 90-degree counter-clockwise and allows 90-degree bend
    """

    def __init__(self, board_path="./board.csv"):
        super(CREnv, self).__init__()

        self.all_directions = [(1,0), (0,1), (-1,0), (0,-1)]

        self.state_shape = (6,)

        self.board_path = board_path
        n_actions = 3
        self.action_space = spaces.Discrete(n_actions)
        self.observation_space = spaces.Box(low=0, high=30, shape=self.state_shape, dtype=np.float32)

    def reset(self):

        self.board = np.genfromtxt(self.board_path, delimiter=',')
        
        self.path_length = 0

        self.connection = False
        self.collide = False

        self.obstacles = []

        # parse the board and get pins of each net
        self.nets = {}
        for i in range(self.board.shape[0]):
            for j in range(self.board.shape[1]):
                if abs(self.board[i,j])>=2:
                    net_idx = abs(self.board[i,j])
                    if net_idx in self.nets:
                        self.nets[net_idx].append((i,j))
                    else:
                        self.nets[net_idx] = [(i,j)]
                elif self.board[i,j]==1:
                    self.obstacles.append((i,j))

        # initialize the action node and paths (empty)
        self.paths45 = dict()
        self.pairs_idx = int(min(self.nets.keys()))
        self.max_pair = int(max(self.nets.keys()))

        self.current_net = copy(self.nets[self.pairs_idx])
        self.other_nets = {key: value for key, value in self.nets.items() if key > self.pairs_idx}
        self.head = self.current_net[0]
        self.current_net.remove(self.head)

        self.pre_head = self.find_ini_prehead()
        self.last_node = self.head
        
        self.current_path = [self.head]
        self.paths = {self.pairs_idx:[]}

        self.targets = self.find_targets()

        state = self.extract_circuit_info()

        return state

    def find_targets(self):

        if len(self.paths[self.pairs_idx])!=0:
            return functools.reduce(lambda a, b: a+b, self.paths[self.pairs_idx])
        
        return copy(self.current_net)


    def step(self, action):

        action_tmp = self.get_directions_from_action(action)
        self.connection = False
        self.collide = False
        self.pre_head = self.head

        self.path_length += 1

        # pre-determine new action node
        self.head = (self.head[0]+action_tmp[0], self.head[1]+action_tmp[1])
        # check/adjust new action node and set its value
        x = self.head[0]
        y = self.head[1]

        mid_node = (np.array(self.head)+np.array(self.pre_head))/2
        mid_node = tuple(mid_node)
        if 0 <= x < self.board.shape[0] and 0 <= y < self.board.shape[1]:
            if self.paths45.get(mid_node):
                self.collide = True
                self.goto_new_net(False)
            else:
                if self.head in self.target:
                    self.current_path.append(self.head)
                    if self.head in self.current_net:
                        self.current_net.remove(self.head)
                    self.goto_new_net(True)
                    self.paths45[mid_node] = True
                elif self.board[self.head]!=0:
                    self.collide = True
                    self.goto_new_net(False)
                else:
                    self.current_path.append(self.head)
                    self.board[self.pre_head] = 1
                    self.board[self.head] = 1
                    self.paths45[mid_node] = True
        else:
            self.collide = True
            self.goto_new_net(False, out_range=True)

        reward = self.getReward()

        state = self.extract_circuit_info()

        done = self.isTerminal()

        info = {}

        return state, reward, done, info

    def get_directions_from_action(self, act_idx):

        path_d = np.array(self.head)-np.array(self.pre_head)

        d_idx = (self.all_directions.index(tuple(path_d))+act_idx-1)%len(self.all_directions)

        return self.all_directions[d_idx]

    def find_ini_prehead(self):

        possible_ds = []
        for d in self.all_directions:
            if self.check_direction(d)>0:
                tem_target = (self.head[0]+d[0], self.head[1]+d[1])
                possible_ds.append(d)

        if len(possible_ds)>0:
            best_d = random.choice(possible_ds)
        else:
            best_d = random.choice(self.all_directions)

        return (self.head[0]-best_d[0], self.head[1]-best_d[1])

    def goto_new_net(self, connection_sign, out_range=False):

        self.paths[self.pairs_idx].append(self.current_path)

        self.board[self.pre_head] = 1
        self.connection = connection_sign
        self.last_node = self.head

        if not out_range:
            self.board[self.head] = 1

        if len(self.current_net)>0:
            self.head = self.current_net[0]
            self.pre_head = self.find_ini_prehead()
        elif self.pairs_idx<self.max_pair:
            self.pairs_idx += 1
            self.current_net = copy(self.nets[self.pairs_idx])
            self.other_nets = {key: value for key, value in self.nets.items() if key > self.pairs_idx}
            self.head = self.current_net[0]
            self.current_net.remove(self.head)

            self.pre_head = self.find_ini_prehead()
            self.last_node = self.head
            
            self.current_path = [self.head]
            self.paths = {self.pairs_idx:[]}

            self.targets = self.find_targets()

        self.board[self.head] = 1

    def isTerminal(self):

        if self.pairs_idx > self.max_pair:
            return True

        return False


    def getReward(self):

        if self.connection:
            return 20
        if self.collide:
            left_dist = 3*np.linalg.norm(np.array(self.last_node) - np.array(self.finish[self.pairs_idx-1]))
            # distance.cityblock(self.last_node, self.finish[self.pairs_idx-1])
            return -left_dist/10

        expand_length = np.linalg.norm(np.array(self.head) - np.array(self.pre_head)) 
        return -expand_length/10

    def check_direction(self, direction):

        x = self.head[0] + direction[0]
        y = self.head[1] + direction[1]
        mid_node = np.array(self.head)+np.array(direction)/2
        mid_node = tuple(mid_node)
        if 0 <= x < self.board.shape[0] and 0 <= y < self.board.shape[1]:
            if not self.paths45.get(mid_node):
                if (x,y) == self.finish[self.pairs_idx]:
                    return 2
                elif self.board[(x,y)] == 0:
                    return 1
        return 0

    def extract_circuit_info(self):

        pass


    # def current_net(self):

    #     target_embed = np.zeros((2,))
    #     tem_head = np.array(self.head)
    #     sum_euclid_distance = 0
    #     for t in targets:
    #         tem_t = np.array(t)
    #         sum_euclid_distance += 1/np.linalg.norm(tem_t - tem_head)
    #     for t in targets:
    #         tem_t = np.array(t)
    #         ratio_dist = 1/np.linalg.norm(tem_t - tem_head)/sum_euclid_distance
    #         target_embed += tem_t*ratio_dist
    #     current_net_vector = np.array(list(head)+list(pre_head)+list(target_embed))

    #     return current_net_vector