In [1]:
import os
import csv
import numpy as np
import tensorflow as tf
from random_adjacency_matrix import gen_graph

## Global variables

In [59]:
FORCE_REGEN_SAMPLE = False
SAMPLE_GRAPH_NODES, SAMPLE_GRAPH_EDGES = (10, 10), (10, 30)
SAMPLES_TRAIN, SAMPLES_VALIDATE, SAMPLES_TEST = 100000, 200, 500
BATCH = 2

def wrap_dir(fp):
    return fp

_SAMPLE_TRAIN_FILE = wrap_dir('stupid_experiment_with_random_graphs_train.csv')
_SAMPLE_VALIDATE_FILE = wrap_dir('stupid_experiment_with_random_graphs_validate.csv')
_SAMPLE_TEST_FILE = wrap_dir('stupid_experiment_with_random_graphs_test.csv')

## Helpers

In [60]:
if not isinstance(SAMPLE_GRAPH_NODES, (tuple, list)):
    SAMPLE_GRAPH_NODES = (SAMPLE_GRAPH_NODES, SAMPLE_GRAPH_NODES)
    
if not isinstance(SAMPLE_GRAPH_EDGES, (tuple, list)):
    SAMPLE_GRAPH_NODES = (SAMPLE_GRAPH_EDGES, SAMPLE_GRAPH_EDGES)

def parse_sample(sample):
    nodes, matdef = sample[0], sample[1]
    nodes = int(nodes)
    adj_mat = np.reshape(np.asarray([int(v.strip()) for v in matdef.split(',')], dtype=np.int8), newshape=(nodes, nodes))
    
    # feature 1: total nodes
    f1 = nodes
    
    # feature 2: the number of edges
    f2 = sum(np.matrix.flatten(adj_mat))
    
    # feature 3: the number of 1-out-degree nodes
    f3 = len([r for r in adj_mat if sum(r) == 1])
    
    return (adj_mat, f1, f2, f3)

In [71]:
def _write_sample_file(fp, data):
    with open(fp, 'w') as f:
        writer = csv.writer(f, delimiter='|')
        for row in data:
            writer.writerow(row)
            
def _read_sample_file(fp):
    with open(fp, 'r') as f:
        reader = csv.reader(f, delimiter='|')
        for row in reader:
            yield row
            
def _regen_samples():
    def _gen(num):
        for _ in range(num):
            nodes, adj_mat = gen_graph(SAMPLE_GRAPH_NODES, SAMPLE_GRAPH_EDGES, directed=True)
            yield str(nodes), ','.join(str(v) for v in np.matrix.flatten(adj_mat))

    train_data = _gen(SAMPLES_TRAIN)
    _write_sample_file(_SAMPLE_TRAIN_FILE, train_data)
    
    validate_data = _gen(SAMPLES_VALIDATE)
    _write_sample_file(_SAMPLE_VALIDATE_FILE, validate_data)
    
    test_data = _gen(SAMPLES_TEST)
    _write_sample_file(_SAMPLE_TEST_FILE, test_data)
            

if (
    not os.path.exists(_SAMPLE_TRAIN_FILE) 
    or not os.path.exists(_SAMPLE_VALIDATE_FILE)
    or not os.path.exists(_SAMPLE_TEST_FILE)
    or FORCE_REGEN_SAMPLE
):
    #_regen_samples()
    pass
    

def sample_batch_from_file(fp, batch_size=100):
    done = False
    reader = _read_sample_file(fp)
    while not done:
        batch_adj_mat, batch_f, batch_y = [], [], []
        for i in range(batch_size):
            try:
                adj_mat, *f = parse_sample(next(reader))
                for i in range(f[0]):
                    adj_mat[i, i] = 1
                batch_adj_mat.append(adj_mat)
                batch_f.append(np.ones((f[0], features)))
                batch_y.append([list(f)])
            except StopIteration:
                done = True
                break
        yield batch_adj_mat, batch_f, batch_y

## Model

In [79]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

max_nodes = SAMPLE_GRAPH_NODES[1]
features = 3

# Input adjacency matrices (samples, height, width)
input_adj_mat = tf.placeholder(tf.float32, [None, None, None])
input_f = tf.placeholder(tf.float32, [None, None, 3])

y_ = tf.placeholder(tf.float32, [None, 1, 3])

# First layer
A_x_h_1 = tf.matmul(input_adj_mat, input_f)
W_1 = weight_variable([3, 128])
b_1 = bias_variable([128])
h_1 = tf.map_fn(lambda s: tf.matmul(s, W_1), A_x_h_1) + b_1

relu_1 = tf.nn.relu(h_1)

# Second layer
W_2_z = weight_variable([1, 10])
W_2 = weight_variable([128, 128])
b_2 = bias_variable([128])
h_2 = tf.map_fn(lambda s: tf.matmul(W_2_z, tf.matmul(s, W_2)), relu_1) + b_2
relu_2 = tf.nn.relu(h_2)

# Readout layer
W_3 = weight_variable([64, 3])
b_3 = bias_variable([3])
h_3 = tf.map_fn(lambda s: tf.matmul(s, W_3), relu_2) + b_3
relu_3 = h_3


# Train
#cross_entropy = tf.reduce_mean(
#    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=relu_3))
c1 = y_ - relu_3
train_step = tf.train.AdamOptimizer(1e-4).minimize(tf.reduce_sum(tf.multiply(c1, c1)))
#train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
correct_prediction = tf.equal(tf.cast(relu_3, tf.int32), tf.cast(y_, tf.int32))
#accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
accuracy = relu_3

sess = tf.InteractiveSession()

# launch the model
sess = tf.InteractiveSession()

tf.global_variables_initializer().run()

batch = sample_batch_from_file(_SAMPLE_TRAIN_FILE, batch_size=BATCH)

for _ in range(SAMPLES_TRAIN // BATCH):
    batch_adj_mat, batch_f, batch_y = next(batch)
    sess.run(train_step, feed_dict={input_adj_mat: batch_adj_mat, 
                                    input_f: batch_f, 
                                    y_: batch_y})

In [80]:
batch_validate = sample_batch_from_file(_SAMPLE_TEST_FILE, batch_size=10)

batch_adj_mat_val, batch_f_val, batch_y_val = next(batch_validate)
result = sess.run(accuracy, feed_dict={input_adj_mat: batch_adj_mat_val,
                              input_f: batch_f_val,
                              y_: batch_y_val})

for idx, x in enumerate(batch_y_val):
    print("Expected: %s; Actual: %s" % (x[0], result[idx][0]))

Expected: [10, 19, 3]; Actual: [  9.99717999  18.99988747   2.59805298]
Expected: [10, 29, 1]; Actual: [  9.96837902  28.99187279   0.73597771]
Expected: [10, 20, 2]; Actual: [  9.98408985  19.99081039   2.40075922]
Expected: [10, 20, 2]; Actual: [  9.98507977  20.02750969   2.39527082]
Expected: [10, 12, 6]; Actual: [  9.98909569  11.96247768   3.87451768]
Expected: [10, 27, 1]; Actual: [  9.99398422  26.9720726    1.13679206]
Expected: [10, 19, 4]; Actual: [  9.99696636  18.97505188   2.6023283 ]
Expected: [10, 12, 4]; Actual: [  9.88932323  12.07290173   3.73122549]
Expected: [10, 22, 4]; Actual: [  9.95916748  21.99634171   2.0033958 ]
Expected: [10, 14, 4]; Actual: [ 10.06159687  13.92053127   3.60603976]
