In [2]:
import os
import csv
import numpy as np
import tensorflow as tf
from random_adjacency_matrix import gen_graph

## Global variables

In [3]:
FORCE_REGEN_SAMPLE = False
SAMPLE_GRAPH_NODES, SAMPLE_GRAPH_EDGES = (10, 10), (10, 30)
SAMPLES_TRAIN, SAMPLES_VALIDATE, SAMPLES_TEST = 100000, 200, 500
BATCH = 200

def wrap_dir(fp):
    return fp

_SAMPLE_TRAIN_FILE = wrap_dir('stupid_experiment_with_random_graphs_train.csv')
_SAMPLE_VALIDATE_FILE = wrap_dir('stupid_experiment_with_random_graphs_validate.csv')
_SAMPLE_TEST_FILE = wrap_dir('stupid_experiment_with_random_graphs_test.csv')

## Helpers

In [4]:
if not isinstance(SAMPLE_GRAPH_NODES, (tuple, list)):
    SAMPLE_GRAPH_NODES = (SAMPLE_GRAPH_NODES, SAMPLE_GRAPH_NODES)
    
if not isinstance(SAMPLE_GRAPH_EDGES, (tuple, list)):
    SAMPLE_GRAPH_NODES = (SAMPLE_GRAPH_EDGES, SAMPLE_GRAPH_EDGES)

def parse_sample(sample):
    nodes, matdef = sample[0], sample[1]
    nodes = int(nodes)
    adj_mat = np.reshape(np.asarray([int(v.strip()) for v in matdef.split(',')], dtype=np.int8), newshape=(nodes, nodes))
    
    # feature 1: total nodes
    f1 = nodes
    
    # feature 2: the number of edges
    f2 = sum(np.matrix.flatten(adj_mat))
    
    # feature 3: the number of 1-out-degree nodes
    f3 = len([r for r in adj_mat if sum(r) == 1])
    
    return (adj_mat, f1, f2, f3)

In [12]:
def _write_sample_file(fp, data):
    with open(fp, 'w') as f:
        writer = csv.writer(f, delimiter='|')
        for row in data:
            writer.writerow(row)
            
def _read_sample_file(fp):
    with open(fp, 'r') as f:
        reader = csv.reader(f, delimiter='|')
        for row in reader:
            yield row
            
def _regen_samples():
    def _gen(num):
        for _ in range(num):
            nodes, adj_mat = gen_graph(SAMPLE_GRAPH_NODES, SAMPLE_GRAPH_EDGES, directed=True)
            yield str(nodes), ','.join(str(v) for v in np.matrix.flatten(adj_mat))

    train_data = _gen(SAMPLES_TRAIN)
    _write_sample_file(_SAMPLE_TRAIN_FILE, train_data)
    
    validate_data = _gen(SAMPLES_VALIDATE)
    _write_sample_file(_SAMPLE_VALIDATE_FILE, validate_data)
    
    test_data = _gen(SAMPLES_TEST)
    _write_sample_file(_SAMPLE_TEST_FILE, test_data)
            

if (
    not os.path.exists(_SAMPLE_TRAIN_FILE) 
    or not os.path.exists(_SAMPLE_VALIDATE_FILE)
    or not os.path.exists(_SAMPLE_TEST_FILE)
    or FORCE_REGEN_SAMPLE
):
    #_regen_samples()
    pass
    

def sample_batch_from_file(fp, batch_size=100):
    done = False
    reader = _read_sample_file(fp)
    while not done:
        batch_adj_mat_with_f, batch_y = [], []
        for i in range(batch_size):
            try:
                adj_mat, *f = parse_sample(next(reader))
                bf = adj_mat
                ident = np.identity(f[0])
                bf = np.asarray([bf, ident, ident])
                batch_adj_mat_with_f.append(bf)
                batch_y.append(f)
            except StopIteration:
                done = True
                del reader
                break
        yield batch_adj_mat_with_f, batch_y

## Model

In [70]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(1.0, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    # tf.nn.conv2d对一个四维输入做二维卷积。
    # 参数分别为输入，过滤器，步长，补0策略
    # 其中：
    # 输入的四个维度分别为：样本数，每样本高度，每样本宽度，通道数
    # 过滤器的四个维度分别为：过滤器高度，过滤器宽度，输入通道数，输出通道数
    # 步长为一个一维四元素张量，每元素分别表示在输入的对应维度上的步长
    # 补0策略为'SAME'或'VALID'
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    # tf.nn.max_pool对输入进行最大池化
    # 参数分别为输入，窗口大小，步长，补0策略
    # 
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

max_nodes = SAMPLE_GRAPH_NODES[1]
features = 3

# Input adjacency matrices (samples, height, width)
input_adj_mat_with_f = tf.placeholder(tf.float32, [None, 3, 10, 10])
input_flat = tf.reshape(input_adj_mat_with_f, [-1, 300])

y_ = tf.placeholder(tf.float32, [None, 3])

# First layer
W_1 = weight_variable([300, 300])
b_1 = bias_variable([300])
h_1 = tf.nn.sigmoid(tf.matmul(input_flat, W_1) + b_1)

# Second layer
W_2 = weight_variable([300, 64])
b_2 = bias_variable([64])
h_2 = tf.nn.sigmoid(tf.matmul(h_1, W_2) + b_2)

# Third layer
W_3 = weight_variable([64, 64])
b_3 = bias_variable([64])
h_3 = tf.nn.sigmoid(tf.matmul(h_2, W_3) + b_3)

# h_pool2 = max_pool_2x2(h_2)

# Densely connected layer
W_d = weight_variable([64, 1024])
b_d = bias_variable([1024])
h_d = tf.nn.sigmoid(tf.matmul(h_3, W_d) + b_d)

# Readout layer
W_r = weight_variable([1024, 3])
b_r = bias_variable([3])

y_conv = tf.matmul(h_d, W_r) + b_r

In [71]:
# Train

#cross_entropy = tf.reduce_mean(
#    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=relu_3))
c1 = y_ - y_conv
train_step = tf.train.AdamOptimizer(1e-4).minimize(tf.reduce_sum(tf.multiply(c1, c1)))
#train_step = tf.train.GradientDescentOptimizer(1e-5).minimize(tf.reduce_sum(tf.square(c1)))
correct_prediction = tf.equal(tf.cast(tf.round(y_conv), tf.int32), tf.cast(y_, tf.int32))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# launch the model
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()



In [87]:

batch = sample_batch_from_file(_SAMPLE_TRAIN_FILE, batch_size=BATCH)

for i in range(SAMPLES_TRAIN // BATCH):
    try:
        batch_adj_mat_with_f, batch_y = next(batch)
    except StopIteration:
        batch = sample_batch_from_file(_SAMPLE_TRAIN_FILE, batch_size=BATCH)
        batch_adj_mat_with_f, batch_y = next(batch)
    if len(batch_adj_mat_with_f) == 0:
        continue
    if i % 10 == 0:
        acc = accuracy.eval(feed_dict={input_adj_mat_with_f: batch_adj_mat_with_f, 
                                       y_: batch_y})
        print("Step %d: accuracy: %g" % (i, acc))
    sess.run(train_step, feed_dict={input_adj_mat_with_f: batch_adj_mat_with_f, 
                                    y_: batch_y})

Step 0: accuracy: 0.756667
Step 10: accuracy: 0.78
Step 20: accuracy: 0.8
Step 30: accuracy: 0.786667
Step 40: accuracy: 0.763333
Step 50: accuracy: 0.773333
Step 60: accuracy: 0.78
Step 70: accuracy: 0.78
Step 80: accuracy: 0.77
Step 90: accuracy: 0.766667
Step 100: accuracy: 0.776667
Step 110: accuracy: 0.79
Step 120: accuracy: 0.77
Step 130: accuracy: 0.756667
Step 140: accuracy: 0.793333
Step 150: accuracy: 0.76
Step 160: accuracy: 0.763333
Step 170: accuracy: 0.76
Step 180: accuracy: 0.77
Step 190: accuracy: 0.79
Step 200: accuracy: 0.773333
Step 210: accuracy: 0.783333
Step 220: accuracy: 0.773333
Step 230: accuracy: 0.776667
Step 240: accuracy: 0.783333
Step 250: accuracy: 0.8
Step 260: accuracy: 0.746667
Step 270: accuracy: 0.763333
Step 280: accuracy: 0.77
Step 290: accuracy: 0.776667
Step 300: accuracy: 0.78
Step 310: accuracy: 0.773333
Step 320: accuracy: 0.77
Step 330: accuracy: 0.756667
Step 340: accuracy: 0.77
Step 350: accuracy: 0.76
Step 360: accuracy: 0.783333
Step 370

In [68]:
batch_validate = sample_batch_from_file(_SAMPLE_TEST_FILE, batch_size=20)

batch_adj_mat_val_with_f, batch_y_val = next(batch_validate)
result = sess.run(y_conv, feed_dict={input_adj_mat_with_f: batch_adj_mat_val_with_f,
                                       y_: batch_y_val
                                       })

for idx, x in enumerate(batch_y_val):
    print("Expected: %s; Actual: %s" % (x, result[idx]))

Expected: [10, 19, 3]; Actual: [ 10.01219463  19.00857735   2.73004675]
Expected: [10, 29, 1]; Actual: [  9.9787302   29.13644981   1.12909138]
Expected: [10, 20, 2]; Actual: [ 10.00915909  20.03309441   2.61464405]
Expected: [10, 20, 2]; Actual: [  9.94487762  20.13905525   2.47780275]
Expected: [10, 12, 6]; Actual: [  9.99090195  12.03159523   3.87247562]
Expected: [10, 27, 1]; Actual: [  9.95919895  27.08016205   1.37922049]
Expected: [10, 19, 4]; Actual: [  9.96992016  19.00794601   2.8632412 ]
Expected: [10, 12, 4]; Actual: [  9.96939182  11.9802084    3.79472828]
Expected: [10, 22, 4]; Actual: [ 10.03061581  22.06005859   2.22525501]
Expected: [10, 14, 4]; Actual: [  9.99098396  13.94580555   3.5934701 ]
Expected: [10, 17, 1]; Actual: [  9.94874382  17.01911354   3.09948969]
Expected: [10, 22, 3]; Actual: [ 10.00641441  22.14943504   2.21017385]
Expected: [10, 20, 3]; Actual: [  9.95440388  20.064394     2.45864677]
Expected: [10, 11, 3]; Actual: [  9.99105644  11.00770187   3.99