In [1]:
import os
import csv
import numpy as np
import tensorflow as tf
from random_adjacency_matrix import gen_graph

## Global variables

In [2]:
FORCE_REGEN_SAMPLE = False
SAMPLE_GRAPH_NODES, SAMPLE_GRAPH_EDGES = (10, 10), (10, 30)
SAMPLES_TRAIN, SAMPLES_VALIDATE, SAMPLES_TEST = 100000, 200, 500
BATCH = 50

def wrap_dir(fp):
    return fp

_SAMPLE_TRAIN_FILE = wrap_dir('stupid_experiment_with_random_graphs_train.csv')
_SAMPLE_VALIDATE_FILE = wrap_dir('stupid_experiment_with_random_graphs_validate.csv')
_SAMPLE_TEST_FILE = wrap_dir('stupid_experiment_with_random_graphs_test.csv')

## Helpers

In [3]:
if not isinstance(SAMPLE_GRAPH_NODES, (tuple, list)):
    SAMPLE_GRAPH_NODES = (SAMPLE_GRAPH_NODES, SAMPLE_GRAPH_NODES)
    
if not isinstance(SAMPLE_GRAPH_EDGES, (tuple, list)):
    SAMPLE_GRAPH_NODES = (SAMPLE_GRAPH_EDGES, SAMPLE_GRAPH_EDGES)

def parse_sample(sample):
    nodes, matdef = sample[0], sample[1]
    nodes = int(nodes)
    adj_mat = np.reshape(np.asarray([int(v.strip()) for v in matdef.split(',')], dtype=np.int8), newshape=(nodes, nodes))
    
    # feature 1: total nodes
    f1 = nodes
    
    # feature 2: the number of edges
    f2 = sum(np.matrix.flatten(adj_mat))
    
    # feature 3: the number of 1-out-degree nodes
    f3 = len([r for r in adj_mat if sum(r) == 1])
    
    return (adj_mat, f1, f2, f3)

def variable_summaries(var):
    """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)
        
def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu):
    """Reusable code for making a simple neural net layer.

    It does a matrix multiply, bias add, and then uses relu to nonlinearize.
    It also sets up name scoping so that the resultant graph is easy to read,
    and adds a number of summary ops.
    """
    # Adding a name scope ensures logical grouping of the layers in the graph.
    with tf.name_scope(layer_name):
        # This Variable will hold the state of the weights for the layer
        with tf.name_scope('weights'):
            weights = weight_variable([input_dim, output_dim])
            variable_summaries(weights)
        with tf.name_scope('biases'):
            biases = bias_variable([output_dim])
            variable_summaries(biases)
        with tf.name_scope('Wx_plus_b'):
            preactivate = tf.matmul(input_tensor, weights) + biases
            tf.summary.histogram('pre_activations', preactivate)
        activations = act(preactivate, name='activation')
        tf.summary.histogram('activations', activations)
        return activations

In [4]:
def _write_sample_file(fp, data):
    with open(fp, 'w') as f:
        writer = csv.writer(f, delimiter='|')
        for row in data:
            writer.writerow(row)
            
def _read_sample_file(fp):
    with open(fp, 'r') as f:
        reader = csv.reader(f, delimiter='|')
        for row in reader:
            yield row
            
def _regen_samples():
    def _gen(num):
        for _ in range(num):
            nodes, adj_mat = gen_graph(SAMPLE_GRAPH_NODES, SAMPLE_GRAPH_EDGES, directed=True)
            yield str(nodes), ','.join(str(v) for v in np.matrix.flatten(adj_mat))

    train_data = _gen(SAMPLES_TRAIN)
    _write_sample_file(_SAMPLE_TRAIN_FILE, train_data)
    
    validate_data = _gen(SAMPLES_VALIDATE)
    _write_sample_file(_SAMPLE_VALIDATE_FILE, validate_data)
    
    test_data = _gen(SAMPLES_TEST)
    _write_sample_file(_SAMPLE_TEST_FILE, test_data)
            

if (
    not os.path.exists(_SAMPLE_TRAIN_FILE) 
    or not os.path.exists(_SAMPLE_VALIDATE_FILE)
    or not os.path.exists(_SAMPLE_TEST_FILE)
    or FORCE_REGEN_SAMPLE
):
    #_regen_samples()
    pass
    

def sample_batch_from_file(fp, batch_size=100):
    done = False
    reader = _read_sample_file(fp)
    while not done:
        batch_adj_mat_with_f, batch_y = [], []
        for i in range(batch_size):
            try:
                adj_mat, *f = parse_sample(next(reader))
                bf = adj_mat
                ident = np.identity(f[0])
                bf = np.asarray([bf])
                batch_adj_mat_with_f.append(bf)
                batch_y.append(f)
            except StopIteration:
                done = True
                del reader
                break
        yield batch_adj_mat_with_f, batch_y

## Model

In [5]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1, name="weights")
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(1.0, shape=shape, name="bias")
    return tf.Variable(initial)

def conv2d(x, W):
    # tf.nn.conv2d对一个四维输入做二维卷积。
    # 参数分别为输入，过滤器，步长，补0策略
    # 其中：
    # 输入的四个维度分别为：样本数，每样本高度，每样本宽度，通道数
    # 过滤器的四个维度分别为：过滤器高度，过滤器宽度，输入通道数，输出通道数
    # 步长为一个一维四元素张量，每元素分别表示在输入的对应维度上的步长
    # 补0策略为'SAME'或'VALID'
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME', name="conv2d")

def max_pool_2x2(x):
    # tf.nn.max_pool对输入进行最大池化
    # 参数分别为输入，窗口大小，步长，补0策略
    # 
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name="max pool 2x2")

max_nodes = SAMPLE_GRAPH_NODES[1]
features = 1

# Input adjacency matrices (samples, height, width)
input_adj_mat_with_f = tf.placeholder(tf.float32, [None, features, 10, 10])
input_flat = tf.reshape(input_adj_mat_with_f, [-1, features * 100])

y_ = tf.placeholder(tf.float32, [None, 3])

# First layer
l1 = nn_layer(input_flat, features * 100, 300, "first", act=tf.nn.relu)

# Second layer
l2 = nn_layer(l1, 300, 64, "second", act=tf.nn.relu)

# Third layer
l3 = nn_layer(l2, 64, 64, "third", act=tf.nn.relu)

# h_pool2 = max_pool_2x2(h_2)

# Densely connected layer
ld = nn_layer(l3, 64, 1024, "dense", act=tf.nn.relu)

# Dropout
with tf.name_scope('dropout') as scope:
    keep_prob = tf.placeholder(tf.float32)
    h_d_drop = tf.nn.dropout(ld, keep_prob, name="dropout")

# Readout layer
lr = nn_layer(h_d_drop, 1024, 3, "readout", act=tf.nn.relu)

y_conv = lr

In [8]:
# Train

#cross_entropy = tf.reduce_mean(
#    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=relu_3))
c1 = y_ - y_conv
train_step = tf.train.AdamOptimizer(1e-4).minimize(tf.reduce_sum(tf.square(c1)))
#train_step = tf.train.GradientDescentOptimizer(1e-5).minimize(tf.reduce_sum(tf.square(c1)))

with tf.name_scope('accuracy'):
    with tf.name_scope('correct_prediction'):
        correct_prediction = tf.equal(tf.cast(tf.round(y_conv), tf.int32), tf.cast(y_, tf.int32))
    with tf.name_scope('accuracy'):
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar('accuracy', accuracy)

# launch the model
sess = tf.InteractiveSession()

merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('train',
                                      sess.graph)
test_writer = tf.summary.FileWriter('test')

tf.global_variables_initializer().run()



In [10]:

batch = sample_batch_from_file(_SAMPLE_TRAIN_FILE, batch_size=BATCH)

for i in range(SAMPLES_TRAIN // BATCH * 10):
    try:
        batch_adj_mat_with_f, batch_y = next(batch)
    except StopIteration:
        batch = sample_batch_from_file(_SAMPLE_TRAIN_FILE, batch_size=BATCH)
        batch_adj_mat_with_f, batch_y = next(batch)
    if len(batch_adj_mat_with_f) == 0:
        continue
    if i % 100 == 0:
        summary, acc = sess.run([merged, accuracy], feed_dict={input_adj_mat_with_f: batch_adj_mat_with_f, 
                                       y_: batch_y, keep_prob: 1})
        test_writer.add_summary(summary, i)
        print("Step %d: accuracy: %g" % (i, acc))
    summary, _ = sess.run([merged, train_step], feed_dict={input_adj_mat_with_f: batch_adj_mat_with_f, 
                                    y_: batch_y, keep_prob: 0.5})
    train_writer.add_summary(summary, i)

Step 0: accuracy: 0.0333333
Step 100: accuracy: 0.266667
Step 200: accuracy: 0.34
Step 300: accuracy: 0.506667
Step 400: accuracy: 0.573333
Step 500: accuracy: 0.52
Step 600: accuracy: 0.533333
Step 700: accuracy: 0.613333
Step 800: accuracy: 0.66
Step 900: accuracy: 0.62
Step 1000: accuracy: 0.653333
Step 1100: accuracy: 0.66
Step 1200: accuracy: 0.64
Step 1300: accuracy: 0.653333
Step 1400: accuracy: 0.686667
Step 1500: accuracy: 0.713333
Step 1600: accuracy: 0.713333
Step 1700: accuracy: 0.726667
Step 1800: accuracy: 0.706667
Step 1900: accuracy: 0.713333
Step 2100: accuracy: 0.726667
Step 2200: accuracy: 0.76
Step 2300: accuracy: 0.72
Step 2400: accuracy: 0.753333
Step 2500: accuracy: 0.713333
Step 2600: accuracy: 0.7
Step 2700: accuracy: 0.74
Step 2800: accuracy: 0.693333
Step 2900: accuracy: 0.72
Step 3000: accuracy: 0.753333
Step 3100: accuracy: 0.753333
Step 3200: accuracy: 0.713333
Step 3300: accuracy: 0.746667
Step 3400: accuracy: 0.713333
Step 3500: accuracy: 0.733333
Step 3

In [None]:
batch_validate = sample_batch_from_file(_SAMPLE_TEST_FILE, batch_size=20)

batch_adj_mat_val_with_f, batch_y_val = next(batch_validate)
result = sess.run(y_conv, feed_dict={input_adj_mat_with_f: batch_adj_mat_val_with_f,
                                       y_: batch_y_val, keep_prob: 1
                                       })

for idx, x in enumerate(batch_y_val):
    print("Expected: %s; Actual: %s" % (x, result[idx]))