In [1]:
#coding=utf-8
import numpy as np
import tensorflow as tf
import sklearn.preprocessing as prep
import os

In [2]:
class Autoencoder(object):
    def __init__(self, n_input, n_hidden, transfer_function = tf.nn.softplus,
                 optimizer = tf.train.AdamOptimizer(), scale = 0.0):
        self.n_input = n_input
        self.n_hidden = n_hidden
        self.transfer = transfer_function
        self.scale = tf.placeholder(tf.float32)
        self.training_scale = scale
        network_weights = self._initialize_weights()
        self.weights = network_weights
        self.x = tf.placeholder(tf.float32,[None, self.n_input])
        self.hidden = self.transfer(tf.add(tf.matmul(self.x + scale * tf.random_normal((n_input,)),
                                                     self.weights['w1']), self.weights['b1']))
        self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']),self.weights['b2'])
        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
        self.optimizer = optimizer.minimize(self.cost)
        init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(init)

    def _initialize_weights(self):
        all_weights = dict()
        all_weights['w1'] = tf.Variable(xavier_init(self.n_input, self.n_hidden))
        all_weights['b1'] = tf.Variable(tf.zeros([self.n_hidden],dtype= tf.float32))
        all_weights['w2'] = tf.Variable(tf.zeros([self.n_hidden, self.n_input], dtype= tf.float32))
        all_weights['b2'] = tf.Variable(tf.zeros([self.n_input], dtype= tf.float32))
        return all_weights
    def partial_fit(self,X ):
        cost, opt = self.sess.run((self.cost, self.optimizer), feed_dict= {self.x: X,
                                                                           self.scale: self.training_scale})
        return cost
    def before_loss(self, X):
        cost = self.sess.run((self.cost), feed_dict={self.x: X,
                                                                          self.scale: self.training_scale})
        return cost
    def transform(self, X):
        return self.sess.run(self.hidden, feed_dict= {self.x : X, self.scale: self.training_scale})
    def generate(self, hidden = None):
        if hidden is None:
            #print(self.weights["b1"].shape)
            hidden = np.random.normal( size = self.weights["b1"])
            #hidden = np.random.normal(size=self.weights["b1"].shape)


        return self.sess.run(self.reconstruction, feed_dict= {self.hidden: hidden})
    ###这块的reconstruction是初始化定义的w* x + b

    def reconstruct(self, X):
        ##这块的重建是指整体运行一边复原过程
        return self.sess.run(self.reconstruction, feed_dict={self.x : X, self.scale: self.training_scale})
    def getWeights(self):
        return self.sess.run(self.weights['w1'])
    def getBias(self):
        return self.sess.run(self.weights['b1'])

In [3]:
file1=open("autorcode_emb.txt","w")
file2=open("gene.txt","w")

In [4]:
def standard_scale(X_train):
    preprocessor = prep.StandardScaler().fit(X_train)
    X_train = preprocessor.transform(X_train)
    return X_train

def get_random_block_from_data(data, batch_size):
    start_index = np.random.randint(0, len(data) - batch_size)
    return data[start_index: (start_index + batch_size)]

def xavier_init(fan_in, fan_out, constant = 1):
    low = -constant * np.sqrt(6.0 / (fan_in + fan_out))
    high = constant * np.sqrt(6.0 / (fan_in + fan_out))
    return tf.random_uniform((fan_in, fan_out), minval = low, maxval = high, dtype= tf.float32)

In [5]:
geneSet=set()
data=[]
for lines in open(r"/Users/vlietstraw/git/Post-GWAS/Node2vec/directed_weighed_complete_copy_for_autoencoder.emb","r"):
    line=lines.strip().split()
    geneSet.add(line[0])
    file2.write(line[0])
    file2.write("\n")
    list1=[]
    for l in line[1:]:
        list1.append(float(l))
    data.append(list1)
#print (geneSet)
#print (len(data))

In [6]:
training_epochs =1000
batch_size = 256
display_step = 1
input_n_size = [128, 256]
hidden_size = [350, 100]
sdne = []

for i in range(1):
    if i== 0:
        ae = Autoencoder(n_input = input_n_size[0], n_hidden = hidden_size[0], transfer_function = tf.nn.elu,
                             optimizer = tf.train.AdamOptimizer(learning_rate= 0.0001),
                             scale = 0)
        
        sdne.append(ae)
    else:
        ae = Autoencoder(n_input = input_n_size[1], n_hidden = hidden_size[1], transfer_function = tf.nn.sigmoid,
                             optimizer = tf.train.AdagradOptimizer(learning_rate= 0.01),
                             scale = 0)
        
        sdne.append(ae)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [7]:
W = []
b = []
Hidden_feature = []

for j in range(1):
    if j == 0:
        X_train = standard_scale(data)
    else:
        X_train_pre = X_train
        X_train = sdne[j-1].transform(X_train_pre)
        Hidden_feature.append(X_train)
    epoch=0
    for epoch in range(300):
        total_cost = 0.
        total_batch = int(X_train.shape[0] / batch_size)

        for k in range(total_batch):

            batch_xs = get_random_block_from_data(X_train, batch_size)

            cost = sdne[j].partial_fit(batch_xs)
            total_cost=total_cost+cost
        loss=total_cost/13460

        if epoch % display_step == 0:
            print("Epoch:", "%4d" % (epoch + 1), "每个样本上的误差:", "{:.9f}".format(loss))
            
    if j == 0:
        feat0 = sdne[0].transform(standard_scale(data))
        print (len(feat0))
        for feat in feat0:
            for f in feat:
                file1.write(str(f))
                file1.write("\t")
            file1.write("\n")
file1.close()

file2.close()

Epoch:    1 每个样本上的误差: 79.398108989
Epoch:    2 每个样本上的误差: 56.525825149
Epoch:    3 每个样本上的误差: 46.475921674
Epoch:    4 每个样本上的误差: 42.626154560
Epoch:    5 每个样本上的误差: 27.279527617
Epoch:    6 每个样本上的误差: 23.249182481
Epoch:    7 每个样本上的误差: 19.209487937
Epoch:    8 每个样本上的误差: 17.712815232
Epoch:    9 每个样本上的误差: 21.009118150
Epoch:   10 每个样本上的误差: 19.348768034
Epoch:   11 每个样本上的误差: 12.194838732
Epoch:   12 每个样本上的误差: 12.240765778
Epoch:   13 每个样本上的误差: 11.847535212
Epoch:   14 每个样本上的误差: 9.544716777
Epoch:   15 每个样本上的误差: 10.280545644
Epoch:   16 每个样本上的误差: 8.877107819
Epoch:   17 每个样本上的误差: 7.647968028
Epoch:   18 每个样本上的误差: 6.489051672
Epoch:   19 每个样本上的误差: 8.066670661
Epoch:   20 每个样本上的误差: 3.591215888
Epoch:   21 每个样本上的误差: 6.840368264
Epoch:   22 每个样本上的误差: 4.486811848
Epoch:   23 每个样本上的误差: 3.725745437
Epoch:   24 每个样本上的误差: 4.207976084
Epoch:   25 每个样本上的误差: 5.283438412
Epoch:   26 每个样本上的误差: 4.270611854
Epoch:   27 每个样本上的误差: 3.925184639
Epoch:   28 每个样本上的误差: 3.148459039
Epoch:   29 每个样本上的误差: 3.000770291
