In [None]:
# Tox21 데이터 집합
# 이 데이터 집합에는 1만 개 분자가 안드로겐 수용체와 상호작용하는지 실험한 결과가 들어있다.
# 새로운 분자가 주어졌을 때 안드로겐 수용체와 상호작용할지 여부를 예측한다

In [1]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [None]:
# import deepchem as dc

_, (train, valid, test), _ = dc.molnet.load_tox21()

# X : 피처 벡터, y : 정답(1/0), w : 예제 가중치
# w에는 양수 데이터를 더 강조하는 데 사용할 수 있는 데이터별 권장 가중치가 들어 있다
# 적은 데이터의 중요도를 늘리는 것은 불균형한 데이터 집합을 처리할 때 흔히 사용하는 기술
train_X, train_y, train_w = train.X, train.y, train.w
valid_X, valid_y, valid_w = valid.X, valid.y, valid.w
test_X, test_y, test_w = test.X, test.y, test.w

# 불필요한 데이터 집합 삭제
train_y = train_y[:, 0]
valid_y = valid_y[:, 0]
test_y = test_y[:, 0]
train_w = train_w[:, 0]
valid_w = valid_w[:, 0]
test_w = test_w[:, 0]

# 드롭아웃 확률 플레이스홀더 추가
keep_prob = tf.placeholder(tf.float32)

# 완전연결 아키텍처 정의
# 다양한 크기의 미니배치를 받을 수 있는 플레이스홀더 정의
d = 1024  # 피처 벡터의 차원
with tf.name_scope("placeholders") :
    x = tf.placeholder(tf.float32, (None, d))
    y = tf.placeholder(tf.float32, (None,))

# 은닉층 정의
with tf.name_scope("hidden_layer") :
    W = tf.Variable(tf.random_normal((d, n_hidden)))  # 행렬 형태
    b = tf.Variable(tf.random_normal((n_hidden,)))
    # 활성화 함수
    x_hidden = tf.nn.relu(tf.matmul(x, W) + b)
    # 드롭아웃 적용
    x_hidden = tf.nn.dropout(x_hidden, keep_prob)
    
with tf.name_scope("output") :
    W = tf.Variable(tf.random_normal((n_hidden, 1)))
    b = tf.Variable(tf.random_normal((1,)))
    y_logit = tf.matmul(x_hidden, W) + b
    # 시그모이드는 클래스 1의 확률을 반환
    y_one_prob = tf.sigmoid(y_logit)
    # P(y=1)을 반올림해서 정확한 예측값을 구한다
    y_pred = tf.round(y_one_prob)

with tf.name_scope("loss") :
    # 각 데이터에 대해 교차 엔트로피 항을 계산
    y_expand = tf.expand_dims(y, 1)
    entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels = y_expand)
    # 모든 기여를 더한다
    l = tf.reduce_sum(entropy)

with tf.name_scope("optim") :
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(l)

with tf.name_scope("summaries") :
    tf.summary.scalar("loss", l)
    merged = tf.summary.merge_all()

# 미니배치 학습

# 미니배치를 구현하려면 sess.run을 호출할 때마다
# 미니배치 크기만큼의 데이터를 뽑아야 한다

step = 0
for epoch in range(n_epochs) :
    pos = 0
    while pos < N :
        batch_X = train_X[pos : pos+batch_size]
        batch_y = train_y[pos : pos+batch_size]
        feed_dict = {x : batch_X, y : batch_y, keep_prob : dropout_prob}
        _, summary, loss = sess.run([train_op, merged, l], feed_dict = feed_dict)
        print("epoch %d, step %d, loss : %f" % (epoch, step, loss))
        train_writer.add_summary(summary, step)
        
        step += 1
        pos += batch_size

In [None]:
# import numpy as np
# np.random.seed(456)
# import  tensorflow as tf
# tf.set_random_seed(456)
# import matplotlib.pyplot as plt
# import deepchem as dc
# from sklearn.metrics import accuracy_score

# _, (train, valid, test), _ = dc.molnet.load_tox21()
# train_X, train_y, train_w = train.X, train.y, train.w
# valid_X, valid_y, valid_w = valid.X, valid.y, valid.w
# test_X, test_y, test_w = test.X, test.y, test.w

# # Remove extra tasks
# train_y = train_y[:, 0]
# valid_y = valid_y[:, 0]
# test_y = test_y[:, 0]
# train_w = train_w[:, 0]
# valid_w = valid_w[:, 0]
# test_w = test_w[:, 0]


# # Generate tensorflow graph
# d = 1024
# n_hidden = 50
# learning_rate = .001
# n_epochs = 10
# batch_size = 100

# with tf.name_scope("placeholders"):
#   x = tf.placeholder(tf.float32, (None, d))
#   y = tf.placeholder(tf.float32, (None,))
# with tf.name_scope("hidden-layer"):
#   W = tf.Variable(tf.random_normal((d, n_hidden)))
#   b = tf.Variable(tf.random_normal((n_hidden,)))
#   x_hidden = tf.nn.relu(tf.matmul(x, W) + b)
# with tf.name_scope("output"):
#   W = tf.Variable(tf.random_normal((n_hidden, 1)))
#   b = tf.Variable(tf.random_normal((1,)))
#   y_logit = tf.matmul(x_hidden, W) + b
#   # the sigmoid gives the class probability of 1
#   y_one_prob = tf.sigmoid(y_logit)
#   # Rounding P(y=1) will give the correct prediction.
#   y_pred = tf.round(y_one_prob)
# with tf.name_scope("loss"):
#   # Compute the cross-entropy term for each datapoint
#   y_expand = tf.expand_dims(y, 1)
#   entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y_expand)
#   # Sum all contributions
#   l = tf.reduce_sum(entropy)

# with tf.name_scope("optim"):
#   train_op = tf.train.AdamOptimizer(learning_rate).minimize(l)

# with tf.name_scope("summaries"):
#   tf.summary.scalar("loss", l)
#   merged = tf.summary.merge_all()

# train_writer = tf.summary.FileWriter('/tmp/fcnet-tox21',
#                                      tf.get_default_graph())
# N = train_X.shape[0]
# with tf.Session() as sess:
#   sess.run(tf.global_variables_initializer())
#   step = 0
#   for epoch in range(n_epochs):
#     pos = 0
#     while pos < N:
#       batch_X = train_X[pos:pos+batch_size]
#       batch_y = train_y[pos:pos+batch_size]
#       feed_dict = {x: batch_X, y: batch_y}
#       _, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)
#       print("epoch %d, step %d, loss: %f" % (epoch, step, loss))
#       train_writer.add_summary(summary, step)
    
#       step += 1
#       pos += batch_size

#   # Make Predictions
#   valid_y_pred = sess.run(y_pred, feed_dict={x: valid_X})

# score = accuracy_score(valid_y, valid_y_pred)
# print("Unweighted Classification Accuracy: %f" % score)

# weighted_score = accuracy_score(valid_y, valid_y_pred, sample_weight=valid_w)
# print("Weighted Classification Accuracy: %f" % weighted_score)

In [25]:
step = 0
n_epochs = 10
batch_size = 100
N = 947

for epoch in range(n_epochs):
    pos = 0
    while pos < N:
        print(pos, pos+batch_size)
        # batch_X = train_X[pos:pos+batch_size]
        # batch_y = train_y[pos:pos+batch_size]
        print("epoch %d, step %d" % (epoch, step))

        step += 1
        pos += batch_size

0 100
epoch 0, step 0
100 200
epoch 0, step 1
200 300
epoch 0, step 2
300 400
epoch 0, step 3
400 500
epoch 0, step 4
500 600
epoch 0, step 5
600 700
epoch 0, step 6
700 800
epoch 0, step 7
800 900
epoch 0, step 8
900 1000
epoch 0, step 9
0 100
epoch 1, step 10
100 200
epoch 1, step 11
200 300
epoch 1, step 12
300 400
epoch 1, step 13
400 500
epoch 1, step 14
500 600
epoch 1, step 15
600 700
epoch 1, step 16
700 800
epoch 1, step 17
800 900
epoch 1, step 18
900 1000
epoch 1, step 19
0 100
epoch 2, step 20
100 200
epoch 2, step 21
200 300
epoch 2, step 22
300 400
epoch 2, step 23
400 500
epoch 2, step 24
500 600
epoch 2, step 25
600 700
epoch 2, step 26
700 800
epoch 2, step 27
800 900
epoch 2, step 28
900 1000
epoch 2, step 29
0 100
epoch 3, step 30
100 200
epoch 3, step 31
200 300
epoch 3, step 32
300 400
epoch 3, step 33
400 500
epoch 3, step 34
500 600
epoch 3, step 35
600 700
epoch 3, step 36
700 800
epoch 3, step 37
800 900
epoch 3, step 38
900 1000
epoch 3, step 39
0 100
epoch 4,