In [13]:
import tensorflow as tf
import numpy as np 
import pandas as pd 
import os
from glob import glob
from PIL import Image
import matplotlib.image as mpimg
import math
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score, recall_score

In [2]:
import methods as md
# md.train_labels
# md.id_label_map
# md.get_id_from_file_path(file_path)

In [3]:
files = glob('./dataset/training/*.tif')

In [4]:
def get_imarrary_target(dataset):
    list_image = []
    labels = []
    for file in files:
        image = mpimg.imread(file)
        list_image.append(np.asarray(image))
        labels.append(md.id_label_map[md.get_id_from_file_path(file)])
    return np.asarray(list_image), np.asarray(labels)

In [5]:
files = files[:10000]

In [6]:
features, labels = get_imarrary_target(files)

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features,
                                                    labels,
                                                    test_size=0.2,
                                                    random_state=42)

In [8]:
X_train = X_train.astype(np.float32).reshape(-1, 96*96*3) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 96*96*3) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:10], X_train[10:]
y_valid, y_train = y_train[:10], y_train[10:]

In [9]:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
reset_graph()

In [78]:
n_inputs = 96*96*3
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 2

In [79]:
X = tf.placeholder(tf.float32, [None, n_inputs])
y = tf.placeholder(tf.int32, shape=(None), name="y")

In [80]:
with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1",
                              activation=tf.nn.relu)
    hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2",
                              activation=tf.nn.relu)
    logits = tf.layers.dense(hidden2, n_outputs, name="outputs")
    y_proba = tf.nn.softmax(logits)

In [81]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

In [82]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [83]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [84]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [85]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [86]:
batch_size = 1000
m, n = X_train.shape
n_epochs = math.floor( m / batch_size )

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Batch accuracy:", acc_batch, "Validation accuracy:", acc_valid)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Batch accuracy: 0.5933392 Validation accuracy: 0.6
1 Batch accuracy: 0.59947413 Validation accuracy: 0.6
2 Batch accuracy: 0.5924628 Validation accuracy: 0.6
3 Batch accuracy: 0.58194566 Validation accuracy: 0.6
4 Batch accuracy: 0.58457494 Validation accuracy: 0.6
5 Batch accuracy: 0.607362 Validation accuracy: 0.6
6 Batch accuracy: 0.62138474 Validation accuracy: 0.6


In [87]:
# https://github.com/ageron/handson-ml/blob/master/tensorflow_graph_in_jupyter.py
from IPython.display import clear_output, Image, display, HTML

def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = b"<stripped %d bytes>"%size
    return strip_def
def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script src="//cdnjs.cloudflare.com/ajax/libs/polymer/0.3.3/platform.js"></script>
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))

In [88]:
show_graph(tf.get_default_graph())

In [89]:
with tf.Session() as sess:
    saver.restore(sess, "./my_model_final.ckpt")
    Z = logits.eval(feed_dict={X: X_test})
    y_pred = np.argmax(Z, axis=1)

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt


In [90]:
print(roc_auc_score(y_pred, y_test))
print(precision_score(y_pred, y_test), recall_score(y_pred, y_test))

0.6372414925046503
0.06265356265356266 0.6710526315789473


## FNN Using the Estimator API

In [10]:
feature_cols = [tf.feature_column.numeric_column("X", shape=[96*96*3])]
dnn_clf = tf.estimator.DNNClassifier(hidden_units=[300,100], n_classes=10,
                                     feature_columns=feature_cols)

input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_train}, y=y_train, num_epochs=40, batch_size=50, shuffle=True)
dnn_clf.train(input_fn=input_fn)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/r3/zwg30x2j4kqg8mwjvyy5cl5r0000gn/T/tmpkwk2w985', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x133b07630>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
T

INFO:tensorflow:global_step/sec: 37.8472
INFO:tensorflow:loss = 27.580725, step = 5401 (2.642 sec)
INFO:tensorflow:global_step/sec: 38.0916
INFO:tensorflow:loss = 25.754879, step = 5501 (2.625 sec)
INFO:tensorflow:global_step/sec: 35.0003
INFO:tensorflow:loss = 23.822107, step = 5601 (2.857 sec)
INFO:tensorflow:global_step/sec: 36.8954
INFO:tensorflow:loss = 29.543165, step = 5701 (2.710 sec)
INFO:tensorflow:global_step/sec: 34.4431
INFO:tensorflow:loss = 24.87896, step = 5801 (2.904 sec)
INFO:tensorflow:global_step/sec: 35.5398
INFO:tensorflow:loss = 29.692114, step = 5901 (2.814 sec)
INFO:tensorflow:global_step/sec: 37.07
INFO:tensorflow:loss = 25.246552, step = 6001 (2.697 sec)
INFO:tensorflow:global_step/sec: 37.8602
INFO:tensorflow:loss = 29.007973, step = 6101 (2.641 sec)
INFO:tensorflow:global_step/sec: 36.8412
INFO:tensorflow:loss = 26.11143, step = 6201 (2.714 sec)
INFO:tensorflow:global_step/sec: 36.0304
INFO:tensorflow:loss = 21.42731, step = 6301 (2.775 sec)
INFO:tensorflow

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifier at 0x133b07400>

In [11]:
test_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_test}, y=y_test, shuffle=False)
eval_results = dnn_clf.evaluate(input_fn=test_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-04-26T00:08:10Z
INFO:tensorflow:Graph was finalized.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /var/folders/r3/zwg30x2j4kqg8mwjvyy5cl5r0000gn/T/tmpkwk2w985/model.ckpt-6392
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-04-26-00:08:10
INFO:tensorflow:Saving dict for global step 6392: accuracy = 0.623, average_loss = 0.6377178, global_step = 6392, loss = 79.71472
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 6392: /var/folders/r3/zwg30x2j4kqg8mwjvyy5cl5r0000gn/T/tmpkwk2w985/model.ckpt-6392


In [12]:
eval_results

{'accuracy': 0.623,
 'average_loss': 0.6377178,
 'loss': 79.71472,
 'global_step': 6392}

In [16]:
y_pred_iter = dnn_clf.predict(input_fn=test_input_fn)
y_pred = list(y_pred_iter)
y_pred[0]

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/r3/zwg30x2j4kqg8mwjvyy5cl5r0000gn/T/tmpkwk2w985/model.ckpt-6392
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


{'logits': array([ 11.035367,  11.999402, -25.204435, -23.762527, -37.451687,
        -33.46306 , -30.299366, -27.520096, -34.821247, -34.993572],
       dtype=float32),
 'probabilities': array([2.7607107e-01, 7.2392899e-01, 5.0381867e-17, 2.1305303e-16,
        2.4174714e-22, 1.3049703e-20, 3.0872715e-19, 4.9727489e-18,
        3.3554196e-21, 2.8242749e-21], dtype=float32),
 'class_ids': array([1]),
 'classes': array([b'1'], dtype=object)}