In [112]:
import tensorflow as tf
import numpy as np
import os,shutil
import matplotlib.pyplot as plt
from tensorflow.python.estimator.model_fn import ModeKeys as Modes
from tensorflow.contrib.learn import Experiment
from tensorflow.contrib.learn.python.learn import learn_runner
from tensorflow.contrib.learn.python.learn.utils import (saved_model_export_utils)

tf.logging.set_verbosity(tf.logging.INFO)
OUTDIR='/tmp/trained_model'

def read_and_decode(filename_queue):
    reader = tf.TextLineReader()
    key,value = reader.read(filename_queue)
    record_defaults = [ [0.0] ]*31
    columns = tf.decode_csv(value,record_defaults = record_defaults)
    
    # first column is time field from 1 to 28 column is feature, 2
    value = tf.convert_to_tensor(columns[1:29],dtype=tf.float32)
    value.set_shape([28])
    label =tf.cast( columns[30],tf.int32)
    
    return value,label

def input_fn(filename,batch_size=100):
    filename_queue = tf.train.string_input_producer([filename])
    
    value,label = read_and_decode(filename_queue)
    values,labels = tf.train.batch(
        [value,label],batch_size=batch_size,
        capacity = 1000+3*batch_size)
    return {'inputs':values},labels

def get_input_fn(filename,batch_size=100):
    return lambda: input_fn(filename,batch_size)

def serving_input_fn():
    inputs = {'inputs':tf.placeholder(tf.float32,[None,28])}
    return tf.estimator.export.ServingInputReceiver(inputs,inputs)

def autoencoder_model_fn(features,labels,mode):
    input_layer = features['inputs']
    dense1 = tf.layers.dense(inputs=input_layer,units=20,activation=tf.nn.relu)
    dense2 = tf.layers.dense(inputs=dense1,units=14,activation=tf.nn.relu)
    dense3 = tf.layers.dense(inputs=dense2,units=10,activation=tf.nn.relu)
    dense4 = tf.layers.dense(inputs=dense3,units=14,activation=tf.nn.relu)
    dense5 = tf.layers.dense(inputs=dense4,units=20,activation=tf.nn.relu)
    output_layer = tf.layers.dense(inputs=dense5,units=28,activation=tf.nn.sigmoid)
    
    #training and evaluation mode
    if mode in (Modes.TRAIN,Modes.EVAL):
        global_step = tf.contrib.framework.get_or_create_global_step()
        label_indices = tf.cast(labels,tf.int32)
        loss = tf.reduce_sum(tf.square(output_layer - input_layer))
        tf.summary.scalar('OptimizeLoss',loss)

        if mode == Modes.TRAIN:
            optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
            train_op = optimizer.minimize(loss,global_step=global_step)
            return tf.estimator.EstimatorSpec(mode,loss = loss, train_op = train_op)
        if mode == Modes.EVAL:
            eval_metric_ops = None
            return tf.estimator.EstimatorSpec(
                mode,loss=loss,eval_metric_ops = eval_metric_ops)
        
    # prediction mode
    if mode == Modes.PREDICT:
        predictions={
            'outputs':output_layer
        }
        export_outputs={
            'outputs':tf.estimator.export.PredictOutput(predictions)
        }
        return tf.estimator.EstimatorSpec(
            mode,predictions=predictions,export_outputs=export_outputs)

def build_estimator(model_dir):
    return tf.estimator.Estimator(
        model_fn = autoencoder_model_fn,
        model_dir = model_dir,
        config=tf.contrib.learn.RunConfig(save_checkpoints_secs=180))

def generate_experiment_fn(data_dir,
                          train_batch_size = 100,
                          eval_batch_size = 100,
                          train_steps = 20000,
                          eval_steps = 1,
                          **experiment_args):
    def _experiment_fn(output_dir):
        return Experiment(
            build_estimator(output_dir),
            train_input_fn=get_input_fn('./data/creditcard_training.csv',batch_size=train_batch_size),
            eval_input_fn=get_input_fn('./data/creditcard_testing.csv',batch_size=eval_batch_size),
            export_strategies = [saved_model_export_utils.make_export_strategy(
                serving_input_fn,
                default_output_alternative_key=None,
                exports_to_keep=1)
            ],
            train_steps = train_steps,
            eval_steps = eval_steps,
            **experiment_args
        )
    return _experiment_fn

shutil.rmtree(OUTDIR, ignore_errors=True) # start fresh each time
learn_runner.run(
    generate_experiment_fn(
        data_dir='./data/',
        train_steps=5000),OUTDIR)
    


INFO:tensorflow:Using config: {'_save_checkpoints_secs': 180, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x11a8fb4d0>, '_model_dir': '/tmp/trained_model', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_tf_random_seed': None, '_save_summary_steps': 100, '_environment': 'local', '_num_worker_replicas': 0, '_task_id': 0, '_log_step_count_steps': 100, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_evaluation_master': '', '_master': ''}
Instructions for updating:
Monitors are deprecated. Please use tf.train.SessionRunHook.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/trained_model/model.ckpt.
INFO:tensorflow:Starting evaluation at 2017-09-27-01:20:34
INFO:tensorflow:Restoring parameters from /tmp/trained_model/model.ckpt-1
INFO:tensorflow:Eval

({'global_step': 5000, 'loss': 2.2240827},
 ['/tmp/trained_model/export/Servo/1506475329'])

In [117]:
#reset graph
tf.reset_default_graph()

export_dir = OUTDIR+'/export/Servo/'
timestamp = os.listdir(export_dir)[0]
export_dir = export_dir + timestamp
print(export_dir)

sess = tf.Session()
meta_graph = tf.saved_model.loader.load(sess,[tf.saved_model.tag_constants.SERVING],export_dir)
model_signature = meta_graph.signature_def['serving_default']
input_signature = model_signature.inputs
output_signature = model_signature.outputs

print(input_signature.keys())
print(output_signature.keys())

/tmp/trained_model/export/Servo/1506475329
INFO:tensorflow:Restoring parameters from /tmp/trained_model/export/Servo/1506475329/variables/variables
[u'inputs']
[u'outputs']


In [118]:
export_dir = '{}/export/Servo/{}'.format(
    OUTDIR, os.listdir('{}/export/Servo'.format(OUTDIR))[0]
)

print(export_dir)

/tmp/trained_model/export/Servo/1506475329


In [119]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

df = pd.read_csv('./data/creditcard_validation.csv')
df.head()


Unnamed: 0,0.71035696097,0.955860940362,0.771993885589,0.832990418352,0.183263308288,0.76995653067,0.253941251912,0.271196092353,0.781727130119,0.407857440369,...,0.562319625675,0.513710152425,0.661866895864,0.283897799791,0.585739377661,0.403853310412,0.418311149341,0.316228156476,0.00214276038918,0.0
0,0.79061,0.957872,0.774946,0.843108,0.222501,0.768015,0.25424,0.270633,0.785168,0.456347,...,0.558132,0.486216,0.666844,0.388596,0.546656,0.444663,0.420859,0.314892,0.00035,0.0
1,0.748808,0.955831,0.773235,0.830063,0.223238,0.766206,0.266071,0.266912,0.790518,0.47961,...,0.558114,0.479266,0.671487,0.375709,0.498235,0.409754,0.411367,0.309303,0.003227,0.0
2,0.793393,0.990986,0.773329,0.820798,0.407497,0.772168,0.269552,0.266598,0.786232,0.420546,...,0.558026,0.480873,0.669373,0.411872,0.572649,0.377856,0.415654,0.312042,0.000234,0.0
3,0.751615,0.964915,0.73753,0.787759,0.290917,0.76344,0.25721,0.275097,0.779274,0.482425,...,0.571422,0.505331,0.654013,0.452787,0.583428,0.322545,0.413128,0.315144,0.034816,0.0
4,0.820999,0.989622,0.758402,0.824128,0.219899,0.761775,0.259051,0.263278,0.785084,0.506376,...,0.556055,0.465094,0.670498,0.331263,0.542319,0.457035,0.415415,0.31215,0.003694,0.0


In [120]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

df = pd.read_csv('./data/creditcard_validation.csv')
data = df.ix[:,1:29].as_matrix()
label = df.ix[:,30:31].as_matrix()

print data[0]
print label[0]
print len(data)

feed_dict = {sess.graph.get_tensor_by_name(input_signature['inputs'].name): data}
output = sess.graph.get_tensor_by_name(output_signature['outputs'].name)
results = sess.run(output, feed_dict=feed_dict)

throughput = [1.1,1.0,0.9,0.8]
print "total fraud record is 246"
print "throughput valid invalid %"
for t in throughput:
    valid = 0
    invalid = 0
    for i in range(len(data)):
        loss = np.absolute(np.sum(data[i]-results[i]))
        if loss > t:
            #print loss,label[i]
            if(label[i]==1):
                valid=valid+1
            else:
                invalid = invalid +1
    print t,valid,invalid,float(valid)/float(valid+invalid)


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  


[ 0.95787167  0.77494618  0.84310826  0.22250124  0.76801458  0.25424049
  0.27063279  0.78516789  0.45634706  0.50413066  0.32777495  0.71994316
  0.37756528  0.66218672  0.26013676  0.4530275   0.7143201   0.64094174
  0.57695847  0.57954944  0.5581321   0.48621622  0.66684381  0.38859636
  0.54665569  0.44466304  0.4208591   0.31489154]
[ 0.]
57108
total fraud record is 246
throughput valid invalid %
1.1 115 2 0.982905982906
1.0 115 5 0.958333333333
0.9 120 14 0.89552238806
0.8 132 21 0.862745098039


# 레이어 14,7,4,7,14

loss 값은 3.5대
2000 step 씩 첫번째
total fraud record is 246
throughput valid invalid %
1.1 96 6 0.941176470588
1.0 109 16 0.872
0.9 116 47 0.711656441718
0.8 123 147 0.455555555556

2000 step 씩 두번째
throughput valid invalid %
1.1 105 5 0.954545454545
1.0 110 11 0.909090909091
0.9 121 34 0.78064516129
0.8 126 98 0.5625

2000 step 씩 세번째
throughput valid invalid %
1.1 95 2 0.979381443299
1.0 111 6 0.948717948718
0.9 115 12 0.905511811024
0.8 121 53 0.695402298851

5000 step
total fraud record is 246
throughput valid invalid %
1.1 128 17 0.88275862069
1.0 136 22 0.860759493671
0.9 141 33 0.810344827586
0.8 158 52 0.752380952381


# 레이어 20,10,7,10,20

loss 2.2
total fraud record is 246
throughput valid invalid %
1.1 115 2 0.982905982906
1.0 115 5 0.958333333333
0.9 120 14 0.89552238806
0.8 132 21 0.862745098039