In [1]:
# Read CSVs
import load_data as ld

# To load train and test data
# No need to repeat loading data if just need to change training parameters
in_height = 64
in_width = 64
num_rows = 1000

inputs = ld.read_data('train.csv', in_height, in_width, nrows=num_rows)
labels = ld.read_label('train_label.csv', nrows=num_rows)
x_predict = ld.read_data('test.csv', in_height, in_width, nrows=num_rows)

./data/train.csv  - data shape =  (1000, 4096)
./data/train_label.csv  - label shape =  (1000,)
./data/test.csv  - data shape =  (1000, 4096)


In [2]:
# split inputs for training and testing
import numpy as np

train_ratio = 0.8

np.random.seed(0)
mask = np.random.rand(inputs.shape[0]) <= train_ratio

x_train = inputs[mask]
y_train = labels[mask]
x_test = inputs[~mask]
y_test = labels[~mask]

print("x_train.shape", x_train.shape)
print("y_train.shape", y_train.shape)
print("x_test.shape", x_test.shape)
print("y_test.shape", y_test.shape)

x_train.shape (797, 4096)
y_train.shape (797,)
x_test.shape (203, 4096)
y_test.shape (203,)


In [3]:
# Train the model
import tensorflow as tf
import csv
from model import model_fn

train_epoch = 5
batch_size = 128
num_steps = None
ckpt_steps = 10
max_ckpt = 50

rc = tf.estimator.RunConfig(model_dir = "./model", keep_checkpoint_max=max_ckpt, save_checkpoints_steps=ckpt_steps)
model = tf.estimator.Estimator(model_fn, config=rc)
# model = tf.estimator.Estimator(model_fn)

# Define the input function for training
input_fn = tf.estimator.inputs.numpy_input_fn(
    x= {'file': x_train}, y=y_train,
    batch_size=batch_size, num_epochs=train_epoch, shuffle=True)
# Train the Model
model.train(input_fn, steps=num_steps)

INFO:tensorflow:Using config: {'_model_dir': './model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 10, '_save_checkpoints_secs': None, '_session_config': None, '_keep_checkpoint_max': 50, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x11201e668>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Use the retry module or similar alternatives.
Current accuracy of model (<tf.Tensor 'accuracy/value:0' shape=() dtype=float32>, <tf.Tensor 'accuracy/update_op:0' shape=() dtype=float32>)
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tenso

<tensorflow.python.estimator.estimator.Estimator at 0x11201e5c0>

In [4]:
# Evaluate the Model
# Define the input function for evaluating
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'file': x_test}, y=y_test,
    batch_size=batch_size, shuffle=False)

# Use the Estimator 'evaluate' method
e = model.evaluate(input_fn)
total_step = e['global_step']
print("global_step:", e['global_step'])
print('accuracy = ', e['accuracy'], "loss = ", e['loss'])

INFO:tensorflow:Calling model_fn.
Current accuracy of model (<tf.Tensor 'accuracy/value:0' shape=() dtype=float32>, <tf.Tensor 'accuracy/update_op:0' shape=() dtype=float32>)
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-04-05-15:49:52
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model/model.ckpt-32
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-04-05-15:49:53
INFO:tensorflow:Saving dict for global step 32: accuracy = 0.8325123, global_step = 32, loss = 0.7701556
global_step: 32
accuracy =  0.8325123 loss =  0.7701556


In [5]:
# Evaluate checkpoints
print('total_step = ', total_step)
for i in range(1, total_step//ckpt_steps+1):
    ckpt_path = './model/model.ckpt-' + str(i * ckpt_steps + 1)
    print(ckpt_path)
    e = model.evaluate(input_fn, checkpoint_path=ckpt_path)
    print("Testing Accuracy:", e['accuracy'])
    

total_step =  32
./model/model.ckpt-11
INFO:tensorflow:Calling model_fn.
Current accuracy of model (<tf.Tensor 'accuracy/value:0' shape=() dtype=float32>, <tf.Tensor 'accuracy/update_op:0' shape=() dtype=float32>)
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-04-05-15:50:00
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model/model.ckpt-11
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-04-05-15:50:01
INFO:tensorflow:Saving dict for global step 11: accuracy = 0.32019705, global_step = 11, loss = 9.9606285
Testing Accuracy: 0.32019705
./model/model.ckpt-21
INFO:tensorflow:Calling model_fn.
Current accuracy of model (<tf.Tensor 'accuracy/value:0' shape=() dtype=float32>, <tf.Tensor 'accuracy/update_op:0' shape=() dtype=float32>)
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-04-05-15:50:01
INFO:tensorflow

In [6]:
# Predict
predict_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'file': x_predict},
    batch_size=batch_size, num_epochs=1, shuffle=False)

results = model.predict(input_fn=predict_input_fn)

i = 0
with open('result.csv', 'w') as csvfile:
    csv_writer = csv.writer(csvfile,)
    csv_writer.writerow(["sample_id", "malware"])
    for result in results:
        csv_writer.writerow([i, result[1]])
        i = i+1

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model/model.ckpt-32
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
