### Imports

In [20]:
import csv
import numpy as np
import mxnet as mx
import random
import sys
import pickle
import matplotlib as mpl
mpl.use('TkAgg')
from matplotlib import pyplot as plt
from mxnet import gluon, nd, autograd

### Create CSV Reader Object to process data

In [22]:
csvreader = csv.reader(open("/Users/root02/Downloads/creditcard.csv"))

### Initialize variables

In [23]:
input_entries = list()

### Read from CSV and clean up data

In [24]:
i = 0
for row in csvreader:
    if i == 0:
        i += 1
        continue
    for idx, i in enumerate(row):
        if idx < len(row)-1:
            row[idx] = float(i)
        else:
            row[idx] = int(i)
    input_entries.append(row[1:])

In [9]:
X = input_entries[1:]

In [10]:
random.shuffle(X)

### Shorten the size of the data set

In [11]:
X_train = list()
X_test = list()

for i in X:
    if i[-1] == 1:
        X_train.append(i)
        X.remove(i)
        
X_test = np.asarray(X_train[452:]+X[100000:101000])
X_train = np.asarray(X_train[:452]+X[20000:30000])

### Define structure of neural network

In [8]:
net = gluon.nn.Sequential()

with net.name_scope():
    net.add(gluon.nn.Dense(64))
    net.add(gluon.nn.BatchNorm())
    net.add(gluon.nn.Activation(activation="relu"))
    net.add(gluon.nn.Dense(64))
    net.add(gluon.nn.BatchNorm())
    net.add(gluon.nn.Activation(activation="relu"))
    net.add(gluon.nn.Dense(1, activation="sigmoid"))

### Define Loss Function, Trainer, & Initialize NN Parameters

In [9]:
loss = gluon.loss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=True)
net.collect_params().initialize(mx.init.Normal(), ctx=mx.cpu())
trainer = gluon.Trainer(net.collect_params(), optimizer="adagrad")

In [12]:
batch_size = 64

### Define Epoch Function

In [14]:
def epoch():
    for idx, i in enumerate(X_train):
        X_batch = nd.array(X_train[idx:idx+batch_size, :-1])
        Y_batch = nd.array(X_train[idx:idx+batch_size, -1:])
        idx += batch_size
        with autograd.record():
            out = net(X_batch)
            l2 = loss(out, Y_batch)
        l2.backward()
        trainer.step(X_batch.shape[0])
        sys.stdout.write ("\r"+str(idx/len(X_train) * 100)[:10]+" percent done")
        sys.stdout.flush()

### Define Accuracy Evaluation

In [15]:
def eval_acc(data, threshold=0.5, batch_size=64, train_acc=False):
    acc = mx.metric.Accuracy()
    for idx, i in enumerate(data):
        X_batch = nd.array(data[idx:idx+batch_size, :-1])
        Y_batch = nd.array(data[idx:idx+batch_size, -1:])
        idx += batch_size
        out = net(X_batch)
        preds = out > threshold
        acc.update(preds=preds,labels=Y_batch)
    return acc.get()[-1]

### Training Loop

In [16]:
for i in range(10):
    epoch()
    print ("\nAccuracy", eval_acc(X_train))
    print("Epoch", i, ":", "/Training", return_confusion_matrix(), "/Test", return_confusion_matrix(X_test, threshold=0.5, true_val=40))

100.602755 percent done
Accuracy 0.959646849959
Epoch 0 : /Training (0, 0, 10000, 452) /Test (0, 0, 1000, 40)
100.602755 percent done
Accuracy 0.936135502135
Epoch 1 : /Training (8, 253, 9747, 444) /Test (0, 26, 974, 40)
100.602755 percent done
Accuracy 0.894030996593
Epoch 2 : /Training (197, 866, 9134, 255) /Test (18, 82, 918, 22)
100.602755 percent done
Accuracy 0.920857624394
Epoch 3 : /Training (149, 543, 9457, 303) /Test (14, 57, 943, 26)
100.602755 percent done
Accuracy 0.903730927019
Epoch 4 : /Training (216, 784, 9216, 236) /Test (22, 76, 924, 18)
100.602755 percent done
Accuracy 0.978694340483
Epoch 5 : /Training (218, 4, 9996, 234) /Test (18, 0, 1000, 22)
100.602755 percent done
Accuracy 0.964476572621
Epoch 6 : /Training (352, 278, 9722, 100) /Test (34, 29, 971, 6)
100.602755 percent done
Accuracy 0.990713917278
Epoch 7 : /Training (359, 9, 9991, 93) /Test (35, 0, 1000, 5)
100.602755 percent done
Accuracy 0.892350115158
Epoch 8 : /Training (378, 1053, 8947, 74) /Test (37, 9

### Define Confusion Matrix Generating Function

In [16]:
def return_confusion_matrix(dataset=X_train, threshold=0.5, true_val=452):
    tp, fn = 0, 0
    X_2 = nd.array(dataset[:true_val, :-1])
    Y_2 = nd.array(dataset[:true_val, -1:])
    out = net(X_2)
    preds = out > threshold
    tp = np.count_nonzero((preds == Y_2).asnumpy())
    fn = true_val - tp
    
    fp, tn = 0, 0
    X_3 = nd.array(dataset[true_val:, :-1])
    Y_3 = nd.array(dataset[true_val:, -1:])
    out = net(X_3)
    preds = out > threshold
    tn = np.count_nonzero((preds == Y_3).asnumpy())
    fp = X_3.shape[0] - tn
    return tp, fp, tn, fn

### Calculate TPR & FPR

In [17]:
def return_tpfr(tp, fp, tn, fn):
    return tp/(tp+fn), fp/(tn+fp)

### Model Performance Evaluation

In [18]:
a, b, c, d = return_confusion_matrix(X_train, threshold=0.5, true_val=452)

In [19]:
return_tpfr(a, b, c, d)

(0.8296460176991151, 0.0473)

### Save model and parameters

In [23]:
pickle.dump(net, open("/Users/root02/Desktop/model_ccfraud1.Sequential", "wb"))

In [22]:
net.save_params("/Users/root02/Desktop/general_classifier1.params")

### Construct ROC Curve

#### Choose a range of threshold values

In [134]:
threshold_values = list()

start = 1
while start >= 0:
    threshold_values.append(start)
    start -= 0.0001

In [135]:
X_train[5001]

array([  1.12834678e+00,   5.69223257e-01,   5.08629436e-01,
         2.46508449e+00,   3.25329455e-01,   4.04871267e-01,
        -7.99382737e-02,   9.65915899e-02,   3.33517105e-01,
         4.75470318e-01,   1.91523950e+00,  -2.28336558e+00,
         1.00608604e+00,   2.09541372e+00,  -6.56948480e-01,
         5.91927586e-01,   8.17482439e-02,   2.12893015e-01,
        -1.15054523e+00,  -2.34467328e-01,  -8.71505034e-02,
        -7.05947797e-02,  -6.65994216e-02,  -3.81225584e-01,
         4.62689187e-01,   3.16093003e-02,  -3.70014884e-02,
        -4.22349391e-03,   1.00300000e+01,   0.00000000e+00])

#### Compute TPR and FPR for each value

In [136]:
tpr, fpr = list(), list()

for values in threshold_values:
    t1, t2, t3, t4 = return_confusion_matrix(X_test, threshold=values, true_val=40)
    _tpr, _fpr = return_tpfr(t1, t2, t3, t4)
    tpr.append(_tpr)
    fpr.append(_fpr)

In [137]:
plt.plot(threshold_values, threshold_values, fpr, tpr)

[<matplotlib.lines.Line2D at 0x125c8f2e8>,
 <matplotlib.lines.Line2D at 0x125dc6f98>]

In [138]:
plt.savefig(open("/Users/root02/Desktop/comparison.png", "wb"))