# Imbalanced classification: credit card fraud detection

## Load data

In [None]:
import numpy as np

# Get the real data from https://www.kaggle.com/mlg-ulb/creditcardfraud/

features = np.load('features.npy')
targets = np.load('targets.npy')

In [None]:
features.shape

In [None]:
targets.shape

## Prepare a validation set

In [None]:
num_val_samples = int(len(features) * 0.2)
train_features = features[:-num_val_samples]
train_targets = targets[:-num_val_samples]
val_features = features[-num_val_samples:]
val_targets = targets[-num_val_samples:]

print("Number of training samples:", len(train_features))
print("Number of validation samples:", len(val_features))


## Analyze class imbalance in the targets

In [None]:
counts = np.bincount(train_targets[:, 0])
print(
    "Number of positive samples in training data: {} ({:.2f}% of total)".format(
        counts[1], 100 * float(counts[1]) / len(train_targets)
    )
)

weight_for_0 = 1.0 / counts[0]
weight_for_1 = 1.0 / counts[1]

class_weight = {0: weight_for_0,
                1: weight_for_1}

## Normalize the data using training set statistics

In [None]:
mean = np.mean(train_features, axis=0)
train_features -= mean
val_features -= mean
std = np.std(train_features, axis=0)
train_features /= std
val_features /= std

## Build a binary classification model

In [None]:
from tensorflow import keras

model = keras.Sequential(
    [
        keras.layers.Dense(
            256, activation="relu", input_shape=(train_features.shape[-1],)
        ),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dense(1, activation="sigmoid")
    ]
)
model.summary()

## Train the model with `class_weight` argument

In [None]:
metrics = [
    keras.metrics.Precision(name="precision"),
    keras.metrics.Recall(name="recall")
]

In [None]:
model.compile(
    optimizer=keras.optimizers.Adam(1e-2), loss="binary_crossentropy", metrics=metrics
)

In [None]:
model.fit(
    train_features,
    train_targets,
    batch_size=2048,
    epochs=30,
    validation_data=(val_features, val_targets),
    class_weight=class_weight
)