# IMPORT PACKAGES

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Sun Aug  2 02:56:46 2020
@author: Ouedraogo Abdoul-Fatao
"""
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import keras
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import regularizers
import scipy.io as sio
from sklearn import preprocessing

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import (accuracy_score, auc, recall_score,f1_score, 
classification_report, precision_recall_curve,recall_score,
precision_recall_fscore_support, roc_curve, confusion_matrix, precision_score)

tf.compat.v1.disable_eager_execution()

# IMPORT DATASET

In [None]:
#Import dataset from repertory
data =pd.read_csv("../input/credit-card-detection/creditcard.csv")
data.head(10)

# DATA ANALYSIS

In [None]:
print('Shape:\n', data.shape)   # Dimension of the data
print('Data type', data.dtypes.value_counts())
print(' columns:\n', data.columns) #Number of columns
count_class= pd.value_counts(data['Class'], sort = True)
print('Number of classes:\n',count_class)
print('Information on the dataset:\n', data.info())
print('Dataset description:\n', data.describe())
print('Check Missing values', data.isnull().sum())
print('Check duplicate values',data.duplicated() )

# VISUALIZATION

In [None]:
#Distribution Analysis:
print(("Distribution of fraudulent points: {:.2f}%".format(len(data[data['Class']==1])/len(data)*50)))
sns.countplot(data['Class'])
plt.title('Class Distribution')
plt.xticks(range(2),['Normal','Fraud'])
plt.show()

In [None]:
# Correlation matrix 
corrmat = data.corr() 
fig = plt.figure(figsize = (8, 6)) 
sns.heatmap(corrmat, vmax = .8, square = True) 
plt.show()

In [None]:
#Amount per transaction by class
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize = (10,10) )
f.suptitle('Amount per transaction by class')

bins = 10
ax1.hist(Fraud.Amount, bins = bins)
ax1.set_title('Fraud')
ax2.hist(Normal.Amount, bins = bins)
ax2.set_title('Normal')

ax1.grid()
ax2.grid()
plt.xlabel('Amount ($)')
plt.ylabel('Number of Transactions')
plt.xlim((0, 20000))
plt.yscale('log')
plt.show();

In [None]:
#Time of transaction vs Amount by class'
f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(10,10))
f.suptitle('Time of transaction vs Amount by class')

ax1.scatter(Fraud.Time, Fraud.Amount, marker='.')
ax1.set_title('Fraud')
ax1.grid()
ax2.scatter(Normal.Time, Normal.Amount, marker='.')
ax2.set_title('Normal')
ax2.grid()
plt.xlabel('Time (in Seconds)')
plt.ylabel('Amount')
plt.show()

# DATA PROCESSING

In [None]:
#Delete  Time 
df =data.drop(['Time'], axis=1)
# Separate Normal  and fraud data
Normal= df[df.Class==0]
Fraud = df[df.Class==1]

In [None]:
#Splitting our Training set
X_train, X_test_normal = train_test_split(Normal, test_size= 0.2, random_state=27)
Y_train =X_train['Class']
X_train = X_train.drop(['Class'], axis=1)
X_val = X_test_normal.drop(['Class'], axis=1)

In [None]:
#Concatenate fraud to X_test_normal
X_test = pd.concat([Fraud, X_test_normal], ignore_index=True)
Y_test = X_test['Class']
X_test = X_test.drop(['Class'], axis=1)

# Normalize colomn Amount
scaler = StandardScaler().fit(X_train.Amount.values.reshape(-1,1))
X_train['Amount'] = scaler.transform(X_train.Amount.values.reshape(-1,1))
X_test['Amount'] = scaler.transform(X_test.Amount.values.reshape(-1,1))
X_val['Amount'] = scaler.transform(X_val.Amount.values.reshape(-1,1))

#Transform data to array     
X_train =X_train.values
X_test = X_test.values
X_val = X_val.values

# IMPLEMENTATION OF VARIATIONAL AUTOENCODER

In [None]:
# Setup the network parameters:
original_dim = X_train.shape[1]
input_shape = (original_dim, )
encoding_dim = 25
hidden_dim = 10
batch_size = 16
latent_dim = 10
epochs =100

# build encoder model : P(z | x )
inputs =  Input(shape=(original_dim, ))
x = Dense(encoding_dim, activation='relu')(inputs)
x = Dense(hidden_dim, activation='relu')(x)
#Q_x(z) ={g(x), h(x)}
mean = Dense(latent_dim,name='mean')(x)          #g(x)
log_var = Dense(latent_dim, name='log_var')(x)   #h(x)

# Définition d'une fonction d'échantillonnage
def sampling(args):
    mean, log_var = args
    batch = K.shape(mean)[0]  # le nombre d'observation
    dim = K.int_shape(mean)[1]  #  dimension 
    epsilon = K.random_normal(shape=(batch, dim))
    return mean + K.exp(0.5 * log_var) * epsilon

#Utilisation d'une couche Keras Lambda pour inclure la fonction d'échantillonnage en tant que couche de sortie
z = Lambda(sampling)([mean, log_var])

#Instantiate encoder model
encoder = Model(inputs, mean)
print('Encoder summary\n')
encoder.summary()
#SVG(model_to_dot(encoder,show_shapes=True).create(prog='dot', format='svg'))

# Build the decoder model: P(x | z)
laten_inputs = Input(shape=(latent_dim,), name='z_sampling')
x_decoded = Dense(hidden_dim, activation='relu')(laten_inputs)
x_decoded = Dense(encoding_dim, activation='relu')(x_decoded)
x_decoded = Dense(original_dim, activation='linear')(x_decoded)

# instantiate decoder model
decoder = Model(laten_inputs, x_decoded, name='decoder')
print('Decoder summary\n')
decoder.summary()
#SVG(model_to_dot(decoder,show_shapes=True).create(prog='dot', format='svg'))

# Instantiate the VAE model:
outputs = decoder(encoder(inputs))
vae = Model(inputs, outputs, name='vae_mlp')
print('Variational autoencoder summary\n')
vae.summary()
#SVG(model_to_dot(vae,show_shapes=True).create(prog='dot', format='svg'))

In [None]:
def recon_loss(x, x_decoded):
    return K.mean(K.square(x_decoded - x), axis= 1)

# kl _loss
def kl_loss(x, x_decoded):    
    return -0.5 * K.mean(1 + log_var - K.square(mean) - K.exp(log_var), axis = -1)
   
#Vae_loss = recon_loss+kl_loss
def vae_loss(x, x_decoded):
    """ Calculate loss = reconstruction loss + KL loss for each data in minibatch """
    
    recon_loss= K.mean(K.square(x_decoded - x), axis= 1)
    kl_loss = -0.5 * K.mean(1 + log_var - K.square(mean) - K.exp(log_var), axis = -1)
   
    return recon_loss + kl_loss

# Compile model
Learning_rate= 1e-7
optimizer= keras.optimizers.Adam(learning_rate=Learning_rate)
vae.compile(optimizer='adam', loss= vae_loss, metrics=['accuracy'])

cp = ModelCheckpoint(filepath="/content/drive/My Drive/FraudDetection/vae.h5",
                               save_best_only=True,
                               verbose=0)
#Train the model:
history = vae.fit(X_train, X_train,
        shuffle=True,
        epochs=epochs,
        batch_size=batch_size,       
        validation_data=(X_val, X_val))

In [None]:
# Visualization of loss fonction
plt.plot(history.history['loss'], 'b')
plt.plot( history.history['val_loss'], 'r')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.grid()
plt.legend(['Train_loss', 'Val_loss'], loc='upper right');
plt.show()

# plot accuracy during training
plt.title('Accuracy')
plt.plot(history.history['accuracy'], label='Train_accuracy')
plt.plot(history.history['val_accuracy'], label='Val_accuracy')
plt.legend()
plt.show()

In [None]:
# Prediction X_test
predictions = vae.predict(X_test)

In [None]:
# calculating the mean squared error reconstruction loss per row in the numpy array
mse = np.mean((predictions - X_test)**2,  axis=1)

# showing the reconstruction losses for a subsample of transactions
sample_size = 30

print(f'Mean Squared Error reconstruction losses for {sample_size} normal transactions:\n')
print(mse[np.where(Y_test==0)][:sample_size])

print(f'\nMean Squared Error reconstruction losses for {sample_size} fraudulent transactions:\n')
print(mse[np.where(Y_test==1)][:sample_size])

error_df = pd.DataFrame({'reconstruction_error': mse, 'true_class':Y_test})

In [None]:
# Plot Density distribution between normal data and fraud data
mseNormal= mse[np.where(Y_test==0)]
mseFraud= mse[np.where(Y_test==1)]
sns.kdeplot(mseNormal, shade= True, color='b',bw=0.1)
sns.kdeplot(mseFraud, shade= True, color='r',bw=0.1)

**CALCULATE RESIDUALS OF X_TRAIN AND X_TEST**

In [None]:
# Residu de x_train
r_xtrain = X_train - vae.predict(X_train)
print("Residuals of Xtrain\n",r_xtrain)
print('\n')
r_xtest  = X_test - vae.predict(X_test)
print("Residuals of Xtest\n",r_xtest)

# IMPLEMENTATION OF SVDD MODEL

In [None]:
!pip install cvxopt

In [None]:
# -*- coding: utf-8 -*-

import numpy as np
from cvxopt import matrix, solvers
import sklearn.metrics.pairwise as smp
import time

class SVDD():
    
    def __init__(self, parameters):
        
        """ 
        DESCRIPTION
        
        --------------------------------------------------        
        INPUT
          parameters   

             "positive penalty": positive penalty factor
             "negative penalty": negative penalty factor
             "kernel"          : kernel function         
             "option"          : some options 
             
        
        """                
        self.parameters = parameters



    def train(self, data, label):
        
        """ 
        DESCRIPTION
        
        Train SVDD model
        
        -------------------------------------------------- 
        Reference
        Tax, David MJ, and Robert PW Duin.
        "Support vector data description." 
        Machine learning 54.1 (2004): 45-66.
        
        -------------------------------------------------- 
        model = train(data, label)
        
        --------------------------------------------------        
        INPUT
        data        Training data (n*d) 
                        n: number of samples
                        d: number of features
        label       Training label (n*1)
                        positive: 1
                        negative: -1
                        
        OUTPUT
        model       SVDD hypersphere
        --------------------------------------------------
        
        """
        start_time = time.time()
        
        label = np.array(label, dtype = 'int')      
        if np.abs(np.sum(label)) == data.shape[0]:
            self.labeltype = 'single'
        else:
            self.labeltype = 'hybrid'
        
        # index of positive and negative samples
        pIndex = label[:,0] == 1
        nIndex = label[:,0] == -1
        
        # threshold for support vectors
        threshold = 1e-7
        
        # compute the kernel matrix
        K = self.getMatrix(data, data)

        # solve the Lagrange dual problem
        alf, obj, iteration = self.quadprog(K, label)
        
        # find the index of support vectors
        sv_index = np.where(alf > threshold)[0][:]

        # support vectors and alf
        sv_value = data[sv_index, :]
        sv_alf = alf[sv_index]
        
        # compute the center of initial feature space
        center = np.dot(alf.T, data)
        
        ''''
        compute the radius: eq(15)
        
        The distance from any support vector to the center of 
        the sphere is the hypersphere radius. 
        Here take the 1st support vector as an example.
        
        '''
        # the 1st term in eq(15)
        used = 0
        term_1 = K[sv_index[used], sv_index[used]]
        
        # the 2nd term in eq(15)
        term_2 = -2*np.dot(K[sv_index[used], :], alf)
        
        # the 3rd term in eq(15)
        term_3 = np.dot(np.dot(alf.T, K), alf)

        # R
        radius = np.sqrt(term_1+term_2+term_3)
        
        end_time = time.time()
        timecost = end_time - start_time
        
        # numbers of positive and negative samples
        pData = np.sum(pIndex)/data.shape[0]
        nData = np.sum(nIndex)/data.shape[0]
        
        # number of support vectors
        nSVs = sv_index.shape[0]
        
        # ratio of  support vectors
        rSVs = nSVs/data.shape[0]
        
        # store the results
        self.model = {"data"      : data        ,
                      "sv_alf"    : sv_alf      ,
                      "radius"    : radius      ,
                      "sv_value"  : sv_value    ,
                      "sv_index"  : sv_index    ,
                      "nSVs"      : nSVs        ,
                      "center"    : center      ,
                      "term_3"    : term_3      ,
                      "alf"       : alf         ,  
                      "K"         : K           ,
                      "pIndex"    : pIndex      ,
                      "nIndex"    : nIndex      ,
                      "obj"       : obj         ,
                      "iteration" : iteration   ,
                      "timecost"  : timecost    ,
                      "pData"     : pData       ,
                      "nData"     : nData       ,
                      "rSVs"      : rSVs        ,
                      }
        
        # compute the training accuracy
        display_ = self.parameters["option"]["display"]
        self.parameters["option"]["display"] = 'off'
        _, accuracy = self.test(data, label)
        self.parameters["option"]["display"] = display_      
        self.model["accuracy"] = accuracy[0]
        
        # display training results       
        if self.parameters["option"]["display"] == 'on':
            print('\n')
            print('*** SVDD model training finished ***\n')
            print('iter             = %d'       % self.model["iteration"])
            print('time cost        = %.4f s'   % self.model["timecost"])
            print('obj              = %.4f'     % self.model["obj"])
            print('pData            = %.4f %%'  % (100*self.model["pData"]))
            print('nData            = %.4f %%'  % (100*self.model["nData"]))
            print('nSVs             = %d'       % self.model["nSVs"])
            print('radio of nSVs    = %.4f %%'  % (100*self.model["rSVs"]))
            print('accuracy         = %.4f %%'  % (100*self.model["accuracy"]))
            print('\n')
  
    def test(self, data, label):
    
        """ 
        DESCRIPTION
        
        Test the testing data using the SVDD model
    
        distance = test(model, Y)
        
        --------------------------------------------------        
        INPUT
        data        Test data (n*d) 
                        n: number of samples
                        d: number of features
        label       Test label (n*1)
                        positive: 1
                        negative: -1
            
        OUTPUT
        distance    Distance between the test data and hypersphere
        --------------------------------------------------
        
        """    
        
        start_time = time.time()
        n = data.shape[0]
        
        # compute the kernel matrix
        K = self.getMatrix(data, self.model["data"])
        
        # the 1st term
        term_1 = self.getMatrix(data, data)
        
        # the 2nd term
        tmp_1 = -2*np.dot(K, self.model["alf"])
        term_2 = np.tile(tmp_1, (1, n))
        
        # the 3rd term
        term_3 =  self.model["term_3"]
        
        # distance
        distance = np.sqrt(np.diagonal(term_1+term_2+term_3))
        
        # predicted label
        predictedlabel = np.mat(np.ones(n)).T
        fault_index = np.where(distance > self.model["radius"])[1][:]
        predictedlabel[fault_index] = -1
            
        # compute prediction accuracy
        accuracy = np.sum(predictedlabel == label)/n
        
        #compute acc
        
        
        accuracy = accuracy_score(predictedlabel,label)
        recall = recall_score(predictedlabel,label)
        precision = precision_score(predictedlabel,label)
        f1 = f1_score(predictedlabel,label)
        
      
        end_time = time.time()
        timecost = end_time - start_time
        if self.parameters["option"]["display"] == 'on':
        # display test results
            print('\n')
            print('*** SVDD model test finished ***\n')
            print('time cost        = %.4f s'   % timecost)
            print('accuracy         = %.4f %%'  % (100*accuracy))
            print('recall         = %.4f %%'  % (100*recall))
            print('precision         = %.4f %%'  % (100*precision))
            print('f1         = %.4f %%'  % (100*f1))
            print('\n')
        
        
        return distance, [accuracy,recall,f1,precision]

    def quadprog(self, K, label):
    
        """ 
        DESCRIPTION
        
        Solve the Lagrange dual problem
        
        quadprog(self, K, label)
        
        --------------------------------------------------
        INPUT
        K         Kernel matrix
        label     training label
        
                        
        OUTPUT
        alf       Lagrange multipliers
        
        --------------------------------------------------
        
        minimize    (1/2)*x'*P*x + q'*x
        subject to  G*x <= h
                    A*x = b                    
        --------------------------------------------------
        
        """ 
        solvers.options['show_progress'] = False
        
        label = np.mat(label)
        K = np.multiply(label*label.T, K)
        
        # P
        n = K.shape[0]
        P = K+K.T
        
        # q
        q = -np.multiply(label, np.mat(np.diagonal(K)).T)

        # G
        G1 = -np.eye(n)
        G2 = np.eye(n)
        G = np.append(G1, G2, axis=0)
        
        # h
        h1 = np.mat(np.zeros(n)).T # lb
        h2 = np.mat(np.ones(n)).T
        if self.labeltype == 'single':
            h2[label == 1] = self.parameters["positive penalty"]
        
        if self.labeltype == 'hybrid':
            h2[label == 1] = self.parameters["positive penalty"]
            h2[label == -1] = self.parameters["negative penalty"]

            
        h = np.append(h1, h2, axis=0)
        
        # A, b
        A = np.mat(np.ones(n))
        b = 1.
        
        #
        P = matrix(P)
        q = matrix(q)
        G = matrix(G)
        h = matrix(h)
        A = matrix(A)
        b = matrix(b)
        
        #
        sol =solvers.qp(P, q, G, h, A, b)
        alf = np.array(sol['x'])
        obj = np.array(sol['dual objective'])
        iteration = np.array(sol['iterations'])

        return alf, obj, iteration

    def getMatrix(self, X, Y):
    
        """ 
        DESCRIPTION
        
        Compute kernel matrix 
        
        K = getMatrix(X, Y)
        
        -------------------------------------------------- 
        INPUT
        X         data (n*d)
        Y         data (m*d)

        OUTPUT
        K         kernel matrix 
        -------------------------------------------------- 
                        
                            
        type   -  
        
        linear :  k(x,y) = x'*y+c
        poly   :  k(x,y) = (x'*y+c)^d
        gauss  :  k(x,y) = exp(-s*||x-y||^2)
        tanh   :  k(x,y) = tanh(g*x'*y+c)
        lapl   :  k(x,y) = exp(-s*||x-y||)
           
        degree -  d
        offset -  c
        width  -  s
        gamma  -  g
        
        --------------------------------------------------      
        ker    - 
        
        ker = {"type": 'gauss', "width": s}
        ker = {"type": 'linear', "offset": c}
        ker = {"type": 'ploy', "degree": d, "offset": c}
        ker = {"type": 'tanh', "gamma": g, "offset": c}
        ker = {"type": 'lapl', "width": s}
    
        """
        def gaussFunc():
            
            if self.parameters["kernel"].__contains__("width"):
                s =  self.parameters["kernel"]["width"]
            else:
                s = 2
            K = smp.rbf_kernel(X, Y, gamma=s)

                
            return K
            
        def linearFunc():
            
            if self.parameters["kernel"].__contains__("offset"):
                c =  self.parameters["kernel"]["offset"]
            else:
                c = 0

            K = smp.linear_kernel(X, Y)+c
            
            return K
        
        def ployFunc():
            if self.parameters["kernel"].__contains__("degree"):
                d =  self.parameters["kernel"]["degree"]
            else:
                d = 2
                
            if self.parameters["kernel"].__contains__("offset"):
                c =  self.parameters["kernel"]["offset"]
            else:
                c = 0
                
            K = smp.polynomial_kernel(X, Y, degree=d, gamma=None, coef0=c)
            
            return K
             
        def laplFunc():
            
            if self.parameters["kernel"].__contains__("width"):
                s =  self.parameters["kernel"]["width"]
            else:
                s = 2
            K = smp.laplacian_kernel(X, Y, gamma=s)

            return K
    
        def tanhFunc():
            if self.parameters["kernel"].__contains__("gamma"):
                g =  self.parameters["kernel"]["gamma"]
            else:
                g = 0.01
                
            if self.parameters["kernel"].__contains__("offset"):
                c =  self.parameters["kernel"]["offset"]
            else:
                c = 1
            
            K = smp.sigmoid_kernel(X, Y, gamma=g, coef0=c)

            return K

        kernelType = self.parameters["kernel"]["type"]
        switcher = {    
                        "gauss"   : gaussFunc  ,        
                        "linear"  : linearFunc ,
                        "ploy"    : ployFunc   ,
                        "lapl"    : laplFunc   ,
                        "tanh"    : tanhFunc   ,
                     }
        
        return switcher[kernelType]()

In [None]:
# -*- coding: utf-8 -*-

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import auc
from mpl_toolkits.mplot3d import Axes3D
import time

class Visualization():
    
    def testResult(svdd, distance):
    
        """ 
        DESCRIPTION
        
        Plot the test results
        
        testResult(model, distance)
        
        --------------------------------------------------------------- 
        
        INPUT
        svdd             SVDD hypersphere
        distance         distance from test data to SVDD hypersphere 
        
        --------------------------------------------------------------- 
        
        """
        plt.rcParams['font.size'] = 15
        n = distance.shape[0]
        
        fig = plt.figure(figsize = (10, 6))
        ax = fig.add_subplot(1, 1, 1)
        radius = np.ones((n, 1))*svdd.model["radius"]
        ax.plot(radius, 
                color ='r',
                linestyle = '-', 
                marker = 'None',
                linewidth = 2,
                markeredgecolor ='k',
                markerfacecolor = 'w', 
                markersize = 6)
        
        ax.plot(distance,
                color = 'k',
                linestyle = ':',
                marker='o',
                linewidth=1,
                markeredgecolor = 'k',
                markerfacecolor = 'C4',
                markersize = 6)
        
        ax.set_xlabel('Samples')
        ax.set_ylabel('Distance')
        
        ax.legend(["Radius","Distance"], 
                  ncol = 1, loc = 0, 
                  edgecolor = 'black', 
                  markerscale = 2, fancybox = True)
    
        plt.show() 
        
    def testROC(label, distance):
        """ 
        DESCRIPTION
        
        Plot the test ROC
        
        testROC(label, distance)
        
        --------------------------------------------------------------- 
        
        INPUT
        label            test label
        distance         distance from test data to SVDD hypersphere 
        
        --------------------------------------------------------------- 
    
        """
        if np.abs(np.sum(label)) == label.shape[0]:
            raise SyntaxError('Both positive and negative labels must be entered for plotting ROC curve.')
            
        # number of positive samples
        plt.rcParams['font.size'] = 15
        n_p = np.sum(label == 1)
        n_n = np.sum(label == -1)
        
        #sort
        index = np.argsort(distance)
        label_sort = label[index]
        FP = np.cumsum(label_sort == -1)
        TP = np.cumsum(label_sort == 1)
        FPR = FP/n_n
        TPR = TP/n_p
        
        roc_auc = auc(FPR.T, TPR.T) 
                  
        fig = plt.figure(figsize = (6, 6))
        ax = fig.add_subplot(1, 1, 1)
        ax.plot(FPR.T, TPR.T,
                color ='C3',
                linestyle = '-', 
                marker = 'None',
                linewidth = 5, 
                markeredgecolor ='k',
                markerfacecolor = 'w', 
                markersize = 6)
        
        ax.set_xlabel('False positive rate (FPR)')
        ax.set_ylabel('True positive rate (TPR)')
        ax.set_title('Area under the curve (AUC) = %.4f' %roc_auc)
        
        plt.grid()
        plt.show()
        
        
    def boundary(svdd, data, label, r=0.3, nn=2):
        """ 
        DESCRIPTION
        
        Plot the boundary
        
        boundary(svdd, data, label, r=0.3, nn=2)
        
        --------------------------------------------------------------- 
        
        INPUT
        svdd             SVDD hypersphere
        data             training data 
        label            training label
        r                radio of expansion (0<r<1)
        nn               number of grids
        
        --------------------------------------------------------------- 
        
        """ 
        
        dim = data.shape[1]
        if dim!=2:
            raise SyntaxError('Visualization of decision boundary only supports for 2D data')
    
        # compute the range of grid 
        numGrids = np.rint(data.shape[0]/nn).astype(int)  # number of grids
        x_range = np.zeros(shape=(numGrids, 2))
        for i in range(2):  
            _tmp_ = (np.max(data[:, i])-np.min(data[:, i]))*r
            xlim_1 = np.min(data[:, i])-_tmp_
            xlim_2 = np.max(data[:, i])+_tmp_
            x_range[:, i] = np.linspace(xlim_1, xlim_2, numGrids)
        
        # grid
        xv, yv = np.meshgrid(x_range[:, 0], x_range[:, 1])
        
        num1 = xv.shape[0]
        num2 = yv.shape[0]
        distance = np.zeros(shape=(num1, num1))
        
        # calculate the grid scores
        print("Calculating the grid (%04d*%04d) scores...\n" %(num1, num2))
        
        display_ = svdd.parameters["option"]["display"]
        svdd.parameters["option"]["display"] = 'off'
        start_time = time.time()       
        for i in range(num1):
            for j in range(num2):
                tmp = np.mat([xv[i, j], yv[i, j]])   
                distance[i, j], _ = svdd.test(tmp, 1)
                # print('[feature 1: %06d]  [feature 2: %06d] \n' % (i+1,j+1))
        end_time = time.time()
        print('Grid scores completed. Time cost %.4f s\n' % (end_time-start_time))
        svdd.parameters["option"]["display"] = display_
        
        """
        # plot the contour (3D)
        fig = plt.figure(figsize = (20, 6))
        
        ax3 = fig.add_subplot(1, 3, 1, projection='3d') 
        # ax3 = ax3.axes(projection='3d')
        ada = ax3.plot_surface(xv, yv, distance, cmap=plt.cm.jet)
        ax3.contourf(xv, yv, distance, zdir='z', offset=np.min(distance)*0.9, cmap=plt.cm.coolwarm)
        ax3.set_zlim(np.min(distance)*0.9, np.max(distance)*1.05)
        # plt.colorbar(ada)
            

        # plot the contour (2D)
        # fig = plt.figure(figsize = (10, 8))
        ax1 = fig.add_subplot(1, 3, 2)    
          
        ctf1 = ax1.contourf(xv, yv, distance, alpha = 0.8, cmap=plt.cm.jet)
        ctf2 = ax1.contour(xv, yv, distance, colors='black', linewidths=1)
        plt.clabel(ctf2, inline=True)
        # plt.colorbar(ctf1)
        
        """
        
        # plot the boundary
        fig = plt.figure(figsize = (15, 15))
        ax2 = fig.add_subplot(1, 1, 1)    
        
        if svdd.labeltype == 'single':

            ax2.scatter(data[:,0], data[:,1],
                        color='yellow',marker='o',
                        edgecolor='black',alpha=0.5, zorder = 2)
            ax2.scatter(data[svdd.model["sv_index"],0], data[svdd.model["sv_index"],1],
                    facecolor='C2',marker='o',s = 144,linewidths = 2,
                    edgecolor='black', zorder = 2)
        
            ax2.contour(xv, yv, distance,[svdd.model["radius"]],
                              colors='C3', linewidths=5, zorder = 1)
            
            ax2.legend(["Training data", "Support vectors"], 
                      ncol = 1, loc = 0, 
                      edgecolor = 'black',markerscale = 1.2, fancybox = True) 
                
        else:
            ax2.scatter(data[svdd.model["pIndex"],0], data[svdd.model["pIndex"],1],
                    facecolor='C0',marker='o', s = 100,linewidths = 2,
                    edgecolor='black', zorder = 2)
            ax2.scatter(data[svdd.model["nIndex"],0], data[svdd.model["nIndex"],1],
                    facecolor='C4',marker='s',s = 100,linewidths = 2,
                    edgecolor='black', zorder = 2)
        
            ax2.scatter(data[svdd.model["sv_index"],0], data[svdd.model["sv_index"],1],
                    facecolor='C2',marker='o',s = 144,linewidths = 2,
                    edgecolor='black', zorder = 2)
            
            ax2.contour(xv, yv, distance,[svdd.model["radius"]],
                              colors='C3', linewidths=5, zorder = 1)
            
            ax2.legend(["Training data (+)","Training data (-)", "Support vectors"], 
                      ncol = 1, loc = 0, 
                      edgecolor = 'black',markerscale = 1.2, fancybox = True) 
        
        plt.show()
        

# APPLICATION OF SVDD

In [None]:
# Change index value of y_test and y_train to : 0 == 1 and 1 == -1
Y_test1= Y_test.values
Y_test_list= []
for i in range(len(Y_test)):
    if Y_test[i]==0:
        Y_test[i]=1
    else:
        Y_test[i]=-1
    y1=[]
    y1.append(Y_test[i])
    y1=np.array(y1)
    Y_test_list.append(y1) 
y_test = np.array(Y_test_list)

Y_train1= Y_train.values
Y_train_list= []
for y in Y_train1:
    if y==0:      
        y1=[]
        y1.append(1)
        y1=np.array(y1)
        Y_train_list.append(y1)
y_train = np.array(Y_train_list)

In [None]:
"""
ker = {"type": 'gauss', "width": s}
        ker = {"type": 'linear', "offset": c}
        ker = {"type": 'ploy', "degree": d, "offset": c}
        ker = {"type": 'tanh', "gamma": g, "offset": c}
        ker = {"type": 'lapl', "width": s}
"""
nu= Fraud.shape[0]/Normal.shape[0]
# set SVDD parameters
parameters = {"positive penalty": 0.9,
              "negative penalty": [],
              "kernel": {"type": 'gauss', "width": 1/24},
              "option": {"display": 'on'}}

In [None]:
# train SVDD model
start = 0
end = 8000
models = []
for i in range(20):
    svdd = SVDD(parameters)
    svdd.train(r_xtrain[start:end], y_train[start:end]) # 0-8000
    models.append(svdd)
    start = end
    end += 8000 

In [None]:
# test SVDD model
results = []
st = 0
sd = 3000
for i in models:
    accuracy = []
    for j in range(2):
        distance, acc= i.test(r_xtest[st:sd], y_test[st:sd])
        accuracy.append(np.array(acc))
    accuracy = np.array(accuracy)
    results.append(np.mean(accuracy,axis=0))
df = pd.DataFrame(results,columns=["accuracy","recall","f1","precision"])
df.to_csv("results.csv",index=False)

In [None]:
# visualize test results
Visualization.testResult(svdd, distance)

In [None]:
# COUBRE ROC
Visualization.testROC(y_test[st:sd], distance)