In [1]:
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


df = pd.read_csv('KRfin01.csv')
df2 = pd.read_csv('KRfin02.csv')
df = df.append(df2, ignore_index=True)

#lets create train, validation, test sets
p=0.9
kiri=int(p*df.shape[0])
train_X,val_X,test_X=np.array(df.iloc[:kiri,1:-1]),np.array(df.iloc[kiri::2,1:-1]),np.array(df.iloc[kiri+1::2,1:-1])
train_Y,val_Y,test_Y=np.array(df.iloc[:kiri,-1]),np.array(df.iloc[kiri::2,-1]),np.array(df.iloc[kiri+1::2,-1])

In [2]:
from collections import Counter
train_Y_count=Counter(train_Y)
val_Y_count=Counter(val_Y)
test_Y_count=Counter(test_Y)

#Print Statistics
print("In train set: {} negative and {} positive".format(train_Y_count[0],train_Y_count[1]))
print("In val set: {} negative and {} positive".format(val_Y_count[0],val_Y_count[1]))
print("In test set: {} negative and {} positive".format(test_Y_count[0],test_Y_count[1]))
print("Train Set, Positive/Negative: {} %".format(train_Y_count[1]/train_Y_count[0]*100))
print("Val Set, Positive/Negative: {} %".format(val_Y_count[1]/val_Y_count[0]*100))
print("Test Set, Positive/Negative: {} %".format(test_Y_count[1]/test_Y_count[0]*100))

In train set: 8871 negative and 9075 positive
In val set: 486 negative and 512 positive
In test set: 506 negative and 491 positive
Train Set, Positive/Negative: 102.29962800135273 %
Val Set, Positive/Negative: 105.34979423868313 %
Test Set, Positive/Negative: 97.03557312252964 %


In [3]:
train_data=TensorDataset(torch.from_numpy(train_X),torch.from_numpy(train_Y))
val_data=TensorDataset(torch.from_numpy(val_X),torch.from_numpy(val_Y))
test_data=TensorDataset(torch.from_numpy(test_X),torch.from_numpy(test_Y))

In [4]:
#find out the number positive and negative examples in the training set
num_classes=[train_Y_count[0],train_Y_count[1]]
#find the respective weights (mind that weights need to be a torch tensor)
weights=1./torch.tensor(num_classes,dtype=float)
#create a torch tensor associating the train_Y and the weights. Sample Weights is a torch tensor
sample_weights=weights[train_Y]
sampler=torch.utils.data.WeightedRandomSampler(weights=sample_weights,num_samples=len(sample_weights),replacement=True)

In [5]:
batch_size=50001
train_loader=DataLoader(train_data,sampler=sampler,batch_size=batch_size)
val_loader=DataLoader(val_data,shuffle=True,batch_size=batch_size)
test_loader=DataLoader(test_data,shuffle=False,batch_size=batch_size)

In [6]:
train_iter=iter(val_loader)
inputs,outputs=train_iter.next()
print(inputs)

tensor([[0.4839, 0.3238, 0.4038,  ..., 0.4410, 0.5104, 0.4741],
        [0.5382, 0.5243, 0.5725,  ..., 0.5093, 0.4797, 0.5255],
        [0.5251, 0.4062, 0.5464,  ..., 0.4696, 0.4733, 0.4854],
        ...,
        [0.4477, 0.4528, 0.5268,  ..., 0.5611, 0.4911, 0.4891],
        [0.4943, 0.3442, 0.4233,  ..., 0.5317, 0.4204, 0.4989],
        [0.5329, 0.7861, 0.4878,  ..., 0.4516, 0.4303, 0.4404]],
       dtype=torch.float64)


In [7]:
print(inputs.size())

torch.Size([998, 23])


In [8]:
#loader wrapping
loaders={'train':train_loader, 'valid':val_loader}

In [9]:
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.fc1=nn.Linear(23,128)
        self.fc2=nn.Linear(128,15,bias=False)
        self.norm2=nn.BatchNorm1d(15)
        self.fc3=nn.Linear(15,10,bias=False)
        self.norm3=nn.BatchNorm1d(10)
        self.fc4=nn.Linear(10,5,bias=False)
        self.norm4=nn.BatchNorm1d(5)
        self.fc5=nn.Linear(5,1)
         
        
    def forward(self,x):
        x=F.relu(self.fc1(x))
        x=F.relu(self.fc2(x))
        x=F.relu(self.fc3(x))
        x=F.relu(self.norm4(self.fc4(x)))
        x=torch.sigmoid(self.fc5(x))        
        return x

In [10]:
model = Net()
criterion=nn.BCEWithLogitsLoss()

In [11]:
optimizer=torch.optim.SGD(model.parameters(),lr=0.001,momentum=0.9)
scheduler=torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode="min",factor=0.75,patience=50,min_lr=1e-5) 

In [12]:
def train(n_epochs,loaders,model,optimizer,criterion,scheduler):
    #Trains the neural network passed in "model", for a number of
    #epochs equal to n_epochs, on the loaders contained in the 
    #dictionary "loaders", using the optimizer in "optimizer", 
    #with the loss function defined in "criterion" and, if necessary
    #decreases the learning rate in accordance to the rules contained 
    #in "scheduler"
    valid_loss_min=np.Inf
    list_train_loss=[]
    list_valid_loss=[]
    for epoch in range(1,n_epochs+1):
        train_loss=0.0
        valid_loss=0.0
        model.train()
        for batch_idx,(data,target) in enumerate(loaders["train"]):
            optimizer.zero_grad()
            output=model(data.float())
            loss=criterion(output.squeeze(),target.float())
            loss.backward()
            optimizer.step()
            train_loss=train_loss+1/(batch_idx+1)*(loss.item()-train_loss)
        list_train_loss.append(train_loss)
        print("At {} epoch, Training Loss: {} ".format(epoch,train_loss))
        model.eval()
        for batch_idx,(data,target) in enumerate(loaders["valid"]):
            output=model(data.float())
            loss=criterion(output.squeeze(),target.float())
            valid_loss=valid_loss+1/(batch_idx+1)*(loss.item()-valid_loss)
        scheduler.step(valid_loss)
        list_valid_loss.append(valid_loss)
        print("At {} epoch, Validation Loss: {} ".format(epoch,valid_loss))
        #Save the model
        if valid_loss < valid_loss_min:
            torch.save(model.state_dict(),'ccFraud.pt')
            print("Minimum validation loss detected, saving model......................................................................................")
            valid_loss_min=valid_loss
    
    return model, list_train_loss, list_valid_loss

In [None]:
n_epoch=10000
model,train_loss,valid_loss=train(n_epoch,loaders,model,optimizer,criterion,scheduler)

At 1 epoch, Training Loss: 0.7171865701675415 
At 1 epoch, Validation Loss: 0.7095901966094971 
Minimum validation loss detected, saving model......................................................................................
At 2 epoch, Training Loss: 0.7122880816459656 
At 2 epoch, Validation Loss: 0.7095887660980225 
Minimum validation loss detected, saving model......................................................................................
At 3 epoch, Training Loss: 0.7154885530471802 
At 3 epoch, Validation Loss: 0.7095871567726135 
Minimum validation loss detected, saving model......................................................................................
At 4 epoch, Training Loss: 0.7176935076713562 
At 4 epoch, Validation Loss: 0.7095851302146912 
Minimum validation loss detected, saving model......................................................................................
At 5 epoch, Training Loss: 0.7149812579154968 
At 5 epoch, Validation Loss: 0.709582

At 37 epoch, Training Loss: 0.7142153978347778 
At 37 epoch, Validation Loss: 0.7094066143035889 
Minimum validation loss detected, saving model......................................................................................
At 38 epoch, Training Loss: 0.7173911333084106 
At 38 epoch, Validation Loss: 0.7093995213508606 
Minimum validation loss detected, saving model......................................................................................
At 39 epoch, Training Loss: 0.7146093249320984 
At 39 epoch, Validation Loss: 0.7093924880027771 
Minimum validation loss detected, saving model......................................................................................
At 40 epoch, Training Loss: 0.715353786945343 
At 40 epoch, Validation Loss: 0.709385335445404 
Minimum validation loss detected, saving model......................................................................................
At 41 epoch, Training Loss: 0.7140320539474487 
At 41 epoch, Validation Loss: 

At 73 epoch, Training Loss: 0.7157356142997742 
At 73 epoch, Validation Loss: 0.7090849876403809 
Minimum validation loss detected, saving model......................................................................................
At 74 epoch, Training Loss: 0.7141815423965454 
At 74 epoch, Validation Loss: 0.7090716361999512 
Minimum validation loss detected, saving model......................................................................................
At 75 epoch, Training Loss: 0.716707170009613 
At 75 epoch, Validation Loss: 0.7090595960617065 
Minimum validation loss detected, saving model......................................................................................
At 76 epoch, Training Loss: 0.7134615182876587 
At 76 epoch, Validation Loss: 0.7090463042259216 
Minimum validation loss detected, saving model......................................................................................
At 77 epoch, Training Loss: 0.7155309915542603 
At 77 epoch, Validation Loss:

At 109 epoch, Training Loss: 0.7172510623931885 
At 109 epoch, Validation Loss: 0.7085526585578918 
Minimum validation loss detected, saving model......................................................................................
At 110 epoch, Training Loss: 0.7143585085868835 
At 110 epoch, Validation Loss: 0.7085373997688293 
Minimum validation loss detected, saving model......................................................................................
At 111 epoch, Training Loss: 0.7129939198493958 
At 111 epoch, Validation Loss: 0.7085208296775818 
Minimum validation loss detected, saving model......................................................................................
At 112 epoch, Training Loss: 0.7144388556480408 
At 112 epoch, Validation Loss: 0.7085041999816895 
Minimum validation loss detected, saving model......................................................................................
At 113 epoch, Training Loss: 0.7114328145980835 
At 113 epoch, Valid

At 145 epoch, Training Loss: 0.7107449173927307 
At 145 epoch, Validation Loss: 0.7082164883613586 
Minimum validation loss detected, saving model......................................................................................
At 146 epoch, Training Loss: 0.7133185863494873 
At 146 epoch, Validation Loss: 0.7082035541534424 
Minimum validation loss detected, saving model......................................................................................
At 147 epoch, Training Loss: 0.7149261236190796 
At 147 epoch, Validation Loss: 0.7081935405731201 
Minimum validation loss detected, saving model......................................................................................
At 148 epoch, Training Loss: 0.7153943777084351 
At 148 epoch, Validation Loss: 0.7081865668296814 
Minimum validation loss detected, saving model......................................................................................
At 149 epoch, Training Loss: 0.7133080363273621 
At 149 epoch, Valid

At 181 epoch, Training Loss: 0.712857186794281 
At 181 epoch, Validation Loss: 0.7078624367713928 
Minimum validation loss detected, saving model......................................................................................
At 182 epoch, Training Loss: 0.717097282409668 
At 182 epoch, Validation Loss: 0.70786052942276 
Minimum validation loss detected, saving model......................................................................................
At 183 epoch, Training Loss: 0.7116624712944031 
At 183 epoch, Validation Loss: 0.7078559994697571 
Minimum validation loss detected, saving model......................................................................................
At 184 epoch, Training Loss: 0.7123064994812012 
At 184 epoch, Validation Loss: 0.7078531980514526 
Minimum validation loss detected, saving model......................................................................................
At 185 epoch, Training Loss: 0.7165338397026062 
At 185 epoch, Validatio

At 217 epoch, Training Loss: 0.715168297290802 
At 217 epoch, Validation Loss: 0.7076366543769836 
Minimum validation loss detected, saving model......................................................................................
At 218 epoch, Training Loss: 0.7102392911911011 
At 218 epoch, Validation Loss: 0.7076255679130554 
Minimum validation loss detected, saving model......................................................................................
At 219 epoch, Training Loss: 0.7137807607650757 
At 219 epoch, Validation Loss: 0.7076114416122437 
Minimum validation loss detected, saving model......................................................................................
At 220 epoch, Training Loss: 0.7130511403083801 
At 220 epoch, Validation Loss: 0.7076036334037781 
Minimum validation loss detected, saving model......................................................................................
At 221 epoch, Training Loss: 0.7125883102416992 
At 221 epoch, Valida

At 253 epoch, Training Loss: 0.716376543045044 
At 253 epoch, Validation Loss: 0.7072991728782654 
Minimum validation loss detected, saving model......................................................................................
At 254 epoch, Training Loss: 0.7158887982368469 
At 254 epoch, Validation Loss: 0.7072909474372864 
Minimum validation loss detected, saving model......................................................................................
At 255 epoch, Training Loss: 0.7152948379516602 
At 255 epoch, Validation Loss: 0.7072847485542297 
Minimum validation loss detected, saving model......................................................................................
At 256 epoch, Training Loss: 0.713757336139679 
At 256 epoch, Validation Loss: 0.7072696685791016 
Minimum validation loss detected, saving model......................................................................................
At 257 epoch, Training Loss: 0.7124376893043518 
At 257 epoch, Validat

At 289 epoch, Training Loss: 0.7124423980712891 
At 289 epoch, Validation Loss: 0.7070654034614563 
Minimum validation loss detected, saving model......................................................................................
At 290 epoch, Training Loss: 0.7123122811317444 
At 290 epoch, Validation Loss: 0.707054853439331 
Minimum validation loss detected, saving model......................................................................................
At 291 epoch, Training Loss: 0.7149368524551392 
At 291 epoch, Validation Loss: 0.7070401906967163 
Minimum validation loss detected, saving model......................................................................................
At 292 epoch, Training Loss: 0.7130239605903625 
At 292 epoch, Validation Loss: 0.7070278525352478 
Minimum validation loss detected, saving model......................................................................................
At 293 epoch, Training Loss: 0.7117188572883606 
At 293 epoch, Valida

At 325 epoch, Training Loss: 0.7112387418746948 
At 325 epoch, Validation Loss: 0.7067970633506775 
Minimum validation loss detected, saving model......................................................................................
At 326 epoch, Training Loss: 0.7102723717689514 
At 326 epoch, Validation Loss: 0.7067936658859253 
Minimum validation loss detected, saving model......................................................................................
At 327 epoch, Training Loss: 0.7105622887611389 
At 327 epoch, Validation Loss: 0.7067946195602417 
At 328 epoch, Training Loss: 0.7137243151664734 
At 328 epoch, Validation Loss: 0.7067933678627014 
Minimum validation loss detected, saving model......................................................................................
At 329 epoch, Training Loss: 0.7113313674926758 
At 329 epoch, Validation Loss: 0.7067890763282776 
Minimum validation loss detected, saving model.......................................................

At 361 epoch, Training Loss: 0.7127507328987122 
At 361 epoch, Validation Loss: 0.7065408229827881 
Minimum validation loss detected, saving model......................................................................................
At 362 epoch, Training Loss: 0.7154801487922668 
At 362 epoch, Validation Loss: 0.7065266370773315 
Minimum validation loss detected, saving model......................................................................................
At 363 epoch, Training Loss: 0.708999752998352 
At 363 epoch, Validation Loss: 0.7065178751945496 
Minimum validation loss detected, saving model......................................................................................
At 364 epoch, Training Loss: 0.7125486731529236 
At 364 epoch, Validation Loss: 0.7065029740333557 
Minimum validation loss detected, saving model......................................................................................
At 365 epoch, Training Loss: 0.7127106785774231 
At 365 epoch, Valida

At 397 epoch, Training Loss: 0.7122743129730225 
At 397 epoch, Validation Loss: 0.7062755823135376 
Minimum validation loss detected, saving model......................................................................................
At 398 epoch, Training Loss: 0.7116274237632751 
At 398 epoch, Validation Loss: 0.7062711715698242 
Minimum validation loss detected, saving model......................................................................................
At 399 epoch, Training Loss: 0.7097826600074768 
At 399 epoch, Validation Loss: 0.70626300573349 
Minimum validation loss detected, saving model......................................................................................
At 400 epoch, Training Loss: 0.7145788073539734 
At 400 epoch, Validation Loss: 0.7062562704086304 
Minimum validation loss detected, saving model......................................................................................
At 401 epoch, Training Loss: 0.7113223075866699 
At 401 epoch, Validat

At 436 epoch, Training Loss: 0.7091901898384094 
At 436 epoch, Validation Loss: 0.706101655960083 
Minimum validation loss detected, saving model......................................................................................
At 437 epoch, Training Loss: 0.7124619483947754 
At 437 epoch, Validation Loss: 0.7060952186584473 
Minimum validation loss detected, saving model......................................................................................
At 438 epoch, Training Loss: 0.7126631736755371 
At 438 epoch, Validation Loss: 0.706087052822113 
Minimum validation loss detected, saving model......................................................................................
At 439 epoch, Training Loss: 0.7119537591934204 
At 439 epoch, Validation Loss: 0.7060813903808594 
Minimum validation loss detected, saving model......................................................................................
At 440 epoch, Training Loss: 0.7098053693771362 
At 440 epoch, Validat

At 472 epoch, Training Loss: 0.7111103534698486 
At 472 epoch, Validation Loss: 0.705920934677124 
At 473 epoch, Training Loss: 0.7105152010917664 
At 473 epoch, Validation Loss: 0.7059203386306763 
At 474 epoch, Training Loss: 0.7100476026535034 
At 474 epoch, Validation Loss: 0.7059148550033569 
Minimum validation loss detected, saving model......................................................................................
At 475 epoch, Training Loss: 0.7122187614440918 
At 475 epoch, Validation Loss: 0.7059147953987122 
Minimum validation loss detected, saving model......................................................................................
At 476 epoch, Training Loss: 0.7124790549278259 
At 476 epoch, Validation Loss: 0.7059120535850525 
Minimum validation loss detected, saving model......................................................................................
At 477 epoch, Training Loss: 0.7114972472190857 
At 477 epoch, Validation Loss: 0.705912172794342 
At 

At 510 epoch, Training Loss: 0.7091147899627686 
At 510 epoch, Validation Loss: 0.7057155966758728 
Minimum validation loss detected, saving model......................................................................................
At 511 epoch, Training Loss: 0.7125344276428223 
At 511 epoch, Validation Loss: 0.7057141661643982 
Minimum validation loss detected, saving model......................................................................................
At 512 epoch, Training Loss: 0.7098562717437744 
At 512 epoch, Validation Loss: 0.7057153582572937 
At 513 epoch, Training Loss: 0.7095509767532349 
At 513 epoch, Validation Loss: 0.7057157158851624 
At 514 epoch, Training Loss: 0.7093765139579773 
At 514 epoch, Validation Loss: 0.7057105302810669 
Minimum validation loss detected, saving model......................................................................................
At 515 epoch, Training Loss: 0.7119280099868774 
At 515 epoch, Validation Loss: 0.7057062387466431 
M

In [None]:
torch.load("ccFraud.pt")

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

x_epochs=np.linspace(1,n_epoch,n_epoch).astype(dtype=int)
plt.plot(x_epochs,train_loss)
plt.plot(x_epochs,valid_loss)
plt.xlabel("N Epochs")
plt.ylabel("Loss")
plt.grid()
plt.legend(["Training","Validation"])
plt.show()

In [None]:
k=np.linspace(0,1,10)
TPR=[]
FPR=[]
for n in k:
    outputs=[]
    targets=[]
    for data,target in test_loader:
        output=model(data.float())
        output=torch.sigmoid(output.cpu())
        output=list(np.where(output<n,0,1)[:])
        target=list(target.cpu().numpy())
        outputs+=output
        targets+=target

    outputs_of_model=np.stack(outputs,axis=0).squeeze().astype(dtype=int)
    targets_of_model=np.array(targets).astype(dtype=int)
    
    #detect True Negatives, False Positives, False Negatives, True Positives
    TN=np.array(targets_of_model[outputs_of_model==0]==0).sum()
    FP=np.array(targets_of_model[outputs_of_model==1]==0).sum()
    FN=np.array(targets_of_model[outputs_of_model==0]==1).sum()
    TP=np.array(targets_of_model[outputs_of_model==1]==1).sum()

    TPR.append(TP/(TP+FN))
    FPR.append(FP/(FP+TN))
    
#Plot True Positive Rate and False Positive Rate
plt.plot(FPR,TPR)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")

AUC=0.0;
for i in range(len(TPR)-1):
    AUC+=np.abs((TPR[i]+TPR[i+1])*(FPR[i+1]-FPR[i])/2)
  
  
print("Estimated AUC: {}".format(AUC))

In [None]:
outputs=[]
targets=[]
for data,target in test_loader:
    output=model(data.float())
    output=torch.sigmoid(output.cpu())
    output=list(np.where(output<0.5,0,1)[:])
    target=list(target.cpu().numpy())
    outputs+=output
    targets+=target

outputs_of_model=np.stack(outputs,axis=0).squeeze().astype(dtype=int)
targets_of_model=np.array(targets).astype(dtype=int)


TN=np.array(targets_of_model[outputs_of_model==0]==0).sum()
FP=np.array(targets_of_model[outputs_of_model==1]==0).sum()
FN=np.array(targets_of_model[outputs_of_model==0]==1).sum()
TP=np.array(targets_of_model[outputs_of_model==1]==1).sum()


print("True Positives: {}".format(TP))
print("True Negatives: {}".format(TN))
print("False Positives: {}".format(FP))
print("False Negatives: {}".format(FN))


precision=TP/(TP+FP)
recall=TP/(TP+FN)
print("Model Precision: {}".format(precision))
print("Model Recall: {}".format(recall))

F1=2*precision*recall/(precision+recall)

print("F1-score: {}".format(F1))

In [None]:
(TP+TN)/(TP+FP+TN+FN)