# Part 06 RESULTS

# 0.0. Imports

In [1]:
# Data manipulation
import pandas as pd
import numpy as np

# Graphs
import matplotlib.pyplot as plt
import seaborn as sns

# Save files
import pickle

# Warning
import warnings
warnings.filterwarnings( 'ignore' )

## 0.1. Helper Functions

In [8]:
# Checking model performance
def Check(df):
   if df['Predictions']== df['is_fraud']:
      return "True"
   else:
      return "False" 

# 11.0. Business Performance

In [2]:
# Load dataset
feature_dtypes = {'step': 'float32',
                  'type': 'uint8',
                  'amount': 'float32',
                  'oldbalance_orig': 'float32',
                  'newbalance_orig': 'float32',
                  'oldbalance_dest': 'float32',
                  'newbalance_dest': 'float32',
                  'is_fraud': 'uint8',
                  'is_flagged_fraud': 'uint8',
                  'error_orig' : 'float32',
                  'error_dest' : 'float32',
                  'dest_type' : 'uint8',
                  'days' : 'float32'}


df4 = pd.read_csv('dataset\df3.csv', dtype=feature_dtypes)

In [28]:
# Load dataset 2
# Load dataset
feature_dtypes = {'step': 'int16',
                  'type': 'category',
                  'amount': 'float32',
                  'name_orig': 'object',
                  'oldbalance_orig': 'float32',
                  'newbalance_orig': 'float32',
                  'name_dest': 'object',
                  'oldbalance_dest': 'float32',
                  'newbalance_dest': 'float32',
                  'is_fraud': 'uint8',
                  'is_flagged_fraud': 'uint8',
                  'error_orig' : 'float32',
                  'error_dest' : 'float32',
                  'dest_type' : 'category',
                  'days' : 'int8'}

df5 = pd.read_csv('dataset\df2.csv', dtype=feature_dtypes)

In [4]:
# Machine Learning featuers
boruta_select = ['step',
 'type',
 'amount',
 'oldbalance_orig',
 'newbalance_orig',
 'oldbalance_dest',
 'newbalance_dest',
 'is_flagged_fraud',
 'error_orig',
 'error_dest',
 'dest_type',
 'days']

In [3]:
# Load Machine Learning Models
rfc_tunned = pickle.load(open('models/rfc_tunned.pkl', 'rb'))

## 11.1. What is the model's *precision* and *accuracy*?

### 11.1.1. Performance

In [6]:
df_results = pd.read_csv('dataset/results.csv')
df_results

Unnamed: 0,NAME,ACCURACY,PRECISION,RECALL,F1,ROC
0,Baseline,0.499483,0.00125,0.49595,0.002493,0.497719
1,Logistic Regression,0.960864,0.02749,0.87352,0.053303,0.917247
2,Random Forest,0.999994,0.998751,0.996262,0.997505,0.99813
3,XGBoost Classifier,0.999939,0.95746,0.995639,0.976176,0.99779
4,Random Forest+,0.999991,0.996883,0.996262,0.996572,0.998129


## 11.2. How reliable is the model in classifying transactions as *legitimate* or *fraudulent*?

### 11.2.1. Real Performance

In [7]:
df_r_results = pd.read_csv('dataset/real_results.csv')
df_r_results

Unnamed: 0,real_accuracy,std_accuracy,real_precision,std_precision,real_recall,std_recall,real_f1,std_f1,real_roc,std_roc
Logistic Regression,0.9164,0.0045,0.9447,0.0277,0.8851,0.038,0.9136,0.0076,0.9164,0.0045
Random Forest Classifier,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0
XGBoost Classifier,0.9985,0.0002,0.9999,0.0,0.9971,0.0003,0.9985,0.0002,0.9985,0.0002
Random Forest Classifier+,0.9987,0.0003,1.0,0.0,0.9974,0.0007,0.9987,0.0004,0.9987,0.0003


## 11.3.  What is the expected billing by the company if we classify 100% of data transactions with the model?

In [37]:
# Predict column
df4['predict'] = rfc_tunned.predict(df4[boruta_select])
df5['predict'] = df4.predict

# The company receive 25% of each transaction value truly detected as fraud
correct_predict = df5[(df5.is_fraud == 1) & (df5.predict == 1)]
correct_predict_amount = correct_predict[['amount', 'is_fraud', 'predict']].groupby(['is_fraud', 'predict']).sum().reset_index()
correct_predict_amount['to_receive'] = correct_predict_amount.amount * 0.25
amount_TP = correct_predict_amount.to_receive.sum()
print(f'The company will receive ${amount_TP:,.2f} dua to transaction truly detected as fraud')
print('---' * 30)

# The company will receive 5% of the value of each transaction detected as fraud, but the transaction is legitimate
not_fraud = df5[(df5.is_fraud == 0) & (df5.predict == 1)]
not_fraud_amount = not_fraud[['amount', 'is_fraud', 'predict']].groupby(['is_fraud', 'predict']).sum().reset_index()
not_fraud_amount['to_receive'] = not_fraud_amount.amount * 0.05
amount_FP = not_fraud_amount.to_receive.sum()
print(f'The company will receive ${amount_FP:,.2f} due to transaction detected as fraud, but actually legitimate')

The company will receive $3,011,586,048.00 dua to transaction truly detected as fraud
------------------------------------------------------------------------------------------
The company will receive $421,231.81 due to transaction detected as fraud, but actually legitimate


In [39]:
expected_bill = amount_TP + amount_FP
print(f'The expected billing by the company if we classify 100% of data transactions is: ${expected_bill:,.2f}')

The expected billing by the company if we classify 100% of data transactions is: $3,012,007,168.00


## 11.4. What is the loss expected by company in case of model failure?

In [40]:
# The company will return 100% of the value to the customer, for each transaction detected as legitimate, however a transaction is a fraud
wrong_predict = df5[(df5.is_fraud == 1) & (df5.predict == 0)]
wrong_predict_amount = wrong_predict[['amount', 'is_fraud', 'predict']].groupby(['is_fraud', 'predict']).sum().reset_index()
wrong_predict_amount['to_receive'] = wrong_predict_amount.amount * 1
amount_TF = wrong_predict_amount.to_receive.sum()
print(f'The company will give back ${amount_TF:,.2f} due to transactions detected as legitimate, but actually fraud')

The company will give back $10,071,736.00 due to transactions detected as legitimate, but actually fraud


## 11.5. What is the profit expected by the Blocker Fraud Company when using model?

In [41]:
profit = expected_bill - amount_TF
print(f'The profit of Blocker Fraud Company is: ${profit:,.2f}')

The profit of Blocker Fraud Company is: $3,001,935,360.00


# 12.0. Merging Notebooks

## 12.1.  View .ipynb file contents

In [1]:
import json
import os

# note -- we can look at a .ipynb file as a plain text file right here in Jupyter notebook
with open ('part1_blocker_fraud.ipynb', mode = 'r', encoding = 'utf-8') as f:
    a = json.load (f)

print(type(a))

print(a.keys())

print(a)

AAACQsgi+AQBARou3xJNqZ/fZGTwAAAAASFEE3wAAIKPFms2k2jmyOY0CAAAAgFTFFRsAAMhoycz4tmfZZDgodQIAAAAAqYrgGwAAZLRkZnw7/JQ5AQAAAIBURvANAAAyWqw5iRnflDkBAAAAgJTGVRsAAMhosSRKnThymPENAAAAAKmM4BsAAGS0WCPBNwAAAACkG4JvAACQsayopXggiRrfBN8AAAAAkNIIvgEAQMaKNsWTaufI4RQKAAAAAFIZV20AACBjJVPmRGLGNwAAAACkOoJvAACQsaJJBN82t02G02DwAAAAACCFEXwDAICMFWuIdbgNZU4AAAAAIPVx5QYAADJWtK7jM76deQ4GDgAAAABSHME3AADIWJH6js/4dhZQ3xsAAAAAUh3BNwAAyEix5risqNXhds4CZnwDAAAAQKoj+AYAABkpWh9Pqp2L4BsAAAAAUh7BNwAAyEjRuo6XOTHshhx+Sp0AAAAAQKoj+AYAABkpcjjJ+t4GYwcAAAAAqY7gGwAAZKTIoWiH21DfGwAAAAC6B4JvAACQccyopWhDx2t8u/IJvgEAAACgOyD4BgAAGSeaRJkTSXIWEnwDAAAAQHdA8A0AADJOJNngu4AbWwIAAABAd0DwDQAAMk74YMfre9uchhzZBN8AAAAA0B0QfAMAgIwTPsCNLQEAAAAgnRF8AwCAjBIPmIo1d/zGlu4yJ4MHAAAAAN0EwTcAAMgoyZQ5kQi+AQAAAKA7IfgGAAAZJZkyJxLBNwAAAAB0JwTfAAAgo4SrOx58O/x22b2cNgEAAABAd8EVHAAAyBhmxFL4UMeDb2Z7AwAAAED3QvANAAAyRnh/RLI63o7gGwAAAAC6F4JvAACQMUJV1PcGAAAAgExA8A0AADJGqDLS8ZMllyFXgYPBAwAAAIBuhOAbAABkhHjAVKQ21uF27lKnZDB+AAAAANCdEHwDAICMENwbSaodZU4AAAAAoPsh+AYAABkhuCecVDu

## 12.2. Generating List of Notebooks to Merge

In [2]:
# generate list of files to be merged
notebooks_to_merge = [file for file in os.listdir(os.getcwd()) if file.endswith('.ipynb')]

# list should be in the order in which you want the notebooks to be merged
# my notebooks are labeled 1.ipynb, 2.ipynb... in the order I want
notebooks_to_merge.sort()

print(notebooks_to_merge)

['part1_blocker_fraud.ipynb', 'part2_blocker_fraud.ipynb', 'part3_blocker_fraud.ipynb', 'part4_blocker_fraud.ipynb', 'part5_blocker_fraud.ipynb', 'part6_blocker_fraud.ipynb', 'v003_blocker_fraud_co.ipynb']


## 12.3. Generate Combined .ipynb File

In [3]:
def combine_ipynb_files(list_of_notebooks, combined_file_name):
    
    '''
    parameters:
    `list_of_notebooks` is an ordered list of your .ipynb files to be merged
    `combined_file_name` is the name of your combined .ipynb file which will be generated by this function
    
    returns: the filepath of the new file
    '''
    
    with open (notebooks_to_merge[0], mode = 'r', encoding = 'utf-8') as f:
        a = json.load (f)
    
    for notebook in notebooks_to_merge[1:]:
        with open (notebook, mode = 'r', encoding = 'utf-8') as f:
            b = json.load(f)
            a['cells'].extend (b['cells']) 
                # extend here, not append, so that each dictionary in b['cells']
                # is added to new dictionary in a['cells']

    with open(combined_file_name, mode='w', encoding='utf-8') as f:
        json.dump(a, f)
    
    print('Generated file: "{}".'.format(combined_file_name))
    
    return (os.path.realpath(combined_file_name))

combine_ipynb_files(notebooks_to_merge, "ver001_blocker_fraud.ipynb")

Generated file: "ver001_blocker_fraud.ipynb".


'd:\\MEUS_PROJETOS_DS\\Blocker Fraud Company\\notebooks\\ver001_blocker_fraud.ipynb'