# Model Evaluation

## Autoencoder Model Evaluation on Validation Set

Import Evaluation Metrics

In [None]:
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score

In [None]:
# Load models
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model_autoencoder_p2_v1 = Autoencoder_p2_v1()
model_autoencoder_p2_v1.load_state_dict(torch.load('model_name.pth'))

In [None]:
model_autoencoder_p2_v1

In [None]:
# Create validation dataloader
val_loader = DataLoader(dataset=x_val, shuffle=False, batch_size=BATCH_SIZE)

model_name = model_autoencoder_p2_v1

### Derive losses on validation dataset and true ato cases

In [None]:
_, val_loss = predict_batch(model_name, val_loader)
_, ato_loss = predict(model_name, x_test_ato)
_, all_loss = predict(model_name, x_test_all)

Transform losses to numpy arrays.

In [None]:
normal_losses = np.array(val_loss)
ato_losses = np.array(ato_loss)

Create labels for normal and ato cases

In [None]:
normal_label = np.zeros((normal_losses.shape), dtype=int)
ato_label = np.ones((ato_loss.shape), dtype=int)

Create true label and predictions scores for metrics calculation

In [None]:
y_true = np.concatenate((normal_label, ato_label))
y_score = np.concatenate((normal_losses, ato_losses))

### Derive false positive rate, true positive rate, thresholds and auc score

In [None]:
fpr, tpr, thresholds = roc_curve(y_true, y_score)

auc = auc(fpr, tpr)

Plot Roc curve

In [None]:
plt.figure(1)
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr, label='model_name (area = {:.3f})'.format(auc))

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC curve')
plt.legend(loc='best')
plt.show()

### Derive Recall, Precision, thresholds and F1-Score

In [None]:
precision , recall, thresholds_2 = precision_recall_curve(y_true, y_score)

f1_score = 2*np.multiply(precision, recall)/(precision + recall)

Plot F1-Scores against Thresholds.

In [None]:
plt.figure(2)
plt.xlim(0, 10)
plt.plot(thresholds_p2_v1, f1_p2_v1[:-1], label='model_p2_v1')
plt.plot(thresholds_p2_v2, f1_p2_v2[:-1], label='model_p2_v2')
plt.xlabel('Thresholds')
plt.ylabel('F1-Score')
plt.title('F1-Score')
plt.legend(loc='best')
plt.show()

## Model Performance on Test Dataset

Read all fraud test dataset

In [None]:
# all types of frauds cases
bucket_name = 'fraud-user-profile-sandbox/ATO_Features_V2'
data_key = 'all_fraud_data.csv'
data_location = 's3://{}/{}'.format(bucket_name, data_key)
df_all_frauds = pd.read_csv(data_location)

# ATO frauds cases
data_key = 'ato_fraud_data.csv'
data_location = 's3://{}/{}'.format(bucket_name, data_key)
df_ato_frauds = pd.read_csv(data_location)


df_ato_frauds.drop_duplicates(inplace=True)
df_all_frauds.fillna(0, inplace=True)
df_ato_frauds.fillna(0, inplace=True)

Read all test dataset for each month

In [None]:
monthly_file_list = ['s3://fraud-user-profile-sandbox/ATO_Features_V2/dataset_FEB2021.csv.gz',
                     's3://fraud-user-profile-sandbox/ATO_Features_V2/dataset_MAR2021.csv.gz',
                     's3://fraud-user-profile-sandbox/ATO_Features_V2/dataset_APR2021.csv.gz',
                     's3://fraud-user-profile-sandbox/ATO_Features_V2/dataset_MAY2021.csv.gz']

### Preprocess each months dataset and calculate losses for normal customer behaviors for each months.

In [None]:
monthly_normal_losses = []
model_name = model_autoencoder_p2_v2
for file in monthly_file_list:
    # read monthly data
    df_monthly = wr.s3.read_csv(path=file)
    
    # fill nulls with 0 and drop new features from the monthly data
    df_monthly.fillna(0, inplace=True)
    
    
    # remove the fraud cases from the monthly data
    df_monthly = pd.merge(left=df_monthly, 
                  right=df_all_frauds[['CUSTOMER_ID','GA_SESSIONS_DATE']],
                  on=['CUSTOMER_ID','GA_SESSIONS_DATE'], 
                  how='left',
                  indicator=True)  
    
    df_monthly = df_monthly[df_monthly['_merge'] == 'left_only']
    df_monthly = df_monthly.drop(columns='_merge')
    
    df_monthly = pd.merge(left=df_monthly, 
                  right=df_ato_frauds[['CUSTOMER_ID','GA_SESSIONS_DATE']],
                  on=['CUSTOMER_ID','GA_SESSIONS_DATE'], 
                  how='left',
                  indicator=True)  
    
    df_monthly = df_monthly[df_monthly['_merge'] == 'left_only']
    df_monthly = df_monthly.drop(columns='_merge')
    
    # Drop Customer_id and Date
    df_monthly = df_monthly.drop(columns=['CUSTOMER_ID','GA_SESSIONS_DATE'])
    
    # Standardize the monthly data
    df_monthly = scaler_p2.transform(df_monthly)
    
    # Transfome nparray to torch.tensor
    df_monthly = torch.from_numpy(df_monthly).float()
    df_monthly.to(device)
    
    # Create the dataloader
    monthly_loader = DataLoader(dataset=df_monthly, shuffle=False, batch_size=200000)
    # Get prediction losses
    _, normal_loss = predict_batch(model_name, monthly_loader)
    monthly_normal_losses.append(normal_loss)

### Calculate losses on ATO fraud behaviors for each month.

In [None]:
monthly_ato_losses = []

# Filter ATO cases in each month
ato_feb = df_ato_frauds.loc[(df_ato_frauds['GA_SESSIONS_DATE'] >= '2021-02-01') 
                            & (df_ato_frauds['GA_SESSIONS_DATE'] <= '2021-02-28')]

ato_mar = df_ato_frauds.loc[(df_ato_frauds['GA_SESSIONS_DATE'] >= '2021-03-01') 
                            & (df_ato_frauds['GA_SESSIONS_DATE'] <= '2021-03-31')]

ato_apr = df_ato_frauds.loc[(df_ato_frauds['GA_SESSIONS_DATE'] >= '2021-04-01') 
                            & (df_ato_frauds['GA_SESSIONS_DATE'] <= '2021-04-30')]

ato_may = df_ato_frauds.loc[(df_ato_frauds['GA_SESSIONS_DATE'] >= '2021-05-01') 
                            & (df_ato_frauds['GA_SESSIONS_DATE'] <= '2021-05-31')]

In [None]:
monthly_ato_losses = []
ato_monthly_files = [ato_feb, ato_mar, ato_apr, ato_may]

for file in ato_monthly_files:
    
    file = file.drop(columns=['CUSTOMER_ID','GA_SESSIONS_DATE'])

    file = scaler_p2.transform(file)
    file = torch.from_numpy(file).float()
    file.to(device)
    
    # Get prediction losses
    _, ato_loss = predict(model_name, file)
    monthly_ato_losses.append(ato_loss)

### Set up the threshold for decision boundary

In [None]:
threshold = 4.3

### Calculate the numbers of False Positives among each months

In [None]:
filtered = filter(lambda score: score >= threshold, monthly_normal_losses[0])
fp_feb = len(list(filtered))

filtered = filter(lambda score: score >= threshold, monthly_normal_losses[1])
fp_mar = len(list(filtered))

filtered = filter(lambda score: score >= threshold, monthly_normal_losses[2])
fp_apr = len(list(filtered))

### Calculate the numbers of True Positives among each months

In [None]:
tp_feb = sum(l >= threshold for l in monthly_ato_losses[0])
tp_mar = sum(l >= threshold for l in monthly_ato_losses[1])
tp_apr = sum(l >= threshold for l in monthly_ato_losses[2])

# Evaluation of SVMs on validations sets

Calculate the predictions

In [None]:
SVM_rbf_pred_val = SVM_rbf_v1.predict(x_val)
SVM_rbf_pred_test_ato = SVM_rbf_v1.predict(x_test_ato)
SVM_rbf_pred_test_all = SVM_rbf_v1.predict(x_test_all)

Calucalate the False negatives and True Positives

In [None]:
n_error_val = SVM_rbf_pred_val[SVM_rbf_pred_val == -1].size
n_pos_ato = SVM_rbf_pred_test_ato[SVM_rbf_pred_test_ato == -1].size
n_pos_all = SVM_rbf_pred_test_all[SVM_rbf_pred_test_all == -1].size

In [None]:
print('False Positive Rate:', n_error_val/len(x_val))
print('ATO Recall:', n_pos_ato/len(x_test_ato))
print('All Recall:', n_pos_all/len(x_test_all))

# Evaluation of Isolation Forests on validations sets

In [None]:
y_pred_outliers = clf.predict(x_test_ato)

y_pred_val = clf.predict(x_val)

Calculate the true positives and false negatives

In [None]:
tp = y_pred_outliers[y_pred_outliers == -1].size
fp = y_pred_val[y_val == 1].size