# Kubertemporal behavior

This chapter demonstrates how a 1D model can be learned over DBFP records.

The main goal of this notebook is to train a 1D model per application and per user. `Kuberspatiotemporal` is used to model the time when an authetications was requested.

In [None]:
%load_ext nb_black
import os, sys
import pandas as pd
import numpy as np


from ipywidgets import interact, interactive, Layout, IntSlider

In [None]:
from kuberspatiotemporal import CompoundModel, Feature, SpatialModel, KuberModel
from kuberspatiotemporal.tools import make_ellipses

from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.preprocessing import FunctionTransformer

In [None]:
import matplotlib.pyplot as plt
from scipy import stats

In [None]:
from sklearn.metrics import confusion_matrix



## 1. Import data

In [None]:
data = pd.read_csv(
    "/Users/adrianai/Desktop/aiml/aiml-dbfp-anomaly/utils/user_dbfp_mfa.csv"
)

In [None]:
data.head(5)

## 2. Initial analysis

### 2.1 # authentications per application per user

For learning behaviors (see respective chapter), it is interesting to sort by applications and see how many datapoints per user and per app (to count combinations, see [here](https://stackoverflow.com/a/25259333)) are in the dataset. 

In [None]:
step = 10
df_per_app_per_user_approved = (
    data[data["mfa_status"] == "approved"]
    .groupby(["user_id", "organization_id", "oauth_application_id"], as_index=False)[
        "created_at"
    ]
    .count()
    .rename(columns={"created_at": "approved"})
    .sort_values(by="approved", ascending=False)
)
df_per_app_per_user_rejected = (
    data[data["mfa_status"] == "rejected"]
    .groupby(["user_id", "organization_id", "oauth_application_id"], as_index=False)[
        "created_at"
    ]
    .count()
    .rename(columns={"created_at": "rejected"})
    .sort_values(by="rejected", ascending=False)
)


df_per_app_per_user = pd.merge(
    df_per_app_per_user_approved,
    df_per_app_per_user_rejected,
    on=["user_id", "organization_id", "oauth_application_id"],
)
df_per_app_per_user = df_per_app_per_user.sort_values(
    by=["approved", "rejected"], ascending=False
)


def show_dbfp_app_user_counts(i):
    display(df_per_app_per_user.iloc[i * step : i * step + step])


show_app_user_counts = interactive(
    show_dbfp_app_user_counts,
    i=IntSlider(
        min=0,
        max=int(df_per_app_per_user.shape[0] / step - 1),
        layout=Layout(width="30%", height="100px"),
    ),
)
show_app_user_counts

## 3. Train Models

In [None]:
df_per_app_per_user[
    (df_per_app_per_user["approved"] > 30) & (df_per_app_per_user["rejected"] > 30)
]

In [None]:
user, org, app = (
    df_per_app_per_user[
        (df_per_app_per_user["approved"] > 30) & (df_per_app_per_user["rejected"] > 30)
    ][["user_id", "organization_id", "oauth_application_id"]]
    .head(10)
    .values.tolist()[4]
)

In [None]:
user, org, app

### 3.1 Split test and train samples
In point 5 we will calculate the model's performance metrics and, for that, we must divide the data beforehand.

In [None]:
data_approved = data[
    (data["mfa_status"] == "approved")
    & (data["user_id"] == user)
    & (data["oauth_application_id"] == app)
    & (data["organization_id"] == org)
]
data_rejected = data[
    (data["mfa_status"] == "rejected")
    & (data["user_id"] == user)
    & (data["oauth_application_id"] == app)
    & (data["organization_id"] == org)
]

In [None]:
msk_approved = np.random.rand(len(data_approved)) < 0.7
msk_rejected = np.random.rand(len(data_rejected)) < 0.7

In [None]:
train_data_approved = data_approved[msk_approved]
test_data_approved = data_approved[~msk_approved]

train_data_approved.shape[0], test_data_approved.shape[0]

In [None]:
train_data_approved.shape[0] / (
    train_data_approved.shape[0] + test_data_approved.shape[0]
), test_data_approved.shape[0] / (
    train_data_approved.shape[0] + test_data_approved.shape[0]
)

In [None]:
train_data_rejected = data_rejected[msk_rejected]
test_data_rejected = data_rejected[~msk_rejected]

train_data_rejected.shape[0], test_data_rejected.shape[0]

In [None]:
train_data_rejected.shape[0] / (
    train_data_rejected.shape[0] + test_data_rejected.shape[0]
), test_data_rejected.shape[0] / (
    train_data_rejected.shape[0] + test_data_rejected.shape[0]
)

### 3.2 Create behavior object

#### 3.2.1. Modelling approved data

In [None]:
kt_approved = SpatialModel(
    n_dim=1,
    min_eigval=1e-9,
    n_iterations=200,
    scaling_parameter=1.1,
    nonparametric=True,
    online_learning=False,
    loa=True,
    limits=np.array([[0], [24]]),
)

In [None]:
pipeline_approved = make_pipeline(
    make_column_transformer(
        (
            FunctionTransformer(
                lambda x: np.array(
                    [
                        pd.Timestamp(ts).hour
                        + pd.Timestamp(ts).minute / 60
                        + pd.Timestamp(ts).second / 3600
                        for ts in x
                    ]
                ).reshape(-1, 1),
            ),
            "updated_at",
        ),
    ),
    kt_approved,
)

In [None]:
pipeline_approved.fit(train_data_approved)

#### 3.2.2. Modelling rejected data

In [None]:
kt_rejected = SpatialModel(
    n_dim=1,
    min_eigval=1e-9,
    n_iterations=200,
    scaling_parameter=1.1,
    nonparametric=True,
    online_learning=False,
    loa=True,
    limits=np.array([[0], [24]]),
)

In [None]:
pipeline_rejected = make_pipeline(
    make_column_transformer(
        (
            FunctionTransformer(
                lambda x: np.array(
                    [
                        pd.Timestamp(ts).hour
                        + pd.Timestamp(ts).minute / 60
                        + pd.Timestamp(ts).second / 3600
                        for ts in x
                    ]
                ).reshape(-1, 1),
            ),
            "updated_at",
        ),
    ),
    kt_rejected,
)

In [None]:
pipeline_rejected.fit(train_data_rejected)

## 4. Score samples
In a real scenario, whenever we receive a new record we will want to give it a score. This score must be calculated as I will describe below.

Kuberspatiotemporal offers 3 alternatives for scoring: 2 of them return a continuos score, like a probability and the other returns a binary score. In this case we will use the simplest approach to compute the probabilistic score because we are only working with continuous data. This scoring approach computes the probability that a given point belongs to the `SpatialModel` - it can be used after activating `SpatialModel.loa`. We can also compute the binary score, that will tell us whether the record was drawn (1) from the model distribution or not (0).

### 4.1 loa_score

In [None]:
# sample record
record = train_data_approved.iloc[0:1]
record

In [None]:
pipeline_approved["spatialmodel"].loa = True

In [None]:
pipeline_approved.score(record)

### 4.2 anomaly_score

In [None]:
pipeline_approved["spatialmodel"].loa = False
pipeline_approved["spatialmodel"].score_threshold = pipeline_approved[
    "spatialmodel"
].get_score_threshold(
    pipeline_approved["columntransformer"].transform(train_data_approved),
    lower_quantile=0,
    upper_quantile=0.8,
)

In [None]:
pipeline_approved.score(record)

## 5. Compute model metrics

As we collect data and create model, we will want to validate the performance of our models, i.e., to realize how well they are adapted to the data we have and how robust they are to make predictions. Since our data can be classified as approved or rejected requests, we will use this label to compute performance metrics.

### 5.1 Score samples

Let's consider that 0 stands for Rejected transactions and 1 stands for Approved Transactions.

In this approach, I'm creating 2 models: 1 to learn the approved requests and another to learn the rejected ones.
Here we aim to get not a binary score, but a value between 0 and 1.
- **For the approved test set**: we score each sample using both models, the final predicted label will correspond to the maximum score between the two models. Example: for a given approved request on the test set, the approved model will return 0.7 while the rejected model will return 0.1, so we can conclude that this sample corresponds to an true approved.
- **For the rejected test set**: we do the same. For a given rejected sample we get 0.8 from the approved model and 0.3 from the rejected which allows us to conclude that this sample is a false approved.

Based on this approach we can compute a confusion matrix.

In [None]:
def calculate_score(record, pipeline):
    pipeline["spatialmodel"].loa = True
    return pipeline.score(record)

In [None]:
pred_test_data_approved = np.argmin(
    np.array(
        [
            [
                calculate_score(
                    test_data_approved.iloc[[idx]],
                    pipeline_approved
                ),
                calculate_score(
                    test_data_approved.iloc[[idx]],
                    pipeline_rejected
                ),
            ]
            for idx in range(len(test_data_approved))
        ]
    ),
    axis=1,
)




In [None]:
pred_test_data_rejected = np.argmax(
    np.array(
        [
            [
                calculate_score(test_data_rejected.iloc[[idx]], pipeline_rejected),
                calculate_score(test_data_rejected.iloc[[idx]], pipeline_approved),
            ]
            for idx in range(len(test_data_rejected))
        ]
    ),
    axis=1,
)

In [None]:
y_true = np.concatenate(
    (np.zeros(len(pred_test_data_rejected)), np.ones(len(pred_test_data_approved)))
)

In [None]:
y_pred = np.concatenate((pred_test_data_rejected, pred_test_data_approved))

In [None]:
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

In [None]:
(tn, fp, fn, tp)

In [None]:
acc = (tp + tn) / (tn + fp + fn + tp)
precision = tp / (tp + fp)
recall = tp / (tp + fn)
specificity = tn / (tn + fp)
f1_score = 2 * precision * recall / (precision + recall)

In [None]:
metrics = pd.DataFrame(
    np.round(
        np.array([acc, precision, recall, specificity, f1_score])[:, np.newaxis] * 100,
        3,
    ),
    index=["Accuracy", "Precision", "Recall", "Specificity", "F1 Score"],
    columns=["values"],
)

In [None]:
metrics

## 5.2 Different scoring approach 
With Kubertemporal we can compute a binary score to predict whether a sample was drawn from a given distribution.

In this approach, I'm only creating 1 model to learn the approved requests. In the scoring step, I will use this model to score both approved and rejected test samples:
- **For the approved test set**: the true approved should get score 1 and the false rejecteds will get score 0;
- **For the rejected test set**: the true rejected should get score 0 - the model is assuming that they were not drawn from the approved distribution - and the false approveds will get score 1 - the model says that those rejecteds were drawn from the approved distribution;

In [None]:
def calculate_binary_score(record, pipeline, train_data):
    pipeline["spatialmodel"].loa = False
    pipeline["spatialmodel"].loa = False
    pipeline["spatialmodel"].score_threshold = pipeline[
        "spatialmodel"
    ].get_score_threshold(
        pipeline["columntransformer"].transform(train_data),
        lower_quantile=0,
        upper_quantile=0.8,
    )
    return pipeline.score(record)

In [None]:
pred_test_data_approved_1 = np.array(
    [
        [
            calculate_binary_score(
                test_data_approved.iloc[[idx]], pipeline_approved, train_data_approved,
            )
        ]
        for idx in range(len(test_data_approved))
    ]
)

In [None]:
pred_test_data_rejected_1 = np.array(
    [
        [
            calculate_binary_score(
                test_data_rejected.iloc[[idx]], pipeline_approved, train_data_approved,
            )
        ]
        for idx in range(len(test_data_rejected))
    ]
)

In [None]:
y_pred = np.concatenate((pred_test_data_rejected_1, pred_test_data_approved_1))

In [None]:
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

In [None]:
(tn, fp, fn, tp)

In [None]:
acc = (tp + tn) / (tn + fp + fn + tp)
precision = tp / (tp + fp)
recall = tp / (tp + fn)
specificity = tn / (tn + fp)
f1_score = 2 * precision * recall / (precision + recall)

In [None]:
metrics = pd.DataFrame(
    np.round(
        np.array([acc, precision, recall, specificity, f1_score])[:, np.newaxis] * 100,
        3,
    ),
    index=["Accuracy", "Precision", "Recall", "Specificity", "F1 Score"],
    columns=["values"],
)

In [None]:
metrics