In [1]:
from catboost import CatBoostClassifier
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, plot_roc_curve, make_scorer, f1_score, roc_auc_score, det_curve
from sklearn import preprocessing
from scipy import stats
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_validate, LeaveOneGroupOut, PredefinedSplit, GridSearchCV
import matplotlib.pyplot as plt
import os
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import CategoricalNB
import json
from datetime import datetime as dt

%matplotlib inline

In [2]:
def update_dict(d, u):
    import collections.abc
    for k, v in u.items():
        if isinstance(v, collections.abc.Mapping):
            d[k] = update(d.get(k, {}), v)
        else:
            d[k] = v
    return d


def update_file_with_results(file_path, results_dict):
    with open(file_path, 'r') as f:
        res = json.load(f)
    
    res = update_dict(res, results_dict)
    
    with open(file_path, 'w') as f:
        json.dump(res, f, sort_keys=True, indent=2)
        
        
def get_dict_with_results(json_path):
    with open(json_path, 'r') as f:
        res = json.load(f)
    return res 

In [3]:
def eer(fpr, fnr, thresholds):
    idx = np.nanargmin(np.absolute((fnr - fpr)))
    eer_threshold = thresholds[idx]
    eer1 = fpr[idx]
    
    return eer1, eer_threshold


def auc_roc(fpr, tpr):
    return metrics.auc(fpr, tpr)


def confusion_matrix_thr(y_true, proba, threshold):
    predict = proba
    predict[predict > threshold] = 1
    predict[predict <= threshold] = 0
    
    matr = metrics.confusion_matrix(y_true, predict, labels=[0, 1])
    
    tp = matr[0, 0]
    fp = matr[1, 0]
    fn = matr[0, 1]
    tn = matr[1, 1]
    
    return tn, fp, fn, tp


def calc_metrics(y_test, proba, thresholds):
    FPR = np.array([])
    TPR = np.array([])
    FNR = np.array([])
    F_score = np.array([])
    ANGA = np.array([])
    ANIA = np.array([])

    for thr in thresholds:
        tn, fp, fn, tp = confusion_matrix_thr(y_test, proba.copy(), thr)
        
        fpr = fp / (tn + fp)
        tpr = tp / (tp + fn)
        fnr = fn / (tp + fn)
        
        FPR = np.append(FPR, 1 if np.isnan(fpr) else fpr)
        TPR = np.append(TPR, 1 if np.isnan(tpr) else tpr)
        FNR = np.append(FNR, 1 if np.isnan(fnr) else fnr)
        F_score = np.append(F_score, tp / (tp + 0.5 * (fn + fp)))
    
    EER, EER_thr = eer(fpr=FPR, fnr=FNR, thresholds=thresholds)
    AUC_ROC = auc_roc(fpr=FPR, tpr=TPR)
    
    return {'FAR': FPR, 
            'FRR': FNR, 
            'F': F_score, 
            'EER': EER, 
            'EER_thr': EER_thr, 
            'AUC-ROC': AUC_ROC}


def get_broadcasts_custom_results(y_test, proba, time, window_type, window):
    df = pd.DataFrame(np.array([y_test, proba, time]).T, columns=['test', 'proba', 'time'])
    df['time'] = df['time'].apply(lambda x: dt.strptime(x, '%Y-%m-%d %H:%M:%S.%f'))
    df['events'] = 1
    
    df.index = pd.DatetimeIndex(df.time)
    df = df.sort_index()
    
    FOO = 'min'
    
    agg_dict = {'events': 'sum', 'proba': FOO, 'test': lambda x: 1 if (x == 1).all() else 0}
    
    if window_type == 'sampling':
        df = df.groupby(pd.Grouper(freq = window)).agg(agg_dict)
    elif window_type == 'rolling':
        df = df.rolling(window, min_periods = 1, center = False).agg(agg_dict)
        
    df = df.fillna(0.51)
    df.proba = df.proba.apply(lambda x: float(x))
    
    return df.test.to_numpy(), df.proba.to_numpy(), df.events.to_numpy()


def calc_broadcasts_metrics(y_test, proba, time, thresholds, window_type, window):
    FPR = np.array([])
    TPR = np.array([])
    FNR = np.array([])
    F_score = np.array([])
    ANGA = np.array([])
    ANIA = np.array([])
    
    y_test, proba, _ = get_broadcasts_custom_results(y_test, proba, time, window_type, window)

    for thr in thresholds:
        tn, fp, fn, tp = confusion_matrix_thr(y_test, proba.copy(), thr)
        
        fpr = fp / (tn + fp)
        tpr = tp / (tp + fn)
        fnr = fn / (tp + fn)
        
        FPR = np.append(FPR, 1 if np.isnan(fpr) else fpr)
        TPR = np.append(TPR, 1 if np.isnan(tpr) else tpr)
        FNR = np.append(FNR, 1 if np.isnan(fnr) else fnr)
        F_score = np.append(F_score, tp / (tp + 0.5 * (fn + fp)))
    
    EER, EER_thr = eer(fpr=FPR, fnr=FNR, thresholds=thresholds)
    AUC_ROC = auc_roc(fpr=FPR, tpr=TPR)
    
    return {'FAR': FPR, 
            'FRR': FNR, 
            'F': F_score, 
            'EER': EER, 
            'EER_thr': EER_thr, 
            'AUC-ROC': AUC_ROC}


def iterate_over_cv_results(results):
    for df_type, inner in results.items():
        if df_type == 'stub':
            continue

        for window_type, inner1 in inner.items():
            for window_size, inner2 in inner1.items():
                for model, inner3 in inner2.items():
                    for valid_user, inner4 in inner3['cross_validation']['valid_user'].items():
                        yield {'df_type': df_type, 
                               'window_type': window_type, 
                               'window_size': window_size, 
                               'model': model, 
                               'valid_user': valid_user, 
                               'accuracy': np.array(inner4['accuracy'])}

                        
def iterate_over_final_results(results):
    for df_type, inner in results.items():
        if df_type == 'stub':
            continue

        for window_type, inner1 in inner.items():
            for window_size, inner2 in inner1.items():
                for model, inner3 in inner2.items():
                    for valid_user, inner4 in inner3['final_validation']['valid_user'].items():
                        for intruder, inner5 in inner4['extracted_user'].items(): 
                            yield {'df_type': df_type, 
                                   'window_type': window_type, 
                                   'window_size': window_size, 
                                   'model': model, 
                                   'valid_user': valid_user, 
                                   'intruder': intruder,
                                   'test': np.array(inner5['test']), 
                                   'proba': np.array(inner5['proba'])[:, 1], 
                                   'time': [] if 'time' not in inner5.keys() else np.array(inner5['time'])}
            

def avg_accuracy(results):
    metrics = {}
    for res in iterate_over_cv_results(results):
        key = (res['df_type'], res['window_type'], res['window_size'], res['model'])
        if key not in metrics.keys():
            metrics[key] = {'accuracy': []}
        
        metrics[key]['accuracy'].append(res['accuracy'])
        
    for k, v in metrics.items():
        metrics[k] = ({'accuracy': np.array(v['accuracy']).mean()})    
        
    return metrics
          
    
def avg_common_metrics(results, thresholds):
    metrics = {}
    for res in iterate_over_final_results(results):
        key = (res['df_type'], res['window_type'], res['window_size'], res['model'])
        if key not in metrics.keys():
            metrics[key] = {'EER': [], 'AUC-ROC': []}
        
        metrics_dict = calc_metrics(res['test'], res['proba'], thresholds)
        
        metrics[key]['EER'].append(metrics_dict['EER'])
        metrics[key]['AUC-ROC'].append(metrics_dict['AUC-ROC'])
        
    for k, v in metrics.items():
        metrics[k] = ({'EER': np.array(v['EER']).mean(), 
                       'AUC-ROC': np.array(v['AUC-ROC']).mean()})
    
    return metrics


def avg_common_broadcasts_metrics(results, window_types, windows, thresholds):
    metrics = {}
    for wnd_type in window_types:
        for wnd in windows:
            print(wnd_type, wnd)
            for res in iterate_over_final_results(results):
                key = (res['df_type'], wnd_type, wnd, res['model'])
                if key not in metrics.keys():
                    metrics[key] = {'EER': [], 'AUC-ROC': []}

                metrics_dict = calc_broadcasts_metrics(res['test'], res['proba'], res['time'], thresholds, wnd_type, wnd)

                metrics[key]['EER'].append(metrics_dict['EER'])
                metrics[key]['AUC-ROC'].append(metrics_dict['AUC-ROC'])
        
            for k, v in metrics.items():
                metrics[k] = ({'EER': np.array(v['EER']).mean(), 
                               'AUC-ROC': np.array(v['AUC-ROC']).mean()})
    
    return metrics

In [4]:
def generate_common_accuracy_tables(results, df_type, window_type, window_sizes):
    word_document = Document()
    document_name = '_'.join(['df_type', 'window_type'])
    
    table = word_document.add_table(rows=10, cols=6) # we add rows iteratively
    table.style = 'TableGrid'
    
    NameIdx = 0
    WndIdx = 1
    CatBoostIdx = 2
    RandomForestIdx = 3
    SVCIdx = 4
    LogRegIdx = 5
    
    def get_col_idx(model_tag):
        if model_tag == 'CatBoost':
            return CatBoostIdx
        if model_tag == 'RandomForest':
            return RandomForestIdx
        if model_tag == 'SVC':
            return SVCIdx
        if model_tag == 'LogReg':
            return LogRegIdx
    
    s5 = 1
    s10 = 2
    s30 = 3
    s60 = 4
    s90 = 5
    s120 = 6
    s240 = 7
    s600 = 8
    
    def get_row_idx(wnd):
        if wnd == '5s':
            return s5
        if wnd == '10s':
            return s10
        if wnd == '30s':
            return s30
        if wnd == '60s':
            return s60
        if wnd == '90s':
            return s90
        if wnd == '120s':
            return s120
        if wnd == '240s':
            return s240
        if wnd == '600s':
            return s600
    
    table = add_columns_names(table, ['Метрика', 'Размер окна, с', 'CatBoostClassifier', 'RandomForest', 'SVM-SVC', 'LogisticRegression'])
    table = add_rows_names(table, ['Метрика', 'Accuracy'])
    table = add_rows_names(table, ['Размер окна, с'] + 
                           [str(x).replace('s', '') for x in window_sizes] + ['Лучший результат'], col_index=WndIdx)
    
    best_res = {}
    for k, v in results.items():
        if k[0] == df_type and k[1] == window_type:
            accuracy = results[k]['accuracy']
            
            if k[3] not in best_res.keys():
                best_res[k[3]] = ('0s', 0)
            
            if accuracy > best_res[k[3]][1]:
                best_res[k[3]][0] = k[2]
                best_res[k[3]][1] = accuracy
            
            table.rows[get_row_idx(k[2])].cells[get_col_idx(k[3])].text = str(accuracy)
            
    for k, v in best_res.items():
        table.rows[get_row_idx(v[0])].cells[get_col_idx(k)].text = str(v[1])
            
    word_document.add_page_break()
    word_document.save(document_name + '.docx')
    
    
def generate_common_metrics_tables(results, df_type, window_type, window_sizes):
   
    NameIdx = 0
    WndIdx = 1
    CatBoostIdx = 2
    RandomForestIdx = 3
    SVCIdx = 4
    LogRegIdx = 5
    
    def get_col_idx(model_tag):
        if model_tag == 'CatBoost':
            return CatBoostIdx
        if model_tag == 'RandomForest':
            return RandomForestIdx
        if model_tag == 'SVC':
            return SVCIdx
        if model_tag == 'LogReg':
            return LogRegIdx
    
    s5 = 1
    s10 = 2
    s30 = 3
    s60 = 4
    s90 = 5
    s120 = 6
    s240 = 7
    s600 = 8
    
    def get_row_idx(wnd):
        if wnd == '5s':
            return s5
        if wnd == '10s':
            return s10
        if wnd == '30s':
            return s30
        if wnd == '60s':
            return s60
        if wnd == '90s':
            return s90
        if wnd == '120s':
            return s120
        if wnd == '240s':
            return s240
        if wnd == '600s':
            return s600
        
    for metr in ['AUC-ROC', 'EER']:
        
        word_document = Document()
        document_name = '_'.join(['df_type', 'window_type', metr])

        table = word_document.add_table(rows=10, cols=6) # we add rows iteratively
        table.style = 'TableGrid'
        
        table = add_columns_names(table, ['Метрика', 'Размер окна, с', 'CatBoostClassifier', 'RandomForest', 'SVM-SVC', 'LogisticRegression'])
        table = add_rows_names(table, ['Метрика', metr])
        table = add_rows_names(table, ['Размер окна, с'] + 
                               [str(x).replace('s', '') for x in window_sizes] + ['Лучший результат'], col_index=WndIdx)

        best_res = {}
        for k, v in results.items():
            if k[0] == df_type and k[1] == window_type:
                accuracy = results[k][metr]

                if k[3] not in best_res.keys():
                    best_res[k[3]] = ('0s', 0)

                if accuracy > best_res[k[3]][1]:
                    best_res[k[3]][0] = k[2]
                    best_res[k[3]][1] = accuracy

                table.rows[get_row_idx(k[2])].cells[get_col_idx(k[3])].text = str(accuracy)

        for k, v in best_res.items():
            table.rows[get_row_idx(v[0])].cells[get_col_idx(k)].text = str(v[1])

        word_document.add_page_break()
        word_document.save(document_name + '.docx')

In [5]:
RESULTS_PATH = ".\\_results"
RESULTS_FILE = 'bt_results.json'

THRESHOLDS = np.arange(0.0, 1.01, 0.05)

results = get_dict_with_results(os.path.join(RESULTS_PATH, RESULTS_FILE))

In [6]:
avg_accuracy(results)

{('bt', 'rolling', '60s', 'CatBoost'): {'accuracy': 0.8964166017861801},
 ('bt', 'rolling', '60s', 'LogReg'): {'accuracy': 0.9210366240796924},
 ('bt', 'rolling', '60s', 'RandomForest'): {'accuracy': 0.8741074958134654},
 ('bt', 'sampling', '60s', 'CatBoost'): {'accuracy': 0.9025587289930687},
 ('bt', 'sampling', '60s', 'LogReg'): {'accuracy': 0.9292526772441232},
 ('bt', 'sampling', '60s', 'RandomForest'): {'accuracy': 0.8830167486596852}}

In [7]:
avg_common_metrics(results, THRESHOLDS)

{('bt', 'rolling', '60s', 'CatBoost'): {'EER': 0.2415056009300803,
  'AUC-ROC': 0.8627148197291062},
 ('bt', 'rolling', '60s', 'LogReg'): {'EER': 0.4282943184161751,
  'AUC-ROC': 0.7188691617094093},
 ('bt', 'rolling', '60s', 'RandomForest'): {'EER': 0.12824692365706278,
  'AUC-ROC': 0.9162911549937582},
 ('bt', 'sampling', '60s', 'CatBoost'): {'EER': 0.1995506944816863,
  'AUC-ROC': 0.8191199189993269},
 ('bt', 'sampling', '60s', 'LogReg'): {'EER': 0.19195174384218738,
  'AUC-ROC': 0.7170741268418472},
 ('bt', 'sampling', '60s', 'RandomForest'): {'EER': 0.22613470664120547,
  'AUC-ROC': 0.8154799261677675}}

In [8]:
WINDOW_TYPES = ['rolling', 'sampling']
WINDOWS = ['5s', '10s', '30s', '60s', '90s', '120s', '240s', '600s']

In [9]:
avg_common_broadcasts_metrics(results, ['sampling', 'rolling'], ['60s'], THRESHOLDS)

sampling 60s


ValueError: Shape of passed values is (3, 1), indices imply (3, 3)