In [1]:
import os
import pandas as pd
import numpy as np
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import CategoricalNB

In [2]:
def concat_dataframes(path, df_type):
    dfs_list = []
    dfs_rows_len_list = []
    
    for user in os.listdir(path):
        for file in os.listdir(os.path.join(path, user)):
            if file.find(df_type) != -1:
                df = pd.read_csv(os.path.join(path, user, file))
                
                df = df.drop(["timestamp"], axis=1)
#                 df = (df - df.min()) / (df.max() - df.min())
                
                df["user"] = int(user.split('_')[1])
                
                dfs_list.append(df)
    
    return pd.concat(dfs_list, ignore_index=True)


def split_users_into_two_classes(df, valid_user_label, intruder_label):
    df.loc[df["user"] != valid_user_label, "user"] = intruder_label
    df.loc[df["user"] == valid_user_label, "user"] = 1
    return df  


def create_file_for_results(data_type):
    res_folder = '.\\_results'
    if os.path.exists(res_folder) is False:
        os.makedirs(res_folder)
    
    file = os.path.join(res_folder, data_type + '_results.json')
    if os.path.exists(file) is False:
        with open(file, 'w') as f:
            json.dump({'stub': None}, f)
        
    return file    


def update_file_with_results(file_path, results_dict):
    import collections.abc

    def update(d, u):
        for k, v in u.items():
            if isinstance(v, collections.abc.Mapping):
                d[k] = update(d.get(k, {}), v)
            else:
                d[k] = v
        return d
    
    with open(file_path, 'r') as f:
        res = json.load(f)
    
    res = update(res, results_dict)
    
    with open(file_path, 'w') as f:
        json.dump(res, f, sort_keys=True, indent=2)
        
        
def get_dict_with_results(json_path):
    with open(json_path, 'r') as f:
        res = json.load(f)
    return res    


def get_broadcasts_list(file):
    with open(file, 'r') as f:
        lines = f.readlines()
    
    return [x.replace('\n', '') for x in lines]


def fit_label_encoder(broadcasts_list):
    le = LabelEncoder()
    le.fit(broadcasts_list)
    return le


def get_broadcasts_dataframe(data_path, window_type, window_size):
    return concat_dataframes(os.path.join(data_path, window_type, window_size), 'broadcasts'), create_file_for_results('broadcasts')

In [3]:
def get_train_split(df, valid_user, intruder):
    train_df = df[df.user != intruder]
    intruder_label = 0

    train_df = split_users_into_two_classes(train_df.copy(), valid_user, intruder_label)   
    
    valid_user_count = train_df[train_df.user == 1].shape[0]
    intruder_count = train_df[train_df.user == intruder_label].shape[0]

    if valid_user_count < intruder_count:
        train_df = train_df.drop(train_df[train_df.user == intruder_label].sample(intruder_count - valid_user_count).index)
    else:
        train_df = train_df.drop(train_df[train_df.user == 1].sample(valid_user_count - intruder_count).index)
         
    dataset = train_df.to_numpy().copy()
    np.random.shuffle(dataset)

    X = dataset[:, :-1]
    y = dataset[:, -1]    
    
    return X, y


def get_test_split(df):
    df = df.drop('events_count', axis=1)
    df = df.fillna(0)
        
    test_dataset = df.to_numpy().copy()
    
    timestamps = test_dataset[:, 0]
    X_test = test_dataset[:, 1:-1]
    y_test = test_dataset[:, -1]
    
    return X_test, [int(x) for x in y_test], timestamps


def get_test_df(
    data_path,
    events_wnd,
    window_type,
    window_size,
    valid_user,
    intruder,
    data_type,
    index_1,
    index_2
):
    test_df = pd.read_csv(
        os.path.join(
            data_path,
            events_wnd,
            window_type,
            window_size,
            "_".join(["user", str(valid_user)]),
            "_".join([str(data_type), str(index_1), str(index_2), str(intruder)]) + ".data.csv",
        )
    )
    
    return test_df


def test_model(
    results_file,
    model,
    X_test,
    y_test,
    model_tag,
    df_type,
    window_type,
    window_size,
    valid_user,
    intruder,
    index_1,
    index_2,
    timestamps
):
    predict = model.predict(X_test)
    proba = model.predict_proba(X_test)
       
    print("TYPE: ", df_type)
    print('Model = ', model_tag, ', valid user = ', valid_user, ', intruder = ', intruder, ", ", index_1, '_', index_2)    
    print("--------------------------------------------------------------------------------")
        
    results = {
        df_type: {
            "fixed": {
                "fixed": {
                    model_tag: {
                        "valid_user": {
                            str(valid_user): {
                                "intruder": {
                                    str(intruder): {
                                        "valid_sample_index": {
                                            str(index_1): {
                                                "intruder_sample_index": {
                                                    str(index_2): {
                                                        "test": y_test,
                                                        "predict": predict.tolist(),
                                                        "proba": proba.tolist(),
                                                        "timestamps": timestamps.tolist()
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }    
    
    update_file_with_results(results_file, results)
    print('Accuracy score = ', accuracy_score(y_test, predict))
    print("--------------------------------------------------------------------------------")
    
    
def simulate(
    users_count,
    samples_count,
    model,
    model_tag,
    data_path,
    test_data_path,
    df_type,
    events_wnd,
    window_type,
    window_size,
    le
):       
    for valid_user in range(1, users_count + 1):
        for intruder in range(1, users_count + 1):
            if valid_user != intruder:
                df, results_file = get_broadcasts_dataframe(data_path, window_type, window_size)
                
                df['action'] = le.transform(df.action)
                
                X_train, y_train = get_train_split(df, valid_user, intruder)

                model.fit(X_train, y_train)

                for valid_user_sample in range(samples_count):
                    for intruder_sample in range(samples_count):
                        
                        test_df = get_test_df(
                            test_data_path,
                            events_wnd,
                            window_type,
                            window_size,
                            valid_user,
                            intruder,
                            df_type,
                            index_1=valid_user_sample,
                            index_2=intruder_sample,
                        )
                        
                        test_df['action'] = le.transform(test_df.action)
                        X_test, y_test, timestamps = get_test_split(test_df)

                        test_model(
                            results_file,
                            model,
                            X_test,
                            y_test,
                            model_tag,
                            df_type,
                            window_type,
                            window_size,
                            valid_user,
                            intruder,
                            index_1=valid_user_sample,
                            index_2=intruder_sample,
                            timestamps=timestamps
                        )

### Simulation settings
### ***

In [4]:
DATA_PATH = '..\\..\\scripts\\_features_all'
TEST_DATA_PATH = '..\\..\\scripts\\_features_events_full'

DATA_TYPE = "broadcasts"
BROADCASTS_LIST_FILE = "..\\learning\\broadcasts.list"

EVENTS_WND = '15min'
WINDOW_TYPE = "rolling"
WINDOW_SIZE = "60s"

SAMPLES_COUNT = 10
USERS_COUNT = 8

broadcasts_list = get_broadcasts_list(BROADCASTS_LIST_FILE)
le = fit_label_encoder(broadcasts_list)

In [5]:
model_params = {
    'min_categories': len(le.classes_)
}

model_tag = "NaiveBayes"
model = CategoricalNB(**model_params)

In [6]:
DATA_TYPES = ["broadcasts"]
WINDOW_SIZES = ["60s"]
WINDOW_TYPES = ["rolling"]

### ***

In [None]:
for data_type in DATA_TYPES:
    for wnd_type in WINDOW_TYPES:
        for wnd_size in WINDOW_SIZES:
            simulate(
                USERS_COUNT,
                SAMPLES_COUNT,
                model,
                model_tag,
                DATA_PATH,
                TEST_DATA_PATH,
                data_type,
                EVENTS_WND,
                wnd_type,
                wnd_size,
                le
            )

here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  5 ,  4 _ 5
--------------------------------------------------------------------------------
Accuracy score =  0.72
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  5 ,  4 _ 6
--------------------------------------------------------------------------------
Accuracy score =  0.7184466019417476
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  5 ,  4 _ 7
--------------------------------------------------------------------------------
Accuracy score =  0.8134328358208955


Accuracy score =  0.5682539682539682
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  5 ,  7 _ 4
--------------------------------------------------------------------------------
Accuracy score =  0.54
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  5 ,  7 _ 5
--------------------------------------------------------------------------------
Accuracy score =  0.75
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  5 ,  7 _ 6
--------------------------------------------------------------------------------
Accuracy score =  0.7535211267605634
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  

Accuracy score =  0.7732426303854876
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  6 ,  0 _ 4
--------------------------------------------------------------------------------
Accuracy score =  0.7846820809248555
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  6 ,  0 _ 5
--------------------------------------------------------------------------------
Accuracy score =  0.9480322906155398
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  6 ,  0 _ 6
--------------------------------------------------------------------------------
Accuracy score =  0.84399375975039
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , vali

Accuracy score =  0.7474489795918368
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  6 ,  3 _ 4
--------------------------------------------------------------------------------
Accuracy score =  0.7698289269051322
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  6 ,  3 _ 5
--------------------------------------------------------------------------------
Accuracy score =  0.9472322814278323
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  6 ,  3 _ 6
--------------------------------------------------------------------------------
Accuracy score =  0.8327702702702703
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , va

Accuracy score =  0.8467741935483871
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  6 ,  6 _ 4
--------------------------------------------------------------------------------
Accuracy score =  0.8298555377207063
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  6 ,  6 _ 5
--------------------------------------------------------------------------------
Accuracy score =  0.9686356508102457
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  6 ,  6 _ 6
--------------------------------------------------------------------------------
Accuracy score =  0.9003496503496503
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , va

Accuracy score =  0.9224806201550387
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  6 ,  9 _ 4
--------------------------------------------------------------------------------
Accuracy score =  0.8644400785854617
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  6 ,  9 _ 5
--------------------------------------------------------------------------------
Accuracy score =  0.9872151195108394
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  6 ,  9 _ 6
--------------------------------------------------------------------------------
Accuracy score =  0.9563318777292577
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , va

Accuracy score =  0.6296296296296297
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  7 ,  2 _ 4
--------------------------------------------------------------------------------
Accuracy score =  0.37777777777777777
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  7 ,  2 _ 5
--------------------------------------------------------------------------------
Accuracy score =  0.44285714285714284
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  7 ,  2 _ 6
--------------------------------------------------------------------------------
Accuracy score =  0.627906976744186
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , v

Accuracy score =  0.6932773109243697
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  7 ,  5 _ 4
--------------------------------------------------------------------------------
Accuracy score =  0.725
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  7 ,  5 _ 5
--------------------------------------------------------------------------------
Accuracy score =  0.6615384615384615
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  7 ,  5 _ 6
--------------------------------------------------------------------------------
Accuracy score =  0.8148148148148148
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3

Accuracy score =  0.7492354740061162
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  7 ,  8 _ 4
--------------------------------------------------------------------------------
Accuracy score =  0.8449612403100775
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  7 ,  8 _ 5
--------------------------------------------------------------------------------
Accuracy score =  0.7987012987012987
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  7 ,  8 _ 6
--------------------------------------------------------------------------------
Accuracy score =  0.8588235294117647
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , va

Accuracy score =  0.6206896551724138
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  8 ,  1 _ 4
--------------------------------------------------------------------------------
Accuracy score =  0.38144329896907214
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  8 ,  1 _ 5
--------------------------------------------------------------------------------
Accuracy score =  0.4222222222222222
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  8 ,  1 _ 6
--------------------------------------------------------------------------------
Accuracy score =  0.5838509316770186
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , v

Accuracy score =  0.6341463414634146
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  8 ,  4 _ 4
--------------------------------------------------------------------------------
Accuracy score =  0.38095238095238093
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  8 ,  4 _ 5
--------------------------------------------------------------------------------
Accuracy score =  0.425
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  8 ,  4 _ 6
--------------------------------------------------------------------------------
Accuracy score =  0.5897435897435898
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  

Accuracy score =  0.6694214876033058
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  8 ,  7 _ 4
--------------------------------------------------------------------------------
Accuracy score =  0.44298245614035087
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  8 ,  7 _ 5
--------------------------------------------------------------------------------
Accuracy score =  0.5822784810126582
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  3 , intruder =  8 ,  7 _ 6
--------------------------------------------------------------------------------
Accuracy score =  0.6205128205128205
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , v

Accuracy score =  0.3654545454545455
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  4 , intruder =  1 ,  0 _ 4
--------------------------------------------------------------------------------
Accuracy score =  0.42248062015503873
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  4 , intruder =  1 ,  0 _ 5
--------------------------------------------------------------------------------
Accuracy score =  0.6013071895424836
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  4 , intruder =  1 ,  0 _ 6
--------------------------------------------------------------------------------
Accuracy score =  0.6461538461538462
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , v

Accuracy score =  0.35036496350364965
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  4 , intruder =  1 ,  3 _ 4
--------------------------------------------------------------------------------
Accuracy score =  0.390625
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  4 , intruder =  1 ,  3 _ 5
--------------------------------------------------------------------------------
Accuracy score =  0.5496688741721855
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  4 , intruder =  1 ,  3 _ 6
--------------------------------------------------------------------------------
Accuracy score =  0.5859375
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  4 , in

Accuracy score =  0.306
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  4 , intruder =  1 ,  6 _ 4
--------------------------------------------------------------------------------
Accuracy score =  0.2932692307692308
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  4 , intruder =  1 ,  6 _ 5
--------------------------------------------------------------------------------
Accuracy score =  0.42718446601941745
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  4 , intruder =  1 ,  6 _ 6
--------------------------------------------------------------------------------
Accuracy score =  0.45
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  4 , intruder =

Accuracy score =  0.39690721649484534
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  4 , intruder =  1 ,  9 _ 4
--------------------------------------------------------------------------------
Accuracy score =  0.4793103448275862
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  4 , intruder =  1 ,  9 _ 5
--------------------------------------------------------------------------------
Accuracy score =  0.6594594594594595
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , valid user =  4 , intruder =  1 ,  9 _ 6
--------------------------------------------------------------------------------
Accuracy score =  0.7037037037037037
--------------------------------------------------------------------------------
TYPE:  broadcasts
Model =  NaiveBayes , v