In [1]:
from catboost import CatBoostClassifier
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, plot_roc_curve, make_scorer, f1_score, roc_auc_score
from sklearn import preprocessing
from scipy import stats
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_validate, LeaveOneGroupOut, PredefinedSplit, GridSearchCV
import matplotlib.pyplot as plt
import os
import json

%matplotlib inline

In [2]:
def concat_dataframes(path, df_type):
    dfs_list = []
    dfs_rows_len_list = []
    
    for user in os.listdir(path):
        for file in os.listdir(os.path.join(path, user)):
            if file.find(df_type) != -1:
                df = pd.read_csv(os.path.join(path, user, file))
                
                if df_type != 'broadcasts':
                    df = df.drop(["timestamp"], axis=1)
#                 df = (df - df.min()) / (df.max() - df.min())
                
                df["user"] = int(user.split('_')[1])
                
                dfs_list.append(df)
    
    return pd.concat(dfs_list, ignore_index=True)


def drop_bad_rows(df, z = 3):
    bad_rows = set()
    for col in df.columns:
        if col != "user":
            for user in df.user.unique():
                for x in list(df.loc[df.user == user, :][np.abs(stats.zscore(df.loc[df.user == user, col])) > z].index):
                    bad_rows.add(x)

            for x in list(df[col][np.abs(stats.zscore(df[col])) > z].index):
                bad_rows.add(x)

    df = df.drop(list(bad_rows), axis=0)

    return df


def drop_bad_cols(df, z = 3, allowed_proportion = 0.1):
    bad_cols = set()
    for col in df.columns:
        if col != "user":
            if df[df[col] != df[col].mean()].shape[0] < allowed_proportion * df.shape[0]:
                bad_cols.add(col)

            for user in df.user.unique():
                if df.loc[df.user == user, :][df.loc[df.user == user, col] != df.loc[df.user == user, col].mean()].shape[0] < allowed_proportion * df.loc[df.user == user, :].shape[0]:
                    bad_cols.add(col)

                elif np.sum(np.abs(stats.zscore(df.loc[df.user == user, col])) < z) < (1 - allowed_proportion) * df.loc[df.user == user, col].shape[0]:
                    bad_cols.add(col)

    df = df.drop(bad_cols, axis=1)
    return df, list(bad_cols)
    
    
def extract_delayed_user(df, user_label):
    df_user = df[df["user"] == user_label]
    df = df[df["user"] != user_label]
    return df_user, df


def split_users_into_two_classes(df, valid_user_label):
    df.loc[df["user"] != valid_user_label, "user"] = 0
    df.loc[df["user"] == valid_user_label, "user"] = 1
    return df  


def get_cv_split(X, y, group_labels, valid_user_label):
    predefined_split_array = np.zeros(group_labels.shape[0])
    i = 0
    test_array = [x for x in range(group_labels.shape[0])]
    for test, _ in LeaveOneGroupOut().split(X, y, group_labels):
        diff = np.setdiff1d(test_array, test)
        if np.all(group_labels[diff[0] : diff[-1]] == valid_user_label) is np.bool_(True):
            for sample in diff:
                predefined_split_array[sample] = -1
        else:
            for sample in diff:
                predefined_split_array[sample] = i
            i += 1
    return predefined_split_array


def generate_train_dataset(df, user, ex_user, is_SVM = False):
    df_ = df.copy()

    df_for_test = []

    df__ = df_[df_.labels == ex_user].copy()
    df_for_test.append(df__)
    df_ = df_.drop(df__.index, axis=0)

    for user_ in df_.labels.unique():
        if user_ != ex_user:
            test_size = int((0.25 * df_[df_.labels == user_].shape[0]) - 1)
            df__ = df_[df_.labels == user_].sample(test_size).copy()
            df_for_test.append(df__)
            df_ = df_.drop(df__.index, axis=0)

    df_ = split_users_into_two_classes(df_.copy(), user)
          
    if is_SVM:    
        df_.loc[df_.user == 0, 'user'] = -1

    df_ = df_.drop("labels", axis=1)

    dataset = df_.to_numpy().copy()
    np.random.shuffle(dataset)

    X = dataset[:, :-1]
    y = dataset[:, -1]
    
    return X, y, df_for_test


def generate_test_dataset(df_list, user, ex_user, is_SVM = False):
    test_df = pd.concat(df_list)

    valid_user_in_test_count = test_df[test_df.labels == user].shape[0]
    ex_user_in_test_count = test_df[test_df.labels == ex_user].shape[0]
    others_in_test_count = [test_df[test_df.labels == x].shape[0]
                            for x in test_df.labels.unique() if x != user and x != ex_user]

    others_test_count = sum(others_in_test_count)
    part_size = min(valid_user_in_test_count, ex_user_in_test_count)
    if others_test_count <= min(valid_user_in_test_count, ex_user_in_test_count):
        part_size = others_test_count    
        
    new_df_parts = []    

    new_df_parts.append(test_df[test_df.labels == user].sample(part_size).copy())
    new_df_parts.append(test_df[test_df.labels == ex_user].sample(part_size).copy())
    new_df_parts.append(test_df[~test_df.labels.isin([user, ex_user])].sample(part_size).copy())
    
    test_df = pd.concat(new_df_parts)
    
    test_df.loc[test_df.labels == user, "user"] = 1
    if is_SVM:
        test_df.loc[test_df.labels != user, "user"] = -1
    else:
        test_df.loc[test_df.labels != user, "user"] = 0

    print("True: ", test_df[test_df.user == 1].shape)
    print("Shape: ", test_df.shape)
    for x in test_df.labels.unique():
        print("Count ", x, ": ", test_df[test_df.labels == x].shape)

    test_df = test_df.drop("labels", axis=1)

    test_dataset = test_df.to_numpy().copy()
    X_test = test_dataset[:, :-1].copy()
    y_test = test_dataset[:, -1].copy()

    return X_test, y_test


def prepare_dataset(df, user, is_SVM=False):
    df_ = split_users_into_two_classes(df.copy(), user)
    
    group_labels = df_.labels.to_numpy().copy()
    df_ = df_.drop('labels', axis=1)
    
    if is_SVM:
        df_.loc[df_.user == 0, 'user'] = -1
    
    dataset = df_.to_numpy().copy()
    X = dataset[:, :-1]
    y = dataset[:, -1]
    
    return X, y, group_labels


def create_file_for_results(data_type):
    res_folder = '.\\_results'
    if os.path.exists(res_folder) is False:
        os.makedirs(res_folder)
    
    file = os.path.join(res_folder, data_type + '_results.json')
    if os.path.exists(file) is False:
        with open(file, 'w') as f:
            json.dump({'stub': None}, f)
        
    return file    


def update_file_with_results(file_path, results_dict):
    import collections.abc

    def update(d, u):
        for k, v in u.items():
            if isinstance(v, collections.abc.Mapping):
                d[k] = update(d.get(k, {}), v)
            else:
                d[k] = v
        return d
    
    with open(file_path, 'r') as f:
        res = json.load(f)
    
    res = update(res, results_dict)
    
    with open(file_path, 'w') as f:
        json.dump(res, f, sort_keys=True, indent=2)
        
        
def get_dict_with_results(json_path):
    with open(json_path, 'r') as f:
        res = json.load(f)
    return res    


def get_dataframe(path, data_type, window_type, window_size):
    return concat_dataframes(os.path.join(path, window_type, window_size), data_type), create_file_for_results(data_type)


def drop_corr_columns(df, corr_coef):
    corr_matrix = df.corr().abs()
    upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
    corr_cols = [column for column in upper_tri.columns if any(abs(upper_tri[column]) > corr_coef) and column != "user"]
    return df.drop(corr_cols, axis=1), corr_cols


def process_train_df(df, features, corr = 0.7, z = 3, prop = 0.1): 
    df = df.drop(df.columns.difference(features), axis=1)
    df = df.dropna(how='all')
    df = df.fillna(0)
    
    if 'count_mean' in df.columns:
        df = df[df.count_mean != 0]
    
    df = drop_bad_rows(df, z)
    df, dropped_cols_1 = drop_bad_cols(df, z, prop)
    df, dropped_cols_2 = drop_corr_columns(df, corr)
    
    return df, dropped_cols_1 + dropped_cols_2

In [3]:
def model_cross_validation(results_file, model, df, model_tag, df_type, window_type, window_size, is_SVM = False):
    for user in df.labels.unique():
        print("Valid User: ", user)
        print("--------------------------------------------------------------------------------")

        X, y, group_labels = prepare_dataset(df, user, is_SVM)

        cv_split = PredefinedSplit(test_fold=get_cv_split(X, y, group_labels, user))
        scoring = ('accuracy')

        cv_results = cross_validate(model, X, y, scoring=scoring, cv=cv_split, n_jobs=-1)
        accuracy = cv_results['test_score']
        
        results = {
            df_type: {
                window_type: {
                    window_size: {
                        model_tag: {
                            "cross_validation": {
                                "valid_user": {
                                    str(user): {
                                        "accuracy": accuracy.tolist()
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
        
        update_file_with_results(results_file, results)

        print("CV accuracy list: ", accuracy)
        print("CV mean accuracy: ", np.mean(accuracy))
        print("CV min accuracy: ", np.min(accuracy))
        print("CV max accuracy: ", np.max(accuracy))

        print("--------------------------------------------------------------------------------")
        
        
def model_final_validation(results_file, model, df, model_tag, df_type, window_type, window_size, is_SVM = False):
    for user in df.labels.unique():
        print("Valid User: ", user)
        print("--------------------------------------------------------------------------------")
        for ex_user in df.labels.unique():
            if ex_user != user:
                X, y, df_for_test = generate_train_dataset(df, user, ex_user, is_SVM)

                model.fit(X, y)

                X_test, y_test = generate_test_dataset(df_for_test, user, ex_user, is_SVM)

                predict = model.predict(X_test)
#                 if is_SVM:
#                     proba = model.decision_function(X_test)
#                 else:
                proba = model.predict_proba(X_test)

                results = {
                    df_type: {
                        window_type: {
                            window_size: {
                                model_tag: {
                                    "final_validation": {
                                        "valid_user": {
                                            str(user): {
                                                "extracted_user": {
                                                    str(ex_user): {
                                                        "test": y_test.tolist(),
                                                        "predict": predict.tolist(),
                                                        "proba": proba.tolist()
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }

                update_file_with_results(results_file, results)

                print("Valid user = ", user, ", Extracted user = ", ex_user, "accuracy = ", accuracy_score(y_test, predict))
                print("--------------------------------------------------------------------------------")     

### Learning settings
### ***

In [4]:
DATA_PATH = '..\\..\\scripts\\_features_all'

DATA_TYPE = "bt"

WINDOW_TYPE = "rolling"
WINDOW_SIZE = "60s"

In [5]:
DATA_TYPES = ['wifi', 'bt', 'location']
WINDOW_TYPES = ['rolling', 'sampling']
# WINDOWS = ['5s', '10s', '30s', '60s', '90s', '120s', '240s', '600s']
WINDOWS = ['60s']

In [6]:
catboost_params = {
    'iterations': 100,
    'depth': 6,
    'loss_function': 'Logloss',
    'l2_leaf_reg': 1,
    'leaf_estimation_iterations': 5,
    'logging_level': 'Silent'
}

randomforest_params = {
    'n_estimators': 100,
    'criterion': 'gini',
    'max_depth': None,
    'min_samples_split': 2,
    'min_samples_leaf': 1,
    'max_features': 'auto',
    'n_jobs': -1,
    'class_weight': 'balanced',
}

svc_params = {
    'C': 1,
    'kernel': 'rbf',
    'degree': 1,
    'gamma': 5,
    'probability': True
}

logreg_params = {
    'penalty': 'l2',
    'C': 0.01,
    'solver': 'newton-cg',
    'max_iter': 1000,
    'n_jobs': -1
}

MODELS = [
    (CatBoostClassifier(**catboost_params), "CatBoost"),
    (RandomForestClassifier(**randomforest_params), "RandomForest"),
#     (SVC(**svc_params), "SVC"),
    (LogisticRegression(**logreg_params), "LogReg")
]

### ***

## Cross-validation

In [7]:
for data_type in DATA_TYPES:
    for wnd_type in WINDOW_TYPES:
        for wnd in WINDOWS:
            df, RESULTS_FILE = get_dataframe(DATA_PATH, data_type, wnd_type, wnd)
            features = df.columns.to_list()
            df, _ = process_train_df(df, features)
            df['labels'] = df['user']
            
            for model, tag in MODELS:
                print(data_type, wnd_type, wnd, tag)
                model_cross_validation(RESULTS_FILE, model, df, tag, data_type, wnd_type, wnd, is_SVM=tag=='SVC')

  return (a - mns) / sstd


wifi rolling 60s CatBoost
Valid User:  1
--------------------------------------------------------------------------------
CV accuracy list:  [0.51829268 0.97714286 0.91791193 0.78436357 0.89304813 1.
 0.76681614]
CV mean accuracy:  0.8367964736121662
CV min accuracy:  0.5182926829268293
CV max accuracy:  1.0
--------------------------------------------------------------------------------
Valid User:  2
--------------------------------------------------------------------------------
CV accuracy list:  [0.98029829 0.97714286 0.99282726 0.98325997 1.         0.98909091
 1.        ]
CV mean accuracy:  0.9889456109696367
CV min accuracy:  0.9771428571428571
CV max accuracy:  1.0
--------------------------------------------------------------------------------
Valid User:  3
--------------------------------------------------------------------------------
CV accuracy list:  [0.99779046 0.99542683 0.99681211 0.98877798 0.86631016 0.86909091
 0.70852018]
CV mean accuracy:  0.9175326616919646
CV 

CV accuracy list:  [0.99944762 0.98551829 0.99571429 0.98525603 0.999256   1.
 0.98654709]
CV mean accuracy:  0.9931056149640645
CV min accuracy:  0.9852560270970313
CV max accuracy:  1.0
--------------------------------------------------------------------------------
Valid User:  8
--------------------------------------------------------------------------------
CV accuracy list:  [0.99576505 0.9992378  0.96571429 1.         0.998822   0.74331551
 0.77090909]
CV mean accuracy:  0.9248233913776192
CV min accuracy:  0.7433155080213903
CV max accuracy:  1.0
--------------------------------------------------------------------------------


  return (a - mns) / sstd


wifi sampling 60s CatBoost
Valid User:  1
--------------------------------------------------------------------------------
CV accuracy list:  [0.72033898 0.96742671 0.98035363 0.83373206 0.97452229 0.99090909
 0.74479167]
CV mean accuracy:  0.8874392051016896
CV min accuracy:  0.7203389830508474
CV max accuracy:  0.990909090909091
--------------------------------------------------------------------------------
Valid User:  2
--------------------------------------------------------------------------------
CV accuracy list:  [0.99709302 0.98697068 0.98821218 0.99521531 1.         0.99090909
 0.99479167]
CV mean accuracy:  0.9933131366602866
CV min accuracy:  0.9869706840390879
CV max accuracy:  1.0
--------------------------------------------------------------------------------
Valid User:  3
--------------------------------------------------------------------------------
CV accuracy list:  [0.99273256 0.95762712 0.99607073 0.98325359 0.82165605 0.91818182
 0.88020833]
CV mean accuracy: 

CV accuracy list:  [1.         0.72033898 0.97068404 0.99017682 0.99641148 1.
 0.97395833]
CV mean accuracy:  0.9502242365735026
CV min accuracy:  0.7203389830508474
CV max accuracy:  1.0
--------------------------------------------------------------------------------
Valid User:  8
--------------------------------------------------------------------------------
CV accuracy list:  [0.98982558 1.         0.98045603 1.         0.99641148 0.93630573
 0.90909091]
CV mean accuracy:  0.9731556760403651
CV min accuracy:  0.9090909090909091
CV max accuracy:  1.0
--------------------------------------------------------------------------------


  return (a - mns) / sstd


bt rolling 60s CatBoost
Valid User:  1
--------------------------------------------------------------------------------
CV accuracy list:  [1.         1.         0.56211051 0.86022692 1.         0.89754922
 0.9248366 ]
CV mean accuracy:  0.8921033218751343
CV min accuracy:  0.5621105110095554
CV max accuracy:  1.0
--------------------------------------------------------------------------------
Valid User:  2
--------------------------------------------------------------------------------
CV accuracy list:  [1.         0.96296296 1.         1.         1.         1.
 1.        ]
CV mean accuracy:  0.9947089947089947
CV min accuracy:  0.9629629629629629
CV max accuracy:  1.0
--------------------------------------------------------------------------------
Valid User:  3
--------------------------------------------------------------------------------
CV accuracy list:  [1.         0.48821549 0.7682454  0.89119902 0.99253466 0.90839695
 0.99019608]
CV mean accuracy:  0.8626839411138666
CV mi

CV accuracy list:  [1.         0.99326599 0.99222679 1.         0.98761116 0.9792037
 0.98366013]
CV mean accuracy:  0.9908525397173669
CV min accuracy:  0.9792036971205119
CV max accuracy:  1.0
--------------------------------------------------------------------------------
Valid User:  8
--------------------------------------------------------------------------------
CV accuracy list:  [1. 1. 1. 1. 1. 1. 1.]
CV mean accuracy:  1.0
CV min accuracy:  1.0
CV max accuracy:  1.0
--------------------------------------------------------------------------------


  return (a - mns) / sstd


bt sampling 60s CatBoost
Valid User:  1
--------------------------------------------------------------------------------
CV accuracy list:  [1.         1.         0.72666106 0.87281036 1.         0.91284404
 0.90909091]
CV mean accuracy:  0.9173437662087249
CV min accuracy:  0.7266610597140454
CV max accuracy:  1.0
--------------------------------------------------------------------------------
Valid User:  2
--------------------------------------------------------------------------------
CV accuracy list:  [1.         0.96712329 1.         1.         1.         1.
 1.        ]
CV mean accuracy:  0.9953033268101762
CV min accuracy:  0.9671232876712329
CV max accuracy:  1.0
--------------------------------------------------------------------------------
Valid User:  3
--------------------------------------------------------------------------------
CV accuracy list:  [1.         0.36       0.81581161 0.89413557 0.99159664 0.93577982
 1.        ]
CV mean accuracy:  0.8567605184234349
CV m

CV accuracy list:  [1. 1. 1. 1. 1. 1. 1.]
CV mean accuracy:  1.0
CV min accuracy:  1.0
CV max accuracy:  1.0
--------------------------------------------------------------------------------
location rolling 60s CatBoost
Valid User:  1
--------------------------------------------------------------------------------
CV accuracy list:  [0.99061146 0.7221453  0.98480259 0.89240882 0.99012976 0.99229033
 0.99645138]
CV mean accuracy:  0.9384056646694281
CV min accuracy:  0.7221452967783624
CV max accuracy:  0.9964513839602555
--------------------------------------------------------------------------------
Valid User:  2
--------------------------------------------------------------------------------
CV accuracy list:  [0.99499277 0.9964922  0.92906106 0.88710252 0.96666207 0.9990598
 1.        ]
CV mean accuracy:  0.9676243438504428
CV min accuracy:  0.8871025224725775
CV max accuracy:  1.0
--------------------------------------------------------------------------------
Valid User:  3
-----

CV accuracy list:  [0.99877601 0.98314877 1.         0.89845369 0.90658536 0.96032343
 1.        ]
CV mean accuracy:  0.963898179790718
CV min accuracy:  0.8984536904192572
CV max accuracy:  1.0
--------------------------------------------------------------------------------
Valid User:  7
--------------------------------------------------------------------------------
CV accuracy list:  [0.99732948 1.         1.         0.99622722 0.97980438 0.97349531
 1.        ]
CV mean accuracy:  0.9924080541745658
CV min accuracy:  0.9734953064605191
CV max accuracy:  1.0
--------------------------------------------------------------------------------
Valid User:  8
--------------------------------------------------------------------------------
CV accuracy list:  [1.         1.         1.         0.89064244 0.98614026 0.98053562
 1.        ]
CV mean accuracy:  0.979616901725881
CV min accuracy:  0.890642435836123
CV max accuracy:  1.0
-------------------------------------------------------------

CV accuracy list:  [0.97424412 0.98321343 1.         0.93194292 0.93190661 0.93700787
 0.9701897 ]
CV mean accuracy:  0.9612149515377543
CV min accuracy:  0.9319066147859922
CV max accuracy:  1.0
--------------------------------------------------------------------------------
Valid User:  5
--------------------------------------------------------------------------------
CV accuracy list:  [0.99104143 0.99280576 0.98601399 0.82415519 0.95719844 0.95013123
 0.98373984]
CV mean accuracy:  0.9550122690495374
CV min accuracy:  0.8241551939924906
CV max accuracy:  0.9928057553956835
--------------------------------------------------------------------------------
Valid User:  6
--------------------------------------------------------------------------------
CV accuracy list:  [1.         1.         1.         0.9931164  0.99890231 0.98687664
 1.        ]
CV mean accuracy:  0.9969850487247831
CV min accuracy:  0.9868766404199475
CV max accuracy:  1.0
-------------------------------------------

## Final Validation

In [None]:
for data_type in DATA_TYPES:
    for wnd_type in WINDOW_TYPES:
        for wnd in WINDOWS:
            df, RESULTS_FILE = get_dataframe(DATA_PATH, data_type, wnd_type, wnd)
            features = df.columns.to_list()
            df, _ = process_train_df(df, features)
            df['labels'] = df['user']
                
            for model, tag in MODELS:
                print(data_type, wnd_type, wnd, tag)
                model_final_validation(RESULTS_FILE, model, df, tag, data_type, wnd_type, wnd, is_SVM=tag=='SVC')

  return (a - mns) / sstd


wifi rolling 60s CatBoost
Valid User:  1
--------------------------------------------------------------------------------
True:  (1312, 23)
Shape:  (3936, 23)
Count  1 :  (1312, 23)
Count  2 :  (1312, 23)
Count  5 :  (952, 23)
Count  3 :  (33, 23)
Count  7 :  (16, 23)
Count  4 :  (266, 23)
Count  8 :  (36, 23)
Count  6 :  (9, 23)
Valid user =  1 , Extracted user =  2 accuracy =  0.7850609756097561
--------------------------------------------------------------------------------
True:  (700, 23)
Shape:  (2100, 23)
Count  1 :  (700, 23)
Count  3 :  (700, 23)
Count  5 :  (470, 23)
Count  8 :  (15, 23)
Count  4 :  (140, 23)
Count  6 :  (7, 23)
Count  7 :  (9, 23)
Count  2 :  (59, 23)
Valid user =  1 , Extracted user =  3 accuracy =  0.9471428571428572
--------------------------------------------------------------------------------
True:  (1356, 23)
Shape:  (4068, 23)
Count  1 :  (1356, 23)
Count  4 :  (1356, 23)
Count  5 :  (1146, 23)
Count  2 :  (97, 23)
Count  8 :  (36, 23)
Count  3 :  (5

Valid user =  4 , Extracted user =  2 accuracy =  0.942537909018356
--------------------------------------------------------------------------------
True:  (700, 23)
Shape:  (2100, 23)
Count  4 :  (700, 23)
Count  3 :  (700, 23)
Count  5 :  (472, 23)
Count  1 :  (161, 23)
Count  2 :  (43, 23)
Count  6 :  (5, 23)
Count  7 :  (7, 23)
Count  8 :  (12, 23)
Valid user =  4 , Extracted user =  3 accuracy =  0.9495238095238095
--------------------------------------------------------------------------------
True:  (1253, 23)
Shape:  (3759, 23)
Count  4 :  (1253, 23)
Count  5 :  (1253, 23)
Count  3 :  (111, 23)
Count  1 :  (815, 23)
Count  8 :  (66, 23)
Count  2 :  (190, 23)
Count  7 :  (41, 23)
Count  6 :  (30, 23)
Valid user =  4 , Extracted user =  5 accuracy =  0.7355679702048417
--------------------------------------------------------------------------------
True:  (187, 23)
Shape:  (561, 23)
Count  4 :  (187, 23)
Count  6 :  (187, 23)
Count  5 :  (130, 23)
Count  1 :  (32, 23)
Count  8 : 

True:  (67, 23)
Shape:  (201, 23)
Count  7 :  (67, 23)
Count  5 :  (67, 23)
Count  1 :  (34, 23)
Count  4 :  (28, 23)
Count  8 :  (2, 23)
Count  3 :  (2, 23)
Count  2 :  (1, 23)
Valid user =  7 , Extracted user =  5 accuracy =  0.9651741293532339
--------------------------------------------------------------------------------
True:  (67, 23)
Shape:  (201, 23)
Count  7 :  (67, 23)
Count  6 :  (67, 23)
Count  2 :  (3, 23)
Count  5 :  (31, 23)
Count  1 :  (10, 23)
Count  3 :  (2, 23)
Count  4 :  (20, 23)
Count  8 :  (1, 23)
Valid user =  7 , Extracted user =  6 accuracy =  0.9601990049751243
--------------------------------------------------------------------------------
True:  (67, 23)
Shape:  (201, 23)
Count  7 :  (67, 23)
Count  8 :  (67, 23)
Count  5 :  (34, 23)
Count  1 :  (19, 23)
Count  4 :  (12, 23)
Count  3 :  (1, 23)
Count  2 :  (1, 23)
Valid user =  7 , Extracted user =  8 accuracy =  0.9253731343283582
---------------------------------------------------------------------------

Valid user =  2 , Extracted user =  7 accuracy =  0.9563636363636364
--------------------------------------------------------------------------------
True:  (327, 23)
Shape:  (981, 23)
Count  2 :  (327, 23)
Count  8 :  (327, 23)
Count  4 :  (61, 23)
Count  1 :  (60, 23)
Count  5 :  (198, 23)
Count  3 :  (7, 23)
Count  6 :  (1, 23)
Valid user =  2 , Extracted user =  8 accuracy =  0.9612640163098879
--------------------------------------------------------------------------------
Valid User:  3
--------------------------------------------------------------------------------
True:  (174, 23)
Shape:  (522, 23)
Count  3 :  (174, 23)
Count  1 :  (174, 23)
Count  5 :  (128, 23)
Count  7 :  (2, 23)
Count  4 :  (32, 23)
Count  2 :  (9, 23)
Count  8 :  (3, 23)
Valid user =  3 , Extracted user =  1 accuracy =  0.8659003831417624
--------------------------------------------------------------------------------
True:  (174, 23)
Shape:  (522, 23)
Count  3 :  (174, 23)
Count  2 :  (174, 23)
Count  5 :

True:  (45, 23)
Shape:  (135, 23)
Count  6 :  (45, 23)
Count  1 :  (45, 23)
Count  4 :  (9, 23)
Count  8 :  (4, 23)
Count  5 :  (29, 23)
Count  2 :  (1, 23)
Count  3 :  (2, 23)
Valid user =  6 , Extracted user =  1 accuracy =  0.8444444444444444
--------------------------------------------------------------------------------
True:  (45, 23)
Shape:  (135, 23)
Count  6 :  (45, 23)
Count  2 :  (45, 23)
Count  1 :  (16, 23)
Count  5 :  (20, 23)
Count  4 :  (8, 23)
Count  3 :  (1, 23)
Valid user =  6 , Extracted user =  2 accuracy =  0.8222222222222222
--------------------------------------------------------------------------------
True:  (45, 23)
Shape:  (135, 23)
Count  6 :  (45, 23)
Count  3 :  (45, 23)
Count  5 :  (27, 23)
Count  1 :  (8, 23)
Count  4 :  (5, 23)
Count  2 :  (4, 23)
Count  8 :  (1, 23)
Valid user =  6 , Extracted user =  3 accuracy =  0.8592592592592593
--------------------------------------------------------------------------------
True:  (45, 23)
Shape:  (135, 23)
Coun

Valid user =  1 , Extracted user =  4 accuracy =  0.7765486725663717
--------------------------------------------------------------------------------
True:  (1356, 23)
Shape:  (4068, 23)
Count  1 :  (1356, 23)
Count  5 :  (1356, 23)
Count  4 :  (866, 23)
Count  2 :  (224, 23)
Count  3 :  (120, 23)
Count  8 :  (74, 23)
Count  7 :  (48, 23)
Count  6 :  (24, 23)
Valid user =  1 , Extracted user =  5 accuracy =  0.6551130776794494
--------------------------------------------------------------------------------
True:  (187, 23)
Shape:  (561, 23)
Count  1 :  (187, 23)
Count  6 :  (187, 23)
Count  5 :  (129, 23)
Count  4 :  (36, 23)
Count  3 :  (5, 23)
Count  2 :  (14, 23)
Count  7 :  (2, 23)
Count  8 :  (1, 23)
Valid user =  1 , Extracted user =  6 accuracy =  0.7985739750445633
--------------------------------------------------------------------------------
True:  (275, 23)
Shape:  (825, 23)
Count  1 :  (275, 23)
Count  7 :  (275, 23)
Count  5 :  (186, 23)
Count  4 :  (51, 23)
Count  3 :  (

Valid user =  4 , Extracted user =  6 accuracy =  0.8716577540106952
--------------------------------------------------------------------------------
True:  (275, 23)
Shape:  (825, 23)
Count  4 :  (275, 23)
Count  7 :  (275, 23)
Count  1 :  (66, 23)
Count  5 :  (172, 23)
Count  3 :  (14, 23)
Count  2 :  (16, 23)
Count  8 :  (5, 23)
Count  6 :  (2, 23)
Valid user =  4 , Extracted user =  7 accuracy =  0.6436363636363637
--------------------------------------------------------------------------------
True:  (446, 23)
Shape:  (1338, 23)
Count  4 :  (446, 23)
Count  8 :  (446, 23)
Count  1 :  (89, 23)
Count  5 :  (317, 23)
Count  2 :  (21, 23)
Count  6 :  (4, 23)
Count  3 :  (13, 23)
Count  7 :  (2, 23)
Valid user =  4 , Extracted user =  8 accuracy =  0.8923766816143498
--------------------------------------------------------------------------------
Valid User:  5
--------------------------------------------------------------------------------
True:  (1976, 23)
Shape:  (5928, 23)
Count  5

True:  (110, 23)
Shape:  (330, 23)
Count  8 :  (110, 23)
Count  1 :  (110, 23)
Count  3 :  (1, 23)
Count  5 :  (84, 23)
Count  2 :  (6, 23)
Count  6 :  (1, 23)
Count  4 :  (18, 23)
Valid user =  8 , Extracted user =  1 accuracy =  0.6909090909090909
--------------------------------------------------------------------------------
True:  (110, 23)
Shape:  (330, 23)
Count  8 :  (110, 23)
Count  2 :  (110, 23)
Count  1 :  (25, 23)
Count  5 :  (64, 23)
Count  6 :  (1, 23)
Count  3 :  (3, 23)
Count  4 :  (15, 23)
Count  7 :  (2, 23)
Valid user =  8 , Extracted user =  2 accuracy =  0.6727272727272727
--------------------------------------------------------------------------------
True:  (110, 23)
Shape:  (330, 23)
Count  8 :  (110, 23)
Count  3 :  (110, 23)
Count  5 :  (70, 23)
Count  4 :  (17, 23)
Count  1 :  (17, 23)
Count  2 :  (4, 23)
Count  7 :  (2, 23)
Valid user =  8 , Extracted user =  3 accuracy =  0.6818181818181818
------------------------------------------------------------------

  return (a - mns) / sstd


wifi sampling 60s CatBoost
Valid User:  1
--------------------------------------------------------------------------------
True:  (118, 23)
Shape:  (354, 23)
Count  1 :  (118, 23)
Count  2 :  (118, 23)
Count  6 :  (8, 23)
Count  5 :  (70, 23)
Count  4 :  (17, 23)
Count  3 :  (14, 23)
Count  8 :  (8, 23)
Count  7 :  (1, 23)
Valid user =  1 , Extracted user =  2 accuracy =  0.8248587570621468
--------------------------------------------------------------------------------
True:  (171, 23)
Shape:  (513, 23)
Count  1 :  (171, 23)
Count  3 :  (171, 23)
Count  4 :  (27, 23)
Count  5 :  (105, 23)
Count  7 :  (9, 23)
Count  8 :  (12, 23)
Count  6 :  (12, 23)
Count  2 :  (6, 23)
Valid user =  1 , Extracted user =  3 accuracy =  0.8966861598440545
--------------------------------------------------------------------------------
True:  (171, 23)
Shape:  (513, 23)
Count  1 :  (171, 23)
Count  4 :  (171, 23)
Count  5 :  (112, 23)
Count  2 :  (10, 23)
Count  6 :  (10, 23)
Count  8 :  (9, 23)
Count  3