In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, plot_roc_curve, make_scorer, roc_auc_score, f1_score
from sklearn import preprocessing
from scipy import stats
from sklearn.naive_bayes import CategoricalNB
from sklearn.model_selection import cross_validate, LeaveOneGroupOut, PredefinedSplit, GridSearchCV
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
def make_dataframe_impl(df_count):
    dfs_list = []
    dfs_rows_len_list = []

    file_name = "..\\pipelines\\_datasets\\60s\\broadcasts_"

    for i in range(1, df_count + 1):
        df = pd.read_csv(file_name + str(i) + ".csv")
        df = df.drop(["timestamp"], axis=1)

        df["user"] = i
        dfs_list.append(df)
        dfs_rows_len_list.append(df.shape[0])

    df = pd.concat(dfs_list, ignore_index=True)
    return df

In [3]:
def make_common_dataframe(df_count):
    return make_dataframe_impl(df_count)

In [4]:
def resample(df):
    sampling_dfs = []
    need_count = 0

    for label, count in zip(df.user.value_counts().index, df.user.value_counts().values):
        if need_count == 0:
            need_count = count
            df_ = df[df.user == label]
            sampling_dfs.append(df_)
        else:
            df_ = df[df.user == label]
            df_over = df_.sample(need_count, replace=True, random_state=42)
            sampling_dfs.append(df_over)

    new_df = pd.concat(sampling_dfs)
    new_df = new_df.reset_index()

    return pd.concat(sampling_dfs)

def extract_delayed_user(df, user_label):
    df_user = df[df["user"] == user_label]
    df = df[df["user"] != user_label]
    return df_user, df

def split_users_into_two_classes(df, valid_user_label):
    df.loc[df["user"] != valid_user_label, "user"] = 0
    df.loc[df["user"] == valid_user_label, "user"] = 1
    return df

In [5]:
def get_cv_split(X, y, group_labels, valid_user_label):
    predefined_split_array = np.zeros(group_labels.shape[0])
    i = 0
    test_array = [x for x in range(group_labels.shape[0])]
    for test, _ in LeaveOneGroupOut().split(X, y, group_labels):
        diff = np.setdiff1d(test_array, test)
        if np.all(group_labels[diff[0] : diff[-1]] == valid_user_label) is np.bool_(True):
            for sample in diff:
                predefined_split_array[sample] = -1
        else:
            for sample in diff:
                predefined_split_array[sample] = i
            i += 1
    return predefined_split_array

In [6]:
df = make_common_dataframe(6)

In [7]:
# df.drop('user', axis=1).boxplot()

In [8]:
possible_broadcasts = [ 'android.accounts.LOGIN_ACCOUNTS_CHANGED',
 'android.accounts.action.ACCOUNT_REMOVED',
 'android.app.action.ACTION_PASSWORD_CHANGED',
 'android.app.action.ACTION_PASSWORD_EXPIRING',
 'android.app.action.ACTION_PASSWORD_FAILED',
 'android.app.action.ACTION_PASSWORD_SUCCEEDED',
 'android.app.action.AFFILIATED_PROFILE_TRANSFER_OWNERSHIP_COMPLETE',
 'android.app.action.APPLICATION_DELEGATION_SCOPES_CHANGED',
 'android.app.action.APP_BLOCK_STATE_CHANGED',
 'android.app.action.AUTOMATIC_ZEN_RULE_STATUS_CHANGED',
 'android.app.action.BUGREPORT_FAILED',
 'android.app.action.BUGREPORT_SHARE',
 'android.app.action.BUGREPORT_SHARING_DECLINED',
 'android.app.action.DATA_SHARING_RESTRICTION_APPLIED',
 'android.app.action.DATA_SHARING_RESTRICTION_CHANGED',
 'android.app.action.DEVICE_ADMIN_DISABLED',
 'android.app.action.DEVICE_ADMIN_DISABLE_REQUESTED',
 'android.app.action.DEVICE_ADMIN_ENABLED',
 'android.app.action.DEVICE_OWNER_CHANGED',
 'android.app.action.INTERRUPTION_FILTER_CHANGED',
 'android.app.action.INTERRUPTION_FILTER_CHANGED_INTERNAL',
 'android.app.action.LOCK_TASK_ENTERING',
 'android.app.action.LOCK_TASK_EXITING',
 'android.app.action.MANAGED_USER_CREATED',
 'android.app.action.NETWORK_LOGS_AVAILABLE',
 'android.app.action.NEXT_ALARM_CLOCK_CHANGED',
 'android.app.action.NOTIFICATION_CHANNEL_BLOCK_STATE_CHANGED',
 'android.app.action.NOTIFICATION_CHANNEL_GROUP_BLOCK_STATE_CHANGED',
 'android.app.action.NOTIFICATION_POLICY_ACCESS_GRANTED_CHANGED',
 'android.app.action.NOTIFICATION_POLICY_CHANGED',
 'android.app.action.NOTIFY_PENDING_SYSTEM_UPDATE',
 'android.app.action.PROFILE_OWNER_CHANGED',
 'android.app.action.PROFILE_PROVISIONING_COMPLETE',
 'android.app.action.SECURITY_LOGS_AVAILABLE',
 'android.app.action.SYSTEM_UPDATE_POLICY_CHANGED',
 'android.app.action.TRANSFER_OWNERSHIP_COMPLETE',
 'android.app.action.USER_ADDED',
 'android.app.action.USER_REMOVED',
 'android.app.action.USER_STARTED',
 'android.app.action.USER_STOPPED',
 'android.app.action.USER_SWITCHED',
 'android.appwidget.action.APPWIDGET_DELETED',
 'android.appwidget.action.APPWIDGET_DISABLED',
 'android.appwidget.action.APPWIDGET_ENABLED',
 'android.appwidget.action.APPWIDGET_HOST_RESTORED',
 'android.appwidget.action.APPWIDGET_RESTORED',
 'android.appwidget.action.APPWIDGET_UPDATE',
 'android.appwidget.action.APPWIDGET_UPDATE_OPTIONS',
 'android.bluetooth.a2dp.profile.action.ACTIVE_DEVICE_CHANGED',
 'android.bluetooth.a2dp.profile.action.AVRCP_CONNECTION_STATE_CHANGED',
 'android.bluetooth.a2dp.profile.action.CODEC_CONFIG_CHANGED',
 'android.bluetooth.a2dp.profile.action.CONNECTION_STATE_CHANGED',
 'android.bluetooth.a2dp.profile.action.PLAYING_STATE_CHANGED',
 'android.bluetooth.adapter.action.CONNECTION_STATE_CHANGED',
 'android.bluetooth.adapter.action.DISCOVERY_FINISHED',
 'android.bluetooth.adapter.action.DISCOVERY_STARTED',
 'android.bluetooth.adapter.action.LOCAL_NAME_CHANGED',
 'android.bluetooth.adapter.action.SCAN_MODE_CHANGED',
 'android.bluetooth.adapter.action.STATE_CHANGED',
 'android.bluetooth.device.action.ACL_CONNECTED',
 'android.bluetooth.device.action.ACL_DISCONNECTED',
 'android.bluetooth.device.action.ACL_DISCONNECT_REQUESTED',
 'android.bluetooth.device.action.ALIAS_CHANGED',
 'android.bluetooth.device.action.BATTERY_LEVEL_CHANGED',
 'android.bluetooth.device.action.BOND_STATE_CHANGED',
 'android.bluetooth.device.action.CLASS_CHANGED',
 'android.bluetooth.device.action.CONNECTION_ACCESS_CANCEL',
 'android.bluetooth.device.action.CONNECTION_ACCESS_REPLY',
 'android.bluetooth.device.action.CONNECTION_ACCESS_REQUEST',
 'android.bluetooth.device.action.FOUND',
 'android.bluetooth.device.action.MAS_INSTANCE',
 'android.bluetooth.device.action.NAME_CHANGED',
 'android.bluetooth.device.action.NAME_FAILED',
 'android.bluetooth.device.action.PAIRING_CANCEL',
 'android.bluetooth.device.action.PAIRING_REQUEST',
 'android.bluetooth.device.action.SDP_RECORD',
 'android.bluetooth.device.action.SILENCE_MODE_CHANGED',
 'android.bluetooth.device.action.UUID',
 'android.bluetooth.devicepicker.action.DEVICE_SELECTED',
 'android.bluetooth.devicepicker.action.LAUNCH',
 'android.bluetooth.headset.action.VENDOR_SPECIFIC_HEADSET_EVENT',
 'android.bluetooth.headset.profile.action.ACTIVE_DEVICE_CHANGED',
 'android.bluetooth.headset.profile.action.AUDIO_STATE_CHANGED',
 'android.bluetooth.headset.profile.action.CONNECTION_STATE_CHANGED',
 'android.bluetooth.hearingaid.profile.action.ACTIVE_DEVICE_CHANGED',
 'android.bluetooth.hearingaid.profile.action.CONNECTION_STATE_CHANGED',
 'android.bluetooth.hiddevice.profile.action.CONNECTION_STATE_CHANGED',
 'android.bluetooth.input.profile.action.CONNECTION_STATE_CHANGED',
 'android.bluetooth.input.profile.action.HANDSHAKE',
 'android.bluetooth.input.profile.action.IDLE_TIME_CHANGED',
 'android.bluetooth.input.profile.action.PROTOCOL_MODE_CHANGED',
 'android.bluetooth.input.profile.action.REPORT',
 'android.bluetooth.input.profile.action.VIRTUAL_UNPLUG_STATUS',
 'android.bluetooth.pan.profile.action.CONNECTION_STATE_CHANGED',
 'android.bluetooth.pbap.profile.action.CONNECTION_STATE_CHANGED',
 'android.content.pm.action.SESSION_COMMITTED',
 'android.content.pm.action.SESSION_UPDATED',
 'android.hardware.action.NEW_PICTURE',
 'android.hardware.action.NEW_VIDEO',
 'android.hardware.hdmi.action.OSD_MESSAGE',
 'android.hardware.input.action.QUERY_KEYBOARD_LAYOUTS',
 'android.hardware.usb.action.USB_ACCESSORY_ATTACHED',
 'android.hardware.usb.action.USB_ACCESSORY_DETACHED',
 'android.hardware.usb.action.USB_DEVICE_ATTACHED',
 'android.hardware.usb.action.USB_DEVICE_DETACHED',
 'android.intent.action.ACTION_IDLE_MAINTENANCE_END',
 'android.intent.action.ACTION_IDLE_MAINTENANCE_START',
 'android.intent.action.ACTION_POWER_CONNECTED',
 'android.intent.action.ACTION_POWER_DISCONNECTED',
 'android.intent.action.ACTION_PREFERRED_ACTIVITY_CHANGED',
 'android.intent.action.ACTION_SHUTDOWN',
 'android.intent.action.AIRPLANE_MODE',
 'android.intent.action.ALARM_CHANGED',
 'android.intent.action.APPLICATION_RESTRICTIONS_CHANGED',
 'android.intent.action.BATTERY_CHANGED',
 'android.intent.action.BATTERY_LOW',
 'android.intent.action.BATTERY_OKAY',
 'android.intent.action.BOOT_COMPLETED',
 'android.intent.action.CALL_DISCONNECT_CAUSE',
 'android.intent.action.CAMERA_BUTTON',
 'android.intent.action.CANCEL_ENABLE_ROLLBACK',
 'android.intent.action.CLEAR_DNS_CACHE',
 'android.intent.action.CLOSE_SYSTEM_DIALOGS',
 'android.intent.action.CONFIGURATION_CHANGED',
 'android.intent.action.CONTENT_CHANGED',
 'android.intent.action.DATA_SMS_RECEIVED',
 'android.intent.action.DATA_STALL_DETECTED',
 'android.intent.action.DATE_CHANGED',
 'android.intent.action.DEVICE_STORAGE_FULL',
 'android.intent.action.DEVICE_STORAGE_LOW',
 'android.intent.action.DEVICE_STORAGE_NOT_FULL',
 'android.intent.action.DEVICE_STORAGE_OK',
 'android.intent.action.DISTRACTING_PACKAGES_CHANGED',
 'android.intent.action.DOCK_EVENT',
 'android.intent.action.DOWNLOAD_COMPLETE',
 'android.intent.action.DOWNLOAD_NOTIFICATION_CLICKED',
 'android.intent.action.DREAMING_STARTED',
 'android.intent.action.DREAMING_STOPPED',
 'android.intent.action.DROPBOX_ENTRY_ADDED',
 'android.intent.action.DYNAMIC_SENSOR_CHANGED',
 'android.intent.action.EMERGENCY_CALLBACK_MODE_CHANGED',
 'android.intent.action.EMERGENCY_CALL_STATE_CHANGED',
 'android.intent.action.EXTERNAL_APPLICATIONS_AVAILABLE',
 'android.intent.action.EXTERNAL_APPLICATIONS_UNAVAILABLE',
 'android.intent.action.FACTORY_RESET',
 'android.intent.action.FETCH_VOICEMAIL',
 'android.intent.action.GTALK_CONNECTED',
 'android.intent.action.GTALK_DISCONNECTED',
 'android.intent.action.HEADSET_PLUG',
 'android.intent.action.HEADSET_PLUG',
 'android.intent.action.INPUT_METHOD_CHANGED',
 'android.intent.action.INTENT_FILTER_NEEDS_VERIFICATION',
 'android.intent.action.LOCALE_CHANGED',
 'android.intent.action.LOCKED_BOOT_COMPLETED',
 'android.intent.action.MANAGE_PACKAGE_STORAGE',
 'android.intent.action.MASTER_CLEAR_NOTIFICATION',
 'android.intent.action.MEDIA_BAD_REMOVAL',
 'android.intent.action.MEDIA_BUTTON',
 'android.intent.action.MEDIA_CHECKING',
 'android.intent.action.MEDIA_EJECT',
 'android.intent.action.MEDIA_MOUNTED',
 'android.intent.action.MEDIA_NOFS',
 'android.intent.action.MEDIA_REMOVED',
 'android.intent.action.MEDIA_SCANNER_FINISHED',
 'android.intent.action.MEDIA_SCANNER_SCAN_FILE',
 'android.intent.action.MEDIA_SCANNER_STARTED',
 'android.intent.action.MEDIA_SHARED',
 'android.intent.action.MEDIA_UNMOUNTABLE',
 'android.intent.action.MEDIA_UNMOUNTED',
 'android.intent.action.MY_PACKAGE_REPLACED',
 'android.intent.action.MY_PACKAGE_SUSPENDED',
 'android.intent.action.MY_PACKAGE_UNSUSPENDED',
 'android.intent.action.NEW_OUTGOING_CALL',
 'android.intent.action.NEW_VOICEMAIL',
 'android.intent.action.PACKAGES_SUSPENDED',
 'android.intent.action.PACKAGES_UNSUSPENDED',
 'android.intent.action.PACKAGE_ADDED',
 'android.intent.action.PACKAGE_CHANGED',
 'android.intent.action.PACKAGE_DATA_CLEARED',
 'android.intent.action.PACKAGE_ENABLE_ROLLBACK',
 'android.intent.action.PACKAGE_FIRST_LAUNCH',
 'android.intent.action.PACKAGE_FULLY_REMOVED',
 'android.intent.action.PACKAGE_INSTALL',
 'android.intent.action.PACKAGE_NEEDS_INTEGRITY_VERIFICATION',
 'android.intent.action.PACKAGE_NEEDS_VERIFICATION',
 'android.intent.action.PACKAGE_REMOVED',
 'android.intent.action.PACKAGE_REPLACED',
 'android.intent.action.PACKAGE_RESTARTED',
 'android.intent.action.PACKAGE_UNSUSPENDED_MANUALLY',
 'android.intent.action.PACKAGE_VERIFIED',
 'android.intent.action.PHONE_STATE',
 'android.intent.action.PROVIDER_CHANGED',
 'android.intent.action.PROXY_CHANGE',
 'android.intent.action.QUERY_PACKAGE_RESTART',
 'android.intent.action.REBOOT',
 'android.intent.action.ROLLBACK_COMMITTED',
 'android.intent.action.SCREEN_OFF',
 'android.intent.action.SCREEN_ON',
 'android.intent.action.SERVICE_STATE',
 'android.intent.action.SIM_STATE_CHANGED',
 'android.intent.action.SPLIT_CONFIGURATION_CHANGED',
 'android.intent.action.SUB_DEFAULT_CHANGED',
 'android.intent.action.TIMEZONE_CHANGED',
 'android.intent.action.TIME_SET',
 'android.intent.action.TIME_TICK',
 'android.intent.action.UID_REMOVED',
 'android.intent.action.UMS_CONNECTED',
 'android.intent.action.UMS_DISCONNECTED',
 'android.intent.action.USER_PRESENT',
 'android.intent.action.USER_UNLOCKED',
 'android.intent.action.WALLPAPER_CHANGED',
 'android.media.ACTION_SCO_AUDIO_STATE_UPDATED',
 'android.media.AUDIO_BECOMING_NOISY',
 'android.media.INTERNAL_RINGER_MODE_CHANGED_ACTION',
 'android.media.MASTER_MUTE_CHANGED_ACTION',
 'android.media.RINGER_MODE_CHANGED',
 'android.media.SCO_AUDIO_STATE_CHANGED',
 'android.media.STREAM_DEVICES_CHANGED_ACTION',
 'android.media.STREAM_MUTE_CHANGED_ACTION',
 'android.media.VIBRATE_SETTING_CHANGED',
 'android.media.VOLUME_CHANGED_ACTION',
 'android.media.action.CLOSE_AUDIO_EFFECT_CONTROL_SESSION',
 'android.media.action.HDMI_AUDIO_PLUG',
 'android.media.action.MICROPHONE_MUTE_CHANGED',
 'android.media.action.OPEN_AUDIO_EFFECT_CONTROL_SESSION',
 'android.media.action.SPEAKERPHONE_STATE_CHANGED',
 'android.media.tv.action.CHANNEL_BROWSABLE_REQUESTED',
 'android.media.tv.action.INITIALIZE_PROGRAMS',
 'android.media.tv.action.PREVIEW_PROGRAM_ADDED_TO_WATCH_NEXT',
 'android.media.tv.action.PREVIEW_PROGRAM_BROWSABLE_DISABLED',
 'android.media.tv.action.WATCH_NEXT_PROGRAM_BROWSABLE_DISABLED',
 'android.net.conn.BACKGROUND_DATA_SETTING_CHANGED',
 'android.net.conn.CAPTIVE_PORTAL_TEST_COMPLETED',
 'android.net.conn.CONNECTIVITY_CHANGE',
 'android.net.conn.DATA_ACTIVITY_CHANGE',
 'android.net.conn.INET_CONDITION_ACTION',
 'android.net.conn.RESTRICT_BACKGROUND_CHANGED',
 'android.net.conn.TETHER_STATE_CHANGED',
 'android.net.nsd.STATE_CHANGED',
 'android.net.scoring.SCORER_CHANGED',
 'android.net.scoring.SCORE_NETWORKS',
 'android.net.sip.action.SIP_CALL_OPTION_CHANGED',
 'android.net.sip.action.SIP_INCOMING_CALL',
 'android.net.sip.action.SIP_REMOVE_PROFILE',
 'android.net.sip.action.SIP_SERVICE_UP',
 'android.net.sip.action.START_SIP',
 'android.net.wifi.BATCHED_RESULTS',
 'android.net.wifi.NETWORK_IDS_CHANGED',
 'android.net.wifi.RSSI_CHANGED',
 'android.net.wifi.SCAN_RESULTS',
 'android.net.wifi.STATE_CHANGE',
 'android.net.wifi.WIFI_STATE_CHANGED',
 'android.net.wifi.action.WIFI_NETWORK_SUGGESTION_POST_CONNECTION',
 'android.net.wifi.action.WIFI_SCAN_AVAILABILITY_CHANGED',
 'android.net.wifi.aware.action.WIFI_AWARE_STATE_CHANGED',
 'android.net.wifi.p2p.CONNECTION_STATE_CHANGE',
 'android.net.wifi.p2p.DISCOVERY_STATE_CHANGE',
 'android.net.wifi.p2p.PEERS_CHANGED',
 'android.net.wifi.p2p.STATE_CHANGED',
 'android.net.wifi.p2p.THIS_DEVICE_CHANGED',
 'android.net.wifi.rtt.action.WIFI_RTT_STATE_CHANGED',
 'android.net.wifi.supplicant.CONNECTION_CHANGE',
 'android.net.wifi.supplicant.STATE_CHANGE',
 'android.nfc.action.ADAPTER_STATE_CHANGED',
 'android.nfc.action.PREFERRED_PAYMENT_CHANGED',
 'android.nfc.action.TRANSACTION_DETECTED',
 'android.os.action.ACTION_EFFECTS_SUPPRESSOR_CHANGED',
 'android.os.action.DEVICE_IDLE_MODE_CHANGED',
 'android.os.action.LIGHT_DEVICE_IDLE_MODE_CHANGED',
 'android.os.action.POWER_SAVE_MODE_CHANGED',
 'android.os.action.POWER_SAVE_MODE_CHANGED_INTERNAL',
 'android.os.action.POWER_SAVE_MODE_CHANGING',
 'android.os.action.POWER_SAVE_TEMP_WHITELIST_CHANGED',
 'android.os.action.POWER_SAVE_WHITELIST_CHANGED',
 'android.os.action.UPDATE_EMERGENCY_NUMBER_DB',
 'android.provider.Telephony.MMS_DOWNLOADED',
 'android.provider.Telephony.SECRET_CODE',
 'android.provider.Telephony.SIM_FULL',
 'android.provider.Telephony.SMS_CARRIER_PROVISION',
 'android.provider.Telephony.SMS_CB_RECEIVED',
 'android.provider.Telephony.SMS_DELIVER',
 'android.provider.Telephony.SMS_RECEIVED',
 'android.provider.Telephony.SMS_REJECTED',
 'android.provider.Telephony.SMS_SERVICE_CATEGORY_PROGRAM_DATA_RECEIVED',
 'android.provider.Telephony.WAP_PUSH_DELIVER',
 'android.provider.Telephony.WAP_PUSH_RECEIVED',
 'android.provider.action.DEFAULT_SMS_PACKAGE_CHANGED',
 'android.provider.action.EXTERNAL_PROVIDER_CHANGE',
 'android.provider.action.SMS_EMERGENCY_CB_RECEIVED',
 'android.provider.action.SMS_MMS_DB_CREATED',
 'android.provider.action.SMS_MMS_DB_LOST',
 'android.provider.action.SYNC_VOICEMAIL',
 'android.security.STORAGE_CHANGED',
 'android.security.action.KEYCHAIN_CHANGED',
 'android.security.action.KEY_ACCESS_CHANGED',
 'android.security.action.TRUST_STORE_CHANGED',
 'android.service.controls.action.ADD_CONTROL',
 'android.settings.ENABLE_MMS_DATA_REQUEST',
 'android.speech.tts.TTS_QUEUE_PROCESSING_COMPLETED',
 'android.speech.tts.engine.TTS_DATA_INSTALLED',
 'android.telephony.action.AREA_INFO_UPDATED',
 'android.telephony.action.DEFAULT_SMS_SUBSCRIPTION_CHANGED',
 'android.telephony.action.DEFAULT_SUBSCRIPTION_CHANGED',
 'android.telephony.action.PRIMARY_SUBSCRIPTION_LIST_CHANGED',
 'android.telephony.action.REFRESH_SUBSCRIPTION_PLANS',
 'android.telephony.action.SECRET_CODE',
 'android.telephony.action.SERVICE_PROVIDERS_UPDATED',
 'android.telephony.action.SIM_APPLICATION_STATE_CHANGED',
 'android.telephony.action.SIM_CARD_STATE_CHANGED',
 'android.telephony.action.SIM_SLOT_STATUS_CHANGED',
 'android.telephony.action.SUBSCRIPTION_CARRIER_IDENTITY_CHANGED',
 'android.telephony.action.SUBSCRIPTION_PLANS_CHANGED',
 'android.telephony.action.SUBSCRIPTION_SPECIFIC_CARRIER_IDENTITY_CHANGED',
 'android.telephony.euicc.action.NOTIFY_CARRIER_SETUP_INCOMPLETE',
 'android.telephony.euicc.action.OTA_STATUS_CHANGED',
 'android.telephony.ims.action.WFC_IMS_REGISTRATION_ERROR',
 'com.android.intent.action.DISMISS_KEYBOARD_SHORTCUTS',
 'com.android.intent.action.SHOW_KEYBOARD_SHORTCUTS',
 'com.android.internal.intent.action.ACTION_FORBIDDEN_NO_SERVICE_AUTHORIZATION',
 'com.android.internal.provider.action.VOICEMAIL_SMS_RECEIVED' ]

In [9]:
le = LabelEncoder()
le.fit(possible_broadcasts)

LabelEncoder()

In [10]:
df['action'] = le.transform(df['action'])

In [11]:
model = CategoricalNB()

In [12]:
for user in df.user.unique():
    for valid_user in df.user.unique():
        if user != valid_user:
            print('---------------------------------------------------------------------------')
            print('Valid user: ', valid_user, 'Extracted user: ', user)
            print('---------------------------------------------------------------------------')
            df1, df_ = extract_delayed_user(df.copy(), user)
            df1['user'] = 0
            df_ = split_users_into_two_classes(df_.copy(), valid_user)
#             df_ = resample(df_)

            dataset = df_.to_numpy()
            X = dataset[:, :-1]
            y = dataset[:, -1]

            X_test = df1.to_numpy()[:, :-1]
            y_test = df1.to_numpy()[:, -1]

            model.fit(X, y)

            preds_class = model.predict(X_test)
            print('Accuracy: ', accuracy_score(preds_class, y_test))


            print('---------------------------------------------------------------------------')
            print('---------------------------------------------------------------------------')

---------------------------------------------------------------------------
Valid user:  2 Extracted user:  1
---------------------------------------------------------------------------
Accuracy:  1.0
---------------------------------------------------------------------------
---------------------------------------------------------------------------
---------------------------------------------------------------------------
Valid user:  3 Extracted user:  1
---------------------------------------------------------------------------
Accuracy:  0.8475052199582404
---------------------------------------------------------------------------
---------------------------------------------------------------------------
---------------------------------------------------------------------------
Valid user:  4 Extracted user:  1
---------------------------------------------------------------------------
Accuracy:  0.9748722010223918
---------------------------------------------------------------

Accuracy:  0.9998105741707358
---------------------------------------------------------------------------
---------------------------------------------------------------------------
---------------------------------------------------------------------------
Valid user:  4 Extracted user:  5
---------------------------------------------------------------------------
Accuracy:  0.7460851995285401
---------------------------------------------------------------------------
---------------------------------------------------------------------------
---------------------------------------------------------------------------
Valid user:  6 Extracted user:  5
---------------------------------------------------------------------------
Accuracy:  0.8881545714766795
---------------------------------------------------------------------------
---------------------------------------------------------------------------
---------------------------------------------------------------------------
Valid 

## SVC CV

In [13]:
df["labels"] = df["user"]

CV_SVC_BIG_DICT = {}

for user in df['labels'].unique():
    print("Valid User: ", user)
    print(
        "--------------------------------------------------------------------------------"
    )

    #     df_ = resample(df.copy())
    df_ = df.copy()
    df_ = split_users_into_two_classes(df_.copy(), user)
    #     df_ = resample(df_)

    df_.loc[df_.user == 0, "user"] = -1

    group_labels = df_['labels'].to_numpy().copy()
    df_ = df_.drop("labels", axis=1)

    dataset = df_.to_numpy().copy()
    X = dataset[:, :-1]
    y = dataset[:, -1]

    cv_split = PredefinedSplit(test_fold=get_cv_split(X, y, group_labels, user))
    scoring = ("accuracy", "balanced_accuracy")

    model = CategoricalNB()

    cv_results = cross_validate(model, X, y, scoring=scoring, cv=cv_split, n_jobs=-1)
    accuracy = cv_results["test_accuracy"]

    CV_SVC_BIG_DICT[str(user)] = {}
    CV_SVC_BIG_DICT[str(user)]["accuracy"] = accuracy.copy()
    CV_SVC_BIG_DICT[str(user)]["mean_accuracy"] = np.mean(accuracy).copy()
    CV_SVC_BIG_DICT[str(user)]["max_accuracy"] = np.max(accuracy).copy()
    CV_SVC_BIG_DICT[str(user)]["min_accuracy"] = np.min(accuracy).copy()

    print("CV accuracy list: ", accuracy)
    print("CV mean accuracy: ", np.mean(accuracy))
    print("CV min accuracy: ", np.min(accuracy))
    print("CV max accuracy: ", np.max(accuracy))

    print(
        "--------------------------------------------------------------------------------"
    )

Valid User:  1
--------------------------------------------------------------------------------
CV accuracy list:  [0.99977205 0.76768429 0.97564625 0.97272268 1.        ]
CV mean accuracy:  0.9431650540962888
CV min accuracy:  0.7676842889054356
CV max accuracy:  1.0
--------------------------------------------------------------------------------
Valid User:  2
--------------------------------------------------------------------------------
CV accuracy list:  [1.         1.         1.         0.99966324 1.        ]
CV mean accuracy:  0.9999326485940394
CV min accuracy:  0.999663242970197
CV max accuracy:  1.0
--------------------------------------------------------------------------------
Valid User:  3
--------------------------------------------------------------------------------
CV accuracy list:  [0.84750522 0.99680875 0.99912725 0.99981057 1.        ]
CV mean accuracy:  0.968650360371108
CV min accuracy:  0.8475052199582404
CV max accuracy:  1.0
---------------------------------

## SVC Final Validation

In [14]:
df["labels"] = df["user"]

VALIDATION_SVC_BIG_DICT = {}

for user in df.labels.unique():
    print("Valid User: ", user)
    print("--------------------------------------------------------------------------------")
    
    VALIDATION_SVC_BIG_DICT[str(user)] = {}

    for ex_user in df.labels.unique():
        if ex_user != user:

            df_ = df.copy()

            df_for_test = []

            df__ = df_[df_.labels == ex_user].copy()
            df_for_test.append(df__)
            df_ = df_.drop(df__.index, axis=0)

            for user_ in df_.labels.unique():
                if user_ != ex_user:
                    test_size = int((0.25 * df_[df_.labels == user_].shape[0]) - 1)
                    df__ = df_[df_.labels == user_].sample(test_size).copy()
                    df_for_test.append(df__)
                    df_ = df_.drop(df__.index, axis=0)

            
#             df_ = resample(df_.copy())
            df_ = split_users_into_two_classes(df_.copy(), user)
#             df_ = resample(df_)
            
            df_.loc[df_.user == 0, 'user'] = -1

            df_ = df_.drop("labels", axis=1)

            dataset = df_.to_numpy().copy()
            np.random.shuffle(dataset)

            X = dataset[:, :-1]
            y = dataset[:, -1]

            model =  CategoricalNB()
            model.fit(X, y)

            # Testing

            test_df = pd.concat(df_for_test)

            valid_user_in_test_count = test_df[test_df.labels == user].shape[0]
            ex_user_in_test_count = test_df[test_df.labels == ex_user].shape[0]
            others_in_test_count = [test_df[test_df.labels == x].shape[0] \
                                    for x in test_df.labels.unique() if x != user and x != ex_user]

            min_others_test_count = min(others_in_test_count)

            is_important_min = True
            if min_others_test_count <= ex_user_in_test_count and min_others_test_count <= valid_user_in_test_count:
                is_important_min = False

            new_df_parts = []
            if is_important_min is True:
                part_size = min(valid_user_in_test_count, ex_user_in_test_count)
                other_sample_size = part_size // len(others_in_test_count) + 1

            else:
                part_size_can_be = min_others_test_count * len(others_in_test_count)
                        
                if part_size_can_be > min(valid_user_in_test_count, ex_user_in_test_count):
                    part_size = min(valid_user_in_test_count, ex_user_in_test_count)
                    other_sample_size = part_size // len(others_in_test_count) + 1
                else:
                    part_size = part_size_can_be
                    other_sample_size = min_others_test_count
                    
            new_df_parts.append(test_df[test_df.labels == user].sample(part_size).copy())
            new_df_parts.append(test_df[test_df.labels == ex_user].sample(part_size).copy())

            for x in test_df.labels.unique():
                if x != user and x != ex_user:
                    new_df_parts.append(test_df[test_df.labels == x].sample(other_sample_size).copy())
                    
            test_df = pd.concat(new_df_parts)

            test_df.loc[test_df.labels != user, "user"] = -1
            test_df.loc[test_df.labels == user, "user"] = 1

            test_df = test_df.drop("labels", axis=1)

            test_dataset = test_df.to_numpy().copy()
            X_test = test_dataset[:, :-1].copy()
            y_test = test_dataset[:, -1].copy()

            VALIDATION_SVC_BIG_DICT[str(user)][ex_user] = {}
            VALIDATION_SVC_BIG_DICT[str(user)][ex_user]["y_test"] = y_test.copy()
            VALIDATION_SVC_BIG_DICT[str(user)][ex_user]["y_predict"] = model.predict(X_test).copy()
            VALIDATION_SVC_BIG_DICT[str(user)][ex_user]["y_proba"] = model.predict_proba(X_test).copy()

            print("Valid user = ", user, ", Extracted user = ", ex_user, "accuracy = ",
                    accuracy_score(VALIDATION_SVC_BIG_DICT[str(user)][ex_user]["y_test"], 
                                   VALIDATION_SVC_BIG_DICT[str(user)][ex_user]["y_predict"])
                 )

            print("--------------------------------------------------------------------------------")

Valid User:  1
--------------------------------------------------------------------------------
Valid user =  1 , Extracted user =  2 accuracy =  0.6669867486076435
--------------------------------------------------------------------------------
Valid user =  1 , Extracted user =  3 accuracy =  0.6229114653351258
--------------------------------------------------------------------------------
Valid user =  1 , Extracted user =  4 accuracy =  0.67188400230459
--------------------------------------------------------------------------------
Valid user =  1 , Extracted user =  5 accuracy =  0.6650662569617822
--------------------------------------------------------------------------------
Valid user =  1 , Extracted user =  6 accuracy =  0.6670827731899366
--------------------------------------------------------------------------------
Valid User:  2
--------------------------------------------------------------------------------
Valid user =  2 , Extracted user =  1 accuracy =  0.66676810