#### Import

In [2]:
import pandas as pd
import pyshark as ps
import numpy as np
import time
import multiprocessing
from glob import glob
from datetime import timedelta
from tqdm.notebook import tqdm
import os

#Import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, accuracy_score, recall_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.multiclass import OneVsRestClassifier
import pickle
from sklearn.impute import SimpleImputer

from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import classification_report

#### Functions

In [3]:
def get_model():
    model = RandomForestClassifier(
        n_estimators=100,
        criterion='gini',
        # max_depth=5,
        min_samples_split=10,
        min_samples_leaf=2,
        min_weight_fraction_leaf=0.0,
        max_features='auto',
        max_leaf_nodes=None,
        min_impurity_decrease=0.0,
        # min_impurity_split=None,
        bootstrap=True,
        oob_score=False,
        n_jobs=-1,
        random_state=0,
        verbose=0,
        warm_start=False,
        class_weight='balanced'
    )
    return model

def filter_label(df, i):
    if i==0:
        df = df.loc[df['device'] != 'Smart Plug 2']
    elif i==1: 
        df = df.loc[(df['device'] == 'Amazon Echo') | (df['device'] == 'Belkin wemo motion sensor') | (df['device'] == 'Belkin Wemo switch') | (df['device'] == 'HP Printer') | 
        (df['device'] == 'Insteon Camera') | (df['device'] == 'Light Bulbs LiFX Smart Bulb') | (df['device'] == 'NEST Protect smoke alarm') | (df['device'] == 'Netatmo weather station') |
        (df['device'] == 'Samsung SmartCam') | (df['device'] == 'Smart Things') | (df['device'] == 'Triby Speaker') | (df['device'] == 'Withings Smart scale')]
        
    return df

def features_selection(df):
    df = df[['src_port', 'dst_port', 'src2dst_bytes', 'dst2src_bytes', 'src2dst_packets', 'dst2src_packets', 'udps.bidirectional_mean_piat', 'udps.bidirectional_std_piat', 'udps.bidirectional_variance_piat', 'udps.bidirectional_skew_piat', 'udps.bidirectional_kurtosis_piat', 
            'udps.bidirectional_mean_ps', 'udps.bidirectional_std_ps', 'udps.bidirectional_variance_ps', 'udps.bidirectional_skew_ps','udps.bidirectional_kurtosis_ps', 'bidirectional_duration_ms', 'protocol', 'requested_server_name', 'device']].copy()
    
    return df

def test_split(df):
    X = df.iloc[:, :-1]
    y = df.iloc[:, -1]
    convert_dict = {'requested_server_name': str}
    X = X.astype(convert_dict)
    X['requested_server_name'] = pd.factorize(X['requested_server_name'])[0]

    return X,y

In [3]:
# percent_missing = df_priv_1.isnull().sum() * 100 / len(df_priv_1)
# missing_value_df = pd.DataFrame({'column_name': df_priv_1.columns,
#                                  'percent_missing': percent_missing})
# missing_value_df.sort_values('percent_missing', inplace=True)

#### PRIVATE

##### Week 1

In [4]:
pickle_file = r'D:\Master\model\exp1-2\priv_1_time_x.sav'
clf_priv_1 = pickle.load(open(pickle_file, 'rb'))

In [5]:
df_priv_1 = pd.read_csv(r'D:\Master\datasets\exp1\Combined\Private\week_1.csv')
df_priv_1 = filter_label(df_priv_1, 0)
df_priv_1_cut = features_selection(df_priv_1)
X_priv_1, y_priv_1 = test_split(df_priv_1_cut)

  df_priv_1 = pd.read_csv(r'D:\Master\datasets\exp1\Combined\Private\week_1.csv')


In [6]:
feat_impts = [] 
for clf in clf_priv_1.estimators_:
    feat_impts.append(clf.feature_importances_)

# np.mean(feat_impts, axis=0)
importances = np.mean(feat_impts, axis=0)
# print(importances)

important_features = pd.Series(data=importances, index=X_priv_1.columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features)

dst_port                            0.175186
src2dst_bytes                       0.099664
src_port                            0.097000
requested_server_name               0.078038
udps.bidirectional_mean_ps          0.068098
udps.bidirectional_mean_piat        0.064811
bidirectional_duration_ms           0.051424
dst2src_bytes                       0.048034
udps.bidirectional_variance_piat    0.043182
udps.bidirectional_kurtosis_piat    0.039224
udps.bidirectional_std_piat         0.038515
udps.bidirectional_skew_piat        0.037931
udps.bidirectional_kurtosis_ps      0.032456
udps.bidirectional_variance_ps      0.031143
protocol                            0.029318
udps.bidirectional_std_ps           0.025320
udps.bidirectional_skew_ps          0.018221
dst2src_packets                     0.013634
src2dst_packets                     0.008799
dtype: float64


In [9]:
X_priv_1['requested_server_name']

0           0
1           0
2           1
3           0
4           0
         ... 
231421    159
231422    137
231423    139
231424      1
231425      1
Name: requested_server_name, Length: 231426, dtype: int64

In [10]:
rf_model = get_model()
start = time.time()
print('training....start time: ', start)
clf_priv_1 = rf_model.fit(X_priv_1, y_priv_1)
stop = time.time() 
print('time taken: ', stop-start)

pickle_file = r'D:\Master\model\exp1-2\priv_1_time_x.sav'
pickle.dump(clf_priv_1, open(pickle_file, 'wb'))

training....start time:  1672389991.7143617


  warn(


time taken:  5.722245216369629


In [7]:
y_pred_priv_1 = clf_priv_1.predict(X_priv_1)

In [8]:
print('accuracy: ', accuracy_score(y_priv_1, y_pred_priv_1))
print('f1: ', f1_score(y_priv_1, y_pred_priv_1, average='weighted'))
print('precision: ', precision_score(y_priv_1, y_pred_priv_1, average='weighted'))
print('recall: ', recall_score(y_priv_1, y_pred_priv_1, average='weighted'))

accuracy:  0.9982715857336687
f1:  0.9982726493820587
precision:  0.9982750201237695
recall:  0.9982715857336687


In [9]:
print(sorted(y_priv_1.unique()))
cm = confusion_matrix(y_priv_1, y_pred_priv_1, labels=sorted(y_priv_1.unique()))
print(cm)

['Alexa Echo Dot', 'Mi Air Purifier', 'Mi Box 3', 'Mi Home Security Camera', 'Smart Plug 1']
[[79331     0   144     8     0]
 [    0 26738     0    39     0]
 [   81     0 21082     1     0]
 [   13    80    33 75692     0]
 [    0     0     0     1 28183]]


In [10]:
def classwise_accuracy():
   a = pd.crosstab(y_pred_priv_1,y_priv_1)
   print(a.max(axis=1)/a.sum(axis=1))
classwise_accuracy()

row_0
Alexa Echo Dot             0.998816
Mi Air Purifier            0.997017
Mi Box 3                   0.991674
Mi Home Security Camera    0.999353
Smart Plug 1               1.000000
dtype: float64


##### Week 2

In [14]:
df_priv_2 = pd.read_csv(r'D:\Master\datasets\exp1\combined\Private\week_2.csv')
df_priv_2 = filter_label(df_priv_2, 0)
df_priv_2_cut = features_selection(df_priv_2)
X_priv_2, y_priv_2 = test_split(df_priv_2_cut)

  df_priv_2 = pd.read_csv(r'D:\Master\datasets\exp1\combined\Private\week_2.csv')


In [15]:
y_pred_priv_2 = clf_priv_1.predict(X_priv_2)

print('accuracy: ', accuracy_score(y_priv_2, y_pred_priv_2))
print('f1: ', f1_score(y_priv_2, y_pred_priv_2, average='weighted'))
print('precision: ', precision_score(y_priv_2, y_pred_priv_2, average='weighted'))
print('recall: ', recall_score(y_priv_2, y_pred_priv_2, average='weighted'))

accuracy:  0.9641943734015346
f1:  0.9638851607472578
precision:  0.9661962771196783
recall:  0.9641943734015346


In [16]:
print(sorted(y_priv_2.unique()))
cm = confusion_matrix(y_priv_2, y_pred_priv_2, labels=sorted(y_priv_2.unique()))
print(cm)

['Alexa Echo Dot', 'Mi Air Purifier', 'Mi Box 3', 'Mi Home Security Camera', 'Smart Plug 1']
[[108018      0    740     82      0]
 [  3178  30007   3159    262      0]
 [  2066      0  31447     91      2]
 [    49    143   1394 101204     66]
 [   100      0     45    103  38464]]


##### Week 3

In [17]:
df_priv_3 = pd.read_csv(r'D:\Master\datasets\exp1\combined\Private\week_3.csv')
df_priv_3 = filter_label(df_priv_3, 0)
df_priv_3_cut = features_selection(df_priv_3)
X_priv_3, y_priv_3 = test_split(df_priv_3_cut)

  df_priv_3 = pd.read_csv(r'D:\Master\datasets\exp1\combined\Private\week_3.csv')


In [18]:
y_pred_priv_3 = clf_priv_1.predict(X_priv_3)

print('accuracy: ', accuracy_score(y_priv_3, y_pred_priv_3))
print('f1: ', f1_score(y_priv_3, y_pred_priv_3, average='weighted'))
print('precision: ', precision_score(y_priv_3, y_pred_priv_3, average='weighted'))
print('recall: ', recall_score(y_priv_3, y_pred_priv_3, average='weighted'))

accuracy:  0.8871545378741069
f1:  0.8457691102154081
precision:  0.8980773669249261
recall:  0.8871545378741069


In [19]:
print(sorted(y_priv_3.unique()))
cm = confusion_matrix(y_priv_3, y_pred_priv_3, labels=sorted(y_priv_3.unique()))
print(cm)

['Alexa Echo Dot', 'Mi Air Purifier', 'Mi Box 3', 'Mi Home Security Camera', 'Smart Plug 1']
[[107508      0    649     23      0]
 [ 17249   1776  16983    970      0]
 [  1210      0  47508    121      0]
 [    43    195    141 100387      0]
 [     4      0      2     32  38593]]


##### Week 4

In [4]:
df_priv_4 = pd.read_csv(r'D:\Master\datasets\exp1\combined\Private\week_4.csv')
df_priv_4 = filter_label(df_priv_4, 0)
df_priv_4_cut = features_selection(df_priv_4)
X_priv_4, y_priv_4 = test_split(df_priv_4_cut)

  df_priv_4 = pd.read_csv(r'D:\Master\datasets\exp1\combined\Private\week_4.csv')


In [5]:
y_pred_priv_4 = clf_priv_1.predict(X_priv_4)

print('accuracy: ', accuracy_score(y_priv_4, y_pred_priv_4))
print('f1: ', f1_score(y_priv_4, y_pred_priv_4, average='weighted'))
print('precision: ', precision_score(y_priv_4, y_pred_priv_4, average='weighted'))
print('recall: ', recall_score(y_priv_4, y_pred_priv_4, average='weighted'))

accuracy:  0.7927731254476338
f1:  0.7885864675186397
precision:  0.8810857869442854
recall:  0.7927731254476338


In [23]:
print(sorted(y_priv_4.unique()))
cm = confusion_matrix(y_priv_4, y_pred_priv_4, labels=sorted(y_priv_4.unique()))
print(cm)

['Alexa Echo Dot', 'Mi Air Purifier', 'Mi Box 3', 'Mi Home Security Camera', 'Smart Plug 1']
[[68771     0 31024     8     0]
 [14150  6209 13826   725     0]
 [   45     0 24190    20     0]
 [   39   159   157 94932     0]
 [    0     0     7    22 36132]]


In [6]:
def classwise_accuracy():
   a = pd.crosstab(y_pred_priv_4,y_priv_4)
   print(a.max(axis=1)/a.sum(axis=1))
classwise_accuracy()

row_0
Alexa Echo Dot             0.828516
Mi Air Purifier            0.975031
Mi Box 3                   0.448298
Mi Home Security Camera    0.991902
Smart Plug 1               1.000000
dtype: float64


##### Week 5

In [7]:
df_priv_5 = pd.read_csv(r'D:\Master\datasets\exp1\combined\Private\week_5.csv')
df_priv_5 = filter_label(df_priv_5, 0)
df_priv_5_cut = features_selection(df_priv_5)
X_priv_5, y_priv_5 = test_split(df_priv_5_cut)

  df_priv_5 = pd.read_csv(r'D:\Master\datasets\exp1\combined\Private\week_5.csv')


In [13]:
y_pred_priv_5 = clf_priv_1.predict(X_priv_5)

print('accuracy: ', accuracy_score(y_priv_5, y_pred_priv_5))
print('f1: ', f1_score(y_priv_5, y_pred_priv_5, average='weighted'))
print('precision: ', precision_score(y_priv_5, y_pred_priv_5, average='weighted'))
print('recall: ', recall_score(y_priv_5, y_pred_priv_5, average='weighted'))

accuracy:  0.8924615200871863
f1:  0.9015307693554847
precision:  0.9444237110310703
recall:  0.8924615200871863


In [26]:
print(sorted(y_priv_5.unique()))
cm = confusion_matrix(y_priv_5, y_pred_priv_5, labels=sorted(y_priv_5.unique()))
print(cm)

['Alexa Echo Dot', 'Mi Air Purifier', 'Mi Box 3', 'Mi Home Security Camera', 'Smart Plug 1']
[[61768     0 27163     5     0]
 [    0 31912     5    35     0]
 [  124     0 25040   338     0]
 [  569   146   417 87032     0]
 [    0     0     2     9 33367]]


In [9]:
def classwise_accuracy():
   a = pd.crosstab(y_pred_priv_5,y_priv_5)
   print(a.max(axis=1)/a.sum(axis=1))
classwise_accuracy()

row_0
Alexa Echo Dot             0.988905
Mi Air Purifier            0.995446
Mi Box 3                   0.516142
Mi Home Security Camera    0.995573
Smart Plug 1               1.000000
dtype: float64


##### Week 6

In [10]:
df_priv_6 = pd.read_csv(r'D:\Master\datasets\exp1\combined\Private\week_6.csv')
df_priv_6 = filter_label(df_priv_6, 0)
df_priv_6_cut = features_selection(df_priv_6)
X_priv_6, y_priv_6 = test_split(df_priv_6_cut)

  df_priv_6 = pd.read_csv(r'D:\Master\datasets\exp1\combined\Private\week_6.csv')


In [11]:
y_pred_priv_6 = clf_priv_1.predict(X_priv_6)

print('accuracy: ', accuracy_score(y_priv_6, y_pred_priv_6))
print('f1: ', f1_score(y_priv_6, y_pred_priv_6, average='weighted'))
print('precision: ', precision_score(y_priv_6, y_pred_priv_6, average='weighted'))
print('recall: ', recall_score(y_priv_6, y_pred_priv_6, average='weighted'))

accuracy:  0.7889407782415312
f1:  0.7922036536576897
precision:  0.8529480000410399
recall:  0.7889407782415312


In [29]:
print(sorted(y_priv_6.unique()))
cm = confusion_matrix(y_priv_6, y_pred_priv_6, labels=sorted(y_priv_6.unique()))
print(cm)

['Alexa Echo Dot', 'Mi Air Purifier', 'Mi Box 3', 'Mi Home Security Camera', 'Smart Plug 1']
[[60919     0 26724     5     0]
 [10314 11557  9400   529     0]
 [  738     1 29431   422     2]
 [ 8669   119    41 77980     0]
 [    1     0     4    21 33142]]


In [12]:
def classwise_accuracy():
   a = pd.crosstab(y_pred_priv_6,y_priv_6)
   print(a.max(axis=1)/a.sum(axis=1))
classwise_accuracy()

row_0
Alexa Echo Dot             0.755435
Mi Air Purifier            0.989723
Mi Box 3                   0.448643
Mi Home Security Camera    0.987626
Smart Plug 1               0.999940
dtype: float64


#### UNSW

##### Week 1

In [24]:
# pd.set_option('display.max_rows', None)
# missing_value_df

In [30]:
df_UNSW_1 = pd.read_csv(r'D:\Master\datasets\exp1-2\combined\UNSW\week_1.csv')
df_UNSW_1 = filter_label(df_UNSW_1, 1)
df_UNSW_1_cut = features_selection(df_UNSW_1)
X_UNSW_1, y_UNSW_1 = test_split(df_UNSW_1_cut)

  df_UNSW_1 = pd.read_csv(r'D:\Master\datasets\exp1-2\combined\UNSW\week_1.csv')


In [31]:
percent_missing = df_UNSW_1.isnull().sum() * 100 / len(df_UNSW_1)
missing_value_df = pd.DataFrame({'column_name': df_UNSW_1.columns,
                                 'percent_missing': percent_missing})
missing_value_df.sort_values('percent_missing', inplace=True)

In [32]:
missing_value_df.loc[missing_value_df['column_name'] == 'requested_server_name']

Unnamed: 0,column_name,percent_missing
requested_server_name,requested_server_name,83.158462


In [33]:
rf_model = get_model()
start = time.time()
print('training....start time: ', start)
clf_UNSW_1 = rf_model.fit(X_UNSW_1, y_UNSW_1)
stop = time.time() 
print('time taken: ', stop-start)

pickle_file = r'D:\Master\model\exp1-2\UNSW_1_time_x.sav'
pickle.dump(clf_UNSW_1, open(pickle_file, 'wb'))

training....start time:  1672390185.3567064


  warn(


time taken:  15.309503078460693


In [34]:
pickle_file = r'D:\Master\model\exp1-2\UNSW.sav'
model = pickle.load(open(pickle_file, 'rb'))

feat_impts = [] 
for clf in model.estimators_:
    feat_impts.append(clf.feature_importances_)

# np.mean(feat_impts, axis=0)
importances = np.mean(feat_impts, axis=0)
# print(importances)

important_features = pd.Series(data=importances, index=X_UNSW_1.columns)
important_features.sort_values(ascending=False,inplace=True)
print(important_features)

src_port                            0.141922
src2dst_bytes                       0.111639
dst_port                            0.098087
udps.bidirectional_std_piat         0.070470
udps.bidirectional_mean_ps          0.068256
udps.bidirectional_variance_piat    0.065977
udps.bidirectional_mean_piat        0.064407
bidirectional_duration_ms           0.058388
udps.bidirectional_std_ps           0.041959
udps.bidirectional_variance_ps      0.041059
dst2src_bytes                       0.035733
protocol                            0.032650
udps.bidirectional_skew_ps          0.031337
udps.bidirectional_kurtosis_piat    0.030743
dst2src_packets                     0.027821
requested_server_name               0.023013
src2dst_packets                     0.022968
udps.bidirectional_kurtosis_ps      0.019517
udps.bidirectional_skew_piat        0.014055
dtype: float64


In [35]:
y_pred_UNSW_1 = clf_UNSW_1.predict(X_UNSW_1)

In [36]:
print('accuracy: ', accuracy_score(y_UNSW_1, y_pred_UNSW_1))
print('f1: ', f1_score(y_UNSW_1, y_pred_UNSW_1, average='weighted'))
print('precision: ', precision_score(y_UNSW_1, y_pred_UNSW_1, average='weighted'))
print('recall: ', recall_score(y_UNSW_1, y_pred_UNSW_1, average='weighted'))

accuracy:  0.8244227353463588
f1:  0.8276778480920322
precision:  0.8668384075684372
recall:  0.8244227353463588


In [37]:
def classwise_accuracy():
   a = pd.crosstab(y_pred_UNSW_1,y_UNSW_1)
   print(a.max(axis=1)/a.sum(axis=1))
classwise_accuracy()

row_0
Amazon Echo                    0.999421
Belkin Wemo switch             0.546219
Belkin wemo motion sensor      0.630656
HP Printer                     0.498279
Insteon Camera                 0.999879
Light Bulbs LiFX Smart Bulb    0.998037
NEST Protect smoke alarm       0.912424
Netatmo weather station        0.999313
Samsung SmartCam               0.999477
Smart Things                   0.999824
Triby Speaker                  0.999223
Withings Smart scale           0.956716
dtype: float64


In [38]:
print(sorted(y_UNSW_1.unique()))
cm = confusion_matrix(y_UNSW_1, y_pred_UNSW_1, labels=sorted(y_UNSW_1.unique()))
print(cm)

['Amazon Echo', 'Belkin Wemo switch', 'Belkin wemo motion sensor', 'HP Printer', 'Insteon Camera', 'Light Bulbs LiFX Smart Bulb', 'NEST Protect smoke alarm', 'Netatmo weather station', 'Samsung SmartCam', 'Smart Things', 'Triby Speaker', 'Withings Smart scale']
[[84518     0     0     1     3     0     0     0     1     0     3     0]
 [    1 33374  1085 10514     0     0     0     0     1     1     1     0]
 [    0 27703 24658 10910     1     2     0     0    28     2     0     0]
 [    0     0     0 37647     0     0     2     0     0     0     0     1]
 [    4     1     0  1052 99169     0     2     3     3     0     2     2]
 [    0     0     0     0     0  4068     0     0     0     0     0     0]
 [    0     0     0     1     0     0    78     0     0     0     0     6]
 [    0     0     0     0     0     0   896 10185     0     0     1  1923]
 [   43    22 13356 15428     8     6     4     4 64965     0     4     0]
 [    0     0     0     0     0     0     0     0     0 17007  

##### Week 2

In [39]:
df_UNSW_2 = pd.read_csv(r'D:\Master\datasets\exp1-2\combined\UNSW\week_2.csv')
df_UNSW_2 = filter_label(df_UNSW_2, 1)
df_UNSW_2_cut = features_selection(df_UNSW_2)
X_UNSW_2, y_UNSW_2 = test_split(df_UNSW_2_cut)

  df_UNSW_2 = pd.read_csv(r'D:\Master\datasets\exp1-2\combined\UNSW\week_2.csv')


In [40]:
y_pred_UNSW_2 = clf_UNSW_1.predict(X_UNSW_2)

print('accuracy: ', accuracy_score(y_UNSW_2, y_pred_UNSW_2))
print('f1: ', f1_score(y_UNSW_2, y_pred_UNSW_2, average='weighted'))
print('precision: ', precision_score(y_UNSW_2, y_pred_UNSW_2, average='weighted'))
print('recall: ', recall_score(y_UNSW_2, y_pred_UNSW_2, average='weighted'))

accuracy:  0.8097784839042328
f1:  0.8117054087940014
precision:  0.8469819093286808
recall:  0.8097784839042328


In [41]:
print(sorted(y_UNSW_2.unique()))
cm = confusion_matrix(y_UNSW_2, y_pred_UNSW_2, labels=sorted(y_UNSW_2.unique()))
print(cm)

['Amazon Echo', 'Belkin Wemo switch', 'Belkin wemo motion sensor', 'HP Printer', 'Insteon Camera', 'Light Bulbs LiFX Smart Bulb', 'NEST Protect smoke alarm', 'Netatmo weather station', 'Samsung SmartCam', 'Smart Things', 'Triby Speaker', 'Withings Smart scale']
[[ 97688      0      1      3     12     16      0      3      6      0
       6      0]
 [    12  33374   6524  12011     24      0      0      0     33     14
      10      0]
 [    11  35876  21458  11957     12      0      1      0     37      4
       6      0]
 [    98      0      6  41231    120      2     16      2     76      7
      76      1]
 [     9      0      1   4789 135089      0      0      0     32      0
       0      1]
 [    28      0      0      1      1  17652      0      2      8      0
       1      0]
 [     0      0      0      3      0      0     90     12      2      0
       0      8]
 [     1      0      0      0      1      0   1481   9171      0      0
       3   1741]
 [    80     13  12686  14

##### Week 3

In [42]:
df_UNSW_3 = pd.read_csv(r'D:\Master\datasets\exp1-2\combined\UNSW\week_3.csv')
df_UNSW_3 = filter_label(df_UNSW_3, 1)
df_UNSW_3_cut = features_selection(df_UNSW_3)
X_UNSW_3, y_UNSW_3 = test_split(df_UNSW_3_cut)

  df_UNSW_3 = pd.read_csv(r'D:\Master\datasets\exp1-2\combined\UNSW\week_3.csv')


In [43]:
y_pred_UNSW_3 = clf_UNSW_1.predict(X_UNSW_3)

print('accuracy: ', accuracy_score(y_UNSW_3, y_pred_UNSW_3))
print('f1: ', f1_score(y_UNSW_3, y_pred_UNSW_3, average='weighted'))
print('precision: ', precision_score(y_UNSW_3, y_pred_UNSW_3, average='weighted'))
print('recall: ', recall_score(y_UNSW_3, y_pred_UNSW_3, average='weighted'))

accuracy:  0.7990065117044288
f1:  0.802380169894106
precision:  0.8493915717548468
recall:  0.7990065117044288


In [44]:
print(sorted(y_UNSW_3.unique()))
cm = confusion_matrix(y_UNSW_3, y_pred_UNSW_3, labels=sorted(y_UNSW_3.unique()))
print(cm)

['Amazon Echo', 'Belkin Wemo switch', 'Belkin wemo motion sensor', 'HP Printer', 'Insteon Camera', 'Light Bulbs LiFX Smart Bulb', 'NEST Protect smoke alarm', 'Netatmo weather station', 'Samsung SmartCam', 'Smart Things', 'Triby Speaker', 'Withings Smart scale']
[[ 92722      0      0      2      4      8      0      0      2      0
       4      0]
 [     4  31424   4096  17067      4      0      0      1     38      1
       1      0]
 [     5  32235  22471  17110      3      0      0      3     43      0
       1      0]
 [    49      0      2  43255     23      1      3      0     29      0
       6      0]
 [     0      0      0   4547 125096      0      0      0     10      0
       0      0]
 [     8      0      0      0      2  16648      0      0      4      0
       3      0]
 [     0      0      0      4      0      0     57      7      1      0
       0      5]
 [     0      0      0      0      0      0   1448   8973      0      0
       8   1720]
 [    49      1  10863  18

##### Week 4

In [45]:
df_UNSW_4 = pd.read_csv(r'D:\Master\datasets\exp1-2\combined\UNSW\week_4.csv')
df_UNSW_4 = filter_label(df_UNSW_4, 1)
df_UNSW_4_cut = features_selection(df_UNSW_4)
X_UNSW_4, y_UNSW_4 = test_split(df_UNSW_4_cut)

  df_UNSW_4 = pd.read_csv(r'D:\Master\datasets\exp1-2\combined\UNSW\week_4.csv')


In [46]:
y_pred_UNSW_4 = clf_UNSW_1.predict(X_UNSW_4)

print('accuracy: ', accuracy_score(y_UNSW_4, y_pred_UNSW_4))
print('f1: ', f1_score(y_UNSW_4, y_pred_UNSW_4, average='weighted'))
print('precision: ', precision_score(y_UNSW_4, y_pred_UNSW_4, average='weighted'))
print('recall: ', recall_score(y_UNSW_4, y_pred_UNSW_4, average='weighted'))

accuracy:  0.7876974336022523
f1:  0.7896980249797888
precision:  0.8418116845077106
recall:  0.7876974336022523


In [47]:
print(sorted(y_UNSW_4.unique()))
cm = confusion_matrix(y_UNSW_4, y_pred_UNSW_4, labels=sorted(y_UNSW_4.unique()))
print(cm)

['Amazon Echo', 'Belkin Wemo switch', 'Belkin wemo motion sensor', 'HP Printer', 'Insteon Camera', 'Light Bulbs LiFX Smart Bulb', 'NEST Protect smoke alarm', 'Netatmo weather station', 'Samsung SmartCam', 'Smart Things', 'Triby Speaker', 'Withings Smart scale']
[[ 86885      0      1      0     25      4      0      1      9      0
      27      0]
 [     7  32974   3988  18287      4      0      0      2     14      5
       0      0]
 [     6  34159  30535  18260     18      0      0      2     22      2
       2      0]
 [     6      0      1  45367      3      1      1      0     25      0
      18      2]
 [    37      0      1   4821 129664      0      0      0     19      0
       2      0]
 [   141      0      0      0      2  17588      0      0      9      0
       2      0]
 [     0      0      0      3      0      0     86     10      1      0
       0      6]
 [     0      0      0      0      0      0    144   1133      0      0
       3    217]
 [    43      1  11486  19

##### Week 5

In [48]:
df_UNSW_5 = pd.read_csv(r'D:\Master\datasets\exp1-2\combined\UNSW\week_5.csv')
df_UNSW_5 = filter_label(df_UNSW_5, 1)
df_UNSW_5_cut = features_selection(df_UNSW_5)
X_UNSW_5, y_UNSW_5 = test_split(df_UNSW_5_cut)

  df_UNSW_5 = pd.read_csv(r'D:\Master\datasets\exp1-2\combined\UNSW\week_5.csv')


In [67]:
y_pred_UNSW_5 = clf_UNSW_1.predict(X_UNSW_5)

print('accuracy: ', accuracy_score(y_UNSW_5, y_pred_UNSW_5))
print('f1: ', f1_score(y_UNSW_5, y_pred_UNSW_5, average='weighted'))
print('precision: ', precision_score(y_UNSW_5, y_pred_UNSW_5, average='weighted'))
print('recall: ', recall_score(y_UNSW_5, y_pred_UNSW_5, average='weighted'))

accuracy:  0.8269095959695225
f1:  0.8326850926876856
precision:  0.8745980318977186
recall:  0.8269095959695225


In [50]:
print(sorted(y_UNSW_5.unique()))
cm = confusion_matrix(y_UNSW_5, y_pred_UNSW_5, labels=sorted(y_UNSW_5.unique()))
print(cm)

['Amazon Echo', 'Belkin Wemo switch', 'Belkin wemo motion sensor', 'HP Printer', 'Insteon Camera', 'Light Bulbs LiFX Smart Bulb', 'NEST Protect smoke alarm', 'Netatmo weather station', 'Samsung SmartCam', 'Smart Things', 'Triby Speaker', 'Withings Smart scale']
[[ 99463      3      0    130     16     23      0      1    120      0
     111      0]
 [     5  17797   4900  18267      5      1      1      0     36     48
       2      0]
 [     5  13500  13886   7720      3      1      1      0      9      2
       1      0]
 [     4      1     19  45311     10      0      2      0      9      0
       5      1]
 [    54      0      3   4818 131979      0      0      0     36      0
      77      0]
 [    11      0      0      9      0  11763      0      0      4      0
      12      0]
 [     0      0      0      3      0      0     69      7      0      0
       0      7]
 [     0      0      0      0      0      0    542   3825      0      0
       3    700]
 [    68      0   4875  19

In [51]:
print(np.unique(y_pred_UNSW_5))
print(len(np.unique(y_pred_UNSW_5)))

labels_15=['Amazon Echo','Belkin Wemo switch','Belkin wemo motion sensor','Dropcam','HP Printer',
'Insteon Camera','Light Bulbs LiFX Smart Bulb','NEST Protect smoke alarm','Netatmo weather station','PIX-STAR Photo-frame','Samsung SmartCam','Smart Things','Triby Speaker','iHome Power Plug', 'Withings Smart scale']

['Amazon Echo' 'Belkin Wemo switch' 'Belkin wemo motion sensor'
 'HP Printer' 'Insteon Camera' 'Light Bulbs LiFX Smart Bulb'
 'NEST Protect smoke alarm' 'Netatmo weather station' 'Samsung SmartCam'
 'Smart Things' 'Triby Speaker' 'Withings Smart scale']
12


In [52]:
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import classification_report
labels_16= ['Amazon Echo','Belkin wemo motion sensor','Belkin Wemo switch','Dropcam','HP Printer','iHome Power Plug','Insteon Camera','Light Bulbs LiFX Smart Bulb','NEST Protect smoke alarm','Netatmo weather station','Netatmo Welcome','PIX-STAR Photo-frame','Samsung SmartCam','Smart Things','Triby Speaker','Withings Smart scale']
# multilabel_confusion_matrix(y_UNSW_5, y_pred_UNSW_5,labels=labels_15)
print(classification_report(y_UNSW_5, y_pred_UNSW_5,labels=sorted(y_UNSW_5.unique())))

                             precision    recall  f1-score   support

                Amazon Echo       1.00      1.00      1.00     99867
         Belkin Wemo switch       0.57      0.43      0.49     41062
  Belkin wemo motion sensor       0.59      0.40      0.47     35128
                 HP Printer       0.48      1.00      0.64     45362
             Insteon Camera       1.00      0.96      0.98    136967
Light Bulbs LiFX Smart Bulb       1.00      1.00      1.00     11799
   NEST Protect smoke alarm       0.11      0.80      0.20        86
    Netatmo weather station       1.00      0.75      0.86      5070
           Samsung SmartCam       0.99      0.64      0.78     66742
               Smart Things       0.99      0.71      0.83     12834
              Triby Speaker       0.93      0.91      0.92      2921
       Withings Smart scale       0.01      0.84      0.03        73

                   accuracy                           0.83    457911
                  macro avg     

In [53]:
print(sorted(y_UNSW_5.unique()))
cm = confusion_matrix(y_UNSW_5, y_pred_UNSW_5, labels=sorted(y_UNSW_5.unique()))
print(cm)

['Amazon Echo', 'Belkin Wemo switch', 'Belkin wemo motion sensor', 'HP Printer', 'Insteon Camera', 'Light Bulbs LiFX Smart Bulb', 'NEST Protect smoke alarm', 'Netatmo weather station', 'Samsung SmartCam', 'Smart Things', 'Triby Speaker', 'Withings Smart scale']
[[ 99463      3      0    130     16     23      0      1    120      0
     111      0]
 [     5  17797   4900  18267      5      1      1      0     36     48
       2      0]
 [     5  13500  13886   7720      3      1      1      0      9      2
       1      0]
 [     4      1     19  45311     10      0      2      0      9      0
       5      1]
 [    54      0      3   4818 131979      0      0      0     36      0
      77      0]
 [    11      0      0      9      0  11763      0      0      4      0
      12      0]
 [     0      0      0      3      0      0     69      7      0      0
       0      7]
 [     0      0      0      0      0      0    542   3825      0      0
       3    700]
 [    68      0   4875  19

##### Week 6

In [54]:
df_UNSW_6 = pd.read_csv(r'D:\Master\datasets\exp1-2\combined\UNSW\week_6.csv')
df_UNSW_6 = filter_label(df_UNSW_6, 1)
df_UNSW_6_cut = features_selection(df_UNSW_6)
X_UNSW_6, y_UNSW_6 = test_split(df_UNSW_6_cut)

  df_UNSW_6 = pd.read_csv(r'D:\Master\datasets\exp1-2\combined\UNSW\week_6.csv')


In [55]:
y_pred_UNSW_6 = clf_UNSW_1.predict(X_UNSW_6)

print('accuracy: ', accuracy_score(y_UNSW_6, y_pred_UNSW_6))
print('f1: ', f1_score(y_UNSW_6, y_pred_UNSW_6, average='weighted'))
print('precision: ', precision_score(y_UNSW_6, y_pred_UNSW_6, average='weighted'))
print('recall: ', recall_score(y_UNSW_6, y_pred_UNSW_6, average='weighted'))

accuracy:  0.7801485720406087
f1:  0.7837019574932542
precision:  0.8354414110010167
recall:  0.7801485720406087


In [56]:
print(sorted(y_UNSW_6.unique()))
cm = confusion_matrix(y_UNSW_6, y_pred_UNSW_6, labels=sorted(y_UNSW_6.unique()))
print(cm)

['Amazon Echo', 'Belkin Wemo switch', 'Belkin wemo motion sensor', 'HP Printer', 'Insteon Camera', 'Light Bulbs LiFX Smart Bulb', 'NEST Protect smoke alarm', 'Netatmo weather station', 'Samsung SmartCam', 'Smart Things', 'Triby Speaker', 'Withings Smart scale']
[[ 84715      0      0     17     11     12      0      0     33      1
      12      0]
 [     5  30529   6386  17568      5      2      4      0      1      9
       0      0]
 [     4  34738  29470  17685      5      4      3      0      8      0
       0      0]
 [     3      0     17  44210      6      5      8      0      4      0
      16      1]
 [   277      0      2   4701 128809      0      0      0     15      0
       4      0]
 [    20      0      1      2      2  16386      2      0      8      0
      10      0]
 [     0      0      0      2      0      0     79      9      0      0
       0      7]
 [     2      0      0      0      0      0   1402   8952      0      0
      14   1649]
 [    40      1  11488  18

In [57]:
def classwise_accuracy():
   a = pd.crosstab(y_pred_UNSW_6,y_UNSW_6)
   print(a.max(axis=1)/a.sum(axis=1))
classwise_accuracy()

row_0
Amazon Echo                    0.995616
Belkin Wemo switch             0.532228
Belkin wemo motion sensor      0.622203
HP Printer                     0.429962
Insteon Camera                 0.999690
Light Bulbs LiFX Smart Bulb    0.997869
NEST Protect smoke alarm       0.930943
Netatmo weather station        0.998550
Samsung SmartCam               0.997890
Smart Things                   0.999077
Triby Speaker                  0.996176
Withings Smart scale           0.948792
dtype: float64


##### Week 7

In [58]:
df_UNSW_7 = pd.read_csv(r'D:\Master\datasets\exp1-2\combined\UNSW\week_7.csv')
df_UNSW_7 = filter_label(df_UNSW_7, 1)
df_UNSW_7_cut = features_selection(df_UNSW_7)
X_UNSW_7, y_UNSW_7 = test_split(df_UNSW_7_cut)

  df_UNSW_7 = pd.read_csv(r'D:\Master\datasets\exp1-2\combined\UNSW\week_7.csv')


In [59]:
y_pred_UNSW_7 = clf_UNSW_1.predict(X_UNSW_7)

print('accuracy: ', accuracy_score(y_UNSW_7, y_pred_UNSW_7))
print('f1: ', f1_score(y_UNSW_7, y_pred_UNSW_7, average='weighted'))
print('precision: ', precision_score(y_UNSW_7, y_pred_UNSW_7, average='weighted'))
print('recall: ', recall_score(y_UNSW_7, y_pred_UNSW_7, average='weighted'))

accuracy:  0.7846593056579957
f1:  0.7886625761818908
precision:  0.837094260172052
recall:  0.7846593056579957


In [60]:
labels_15=['Amazon Echo','Belkin Wemo switch','Belkin wemo motion sensor','HP Printer','Insteon Camera','Light Bulbs LiFX Smart Bulb',
'NEST Protect smoke alarm','Netatmo weather station','Samsung SmartCam','Smart Things','Triby Speaker', 'Withings Smart scale']

In [61]:
print(sorted(y_UNSW_7.unique()))
cm = confusion_matrix(y_UNSW_7, y_pred_UNSW_7, labels=sorted(y_UNSW_7.unique()))
print(cm)

['Amazon Echo', 'Belkin Wemo switch', 'Belkin wemo motion sensor', 'HP Printer', 'Insteon Camera', 'Light Bulbs LiFX Smart Bulb', 'NEST Protect smoke alarm', 'Netatmo weather station', 'Samsung SmartCam', 'Smart Things', 'Triby Speaker', 'Withings Smart scale']
[[ 85159      0      0      8     44     37      0      0     75      0
       6      0]
 [    11  29233   8207  17999      5      0      0      0     25      7
       2      0]
 [     3  33886  31171  18334      9      0      0      0     35     11
       0      0]
 [    13      0      5  45087      7      1      2      0     42      0
       0      2]
 [     9      0      0   4822 132490      0      0      0      0      0
       1      0]
 [    12      0      0      0      1  17580      0      0      7      0
       1      0]
 [     0      0      0      2      0      0     79      9      0      0
       0      5]
 [     1      0      0      0      1      0   1039   9934      0      0
      10   1892]
 [    57      1  11503  19

##### Week 8

In [62]:
df_UNSW_8 = pd.read_csv(r'D:\Master\datasets\exp1-2\combined\UNSW\week_8.csv')
df_UNSW_8 = filter_label(df_UNSW_8, 1)
df_UNSW_8_cut = features_selection(df_UNSW_8)
X_UNSW_8, y_UNSW_8 = test_split(df_UNSW_8_cut)

  df_UNSW_8 = pd.read_csv(r'D:\Master\datasets\exp1-2\combined\UNSW\week_8.csv')


In [63]:
y_pred_UNSW_8 = clf_UNSW_1.predict(X_UNSW_8)

print('accuracy: ', accuracy_score(y_UNSW_8, y_pred_UNSW_8))
print('f1: ', f1_score(y_UNSW_8, y_pred_UNSW_8, average='weighted'))
print('precision: ', precision_score(y_UNSW_8, y_pred_UNSW_8, average='weighted'))
print('recall: ', recall_score(y_UNSW_8, y_pred_UNSW_8, average='weighted'))

accuracy:  0.7952513393612084
f1:  0.8014496372189709
precision:  0.8499874337097802
recall:  0.7952513393612084


In [64]:
print(sorted(y_UNSW_8.unique()))
cm = confusion_matrix(y_UNSW_8, y_pred_UNSW_8, labels=sorted(y_UNSW_8.unique()))
print(cm)

['Amazon Echo', 'Belkin Wemo switch', 'Belkin wemo motion sensor', 'HP Printer', 'Insteon Camera', 'Light Bulbs LiFX Smart Bulb', 'NEST Protect smoke alarm', 'Netatmo weather station', 'Samsung SmartCam', 'Smart Things', 'Triby Speaker', 'Withings Smart scale']
[[ 77400      0     16    266    560     25      9      2    677      0
     196      0]
 [    10  32769   4270  17934     12      0      0      1     82      7
      12      0]
 [    12  31388  46564  18244     23      0      2      0    336    454
      10      0]
 [    80      0      0  48098     61      0     14      0    629      0
      31      0]
 [     2      0      1   4816 133667      0      0      0      0      0
       1      0]
 [    16      0      0     11      1  18100      1      4      3      0
      21      0]
 [     0      0      0      2      0      0     72      6      0      0
       0      7]
 [     0      0      0      1      0      0    884  10303      0      0
       0   2042]
 [    65      2  11435  19

In [65]:
def classwise_accuracy():
   a = pd.crosstab(y_pred_UNSW_8,y_UNSW_8)
   print(a.max(axis=1)/a.sum(axis=1))
classwise_accuracy()

row_0
Amazon Echo                    0.997513
Belkin Wemo switch             0.510747
Belkin wemo motion sensor      0.747584
HP Printer                     0.443553
Insteon Camera                 0.994924
Light Bulbs LiFX Smart Bulb    0.997905
NEST Protect smoke alarm       0.896552
Netatmo weather station        0.998353
Samsung SmartCam               0.959835
Smart Things                   0.977202
Triby Speaker                  0.980468
Withings Smart scale           0.964573
dtype: float64


In [66]:
print(classification_report(y_UNSW_8, y_pred_UNSW_8,labels=sorted(y_UNSW_8.unique())))

                             precision    recall  f1-score   support

                Amazon Echo       1.00      0.98      0.99     79151
         Belkin Wemo switch       0.51      0.59      0.55     55097
  Belkin wemo motion sensor       0.75      0.48      0.58     97033
                 HP Printer       0.44      0.98      0.61     48913
             Insteon Camera       0.99      0.97      0.98    138487
Light Bulbs LiFX Smart Bulb       1.00      1.00      1.00     18157
   NEST Protect smoke alarm       0.07      0.83      0.13        87
    Netatmo weather station       1.00      0.78      0.87     13230
           Samsung SmartCam       0.96      0.57      0.72     71909
               Smart Things       0.98      1.00      0.99     19763
              Triby Speaker       0.98      1.00      0.99     13965
       Withings Smart scale       0.03      0.97      0.06        70

                   accuracy                           0.80    555862
                  macro avg     