#### Import

In [2]:
import pandas as pd
import pyshark as ps
import numpy as np
import time
import multiprocessing
from glob import glob
from datetime import timedelta
from tqdm.notebook import tqdm
import os

#Import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, accuracy_score, recall_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.multiclass import OneVsRestClassifier
import pickle
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import r_regression

#### Functions

In [3]:
def get_model():
    model = RandomForestClassifier(
        n_estimators=100,
        criterion='gini',
        # max_depth=5,
        min_samples_split=10,
        min_samples_leaf=2,
        min_weight_fraction_leaf=0.0,
        max_features='auto',
        max_leaf_nodes=None,
        min_impurity_decrease=0.0,
        # min_impurity_split=None,
        bootstrap=True,
        oob_score=False,
        n_jobs=-1,
        random_state=0,
        verbose=0,
        warm_start=False,
        class_weight='balanced'
    )
    return model

def filter_label(df, i):
    if i==0:
        df = df.loc[df['label'] != 'Smart Plug 2']
    elif i==1: 
        df = df.loc[(df['label'] == 'Amazon Echo') | (df['label'] == 'Belkin wemo motion sensor') | (df['label'] == 'Belkin Wemo switch') | (df['label'] == 'HP Printer') | 
        (df['label'] == 'Insteon Camera') | (df['label'] == 'Light Bulbs LiFX Smart Bulb') | (df['label'] == 'NEST Protect smoke alarm') | (df['label'] == 'Netatmo weather station') |
        (df['label'] == 'Samsung SmartCam') | (df['label'] == 'Smart Things') | (df['label'] == 'Triby Speaker') | (df['label'] == 'Withings Smart scale')]

    df = df.ffill().bfill()
    return df

def features_selection(df):
    df = df[['avg_payload_1_second', 'fma5_sum', 'fma5_mean', 'fma5_variance', 'fma5_std','range_5', 'max_payload', 'label']].copy()
    df = df.fillna(method='ffill')
    
    return df

def features_full(df):
    df = df.copy()
    df = df.fillna(method='ffill')
    
    return df

def test_split(df):
    X = df.iloc[:, :-1]
    y = df.iloc[:, -1]

    return X,y

#### PRIVATE

##### Week 1

In [None]:
pickle_file = r'D:\Master\model\private_1_time.sav'
clf_priv = pickle.load(open(pickle_file, 'rb'))

In [5]:
df_priv_1 = pd.read_csv(r'D:\Master\datasets\main-exp\combined\Private\combined_1.csv')
df_priv_1 = filter_label(df_priv_1, 0)
df_priv_1_cut = features_selection(df_priv_1)
X_priv_1, y_priv_1 = test_split(df_priv_1_cut)

In [6]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
y=le.fit_transform(y_priv_1)
y

array([0, 0, 0, ..., 4, 4, 4])

In [53]:
X_priv_1

Unnamed: 0,avg_payload_1_second,fma5_sum,fma5_mean,fma5_variance,fma5_std,range_5,max_payload
0,30.666667,184.0,36.800000,423.200000,20.571825,46.0,1460
1,23.000000,138.0,27.600000,634.800000,25.195238,46.0,1460
2,0.000000,140.0,28.000000,654.000000,25.573424,48.0,1460
3,0.000000,173.0,34.600000,409.800000,20.243517,48.0,1460
4,23.000000,173.0,34.600000,409.800000,20.243517,48.0,1460
...,...,...,...,...,...,...,...
2510573,0.000000,821.0,164.200000,2832.200000,53.218418,119.0,405
2510574,0.000000,633.0,158.250000,3540.250000,59.500000,119.0,405
2510575,69.000000,445.0,148.333333,4720.333333,68.704682,119.0,405
2510576,0.000000,376.0,188.000000,0.000000,0.000000,0.0,405


In [56]:
regression = r_regression(X_priv_1, y)

In [58]:
regression

array([-0.10889866, -0.09048064, -0.09047919, -0.08384815, -0.08440375,
       -0.08955199, -0.41554773])

In [60]:
df_priv_1_cut_2 = features_full(df_priv_1)
X_priv_1_2, y_priv_1_2 = test_split(df_priv_1_cut_2)
X_priv_1_2.drop(columns=['IAT_dt'], inplace=True)
regression2 = r_regression(X_priv_1_2, y)

In [62]:
regression2

array([ 0.33386382, -0.08435976, -0.00230579, -0.09048064, -0.09047919,
       -0.08384815, -0.08440375, -0.09319095, -0.09318802, -0.1196786 ,
       -0.11808422, -0.09575241, -0.0957467 , -0.15615495, -0.16241966,
       -0.09784563, -0.09783839, -0.18997117, -0.21708066, -0.09986228,
       -0.09987003, -0.21866506, -0.28310784, -0.41554773, -0.16031851,
       -0.08955199, -0.129768  , -0.18642664, -0.27463348, -0.41938691,
        0.0686255 , -0.15739504, -0.14884152, -0.21672271, -0.12704045,
        0.0686255 , -0.15739504, -0.14884152, -0.21672271, -0.12704045,
       -0.03789975, -0.31626469, -0.30909709, -0.24539035, -0.26663051,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
       -0.22999457, -0.10889866, -0.09752004, -0.09280505])

In [7]:
rf_model = get_model()
start = time.time()
print('training....start time: ', start)
clf_priv = OneVsRestClassifier(rf_model, n_jobs=-1).fit(X_priv_1, y_priv_1)
stop = time.time() 
print('time taken: ', stop-start)

pickle_file = r'D:\Master\model\private_1_time.sav'
pickle.dump(clf_priv, open(pickle_file, 'wb'))

training....start time:  1672729095.4719448
time taken:  274.19202733039856


In [8]:
y_pred_priv_1 = clf_priv.predict(X_priv_1)

In [9]:
print('accuracy: ', accuracy_score(y_priv_1, y_pred_priv_1))
print('f1: ', f1_score(y_priv_1, y_pred_priv_1, average='weighted'))
print('precision: ', precision_score(y_priv_1, y_pred_priv_1, average='weighted'))
print('recall: ', recall_score(y_priv_1, y_pred_priv_1, average='weighted'))

accuracy:  0.9950202702325919
f1:  0.9950351139396658
precision:  0.995078365746062
recall:  0.9950202702325919


In [66]:
print(sorted(y_priv_1.unique()))
cm = confusion_matrix(y_priv_1, y_pred_priv_1, labels=sorted(y_priv_1.unique()))
print(cm)

['Alexa Echo Dot', 'Mi Air Purifier', 'Mi Box 3', 'Mi Home Security Camera', 'Smart Plug 1']
[[ 320665       0    1966     278       0]
 [      0   56289       0       0       0]
 [   7621       0 1417569    1390       0]
 [    750       0     497  524859       0]
 [      0       0       0       0  178694]]


In [14]:
def classwise_accuracy():
   a = pd.crosstab(y_pred_priv_1,y_priv_1)
   print(a.max(axis=1)/a.sum(axis=1))
classwise_accuracy()

row_0
Alexa Echo Dot             0.974559
Mi Air Purifier            1.000000
Mi Box 3                   0.998266
Mi Home Security Camera    0.996832
Smart Plug 1               1.000000
dtype: float64


In [5]:
# start = time.time()
# print('training....start time: ', start)
# clf_priv = rf_model.fit(X_priv_1, y_priv_1)
# stop = time.time() 
# print('time taken: ', stop-start)

training....start time:  1669102355.0590725


  warn(


time taken:  71.29257845878601


##### Week 2

In [15]:
df_priv_2 = pd.read_csv(r'D:\Master\datasets\main-exp\combined\Private\combined_2.csv')
df_priv_2 = filter_label(df_priv_2, 0)
df_priv_2_cut = features_selection(df_priv_2)
X_priv_2, y_priv_2 = test_split(df_priv_2_cut)

In [16]:
y_pred_priv_2 = clf_priv.predict(X_priv_2)

print('accuracy: ', accuracy_score(y_priv_2, y_pred_priv_2))
print('f1: ', f1_score(y_priv_2, y_pred_priv_2, average='weighted'))
print('precision: ', precision_score(y_priv_2, y_pred_priv_2, average='weighted'))
print('recall: ', recall_score(y_priv_2, y_pred_priv_2, average='weighted'))

accuracy:  0.959575199577924
f1:  0.9586834269914163
precision:  0.9616595154420772
recall:  0.959575199577924


In [70]:
print(y_priv_2.unique())
cm = confusion_matrix(y_priv_2, y_pred_priv_2)
print(cm)

['Alexa Echo Dot' 'Mi Air Purifier' 'Mi Box 3' 'Mi Home Security Camera'
 'Smart Plug 1']
[[ 410805       0   20215    2616       0]
 [      0   82923       0       0       0]
 [  34493       0 2242573    8179       0]
 [   3403       0    6471  615352       0]
 [  14420       0   10449   49318  198591]]


In [17]:
def classwise_accuracy():
   a = pd.crosstab(y_pred_priv_2,y_priv_2)
   print(a.max(axis=1)/a.sum(axis=1))
classwise_accuracy()

row_0
Alexa Echo Dot             0.887036
Mi Air Purifier            1.000000
Mi Box 3                   0.983711
Mi Home Security Camera    0.911005
Smart Plug 1               1.000000
dtype: float64


##### Week 3

In [19]:
df_priv_3 = pd.read_csv(r'D:\Master\datasets\main-exp\combined\Private\combined_3.csv')
df_priv_3 = filter_label(df_priv_3, 0)
df_priv_3_cut = features_selection(df_priv_3)
X_priv_3, y_priv_3 = test_split(df_priv_3_cut)

In [20]:
y_pred_priv_3 = clf_priv.predict(X_priv_3)

print('accuracy: ', accuracy_score(y_priv_3, y_pred_priv_3))
print('f1: ', f1_score(y_priv_3, y_pred_priv_3, average='weighted'))
print('precision: ', precision_score(y_priv_3, y_pred_priv_3, average='weighted'))
print('recall: ', recall_score(y_priv_3, y_pred_priv_3, average='weighted'))

accuracy:  0.9781055563129963
f1:  0.9783015279616893
precision:  0.9787169025931833
recall:  0.9781055563129963


In [73]:
print(y_priv_3.unique())
cm = confusion_matrix(y_priv_3, y_pred_priv_3)
print(cm)

['Alexa Echo Dot' 'Mi Air Purifier' 'Mi Box 3' 'Mi Home Security Camera'
 'Smart Plug 1']
[[ 347526       0   18748    2385       0]
 [      0  114512       0       0       0]
 [  38809       0 2144635   11739       0]
 [   2943       0    2137  571978       0]
 [      0       0       0       0  250546]]


In [21]:
def classwise_accuracy():
   a = pd.crosstab(y_pred_priv_3,y_priv_3)
   print(a.max(axis=1)/a.sum(axis=1))
classwise_accuracy()

row_0
Alexa Echo Dot             0.892745
Mi Air Purifier            1.000000
Mi Box 3                   0.990356
Mi Home Security Camera    0.975902
Smart Plug 1               1.000000
dtype: float64


##### Week 4

In [22]:
df_priv_4 = pd.read_csv(r'D:\Master\datasets\main-exp\combined\Private\combined_4.csv')
df_priv_4 = filter_label(df_priv_4, 0)
df_priv_4_cut = features_selection(df_priv_4)
X_priv_4, y_priv_4 = test_split(df_priv_4_cut)

In [23]:
y_pred_priv_4 = clf_priv.predict(X_priv_4)

print('accuracy: ', accuracy_score(y_priv_4, y_pred_priv_4))
print('f1: ', f1_score(y_priv_4, y_pred_priv_4, average='weighted'))
print('precision: ', precision_score(y_priv_4, y_pred_priv_4, average='weighted'))
print('recall: ', recall_score(y_priv_4, y_pred_priv_4, average='weighted'))

accuracy:  0.9673359568864814
f1:  0.9672490090693512
precision:  0.9673837866257929
recall:  0.9673359568864814


In [76]:
print(y_priv_4.unique())
cm = confusion_matrix(y_priv_4, y_pred_priv_4)
print(cm)

['Alexa Echo Dot' 'Mi Air Purifier' 'Mi Box 3' 'Mi Home Security Camera'
 'Smart Plug 1']
[[365443      0  17404   1834      0]
 [     0 111428      0      0      0]
 [ 26872      0 382424   6953      0]
 [  1922      0   1722 521561      0]
 [     0      0      0      0 298505]]


In [24]:
def classwise_accuracy():
   a = pd.crosstab(y_pred_priv_4,y_priv_4)
   print(a.max(axis=1)/a.sum(axis=1))
classwise_accuracy()

row_0
Alexa Echo Dot             0.926963
Mi Air Purifier            1.000000
Mi Box 3                   0.952370
Mi Home Security Camera    0.983432
Smart Plug 1               1.000000
dtype: float64


##### Week 5

In [25]:
df_priv_5 = pd.read_csv(r'D:\Master\datasets\main-exp\combined\Private\combined_5.csv')
df_priv_5 = filter_label(df_priv_5, 0)
df_priv_5_cut = features_selection(df_priv_5)
X_priv_5, y_priv_5 = test_split(df_priv_5_cut)

In [26]:
y_pred_priv_5 = clf_priv.predict(X_priv_5)

print('accuracy: ', accuracy_score(y_priv_5, y_pred_priv_5))
print('f1: ', f1_score(y_priv_5, y_pred_priv_5, average='weighted'))
print('precision: ', precision_score(y_priv_5, y_pred_priv_5, average='weighted'))
print('recall: ', recall_score(y_priv_5, y_pred_priv_5, average='weighted'))

accuracy:  0.9705760518385752
f1:  0.9707353781466348
precision:  0.9713660705752366
recall:  0.9705760518385752


In [79]:
print(y_priv_5.unique())
cm = confusion_matrix(y_priv_5, y_pred_priv_5)
print(cm)

['Alexa Echo Dot' 'Mi Air Purifier' 'Mi Box 3' 'Mi Home Security Camera'
 'Smart Plug 1']
[[308859      0  12465   1770      0]
 [     0  69207      0      0      0]
 [ 31612      0 759546   8050      0]
 [  2282      0   2280 506176      0]
 [     0      0      0      0 284536]]


In [27]:
def classwise_accuracy():
   a = pd.crosstab(y_pred_priv_5,y_priv_5)
   print(a.max(axis=1)/a.sum(axis=1))
classwise_accuracy()

row_0
Alexa Echo Dot             0.901112
Mi Air Purifier            1.000000
Mi Box 3                   0.980957
Mi Home Security Camera    0.980969
Smart Plug 1               1.000000
dtype: float64


##### Week 6

In [28]:
df_priv_6 = pd.read_csv(r'D:\Master\datasets\main-exp\combined\Private\combined_6.csv')
df_priv_6 = filter_label(df_priv_6, 0)
df_priv_6_cut = features_selection(df_priv_6)
X_priv_6, y_priv_6 = test_split(df_priv_6_cut)

In [29]:
y_pred_priv_6 = clf_priv.predict(X_priv_6)

print('accuracy: ', accuracy_score(y_priv_6, y_pred_priv_6))
print('f1: ', f1_score(y_priv_6, y_pred_priv_6, average='weighted'))
print('precision: ', precision_score(y_priv_6, y_pred_priv_6, average='weighted'))
print('recall: ', recall_score(y_priv_6, y_pred_priv_6, average='weighted'))

accuracy:  0.8968551786741732
f1:  0.8956480695219535
precision:  0.9157073979247665
recall:  0.8968551786741732


In [31]:
print(y_priv_6.unique())
cm = confusion_matrix(y_priv_6, y_pred_priv_6)
print(cm)

['Alexa Echo Dot' 'Mi Air Purifier' 'Mi Box 3' 'Mi Home Security Camera'
 'Smart Plug 1']
[[302731      0  11382   2173      0]
 [     0  87266      0      0      0]
 [ 35050      0 496600  13915      0]
 [  2430      0   2041 474913      0]
 [ 22449  61137  20848   8559 203470]]


In [30]:
def classwise_accuracy():
   a = pd.crosstab(y_pred_priv_6,y_priv_6)
   print(a.max(axis=1)/a.sum(axis=1))
classwise_accuracy()

row_0
Alexa Echo Dot             0.834752
Mi Air Purifier            0.588034
Mi Box 3                   0.935444
Mi Home Security Camera    0.950663
Smart Plug 1               1.000000
dtype: float64


#### UNSW

##### Week 1

In [9]:
df_UNSW_1 = pd.read_csv(r'D:\Master\datasets\main-exp-2\combined\UNSW\week_1.csv')
df_UNSW_1 = filter_label(df_UNSW_1, 1)
df_UNSW_1_cut = features_selection(df_UNSW_1)
X_UNSW_1, y_UNSW_1 = test_split(df_UNSW_1_cut)

In [None]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
y=le.fit_transform(y_UNSW_1)
y

array([ 0,  0,  0, ..., 11, 11, 11])

In [None]:
regression = r_regression(X_UNSW_1, y)

In [None]:
X_UNSW_1

Unnamed: 0,avg_payload_1_second,fma5_sum,fma5_mean,fma5_variance,fma5_std,range_5,max_payload
0,159.000000,390.0,78.0,5106.0,71.456280,159.0,1460
1,0.000000,231.0,46.2,3722.7,61.013933,149.0,1460
2,0.000000,130.0,26.0,571.5,23.906066,48.0,1460
3,13.666667,137.0,27.4,633.8,25.175385,48.0,1460
4,20.500000,296.0,59.2,3511.7,59.259598,159.0,1460
...,...,...,...,...,...,...,...
1795012,60.000000,300.0,60.0,18000.0,134.164079,300.0,844
1795013,75.000000,300.0,75.0,22500.0,150.000000,300.0,844
1795014,100.000000,300.0,100.0,30000.0,173.205081,300.0,844
1795015,150.000000,300.0,150.0,45000.0,212.132034,300.0,844


In [None]:
regression

array([-0.08192188, -0.0444108 , -0.04437536, -0.19901667, -0.16614413,
       -0.16258816, -0.33862645])

In [None]:
df_UNSW_1_cut_2 = features_full(df_UNSW_1)
X_UNSW_1_2, y_UNSW_1_2 = test_split(df_UNSW_1_cut_2)
X_UNSW_1_2.drop(columns=['IAT_dt'], inplace=True)
regression2 = r_regression(X_UNSW_1_2, y)

In [None]:
X_UNSW_1_2

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,protocol,payload,IAT,src_port,dst_port,fma5_sum,fma5_mean,fma5_variance,...,mean_5,std_5,var_5,iat_sum_5,iat_mean_5,iat_std_5,iat_var_5,avg_payload_1_second,avg_payload_5_second,avg_payload_10_second
0,0.0,0.0,17,159,0.000000,60958,49317,390.0,78.0,5106.0,...,78.0,63.91244,4084.8,14.438524,2.887705,4.775385,22.804301,159.000000,154.00,154.00
1,1.0,1.0,17,149,2.132840,37244,33434,231.0,46.2,3722.7,...,78.0,63.91244,4084.8,14.438524,2.887705,4.775385,22.804301,0.000000,149.00,149.00
2,2.0,2.0,6,41,12.294982,443,46369,130.0,26.0,571.5,...,78.0,63.91244,4084.8,14.438524,2.887705,4.775385,22.804301,0.000000,0.00,0.00
3,3.0,3.0,6,0,0.010589,46369,443,137.0,27.4,633.8,...,78.0,63.91244,4084.8,14.438524,2.887705,4.775385,22.804301,13.666667,27.40,27.40
4,4.0,4.0,6,41,0.000113,46369,443,296.0,59.2,3511.7,...,78.0,63.91244,4084.8,14.438524,2.887705,4.775385,22.804301,20.500000,34.25,34.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1795012,37173.0,293.0,6,0,0.196696,49153,80,300.0,60.0,18000.0,...,300.0,0.00000,0.0,2.317430,0.463486,0.635028,0.403260,60.000000,60.00,60.00
1795013,37173.0,294.0,6,0,0.444770,49153,80,300.0,75.0,22500.0,...,300.0,0.00000,0.0,2.317430,0.463486,0.635028,0.403260,75.000000,75.00,75.00
1795014,37173.0,295.0,6,0,0.275119,80,49153,300.0,100.0,30000.0,...,300.0,0.00000,0.0,2.317430,0.463486,0.635028,0.403260,100.000000,100.00,100.00
1795015,37173.0,296.0,6,0,0.016896,49153,80,300.0,150.0,45000.0,...,300.0,0.00000,0.0,2.317430,0.463486,0.635028,0.403260,150.000000,150.00,150.00


In [None]:
regression2

array([ 0.1527136 , -0.09989911,  0.154316  , -0.03028508,  0.12946485,
       -0.02085042, -0.23053817, -0.0444108 , -0.04437536, -0.19901667,
       -0.16614413,  0.24249149,  0.24247479,  0.14336797,  0.25848325,
       -0.33862645,  0.        , -0.16258816,  0.28824397,  0.28824397,
       -0.14041706,  0.08413867, -0.19598644, -0.19598644, -0.20573014,
       -0.06092093, -0.08192188, -0.05974673, -0.06065651])

In [12]:
rf_model = get_model()
start = time.time()
print('training....start time: ', start)
clf_UNSW_1 = OneVsRestClassifier(rf_model, n_jobs=-1).fit(X_UNSW_1, y_UNSW_1)
stop = time.time() 
print('time taken: ', stop-start)

# pickle_file = r'D:\Master\model\main-2\UNSW_1_time.sav'
# pickle.dump(clf_UNSW_1, open(pickle_file, 'wb'))

training....start time:  1672047339.3758469
time taken:  418.19317507743835


In [10]:
pickle_file = r'D:\Master\model\main-2\UNSW_1_time.sav'
clf_UNSW_1 = pickle.load(open(pickle_file, 'rb'))

In [11]:
y_pred_UNSW_1 = clf_UNSW_1.predict(X_UNSW_1)

In [12]:
print('accuracy: ', accuracy_score(y_UNSW_1, y_pred_UNSW_1))
print('f1: ', f1_score(y_UNSW_1, y_pred_UNSW_1, average='weighted'))
print('precision: ', precision_score(y_UNSW_1, y_pred_UNSW_1, average='weighted'))
print('recall: ', recall_score(y_UNSW_1, y_pred_UNSW_1, average='weighted'))

accuracy:  0.8912957370320169
f1:  0.8890137135795608
precision:  0.9045627458174119
recall:  0.8912957370320169


In [13]:
print(sorted(y_UNSW_1.unique()))
cm = confusion_matrix(y_UNSW_1, y_pred_UNSW_1, labels=sorted(y_UNSW_1.unique()))
print(cm)

['Amazon Echo', 'Belkin Wemo switch', 'Belkin wemo motion sensor', 'HP Printer', 'Insteon Camera', 'Light Bulbs LiFX Smart Bulb', 'NEST Protect smoke alarm', 'Netatmo weather station', 'Samsung SmartCam', 'Smart Things', 'Triby Speaker', 'Withings Smart scale']
[[239947     83    271    135      0      0      0      0      1      0
       0      0]
 [  1349 241458  25413      9     24      0      0      0      2      0
    5222      0]
 [  1850 127679 250992     57     72      0      0      0  18592      0
    8928      0]
 [   147     24     24  19599      0      0      0      0      1      0
       4      0]
 [     0     63    326      1 214510      0      0      0     12      0
      12      0]
 [     0      0      0      0      0  15491      0      0      0      0
       0      0]
 [     0      0      0      0      0      0   1386      0      0      0
       0      0]
 [     0      0      0      0      0      0      0  49656      0      0
       0      0]
 [     0     34   4077    

In [14]:
def classwise_accuracy():
   a = pd.crosstab(y_pred_UNSW_1,y_UNSW_1)
   print(a.max(axis=1)/a.sum(axis=1))
classwise_accuracy()

row_0
Amazon Echo                    0.986247
Belkin Wemo switch             0.653732
Belkin wemo motion sensor      0.892705
HP Printer                     0.989699
Insteon Camera                 0.999152
Light Bulbs LiFX Smart Bulb    1.000000
NEST Protect smoke alarm       1.000000
Netatmo weather station        1.000000
Samsung SmartCam               0.952681
Smart Things                   1.000000
Triby Speaker                  0.756665
Withings Smart scale           1.000000
dtype: float64


##### Week 2

In [15]:
df_UNSW_2 = pd.read_csv(r'D:\Master\datasets\main-exp-2\combined\UNSW\week_2.csv')
df_UNSW_2 = filter_label(df_UNSW_2, 1)
df_UNSW_2_cut = features_selection(df_UNSW_2)
X_UNSW_2, y_UNSW_2 = test_split(df_UNSW_2_cut)

In [16]:
y_pred_UNSW_2 = clf_UNSW_1.predict(X_UNSW_2)

In [None]:
print('accuracy: ', accuracy_score(y_UNSW_2, y_pred_UNSW_2))
print('f1: ', f1_score(y_UNSW_2, y_pred_UNSW_2, average='weighted'))
print('precision: ', precision_score(y_UNSW_2, y_pred_UNSW_2, average='weighted'))
print('recall: ', recall_score(y_UNSW_2, y_pred_UNSW_2, average='weighted'))

accuracy:  0.8462149267183948
f1:  0.8384675577101052
precision:  0.8582497639709055
recall:  0.8462149267183948


In [None]:
print(len(y_UNSW_2.unique()), y_UNSW_2.unique(), set(clf_UNSW_1.classes_).symmetric_difference(set(y_UNSW_2.unique())))
cm = confusion_matrix(y_UNSW_2, y_pred_UNSW_2)
print(cm)

12 ['Amazon Echo' 'Belkin wemo motion sensor' 'Belkin Wemo switch'
 'HP Printer' 'Insteon Camera' 'Light Bulbs LiFX Smart Bulb'
 'NEST Protect smoke alarm' 'Netatmo weather station' 'Samsung SmartCam'
 'Smart Things' 'Triby Speaker' 'Withings Smart scale'] set()
[[261446    240    618   2170      0      0      0      0      9      0
       0      0]
 [  1591 284629  45017    103    592      0      0      0    142      0
    7721      0]
 [  3023 164717 276152    142    419      0      0      0   7524      0
   11515      0]
 [     0      1     14  15422  51348      0      0      0      8      0
      28      0]
 [     0   1196   8067     75 274755      0      0      0   1282      0
     449      0]
 [     0      0      0      4      6  64085      0      0      0      0
       0      0]
 [     0      0      0      0      0      1   1563      0      0      0
       0      0]
 [     0      0      0      0      0      0      0  49767      0      0
       0      0]
 [     0     64   3164   

##### Week 3

In [None]:
df_UNSW_3 = pd.read_csv(r'D:\Master\datasets\main-exp-2\combined\UNSW\week_3.csv')
df_UNSW_3 = filter_label(df_UNSW_3, 1)
df_UNSW_3_cut = features_selection(df_UNSW_3)
X_UNSW_3, y_UNSW_3 = test_split(df_UNSW_3_cut)

In [None]:
y_pred_UNSW_3 = clf_UNSW_1.predict(X_UNSW_3)

print('accuracy: ', accuracy_score(y_UNSW_3, y_pred_UNSW_3))
print('f1: ', f1_score(y_UNSW_3, y_pred_UNSW_3, average='weighted'))
print('precision: ', precision_score(y_UNSW_3, y_pred_UNSW_3, average='weighted'))
print('recall: ', recall_score(y_UNSW_3, y_pred_UNSW_3, average='weighted'))

accuracy:  0.8668887479015894
f1:  0.864110653427212
precision:  0.8824212008777016
recall:  0.8668887479015894


In [None]:
print(len(y_UNSW_3.unique()), y_UNSW_3.unique(), set(clf_UNSW_1.classes_).symmetric_difference(set(y_UNSW_3.unique())))
y_UNSW_3.unique()
cm = confusion_matrix(y_UNSW_3, y_pred_UNSW_3)
print(cm)

12 ['Amazon Echo' 'Belkin wemo motion sensor' 'Belkin Wemo switch'
 'HP Printer' 'Insteon Camera' 'Light Bulbs LiFX Smart Bulb'
 'NEST Protect smoke alarm' 'Netatmo weather station' 'Samsung SmartCam'
 'Smart Things' 'Triby Speaker' 'Withings Smart scale'] set()
[[233661    252    530    113      0      0      0      0      5      0
       0      0]
 [  5908 252761  35400    471    100      0      0      0     48      0
    4012      0]
 [ 11929 155539 251323   1188    788      0      0      0   2036      0
    4142      0]
 [     0      2     10   6981  15378      0      0      0      4      0
       7      0]
 [     0    437   2970     20 250834      0      0      0    551      0
     238      0]
 [     0      0      0      3      3  59234      0      0      0      0
       0      0]
 [     0      0      0      0      0      0    986      0      0      0
       0      0]
 [     0      0      0      0      0      0      0  40909      0      0
       0   7117]
 [     0      0      0   

##### Week 4

In [None]:
df_UNSW_4 = pd.read_csv(r'D:\Master\datasets\main-exp-2\combined\UNSW\week_4.csv')
df_UNSW_4 = filter_label(df_UNSW_4, 1)
df_UNSW_4_cut = features_selection(df_UNSW_4)
X_UNSW_4, y_UNSW_4 = test_split(df_UNSW_4_cut)

In [None]:
y_pred_UNSW_4 = clf_UNSW_1.predict(X_UNSW_4)

print('accuracy: ', accuracy_score(y_UNSW_4, y_pred_UNSW_4))
print('f1: ', f1_score(y_UNSW_4, y_pred_UNSW_4, average='weighted'))
print('precision: ', precision_score(y_UNSW_4, y_pred_UNSW_4, average='weighted'))
print('recall: ', recall_score(y_UNSW_4, y_pred_UNSW_4, average='weighted'))

accuracy:  0.8333375005519348
f1:  0.8272908525129302
precision:  0.8532758983667956
recall:  0.8333375005519348


In [None]:
print(len(y_UNSW_4.unique()), y_UNSW_4.unique(), set(clf_UNSW_1.classes_).symmetric_difference(set(y_UNSW_4.unique())))
cm = confusion_matrix(y_UNSW_4, y_pred_UNSW_4)
print(cm)

12 ['Amazon Echo' 'Belkin wemo motion sensor' 'Belkin Wemo switch'
 'HP Printer' 'Insteon Camera' 'Light Bulbs LiFX Smart Bulb'
 'NEST Protect smoke alarm' 'Netatmo weather station' 'Samsung SmartCam'
 'Smart Things' 'Triby Speaker' 'Withings Smart scale'] set()
[[251196    178   1448   1455      0      0      0      0      8      0
       0      0]
 [  5876 259210  37006    126    109      0      0      0    110      0
    5784      0]
 [ 11581 173515 287794   1167   7714      0      0      0   4233      0
    6180      0]
 [     0      6     61  11740  38664      0      0      0     12      0
      70      0]
 [     0   1140   5588     42 252628      0      0      0   2476      0
     342      0]
 [     0      0      0      4      4  63222      0      0      0      0
       0      0]
 [     0      0      0      0      0      2   1793      0      0      0
       0      0]
 [     0      0      0      0      2      0      0   3697      0      0
       0   2229]
 [     0      0      0   

##### Week 5

In [None]:
df_UNSW_5 = pd.read_csv(r'D:\Master\datasets\main-exp-2\combined\UNSW\week_5.csv')
df_UNSW_5 = filter_label(df_UNSW_5, 1)
df_UNSW_5_cut = features_selection(df_UNSW_5)
X_UNSW_5, y_UNSW_5 = test_split(df_UNSW_5_cut)

In [None]:
y_pred_UNSW_5 = clf_UNSW_1.predict(X_UNSW_5)

print('accuracy: ', accuracy_score(y_UNSW_5, y_pred_UNSW_5))
print('f1: ', f1_score(y_UNSW_5, y_pred_UNSW_5, average='weighted'))
print('precision: ', precision_score(y_UNSW_5, y_pred_UNSW_5, average='weighted'))
print('recall: ', recall_score(y_UNSW_5, y_pred_UNSW_5, average='weighted'))

accuracy:  0.8470883037286802
f1:  0.859091416941374
precision:  0.8954672366983653
recall:  0.8470883037286802


In [None]:
print(len(y_UNSW_5.unique()), y_UNSW_5.unique(), set(clf_UNSW_1.classes_).symmetric_difference(set(y_UNSW_5.unique())))
cm = confusion_matrix(y_UNSW_5, y_pred_UNSW_5)
print(cm)

12 ['Amazon Echo' 'Belkin wemo motion sensor' 'Belkin Wemo switch'
 'HP Printer' 'Insteon Camera' 'Light Bulbs LiFX Smart Bulb'
 'NEST Protect smoke alarm' 'Netatmo weather station' 'Samsung SmartCam'
 'Smart Things' 'Triby Speaker' 'Withings Smart scale'] set()
[[261325    150   1732    907      0      0      0      0      4      0
       0      0]
 [   655 114926  17485     32    247      0      0      0   1276      0
    3177      0]
 [  1326  73556 124159    183   4512      0      0      0   2717      0
    4049      0]
 [     0     11     73  11924   6265      0      0      0     20      0
      65      0]
 [     0   2011   7764    110 254012      0      0      0    519      0
     490      0]
 [     0      0      0      1      2  41379      0      0      0      0
       0      0]
 [     0      0      0      0      0      0   1281      0      0      0
       0      0]
 [     0      0      0      0      0      0      0  17880      1      0
       0   2861]
 [     0    119    192   

In [None]:
print(np.unique(y_pred_UNSW_5))
print(len(np.unique(y_pred_UNSW_5)))

labels_15=['Amazon Echo','Belkin Wemo switch','Belkin wemo motion sensor','Dropcam','HP Printer',
'Insteon Camera','Light Bulbs LiFX Smart Bulb','NEST Protect smoke alarm','Netatmo weather station','PIX-STAR Photo-frame','Samsung SmartCam','Smart Things','Triby Speaker','iHome Power Plug', 'Withings Smart scale']

['Amazon Echo' 'Belkin Wemo switch' 'Belkin wemo motion sensor'
 'HP Printer' 'Insteon Camera' 'Light Bulbs LiFX Smart Bulb'
 'NEST Protect smoke alarm' 'Netatmo weather station' 'Samsung SmartCam'
 'Smart Things' 'Triby Speaker' 'Withings Smart scale']
12


In [None]:
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import classification_report
labels_16= ['Amazon Echo','Belkin wemo motion sensor','Belkin Wemo switch','Dropcam','HP Printer','iHome Power Plug','Insteon Camera','Light Bulbs LiFX Smart Bulb','NEST Protect smoke alarm','Netatmo weather station','Netatmo Welcome','PIX-STAR Photo-frame','Samsung SmartCam','Smart Things','Triby Speaker','Withings Smart scale']
# multilabel_confusion_matrix(y_UNSW_5, y_pred_UNSW_5,labels=labels_15)
print(classification_report(y_UNSW_5, y_pred_UNSW_5,labels=labels_15))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                             precision    recall  f1-score   support

                Amazon Echo       0.99      0.99      0.99    264118
         Belkin Wemo switch       0.60      0.83      0.70    137798
  Belkin wemo motion sensor       0.82      0.59      0.69    210502
                    Dropcam       0.00      0.00      0.00         0
                 HP Printer       0.90      0.65      0.76     18358
             Insteon Camera       0.96      0.96      0.96    264906
Light Bulbs LiFX Smart Bulb       1.00      1.00      1.00     41382
   NEST Protect smoke alarm       1.00      1.00      1.00      1281
    Netatmo weather station       0.21      0.86      0.34     20742
       PIX-STAR Photo-frame       0.00      0.00      0.00         0
           Samsung SmartCam       0.98      0.74      0.84    254253
               Smart Things       1.00      1.00      1.00     77768
              Triby Speaker       0.50      0.92      0.65      8539
           iHome Power Plug      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
confusion_matrix(y_UNSW_5, y_pred_UNSW_5,labels=labels_15)

array([[261325,    150,   1732,      0,    907,      0,      0,      0,
             0,      0,      4,      0,      0,      0,      0],
       [   655, 114926,  17485,      0,     32,    247,      0,      0,
             0,      0,   1276,      0,   3177,      0,      0],
       [  1326,  73556, 124159,      0,    183,   4512,      0,      0,
             0,      0,   2717,      0,   4049,      0,      0],
       [     0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0],
       [     0,     11,     73,      0,  11924,   6265,      0,      0,
             0,      0,     20,      0,     65,      0,      0],
       [     0,   2011,   7764,      0,    110, 254012,      0,      0,
             0,      0,    519,      0,    490,      0,      0],
       [     0,      0,      0,      0,      1,      2,  41379,      0,
             0,      0,      0,      0,      0,      0,      0],
       [     0,      0,      0,      0,  

##### Week 6

In [None]:
df_UNSW_6 = pd.read_csv(r'D:\Master\datasets\main-exp-2\combined\UNSW\week_6.csv')
df_UNSW_6 = filter_label(df_UNSW_6, 1)
df_UNSW_6_cut = features_selection(df_UNSW_6)
X_UNSW_6, y_UNSW_6 = test_split(df_UNSW_6_cut)

In [None]:
y_pred_UNSW_6 = clf_UNSW_1.predict(X_UNSW_6)

print('accuracy: ', accuracy_score(y_UNSW_6, y_pred_UNSW_6))
print('f1: ', f1_score(y_UNSW_6, y_pred_UNSW_6, average='weighted'))
print('precision: ', precision_score(y_UNSW_6, y_pred_UNSW_6, average='weighted'))
print('recall: ', recall_score(y_UNSW_6, y_pred_UNSW_6, average='weighted'))

accuracy:  0.8367191914152652
f1:  0.8360383822947334
precision:  0.8482567883170801
recall:  0.8367191914152652


In [None]:
print(len(y_UNSW_6.unique()), y_UNSW_6.unique(), set(clf_UNSW_1.classes_).symmetric_difference(set(y_UNSW_6.unique())))
cm = confusion_matrix(y_UNSW_6, y_pred_UNSW_6)
print(cm)

12 ['Amazon Echo' 'Belkin wemo motion sensor' 'Belkin Wemo switch'
 'HP Printer' 'Insteon Camera' 'Light Bulbs LiFX Smart Bulb'
 'NEST Protect smoke alarm' 'Netatmo weather station' 'Samsung SmartCam'
 'Smart Things' 'Triby Speaker' 'Withings Smart scale'] set()
[[218587   1587   1776   1200      1      0      0      0     10      0
       0      0]
 [     0 233849  62540      4    264      0      0      0   9916      0
    6502      0]
 [     0 169958 297681     14  12712      0      0      0   8024      0
    9931      0]
 [     0      7     24  16134      6      0      0      0      8      0
      11      0]
 [     0   2897   9575     89 251506      0      0      0    839      0
     522      0]
 [     0      0      1     11      7  58461      0      0      3      0
       0      0]
 [     0      0      0      0      0      1   1598      0      0      0
       0      0]
 [     0      0      0     93      5   2236     19  47366    203      0
     102      0]
 [     0      0      0   

##### Week 7

In [18]:
df_UNSW_7 = pd.read_csv(r'D:\Master\datasets\main-exp-2\combined\UNSW\week_7.csv')
df_UNSW_7 = filter_label(df_UNSW_7, 1)
df_UNSW_7_cut = features_selection(df_UNSW_7)
X_UNSW_7, y_UNSW_7 = test_split(df_UNSW_7_cut)

In [19]:
y_pred_UNSW_7 = clf_UNSW_1.predict(X_UNSW_7)

print('accuracy: ', accuracy_score(y_UNSW_7, y_pred_UNSW_7))
print('f1: ', f1_score(y_UNSW_7, y_pred_UNSW_7, average='weighted'))
print('precision: ', precision_score(y_UNSW_7, y_pred_UNSW_7, average='weighted'))
print('recall: ', recall_score(y_UNSW_7, y_pred_UNSW_7, average='weighted'))

accuracy:  0.8524064356933976
f1:  0.8512612248709478
precision:  0.871234202624825
recall:  0.8524064356933976


In [None]:
y_pred_UNSW_7 = clf_UNSW_1.predict(X_UNSW_7)

print('accuracy: ', accuracy_score(y_UNSW_7, y_pred_UNSW_7))
print('f1: ', f1_score(y_UNSW_7, y_pred_UNSW_7, average='weighted'))
print('precision: ', precision_score(y_UNSW_7, y_pred_UNSW_7, average='weighted'))
print('recall: ', recall_score(y_UNSW_7, y_pred_UNSW_7, average='weighted'))

accuracy:  0.8524064356933976
f1:  0.8512612248709478
precision:  0.871234202624825
recall:  0.8524064356933976


In [None]:
print(len(y_UNSW_7.unique()), y_UNSW_7.unique(), set(clf_UNSW_1.classes_).symmetric_difference(set(y_UNSW_7.unique())))
cm = confusion_matrix(y_UNSW_7, y_pred_UNSW_7)
print(cm)

12 ['Amazon Echo' 'Belkin wemo motion sensor' 'Belkin Wemo switch'
 'HP Printer' 'Insteon Camera' 'Light Bulbs LiFX Smart Bulb'
 'NEST Protect smoke alarm' 'Netatmo weather station' 'Samsung SmartCam'
 'Smart Things' 'Triby Speaker' 'Withings Smart scale'] set()
[[240592    140   4098   1050      0      0      0      0     35      0
       0      0]
 [     0 265003  37421      9    262      0      0      0   1411      0
    9731      0]
 [     0 180958 288547     16  12747      0      0      0   7179      0
   10639      0]
 [     0     13     81  16315     25      0      0      0     16      0
      78      0]
 [     0   1696   8865     67 244542      0      0      0   1373      0
     434      0]
 [     0      0      0      2      2  61041      0      0      0      0
       0      0]
 [     0      0      0      0      0      2   1457      0      0      0
       0      0]
 [     0      0      0      0      0      0      0  45333      0   4588
       0      0]
 [     0     63   4588   

##### Week 8

In [3]:
df_UNSW_8 = pd.read_csv(r'D:\Master\datasets\main-exp-2\combined\UNSW\week_8.csv')
df_UNSW_8 = filter_label(df_UNSW_8, 1)
df_UNSW_8_cut = features_selection(df_UNSW_8)
X_UNSW_8, y_UNSW_8 = test_split(df_UNSW_8_cut)

In [6]:
y_pred_UNSW_8 = clf_UNSW_1.predict(X_UNSW_8)

print('accuracy: ', accuracy_score(y_UNSW_8, y_pred_UNSW_8))
print('f1: ', f1_score(y_UNSW_8, y_pred_UNSW_8, average='weighted'))
print('precision: ', precision_score(y_UNSW_8, y_pred_UNSW_8, average='weighted'))
print('recall: ', recall_score(y_UNSW_8, y_pred_UNSW_8, average='weighted'))

accuracy:  0.8305392083685423
f1:  0.8268317268596072
precision:  0.8496871342506306
recall:  0.8305392083685423


In [7]:
print(len(y_UNSW_8.unique()), y_UNSW_8.unique(), set(clf_UNSW_1.classes_).symmetric_difference(set(y_UNSW_8.unique())))
cm = confusion_matrix(y_UNSW_8, y_pred_UNSW_8)
print(cm)

12 ['Amazon Echo' 'Belkin wemo motion sensor' 'Belkin Wemo switch'
 'HP Printer' 'Insteon Camera' 'Light Bulbs LiFX Smart Bulb'
 'NEST Protect smoke alarm' 'Netatmo weather station' 'Samsung SmartCam'
 'Smart Things' 'Triby Speaker' 'Withings Smart scale'] set()
[[243354    533   5506   4405      1      0      0      0    734      0
       0      0]
 [     0 255731  39073      6    767      0      0      0   4988      0
    9071      0]
 [     0 131090 411176     56  60505      0      0      0  36847      0
   30747      0]
 [   376   4339   8404  16013   2635      0      0      0   5168      0
    9390      0]
 [     0   1092   8550     49 256119      0      0      0   2658      0
     519      0]
 [     0      0      0      1      6  75759      0      0     30      0
       1      0]
 [     0      0      0      0      0      1   1324      0      0      0
       0      0]
 [     0      0      0      0      0      0      0  49142      0      0
       0      0]
 [     0     23   2069   

In [8]:
def classwise_accuracy():
   a = pd.crosstab(y_pred_UNSW_8,y_UNSW_8)
   print(a.max(axis=1)/a.sum(axis=1))
classwise_accuracy()

row_0
Amazon Echo                    0.998457
Belkin Wemo switch             0.650818
Belkin wemo motion sensor      0.865610
HP Printer                     0.779753
Insteon Camera                 0.798904
Light Bulbs LiFX Smart Bulb    0.999987
NEST Protect smoke alarm       1.000000
Netatmo weather station        1.000000
Samsung SmartCam               0.851781
Smart Things                   1.000000
Triby Speaker                  0.444937
Withings Smart scale           1.000000
dtype: float64
