In [1]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import json
from sklearn.preprocessing import MinMaxScaler
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
import datetime as dt

In [2]:
# 读取所有设备
col_names = [ 'date','time', 'sensor_id', 'val']

homeass_df = pd.read_csv('event_result.log', sep="\s+", names=col_names)

homeass_df.head()

Unnamed: 0,date,time,sensor_id,val
0,2007-10-25,15:04:24.590379,T001,21.5
1,2007-10-25,15:04:24.595221,AC01,ON
2,2007-10-25,15:04:24.590379,T002,20.5
3,2007-10-25,15:04:24.590379,T003,20.5
4,2007-10-25,15:13:22.086372,T002,20


In [3]:
# 导入两个矩阵， 我们仅仅使用event_trans_pro
event_trans = np.load("event_trans_pro.npy")
# event_single = np.load("event_single_pro.npy")

In [4]:
# 这个是用来将概率转换为权重的函数
def g_sigmoid(x):
    
    return 1.0/(1+ np.exp( 5000 * (-x+0.00021101498206372652) ) ) - 0.0

In [5]:
# 输入一个序列， 我们返回通过概率转移矩阵计算得到的权重
def get_multi_event_pro_2_w(input_event_list, event_trans):
    
    
    n = len(input_event_list)
    if n < 1:
        
        return 1 # 序列中至少有两个事件
    
    # res_pro = g_sigmoid( event_single[input_event_list[0]] )
    res_pro = 1
    
    for i in range(n-1):
        
        res_pro *= g_sigmoid(event_trans[input_event_list[i]][input_event_list[i+1]] )
        
    return res_pro
    

In [6]:
# 每一个事件有相应的id作为标识

with open('event2id.json', 'r') as f:
    event2id = json.load(f)

In [7]:
# 事件序列 M031_1->M031_0->M031_1 的输出权重

get_multi_event_pro_2_w(([ 88, 89, 88]), event_trans)

1.0

In [9]:
# 读取异常检测测试的输入事件序列、并进行相应的处理
col_names = ['datetime', 'flag', 'sensor_id', 'val']
 
test_attack_1_df = pd.read_csv('attacked_random_week.csv', sep=",", names=col_names)

In [10]:
test_attack_1_df =  test_attack_1_df.drop([0], axis = 0)

In [11]:
test_attack_1_df['event_name'] = test_attack_1_df['sensor_id']+ '_' + test_attack_1_df['val']

In [12]:

nrow = test_attack_1_df.shape[0]
event_id_list = []
new_e_list = []
for i in tqdm(range(nrow)):
    
    try: 
        event_id_list.append(event2id[ test_attack_1_df.iloc[i]['event_name'] ])
    except KeyError:
        event_id_list.append (-1)
        new_e_list.append(test_attack_1_df.iloc[i]['event_name'])

100%|██████████| 13678/13678 [00:01<00:00, 6931.24it/s]


In [13]:
test_attack_1_df = test_attack_1_df.reset_index(drop=True)

In [14]:
test_attack_1_df['event_id'] = pd.Series(event_id_list)

In [15]:
test_attack_1_df = test_attack_1_df.drop(['sensor_id', 'val', 'event_name'], axis = 1)
the_event_id_list = test_attack_1_df.event_id.tolist()

In [16]:
time_span = dt.timedelta(seconds=3600)
test_attack_1_df.datetime = pd.to_datetime(test_attack_1_df.datetime)

In [17]:
def get_input_events_by_hour(whole_df):
    
    input_df_list = []
    
    nrow, ncol = whole_df.shape
    
    start_time = whole_df.iloc[0].datetime
    temp_time = start_time
    k = 0
    
    
    for i in tqdm(range(nrow)):
        
        temp_time = whole_df.iloc[i].datetime
        
        if (start_time + time_span ) <= temp_time:
            
            df_input = whole_df[k:i]
            input_df_list.append(df_input)
            k = i
            start_time = temp_time
            
    df_input = whole_df[k:]
    input_df_list.append(df_input)
    
    
    return input_df_list

In [18]:
# 分割事件序列
input_df_list = get_input_events_by_hour(test_attack_1_df)

100%|██████████| 13678/13678 [00:02<00:00, 4745.02it/s]


In [19]:
# 这里重新定义了sigmoid函数、我们需要的是对概率为0的转移事件输出0的权重
def sigmoid(x):
    
    if x <= 0:
        
        return 0
    else:
        
        return 1.0/(1+ np.exp( 5000 * (-x+0.00021101498206372652) ) ) - 0

In [20]:
event_trans[1][10]

0.0198744769874477

In [21]:
# 简单事件两两概率预测当前事件触发概率
def get_simple_weight_from_event(input_df, event_trans):
    
    nrow, ncol = input_df.shape
    event_act_pro = 1
    
    for i in range(nrow ):
    
        
        
        if i == 0:
            
            #　print (event_single[the_event_id] )
            # event_act_pro *= sigmoid(event_single[the_event_id])
            continue
        
        else:
            
            pre_event_id = input_df.iloc[i-1].event_id
            the_event_id = input_df.iloc[i].event_id
            
            if sigmoid(event_trans[pre_event_id][the_event_id] ) == 0:
                
                print( pre_event_id, the_event_id)
            
            if pre_event_id == -1 or the_event_id == -1:
                event_act_pro *= sigmoid(0 )
            else:
                event_act_pro *= sigmoid(event_trans[pre_event_id][the_event_id] )
            
            # print (event_act_pro)
        
    return event_act_pro

In [22]:
# 当出现转移概率为0的事件时，显示出来，它就是导致事件序列被检测为异常的原因
event_act_weight_list = []

for input_df in input_df_list:
    
    event_act_weight_list.append( get_simple_weight_from_event(input_df, event_trans) )

142 75
112 58
58 8
25 9
102 1881
1881 53
53 19
19 81
111 1879
1879 99
60 104
104 25
65 6
102 84
84 21
81 39
39 112
112 62
62 109
111 70
70 110
112 66
5 75
57 1878
1878 59
59 9
110 1883
1883 112
6 37
37 111
16 1881
1881 81
112 64
22 10
110 71
10 130
134 5
5 60
60 87
87 36
36 36
36 1
8 41
75 61
61 9
69 104
104 57
10 142
43 84
84 44
44 81
79 1
4 1879
1879 60
60 109
111 59
130 110
112 43
43 110
33 110
138 65
81 135
135 87
111 42
42 112
111 61
61 112
110 35
110 1883
1883 109
139 78
86 32
93 1879
1879 110
138 58
58 86
112 1885
1885 110
111 191
191 86
109 128
128 111
109 66
66 1879
1879 110
109 53
53 110
67 80
104 1879
1879 36
65 96
40 81
81 69
110 35
35 79
110 33
110 1878
1878 109
110 71
71 111
23 1881
1881 76
66 143
143 16
10 135
135 1880
1880 109
129 1
138 53
53 137
137 93
36 5
4 65
65 8
102 188
104 188
188 5
110 24
24 0
0 62
188 5
7 30
30 6
163 92
92 102
164 60
64 3
3 23
7 69
69 1883
1883 70
70 134
43 9
47 60
60 1
96 68
68 58
58 0
2 64
101 30
20 9
10 66
60 1
6 82
82 1878
1878 12
12 8
80 1

In [23]:
event_act_weight_list

[0.016524519927554934,
 0.9999999999999998,
 0.9952018761383973,
 0.9695918779095855,
 0.4256785529130518,
 0.9999999999999996,
 1.0,
 0.9960254937964075,
 0.002640635363062261,
 0.09331441710716949,
 0.9999999999997389,
 0.9999614713884382,
 0.9995549393860388,
 0.9836733307575325,
 0.36938367285128354,
 0.7684301030187977,
 0.02674266468683991,
 0.9999999995248772,
 0.991779548404907,
 0.9999999999597886,
 0.9999999999999944,
 0.00013583840681461297,
 3.446437124776393e-05,
 0.00042128874537424564,
 0.009678021175812167,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0]