In [1]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import json
from sklearn.preprocessing import MinMaxScaler
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
import datetime as dt


In [2]:
# 读取所有设备
col_names = [ 'date','time', 'sensor_id', 'val']

homeass_df = pd.read_csv('event_result.log', sep="\s+", names=col_names)

homeass_df.head()

Unnamed: 0,date,time,sensor_id,val
0,2007-10-25,15:04:24.590379,T001,21.5
1,2007-10-25,15:04:24.595221,AC01,ON
2,2007-10-25,15:04:24.590379,T002,20.5
3,2007-10-25,15:04:24.590379,T003,20.5
4,2007-10-25,15:13:22.086372,T002,20


In [3]:
# 导入两个矩阵， 我们仅仅使用event_trans_pro
event_trans = np.load("event_trans_pro.npy")
# event_single = np.load("event_single_pro.npy")


In [4]:
# 这个是用来将概率转换为权重的函数
def g_sigmoid(x):
    
    return 1.0/(1+ np.exp( 5000 * (-x+0.00021101498206372652) ) ) - 0.0

In [5]:
# 输入一个序列， 我们返回通过概率转移矩阵计算得到的权重
def get_multi_event_pro_2_w(input_event_list, event_trans):
    
    
    n = len(input_event_list)
    if n < 1:
        
        return 1 # 序列中至少有两个事件
    
    # res_pro = g_sigmoid( event_single[input_event_list[0]] )
    res_pro = 1
    
    for i in range(n-1):
        
        res_pro *= g_sigmoid(event_trans[input_event_list[i]][input_event_list[i+1]] )
        
    return res_pro
    


In [6]:
# 每一个事件有相应的id作为标识

with open('event2id.json', 'r') as f:
    event2id = json.load(f)

In [7]:
# 这是未出现在概率转移矩阵当中的事件
event2id['M020_0']

KeyError: 'M020_0'

In [8]:
# 这是某个事件的编号
event2id['L002_0']

136

In [9]:
# 事件序列 M031_1->M031_0->M031_1 的输出权重

get_multi_event_pro_2_w(([ 88, 89, 88]), event_trans)

1.0

In [10]:
# 读取异常检测测试的输入事件序列、并进行相应的处理
col_names = ['datetime', 'flag', 'sensor_id', 'val']
 
test_attack_1_df = pd.read_csv('attacked_rub_house_from_window_new.csv', sep=",", names=col_names)

In [11]:
test_attack_1_df =  test_attack_1_df.drop([0], axis = 0)

In [12]:
test_attack_1_df

Unnamed: 0,datetime,flag,sensor_id,val
1,2008-01-11 06:01:40.434972,0,M034,1
2,2008-01-11 06:01:40.770873,0,M035,1
3,2008-01-11 06:01:42.302436,0,M035,0
4,2008-01-11 06:01:42.855271,0,M034,0
5,2008-01-11 06:01:45.377568,0,M035,1
6,2008-01-11 06:01:45.977405,0,M034,1
7,2008-01-11 06:01:46.393291,0,M035,0
8,2008-01-11 06:01:46.977122,0,M035,1
9,2008-01-11 06:01:52.671679,0,M035,0
10,2008-01-11 06:01:52.796683,0,M034,0


In [13]:
test_attack_1_df['event_name'] = test_attack_1_df['sensor_id']+ '_' + test_attack_1_df['val']

In [14]:

nrow = test_attack_1_df.shape[0]
event_id_list = []
new_e_list = []
for i in tqdm(range(nrow)):
    
    try: 
        event_id_list.append(event2id[ test_attack_1_df.iloc[i]['event_name'] ])
    except KeyError:
        event_id_list.append (-1)
        new_e_list.append(test_attack_1_df.iloc[i]['event_name'])

100%|██████████| 7648/7648 [00:00<00:00, 7723.58it/s]


In [15]:
event_id_list

[111,
 109,
 110,
 112,
 109,
 111,
 110,
 109,
 110,
 112,
 99,
 96,
 111,
 112,
 111,
 109,
 112,
 110,
 111,
 112,
 8,
 102,
 101,
 5,
 10,
 1,
 8,
 96,
 9,
 11,
 102,
 99,
 101,
 104,
 6,
 102,
 111,
 109,
 112,
 111,
 110,
 109,
 112,
 111,
 110,
 112,
 109,
 111,
 110,
 112,
 0,
 4,
 1,
 111,
 109,
 112,
 111,
 110,
 109,
 112,
 110,
 109,
 111,
 112,
 110,
 5,
 8,
 10,
 9,
 11,
 102,
 109,
 110,
 109,
 111,
 110,
 112,
 109,
 111,
 110,
 112,
 6,
 111,
 112,
 111,
 112,
 10,
 8,
 102,
 11,
 111,
 112,
 111,
 112,
 109,
 111,
 112,
 111,
 112,
 111,
 110,
 112,
 6,
 10,
 5,
 1,
 8,
 9,
 11,
 102,
 104,
 5,
 10,
 1,
 0,
 4,
 109,
 110,
 109,
 111,
 112,
 110,
 111,
 112,
 109,
 111,
 110,
 109,
 112,
 110,
 137,
 2,
 111,
 109,
 112,
 111,
 112,
 110,
 111,
 109,
 110,
 112,
 138,
 109,
 111,
 112,
 98,
 96,
 110,
 109,
 111,
 81,
 87,
 81,
 80,
 79,
 88,
 112,
 87,
 82,
 110,
 79,
 86,
 82,
 79,
 109,
 80,
 89,
 111,
 81,
 82,
 87,
 86,
 112,
 110,
 111,
 112,
 111,
 112,
 109,
 

In [16]:
test_attack_1_df = test_attack_1_df.reset_index(drop=True)

In [17]:
test_attack_1_df['event_id'] = pd.Series(event_id_list)

In [18]:
# 处理数据
test_attack_1_df

Unnamed: 0,datetime,flag,sensor_id,val,event_name,event_id
0,2008-01-11 06:01:40.434972,0,M034,1,M034_1,111
1,2008-01-11 06:01:40.770873,0,M035,1,M035_1,109
2,2008-01-11 06:01:42.302436,0,M035,0,M035_0,110
3,2008-01-11 06:01:42.855271,0,M034,0,M034_0,112
4,2008-01-11 06:01:45.377568,0,M035,1,M035_1,109
5,2008-01-11 06:01:45.977405,0,M034,1,M034_1,111
6,2008-01-11 06:01:46.393291,0,M035,0,M035_0,110
7,2008-01-11 06:01:46.977122,0,M035,1,M035_1,109
8,2008-01-11 06:01:52.671679,0,M035,0,M035_0,110
9,2008-01-11 06:01:52.796683,0,M034,0,M034_0,112


In [19]:
test_attack_1_df = test_attack_1_df.drop(['sensor_id', 'val', 'event_name'], axis = 1)
the_event_id_list = test_attack_1_df.event_id.tolist()

In [20]:
time_span = dt.timedelta(seconds=3600)
test_attack_1_df.datetime = pd.to_datetime(test_attack_1_df.datetime)

In [21]:
def get_input_events_by_hour(whole_df):
    
    input_df_list = []
    
    nrow, ncol = whole_df.shape
    
    start_time = whole_df.iloc[0].datetime
    temp_time = start_time
    k = 0
    
    
    for i in tqdm(range(nrow)):
        
        temp_time = whole_df.iloc[i].datetime
        
        if (start_time + time_span ) <= temp_time:
            
            df_input = whole_df[k:i]
            input_df_list.append(df_input)
            k = i
            start_time = temp_time
            
    df_input = whole_df[k:]
    input_df_list.append(df_input)
    
    
    return input_df_list

In [22]:
# 分割事件序列
input_df_list = get_input_events_by_hour(test_attack_1_df)


100%|██████████| 7648/7648 [00:01<00:00, 4766.80it/s]


In [23]:
# 这里重新定义了sigmoid函数、我们需要的是对概率为0的转移事件输出0的权重
def sigmoid(x):
    
    if x <= 0:
        
        return 0
    else:
        
        return 1.0/(1+ np.exp( 5000 * (-x+0.00021101498206372652) ) ) - 0

In [24]:
event_trans[1][10]

0.0198744769874477

In [25]:
input_df_list[0]

Unnamed: 0,datetime,flag,event_id
0,2008-01-11 06:01:40.434972,0,111
1,2008-01-11 06:01:40.770873,0,109
2,2008-01-11 06:01:42.302436,0,110
3,2008-01-11 06:01:42.855271,0,112
4,2008-01-11 06:01:45.377568,0,109
5,2008-01-11 06:01:45.977405,0,111
6,2008-01-11 06:01:46.393291,0,110
7,2008-01-11 06:01:46.977122,0,109
8,2008-01-11 06:01:52.671679,0,110
9,2008-01-11 06:01:52.796683,0,112


In [26]:
# 简单事件两两概率预测当前事件触发概率
def get_simple_weight_from_event(input_df, event_trans):
    
    nrow, ncol = input_df.shape
    event_act_pro = 1
    
    for i in range(nrow ):
    
        
        
        if i == 0:
            
            #　print (event_single[the_event_id] )
            # event_act_pro *= sigmoid(event_single[the_event_id])
            continue
        
        else:
            
            pre_event_id = input_df.iloc[i-1].event_id
            the_event_id = input_df.iloc[i].event_id
            
            if sigmoid(event_trans[pre_event_id][the_event_id] ) == 0:
                
                print( pre_event_id, the_event_id)
            
            if pre_event_id == -1 or the_event_id == -1:
                event_act_pro *= sigmoid(0 )
            else:
                event_act_pro *= sigmoid(event_trans[pre_event_id][the_event_id] )
            
            # print (event_act_pro)
        
    return event_act_pro

In [27]:
# 当出现转移概率为0的事件时，显示出来，它就是导致事件序列被检测为异常的原因
event_act_weight_list = []

for input_df in input_df_list:
    
    event_act_weight_list.append( get_simple_weight_from_event(input_df, event_trans) )

12 78
78 27


In [28]:
event_trans[10][1] 

0.19213226909920184

In [29]:
event_act_weight_list

[0.6600911714856048,
 0.9673843743578107,
 0.7260431258676441,
 0.9993712972880981,
 0.999999999980961,
 0.8402358647240006,
 1.0,
 0.046487738810873425,
 0.00035627512664981317,
 6.464230238079776e-10,
 0.0,
 0.23128806946054303,
 0.6481957234860948,
 0.9999978213979099,
 0.41234982453026287,
 0.2636911990476835]

In [31]:
# 待检测的包含 跳窗抢劫事件的 权重输出为0
event_act_weight_list[10]

0.0

In [30]:
'''
0.6600911714856048
0.9673843743578107
0.7260431258676441
0.9993712972880981
0.999999999980961
0.8402358647240006
1.0
0.046487738810873425
0.00035627512664981317
6.464230238079776e-10
0.0
0.23128806946054303
0.6481957234860948
0.9999978213979099
0.41234982453026287
0.2636911990476835
'''

'\n0.6600911714856048\n0.9673843743578107\n0.7260431258676441\n0.9993712972880981\n0.999999999980961\n0.8402358647240006\n1.0\n0.046487738810873425\n0.00035627512664981317\n6.464230238079776e-10\n0.0\n0.23128806946054303\n0.6481957234860948\n0.9999978213979099\n0.41234982453026287\n0.2636911990476835\n'