-
Notifications
You must be signed in to change notification settings - Fork 0
/
RRNS.py
117 lines (113 loc) · 4.32 KB
/
RRNS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
from pandas import Series,DataFrame
import matplotlib.pyplot as plt
import generate_HMM_model as ghm
import random
def CALCULATE_INIT_DETECTOR_SET(normal_set,r_self,r_ab,e_max,init_iter):
print('CALCULATE_INIT_DETECTOR_SET')
dim=normal_set.shape
num_hits=0
m=0
e=float("inf")
unit_v=np.sum(np.eye(dim[0]),axis=1)
unit_v.shape=(dim[0],1)
print(dim[0])
mat=normal_set
volum_s=0
while (m<=init_iter)|(e>=e_max):
m+=1
x=np.random.rand(1,dim[1])#random vector
min_dis=np.min(np.sqrt(np.sum(((np.dot(unit_v,x)-normal_set)**2),axis=1)))#min distance with elem in normal set
print(min_dis)
if min_dis<=r_self:
num_hits+=1
volum_s=num_hits/m
e=(((volum_s-np.sqrt(volum_s))/m)**(1/3))
print('%s %s %s'%(volum_s,dim[1],r_ab))
num_ab=int((1-volum_s)/(2*r_ab/(dim[1]**(1/2))**dim[1]))#number of detector
print('Abnorm sample number is %s'%num_ab)
D_set=[]
for i in range(0,num_ab):
x=np.random.rand(1,dim[1])#random vector
min_dis=np.min(np.sqrt(np.sum(((np.dot(unit_v,x)-normal_set)**2),axis=1)))#min distance with elem in normal set
while min_dis<r_self:
x=np.random.rand(1,dim[1])#random vector
min_dis=np.min(np.sqrt(np.sum(((np.dot(unit_v,x)-normal_set)**2),axis=1)))#min distance with elem in normal set
D_set.append(x.tolist()[0])
print('All %s abnorm sample init well!'%num_ab)
return D_set
def OPTIMIZE_DETECTOR_DISTRIBUTION(D_set,r_ab,normal_set,r_self,num_iter,theta_min,alpha,alpha_pert,beta):
r_pert=2*r_ab
num_ab=len(D_set)
T=CALCULATE_INIT_T(D_set,r_ab,normal_set,r_self,r_pert,beta)
for i in range(0,num_iter):
receive_num=0
steps=0
random_receive=0
while (receive_num<num_ab*theta_min)&(steps<=2*num_ab*theta_min):
index=int(random.random()*num_ab)
x=np.random.rand(1,len(D_set[0]))
while distance_two_vector(x,D_set[index])>r_pert:
x=np.random.rand(1,len(D_set[0]))
deta_c=CALCULATE_COST_DIFFERENCE(D_set,index,x,r_ab,normal_set,r_self,beta)
# print('deta_c is %s'%deta_c)
if deta_c<0:
receive_num+=1
D_set[index]=x
print('receive_num %s'%receive_num)
elif np.exp((-1*deta_c)/T)>random.random():
receive_num+=1
random_receive+=1
D_set[index]=x
print('receive_num %s'%receive_num)
print('num_iter %s'%i)
print('random_receive %s'%random_receive)
print('receive_num %s'%receive_num)
T=alpha*T
r_pert=alpha_pert*r_pert
return D_set
def distance_two_vector(array1,array2):
return np.sqrt(np.sum((array1-array2)**2))
def CALCULATE_INIT_T(D_set,r_ab,normal_set,r_self,r_pert,beta):
over_lapping=0
self_covering=0
for i in range(0,D_set.shape[0]-1):
over_lapping+=Overlapping_v_D(D_set[i],D_set[i+1:],r_ab)
print('overlapping is %s'%over_lapping)
for i in D_set:
self_covering+=selfcovering(i,normal_set,r_ab,r_self)
print('selfcovering is %s'%self_covering)
return over_lapping+beta*self_covering
def CALCULATE_COST_DIFFERENCE(D_set,index,d,r_ab,normal_set,r_self,beta):
pre_value=Overlapping_v_D(D_set[index],D_set,r_ab)+beta*selfcovering(D_set[index],normal_set,r_ab,r_self)
cur_value=Overlapping_v_D(d,D_set,r_ab)+beta*selfcovering(d,normal_set,r_ab,r_self)
return cur_value-pre_value
def Overlapping_v_D(v,D_set,r_ab):
dim=D_set.shape
unit_v=np.sum(np.eye(dim[0]),axis=1)
unit_v.shape=(dim[0],1)
v.shape=(1,dim[1])
return sum(np.exp(np.sum((-1*(np.dot(unit_v,v)-D_set)**2),axis=1)/(r_ab**2)))
def selfcovering(v,normal_set,r_ab,r_self):
dim=normal_set.shape
unit_v=np.sum(np.eye(dim[0]),axis=1)
unit_v.shape=(dim[0],1)
v.shape=(1,dim[1])
return sum(np.exp(np.sum((-1*(np.dot(unit_v,v)-normal_set)**2),axis=1)/(((r_ab+r_self)/2)**2)))
if __name__ == '__main__':
_set=pd.read_csv('likelihood_vector/normal_set.csv',header=None)
# ghm.random_generate_vector(800,10)
# D_set=CALCULATE_INIT_DETECTOR_SET(_set,r_self=0.6,r_ab=0.5,e_max=0.1,init_iter=20)
# print('save all %s abnorm detector'%len(D_set))
# for d in D_set:
# ghm.save_abnormal_sample_likeli(d)
D_set=pd.read_csv('likelihood_vector/abnormal_set.csv',header=None)
# T=CALCULATE_INIT_T(np.array(D_set),0.8,_set,0.6,1.6,2)
D_set=OPTIMIZE_DETECTOR_DISTRIBUTION(np.array(D_set),0.5,_set,0.6,10,0.05,0.7,0.8,20)
# print('T is %s' %T )
print('save all %s OPTIMIZED abnorm detector'%len(D_set))
for d in D_set:
ghm.save_abnormal_sample_likeli(d,'OPTIMIZED_abnormal.csv')