In [3]:
import sys
sys.path.insert(0, '..')
sys.path.insert(0, 'plots')

import os

from IPython.display import display
from IPython.display import clear_output
import ipywidgets as widgets


import json
from SplunkIntelOptimized import SplunkIntelOptimized
from sources.SplunkDatasetNew import SplunkDatasetNew
from sources.SplunkFileSource import SplunkFileSource

import plotly as py
import pandas as pd
import numpy as np
import plotNew as plot

py.offline.init_notebook_mode()
pd.set_option('display.notebook_repr_html', True)
    
data_sources_names = []

data_source_picker = None
time_range_picker = None
event_picker = None
event_text_area = None 
threshold_picker = None
new_time_range_picker = None


#create widgets
def create_widgets():
    global data_source_picker, time_range_picker, event_picker
    global event_text_area, threshold_picker, new_time_range_picker
    for file in os.listdir("../../data_prod"):
            if file.endswith(".json"):
                data_sources_names.append(file)

    data_source_picker = widgets.Dropdown(
            options=data_sources_names,
            description='Data Source:',
            disabled=False,
            button_style='' # 'success', 'info', 'warning', 'danger' or ''
        )    

    time_range_picker = widgets.IntRangeSlider(
                value=[1, 2],
                min=0,
                max=100,
                step=1,
                description='Time:',
                disabled=False,
                continuous_update=False,
                orientation='horizontal',
                readout=True,
                readout_format='i',
                slider_color='white',
                color='black'
        )
    
    threshold_picker = widgets.FloatSlider(
                value=0.8,
                min=0,
                max=1,
                step=0.1,
                description='Threshold:',
                disabled=False,
                continuous_update=False,
                orientation='horizontal',
                readout=True,
                readout_format='.1f',
                slider_color='white',
                color='black'
        )
    
    event_picker = widgets.Dropdown(
            options=[1,2,3],
            description='Event Number:',
            disabled=False,
            button_style='' # 'success', 'info', 'warning', 'danger' or ''
        )
    
    l = widgets.Layout(height='40px', width='800px')
    event_text_area = widgets.Textarea(value='TA: height=40px', layout=l)

    
def initialize(change):    
    # Just to set the widgets
    all_events = []
    all_events.extend(SplunkFileSource.load_prod_data('../../data_prod/' + data_source_picker.value))    
    minutes = 0
    for dict in all_events:
        if int(dict.get('logCollectionMinute')) > minutes:
            minutes = dict.get('logCollectionMinute')

    time_range_picker.value = [0,minutes]
    time_range_picker.min = 0
    time_range_picker.max = minutes

    
    change_event_handler(None)
    
    
splunkDataset = SplunkDatasetNew()
splunkIntelargs = []

def event_picker_change_handler(change):
    event_text_area.value = json.dumps(splunkDataset.get_all_events_for_notebook()[event_picker.value])

def split(input, length, size):
    input.replace('\n', ' ')
    input.replace('\tat', ' ')
    return '<br>'.join([input[start:start + size] for start in range(0, length, size)])

def get_tool_tips(all_events):
    tooltips = []
    for idx, event in enumerate(all_events):
        tooltips.append(
            split(event[0], min(100, len(event[0])), 100) + '<br> id = ' + str(idx) + '<br> cluster = ' + str(
            event[3]))
            
    return tooltips
    
# Handle data source change
def change_event_handler(change):
    global splunkDataset
    
    control_start = time_range_picker.value[0]
    test_start = time_range_picker.value[0]
    
    control_end = time_range_picker.value[1]
    test_end = time_range_picker.value[1]

    clear_output()

    prev_out_file = None
    while(control_start <= control_end or test_start < test_end):

        del splunkIntelargs[:]
        splunkDataset = SplunkDatasetNew() 

        print(control_start, control_start)
        print(test_start, test_start)
        splunkDataset.load_prod_file('../../data_prod/' + data_source_picker.value,
                                     [control_start, control_start],
                                     [test_start, test_start], 
                                     ['ip-172-31-12-78', 'ip-172-31-15-177', 'ip-172-31-2-144', 'ip-172-31-12-51'],
                                     ['ip-172-31-4-253'],     
                                          prev_out_file)
    
        splunkIntelargs.append('--sim_threshold')
        splunkIntelargs.append(str(float(threshold_picker.value)))
    
        print(splunkIntelargs)

        splunkIntel = SplunkIntelOptimized(splunkDataset, SplunkIntelOptimized.parse(splunkIntelargs))
        splunkDataset = splunkIntel.run()
    
        file_object  = open("result.json", "w")
        file_object.write(splunkDataset.get_output_for_notebook_as_json)
        file_object.close()
        
        prev_out_file = './result.json'
        
        control_start = control_start + 1
        test_start = test_start + 1

    xy_matrix, tooltips, labels,sizes = splunkDataset.control_scatter_plot()
    plot.scatter_plot_groups(xy_matrix, labels, tooltips, ['control', 'test', 'test-anomaly', 'test-unx-freq'], ['blue','green','red', 'orange'])
    
    #xy_matrix, tooltips, labels, sizes = splunkDataset.count_scatter_plot()
    
    #new_sizes = []
    #for size in sizes:
     #   new_sizes.append((3 + len(str(size)))**2)
        
    #print(new_sizes)    
    
    #plot.scatter_plot_groups(xy_matrix, labels, tooltips, ['test', 'test-anomaly'], ['green','red'], new_sizes)


    #for key,data in splunkDataset.get_anom_clusters().items():
     #   for host,anom in data.items():
     #       print(host, anom.get('text'))
    
    xy_matrix, tooltips, labels, clusters = splunkDataset.control_scatter_plot_4d()
    plot.scatter_plot_groups_4d(xy_matrix, labels, clusters, tooltips, ['blue','orange','red'])
    
    hist_data = splunkDataset.count_hist_plot()
    for key,value in hist_data.items():
        plot.hist_plot(value.get('control'), value.get('test'))
            
    event_picker.options = [ x for x in range(len(splunkDataset.get_all_events_for_notebook()))] 
     

create_widgets()

#intialize
initialize(None)
    
#setup even handlers    
data_source_picker.observe(initialize, names="value")
time_range_picker.observe(change_event_handler, names="value")
event_picker.observe(event_picker_change_handler, names="value")
threshold_picker.observe(change_event_handler, names="value")



# show widgets
display(widgets.HBox([data_source_picker]))
display(widgets.HBox([time_range_picker, threshold_picker]))
display(widgets.HBox([event_picker, event_text_area]))

2017-07-18 15:15:20,599 INFO loading file ../../data_prod/splunkLogs.json
2017-07-18 15:15:20,698 INFO Running using file source
2017-07-18 15:15:20,699 INFO Start vectorization....
2017-07-18 15:15:20,701 INFO setting min_df = 1 and max_df = 1.0


(0, 0)
(0, 0)
([0, 0], [0, 0], ['ip-172-31-12-78', 'ip-172-31-15-177', 'ip-172-31-2-144', 'ip-172-31-12-51'], ['ip-172-31-4-253'])
['--sim_threshold', '0.8']
('control count = ', 15)


2017-07-18 15:15:20,880 INFO Running kemans with k = 8
2017-07-18 15:15:21,079 INFO Running kemans with k = 12
2017-07-18 15:15:21,381 INFO Running kemans with k = 14
2017-07-18 15:15:21,730 INFO Running kemans with k = 15
2017-07-18 15:15:22,117 INFO found k = 15


('test count = ', 1)


2017-07-18 15:15:22,426 INFO Detect Count Anomalies....
2017-07-18 15:15:22,428 INFO Using ZeroDeviationClassifier for cluster 6
2017-07-18 15:15:22,430 INFO done
2017-07-18 15:15:22,436 INFO loading file ../../data_prod/splunkLogs.json
2017-07-18 15:15:22,540 INFO loading file ./result.json
2017-07-18 15:15:22,556 INFO Running using file source
2017-07-18 15:15:22,558 INFO Start vectorization....
2017-07-18 15:15:22,560 INFO setting min_df = 1 and max_df = 1.0


(1, 1)
(1, 1)
([1, 1], [1, 1], ['ip-172-31-12-78', 'ip-172-31-15-177', 'ip-172-31-2-144', 'ip-172-31-12-51'], ['ip-172-31-4-253'])
['--sim_threshold', '0.8']
('control count = ', 30)

2017-07-18 15:15:23,013 INFO Running kemans with k = 15
2017-07-18 15:15:23,375 INFO found k = 15
2017-07-18 15:15:23,377 INFO Running kemans with k = 7
2017-07-18 15:15:23,563 INFO Running kemans with k = 11
2017-07-18 15:15:23,837 INFO Running kemans with k = 13
2017-07-18 15:15:24,180 INFO Running kemans with k = 14



('test count = ', 1)


2017-07-18 15:15:25,060 INFO Detect Count Anomalies....
2017-07-18 15:15:25,062 INFO Using ThreeSigmaClassifier for cluster 6
2017-07-18 15:15:25,064 INFO done
2017-07-18 15:15:25,074 INFO loading file ../../data_prod/splunkLogs.json
2017-07-18 15:15:25,174 INFO loading file ./result.json
2017-07-18 15:15:25,194 INFO Running using file source
2017-07-18 15:15:25,196 INFO Start vectorization....
2017-07-18 15:15:25,198 INFO setting min_df = 1 and max_df = 1.0


(2, 2)
(2, 2)
([2, 2], [2, 2], ['ip-172-31-12-78', 'ip-172-31-15-177', 'ip-172-31-2-144', 'ip-172-31-12-51'], ['ip-172-31-4-253'])
['--sim_threshold', '0.8']
('control count = ', 30)


2017-07-18 15:15:25,626 INFO Running kemans with k = 15
2017-07-18 15:15:26,008 INFO found k = 15
2017-07-18 15:15:26,011 INFO Running kemans with k = 7
2017-07-18 15:15:26,202 INFO Running kemans with k = 11
2017-07-18 15:15:26,496 INFO Running kemans with k = 13
2017-07-18 15:15:26,861 INFO Running kemans with k = 14


('test count = ', 1)


2017-07-18 15:15:27,750 INFO Detect Count Anomalies....
2017-07-18 15:15:27,752 INFO Using ThreeSigmaClassifier for cluster 8
2017-07-18 15:15:27,754 INFO done
2017-07-18 15:15:27,766 INFO loading file ../../data_prod/splunkLogs.json
2017-07-18 15:15:27,851 INFO loading file ./result.json
2017-07-18 15:15:27,875 INFO Running using file source
2017-07-18 15:15:27,876 INFO Start vectorization....
2017-07-18 15:15:27,878 INFO setting min_df = 1 and max_df = 1.0


(3, 3)
(3, 3)
([3, 3], [3, 3], ['ip-172-31-12-78', 'ip-172-31-15-177', 'ip-172-31-2-144', 'ip-172-31-12-51'], ['ip-172-31-4-253'])
['--sim_threshold', '0.8']
('control count = ', 30)


2017-07-18 15:15:28,295 INFO Running kemans with k = 15
2017-07-18 15:15:28,678 INFO found k = 15
2017-07-18 15:15:28,679 INFO Running kemans with k = 7
2017-07-18 15:15:28,877 INFO Running kemans with k = 11
2017-07-18 15:15:29,167 INFO Running kemans with k = 13
2017-07-18 15:15:29,487 INFO Running kemans with k = 14


('test count = ', 1)


2017-07-18 15:15:30,372 INFO Detect Count Anomalies....
2017-07-18 15:15:30,374 INFO Using ThreeSigmaClassifier for cluster 5
2017-07-18 15:15:30,377 INFO done
2017-07-18 15:15:30,396 INFO loading file ../../data_prod/splunkLogs.json
2017-07-18 15:15:30,494 INFO loading file ./result.json
2017-07-18 15:15:30,520 INFO Running using file source
2017-07-18 15:15:30,521 INFO Start vectorization....
2017-07-18 15:15:30,523 INFO setting min_df = 1 and max_df = 1.0


(4, 4)
(4, 4)
([4, 4], [4, 4], ['ip-172-31-12-78', 'ip-172-31-15-177', 'ip-172-31-2-144', 'ip-172-31-12-51'], ['ip-172-31-4-253'])
['--sim_threshold', '0.8']
('control count = ', 30)


2017-07-18 15:15:30,946 INFO Running kemans with k = 15
2017-07-18 15:15:31,314 INFO found k = 15
2017-07-18 15:15:31,316 INFO Running kemans with k = 7
2017-07-18 15:15:31,502 INFO Running kemans with k = 11
2017-07-18 15:15:31,778 INFO Running kemans with k = 13
2017-07-18 15:15:32,117 INFO Running kemans with k = 14


('test count = ', 1)


2017-07-18 15:15:33,019 INFO Detect Count Anomalies....
2017-07-18 15:15:33,022 INFO Using ThreeSigmaClassifier for cluster 0
2017-07-18 15:15:33,025 INFO done
2017-07-18 15:15:33,042 INFO loading file ../../data_prod/splunkLogs.json
2017-07-18 15:15:33,140 INFO loading file ./result.json
2017-07-18 15:15:33,170 INFO Running using file source
2017-07-18 15:15:33,172 INFO Start vectorization....
2017-07-18 15:15:33,174 INFO setting min_df = 1 and max_df = 1.0


(5, 5)
(5, 5)
([5, 5], [5, 5], ['ip-172-31-12-78', 'ip-172-31-15-177', 'ip-172-31-2-144', 'ip-172-31-12-51'], ['ip-172-31-4-253'])
['--sim_threshold', '0.8']
('control count = ', 30)


2017-07-18 15:15:33,591 INFO Running kemans with k = 15
2017-07-18 15:15:33,942 INFO found k = 15
2017-07-18 15:15:33,943 INFO Running kemans with k = 7
2017-07-18 15:15:34,140 INFO Running kemans with k = 11
2017-07-18 15:15:34,422 INFO Running kemans with k = 13
2017-07-18 15:15:34,751 INFO Running kemans with k = 14


('test count = ', 18)


2017-07-18 15:15:36,207 INFO Running kemans with k = 1
2017-07-18 15:15:36,228 INFO Running kemans with k = 2
2017-07-18 15:15:36,276 INFO found k = 2
2017-07-18 15:15:36,277 INFO Detect Count Anomalies....
2017-07-18 15:15:36,279 INFO Using ThreeSigmaClassifier for cluster 0
2017-07-18 15:15:36,282 INFO Using ThreeSigmaClassifier for cluster 1
2017-07-18 15:15:36,285 INFO Using ThreeSigmaClassifier for cluster 2
2017-07-18 15:15:36,288 INFO Using ThreeSigmaClassifier for cluster 3
2017-07-18 15:15:36,291 INFO Using ThreeSigmaClassifier for cluster 4
2017-07-18 15:15:36,294 INFO Using ThreeSigmaClassifier for cluster 5
2017-07-18 15:15:36,296 INFO Using ThreeSigmaClassifier for cluster 6
2017-07-18 15:15:36,299 INFO Using ThreeSigmaClassifier for cluster 7
2017-07-18 15:15:36,302 INFO Using ThreeSigmaClassifier for cluster 8
2017-07-18 15:15:36,305 INFO Using ThreeSigmaClassifier for cluster 9
2017-07-18 15:15:36,307 INFO Using ThreeSigmaClassifier for cluster 10
2017-07-18 15:15:36,31

[30, 63, 63, 65, 64, 51, 30, 64, 63, 65, 64, 52, 30, 64, 63, 66, 64, 52, 31, 64, 64, 65, 65, 51]
[96]
[-1]
[65, 64, 64, 31, 64, 52, 65, 64, 64, 31, 64, 52, 65, 64, 64, 31, 64, 52, 65, 63, 64, 31, 64, 52]
[96]
[-1]
(6, 6)
(6, 6)
([6, 6], [6, 6], ['ip-172-31-12-78', 'ip-172-31-15-177', 'ip-172-31-2-144', 'ip-172-31-12-51'], ['ip-172-31-4-253'])
['--sim_threshold', '0.8']
('control count = ', 30)

2017-07-18 15:15:36,859 INFO Running kemans with k = 15
2017-07-18 15:15:37,152 INFO found k = 15
2017-07-18 15:15:37,154 INFO Running kemans with k = 7
2017-07-18 15:15:37,338 INFO Running kemans with k = 11
2017-07-18 15:15:37,632 INFO Running kemans with k = 13
2017-07-18 15:15:37,960 INFO Running kemans with k = 14



('test count = ', 34)


2017-07-18 15:15:40,069 INFO Running kemans with k = 2
2017-07-18 15:15:40,116 INFO found k = 2
2017-07-18 15:15:40,118 INFO Running kemans with k = 1
2017-07-18 15:15:40,139 INFO Detect Count Anomalies....
2017-07-18 15:15:40,141 INFO Using ThreeSigmaClassifier for cluster 0
2017-07-18 15:15:40,145 INFO Using ThreeSigmaClassifier for cluster 1
2017-07-18 15:15:40,148 INFO Using ThreeSigmaClassifier for cluster 2
2017-07-18 15:15:40,150 INFO Using ThreeSigmaClassifier for cluster 3
2017-07-18 15:15:40,154 INFO Using ThreeSigmaClassifier for cluster 4
2017-07-18 15:15:40,157 INFO Using ThreeSigmaClassifier for cluster 5
2017-07-18 15:15:40,161 INFO Using ThreeSigmaClassifier for cluster 6
2017-07-18 15:15:40,163 INFO Using ThreeSigmaClassifier for cluster 7
2017-07-18 15:15:40,165 INFO Using ThreeSigmaClassifier for cluster 8
2017-07-18 15:15:40,167 INFO Using ThreeSigmaClassifier for cluster 9
2017-07-18 15:15:40,170 INFO Using ThreeSigmaClassifier for cluster 10
2017-07-18 15:15:40,17

[51, 30, 63, 63, 65, 64, 51, 52, 30, 64, 63, 65, 64, 50, 52, 30, 64, 63, 66, 64, 50, 51, 31, 64, 64, 65, 65, 51]
[ 96 135]
[-1, -1]
[31, 64, 64, 65, 64, 52, 50, 31, 64, 64, 65, 64, 52, 51, 31, 64, 64, 65, 64, 52, 51, 30, 64, 63, 65, 64, 52, 50]
[ 96   3 135]
[-1, 1, -1]
[65, 64, 64, 31, 64, 52, 50, 65, 64, 64, 31, 64, 52, 51, 65, 64, 64, 31, 64, 52, 51, 65, 63, 64, 31, 64, 52, 50]
[ 96 132]
[-1, -1]
(7, 7)
(7, 7)
([7, 7], [7, 7], ['ip-172-31-12-78', 'ip-172-31-15-177', 'ip-172-31-2-144', 'ip-172-31-12-51'], ['ip-172-31-4-253'])


2017-07-18 15:15:40,447 INFO Running using file source
2017-07-18 15:15:40,450 INFO Start vectorization....
2017-07-18 15:15:40,452 INFO setting min_df = 1 and max_df = 1.0


['--sim_threshold', '0.8']
('control count = ', 30)


2017-07-18 15:15:40,868 INFO Running kemans with k = 15
2017-07-18 15:15:41,234 INFO found k = 15
2017-07-18 15:15:41,235 INFO Running kemans with k = 7
2017-07-18 15:15:41,429 INFO Running kemans with k = 11
2017-07-18 15:15:41,706 INFO Running kemans with k = 13
2017-07-18 15:15:42,034 INFO Running kemans with k = 14


('test count = ', 36)


2017-07-18 15:15:44,208 INFO Running kemans with k = 3
2017-07-18 15:15:44,263 INFO found k = 3
2017-07-18 15:15:44,264 INFO Running kemans with k = 1
2017-07-18 15:15:44,288 INFO Running kemans with k = 2
2017-07-18 15:15:44,327 INFO found k = 2
2017-07-18 15:15:44,328 INFO Detect Count Anomalies....
2017-07-18 15:15:44,330 INFO Using ThreeSigmaClassifier for cluster 0
2017-07-18 15:15:44,333 INFO Using ThreeSigmaClassifier for cluster 1
2017-07-18 15:15:44,337 INFO Using ThreeSigmaClassifier for cluster 2
2017-07-18 15:15:44,339 INFO Using ThreeSigmaClassifier for cluster 3
2017-07-18 15:15:44,341 INFO Using ThreeSigmaClassifier for cluster 4
2017-07-18 15:15:44,345 INFO Using ThreeSigmaClassifier for cluster 5
2017-07-18 15:15:44,349 INFO Using ThreeSigmaClassifier for cluster 6
2017-07-18 15:15:44,351 INFO Using ThreeSigmaClassifier for cluster 7
2017-07-18 15:15:44,353 INFO Using ThreeSigmaClassifier for cluster 8
2017-07-18 15:15:44,356 INFO Using ThreeSigmaClassifier for cluster

[50, 65, 64, 64, 31, 64, 52, 49, 51, 65, 64, 64, 31, 64, 52, 48, 51, 65, 64, 64, 31, 64, 52, 48, 50, 65, 63, 64, 31, 64, 52, 49]
[132  96 129]
[-1, -1, -1]
[51, 30, 63, 63, 65, 64, 51, 48, 52, 30, 64, 63, 65, 64, 50, 48, 52, 30, 64, 63, 66, 64, 50, 48, 51, 31, 64, 64, 65, 65, 51, 48]
[ 96 135 129]
[-1, -1, -1]
[31, 64, 64, 65, 64, 52, 50, 48, 31, 64, 64, 65, 64, 52, 51, 49, 31, 64, 64, 65, 64, 52, 51, 49, 30, 64, 63, 65, 64, 52, 50, 48]
[ 96   3 135 126]
[-1, 1, -1, -1]
(8, 8)
(8, 8)
([8, 8], [8, 8], ['ip-172-31-12-78', 'ip-172-31-15-177', 'ip-172-31-2-144', 'ip-172-31-12-51'], ['ip-172-31-4-253'])
['--sim_threshold', '0.8']
('control count = ', 30)


2017-07-18 15:15:44,935 INFO Running kemans with k = 15
2017-07-18 15:15:45,301 INFO found k = 15
2017-07-18 15:15:45,302 INFO Running kemans with k = 7
2017-07-18 15:15:45,477 INFO Running kemans with k = 11
2017-07-18 15:15:45,760 INFO Running kemans with k = 13
2017-07-18 15:15:46,091 INFO Running kemans with k = 14


('test count = ', 38)


2017-07-18 15:15:48,313 INFO Running kemans with k = 4
2017-07-18 15:15:48,401 INFO found k = 4
2017-07-18 15:15:48,402 INFO Running kemans with k = 2
2017-07-18 15:15:48,452 INFO found k = 2
2017-07-18 15:15:48,454 INFO Running kemans with k = 1
2017-07-18 15:15:48,483 INFO Detect Count Anomalies....
2017-07-18 15:15:48,486 INFO Using ThreeSigmaClassifier for cluster 0
2017-07-18 15:15:48,489 INFO Using ThreeSigmaClassifier for cluster 1
2017-07-18 15:15:48,494 INFO Using ThreeSigmaClassifier for cluster 2
2017-07-18 15:15:48,497 INFO Using ThreeSigmaClassifier for cluster 3
2017-07-18 15:15:48,499 INFO Using ThreeSigmaClassifier for cluster 4
2017-07-18 15:15:48,502 INFO Using ThreeSigmaClassifier for cluster 5
2017-07-18 15:15:48,505 INFO Using ThreeSigmaClassifier for cluster 6
2017-07-18 15:15:48,510 INFO Using ThreeSigmaClassifier for cluster 7
2017-07-18 15:15:48,515 INFO Using ThreeSigmaClassifier for cluster 8
2017-07-18 15:15:48,518 INFO Using ThreeSigmaClassifier for cluster

[50, 65, 64, 64, 31, 64, 52, 49, 51, 51, 65, 64, 64, 31, 64, 52, 48, 51, 51, 65, 64, 64, 31, 64, 52, 48, 51, 50, 65, 63, 64, 31, 64, 52, 49, 51]
[132  96 129 138]
[-1, -1, -1, -1]
[51, 30, 63, 63, 65, 64, 51, 48, 52, 52, 30, 64, 63, 65, 64, 50, 48, 52, 52, 30, 64, 63, 66, 64, 50, 48, 52, 51, 31, 64, 64, 65, 65, 51, 48, 52]
[ 96 135 129 138]
[-1, -1, -1, -1]
[31, 64, 64, 65, 64, 52, 50, 48, 52, 31, 64, 64, 65, 64, 52, 51, 49, 51, 31, 64, 64, 65, 64, 52, 51, 49, 51, 30, 64, 63, 65, 64, 52, 50, 48, 52]
[ 96   3 135 126 138]
[-1, 1, -1, -1, -1]
(9, 9)
(9, 9)
([9, 9], [9, 9], ['ip-172-31-12-78', 'ip-172-31-15-177', 'ip-172-31-2-144', 'ip-172-31-12-51'], ['ip-172-31-4-253'])


2017-07-18 15:15:48,704 INFO Running using file source
2017-07-18 15:15:48,706 INFO Start vectorization....
2017-07-18 15:15:48,708 INFO setting min_df = 1 and max_df = 1.0


['--sim_threshold', '0.8']
('control count = ', 30)


2017-07-18 15:15:49,135 INFO Running kemans with k = 15
2017-07-18 15:15:49,590 INFO found k = 15
2017-07-18 15:15:49,592 INFO Running kemans with k = 7
2017-07-18 15:15:49,816 INFO Running kemans with k = 11
2017-07-18 15:15:50,135 INFO Running kemans with k = 13
2017-07-18 15:15:50,526 INFO Running kemans with k = 14


('test count = ', 40)


2017-07-18 15:15:53,115 INFO Running kemans with k = 5
2017-07-18 15:15:53,276 INFO found k = 5
2017-07-18 15:15:53,278 INFO Running kemans with k = 2
2017-07-18 15:15:53,352 INFO found k = 2
2017-07-18 15:15:53,353 INFO Running kemans with k = 1
2017-07-18 15:15:53,393 INFO Detect Count Anomalies....
2017-07-18 15:15:53,395 INFO Using ThreeSigmaClassifier for cluster 0
2017-07-18 15:15:53,398 INFO Using ThreeSigmaClassifier for cluster 1
2017-07-18 15:15:53,404 INFO Using ThreeSigmaClassifier for cluster 2
2017-07-18 15:15:53,407 INFO Using ThreeSigmaClassifier for cluster 3
2017-07-18 15:15:53,411 INFO Using ThreeSigmaClassifier for cluster 4
2017-07-18 15:15:53,414 INFO Using ThreeSigmaClassifier for cluster 5
2017-07-18 15:15:53,419 INFO Using ThreeSigmaClassifier for cluster 6
2017-07-18 15:15:53,421 INFO Using ThreeSigmaClassifier for cluster 7
2017-07-18 15:15:53,427 INFO Using ThreeSigmaClassifier for cluster 8
2017-07-18 15:15:53,430 INFO Using ThreeSigmaClassifier for cluster

[50, 65, 64, 64, 31, 64, 52, 49, 51, 51, 51, 65, 64, 64, 31, 64, 52, 48, 51, 52, 51, 65, 64, 64, 31, 64, 52, 48, 51, 52, 50, 65, 63, 64, 31, 64, 52, 49, 51, 51]
[132  96 129 138 135]
[-1, -1, -1, -1, -1]
[51, 30, 63, 63, 65, 64, 51, 48, 52, 52, 52, 30, 64, 63, 65, 64, 50, 48, 52, 51, 52, 30, 64, 63, 66, 64, 50, 48, 52, 51, 51, 31, 64, 64, 65, 65, 51, 48, 52, 51]
[ 96 135 129 138 138]
[-1, -1, -1, -1, -1]
[31, 64, 64, 65, 64, 52, 50, 48, 52, 51, 31, 64, 64, 65, 64, 52, 51, 49, 51, 51, 31, 64, 64, 65, 64, 52, 51, 49, 51, 52, 30, 64, 63, 65, 64, 52, 50, 48, 52, 51]
[ 96   3 135 126 138 135]
[-1, 1, -1, -1, -1, -1]
(10, 10)
(10, 10)
([10, 10], [10, 10], ['ip-172-31-12-78', 'ip-172-31-15-177', 'ip-172-31-2-144', 'ip-172-31-12-51'], ['ip-172-31-4-253'])


2017-07-18 15:15:53,636 INFO Running using file source
2017-07-18 15:15:53,638 INFO Start vectorization....
2017-07-18 15:15:53,642 INFO setting min_df = 1 and max_df = 1.0


['--sim_threshold', '0.8']
('control count = ', 30)


2017-07-18 15:15:54,136 INFO Running kemans with k = 15
2017-07-18 15:15:54,534 INFO found k = 15
2017-07-18 15:15:54,536 INFO Running kemans with k = 7
2017-07-18 15:15:54,729 INFO Running kemans with k = 11
2017-07-18 15:15:55,077 INFO Running kemans with k = 13
2017-07-18 15:15:55,448 INFO Running kemans with k = 14


('test count = ', 42)


2017-07-18 15:15:57,998 INFO Running kemans with k = 6
2017-07-18 15:15:58,131 INFO found k = 6
2017-07-18 15:15:58,133 INFO Running kemans with k = 3
2017-07-18 15:15:58,205 INFO found k = 3
2017-07-18 15:15:58,207 INFO Running kemans with k = 1
2017-07-18 15:15:58,237 INFO Running kemans with k = 2
2017-07-18 15:15:58,280 INFO found k = 2
2017-07-18 15:15:58,281 INFO Detect Count Anomalies....
2017-07-18 15:15:58,283 INFO Using ThreeSigmaClassifier for cluster 0
2017-07-18 15:15:58,285 INFO Using ThreeSigmaClassifier for cluster 1
2017-07-18 15:15:58,288 INFO Using ThreeSigmaClassifier for cluster 2
2017-07-18 15:15:58,290 INFO Using ThreeSigmaClassifier for cluster 3
2017-07-18 15:15:58,293 INFO Using ThreeSigmaClassifier for cluster 4
2017-07-18 15:15:58,295 INFO Using ThreeSigmaClassifier for cluster 5
2017-07-18 15:15:58,297 INFO Using ThreeSigmaClassifier for cluster 6
2017-07-18 15:15:58,302 INFO Using ThreeSigmaClassifier for cluster 7
2017-07-18 15:15:58,305 INFO Using ThreeS

[51, 30, 63, 63, 65, 64, 51, 48, 52, 52, 51, 52, 30, 64, 63, 65, 64, 50, 48, 52, 51, 51, 52, 30, 64, 63, 66, 64, 50, 48, 52, 51, 52, 51, 31, 64, 64, 65, 65, 51, 48, 52, 51, 51]
[ 96 135 129 138 138 138]
[-1, -1, -1, -1, -1, -1]
[31, 64, 64, 65, 64, 52, 50, 48, 52, 51, 52, 31, 64, 64, 65, 64, 52, 51, 49, 51, 51, 52, 31, 64, 64, 65, 64, 52, 51, 49, 51, 52, 51, 30, 64, 63, 65, 64, 52, 50, 48, 52, 51, 52]
[ 96   3 135 126 138 135 135]
[-1, 1, -1, -1, -1, -1, -1]
[50, 65, 64, 64, 31, 64, 52, 49, 51, 51, 52, 51, 65, 64, 64, 31, 64, 52, 48, 51, 52, 51, 51, 65, 64, 64, 31, 64, 52, 48, 51, 52, 51, 50, 65, 63, 64, 31, 64, 52, 49, 51, 51, 52]
[132  96 129 138 135 135]
[-1, -1, -1, -1, -1, -1]
(11, 11)
(11, 11)
([11, 11], [11, 11], ['ip-172-31-12-78', 'ip-172-31-15-177', 'ip-172-31-2-144', 'ip-172-31-12-51'], ['ip-172-31-4-253'])
['--sim_threshold', '0.8']


2017-07-18 15:15:58,501 INFO Running using file source
2017-07-18 15:15:58,504 INFO Start vectorization....
2017-07-18 15:15:58,506 INFO setting min_df = 1 and max_df = 1.0


('control count = ', 30)


2017-07-18 15:15:58,892 INFO Running kemans with k = 15
2017-07-18 15:15:59,246 INFO found k = 15
2017-07-18 15:15:59,247 INFO Running kemans with k = 7
2017-07-18 15:15:59,429 INFO Running kemans with k = 11
2017-07-18 15:15:59,708 INFO Running kemans with k = 13
2017-07-18 15:16:00,047 INFO Running kemans with k = 14


('test count = ', 44)


2017-07-18 15:16:02,558 INFO Running kemans with k = 7
2017-07-18 15:16:02,710 INFO found k = 7
2017-07-18 15:16:02,712 INFO Running kemans with k = 3
2017-07-18 15:16:02,783 INFO found k = 3
2017-07-18 15:16:02,785 INFO Running kemans with k = 1
2017-07-18 15:16:02,815 INFO Running kemans with k = 2
2017-07-18 15:16:02,856 INFO found k = 2
2017-07-18 15:16:02,858 INFO Detect Count Anomalies....
2017-07-18 15:16:02,861 INFO Using ThreeSigmaClassifier for cluster 0
2017-07-18 15:16:02,863 INFO Using ThreeSigmaClassifier for cluster 1
2017-07-18 15:16:02,868 INFO Using ThreeSigmaClassifier for cluster 2
2017-07-18 15:16:02,871 INFO Using ThreeSigmaClassifier for cluster 3
2017-07-18 15:16:02,873 INFO Using ThreeSigmaClassifier for cluster 4
2017-07-18 15:16:02,877 INFO Using ThreeSigmaClassifier for cluster 5
2017-07-18 15:16:02,879 INFO Using ThreeSigmaClassifier for cluster 6
2017-07-18 15:16:02,882 INFO Using ThreeSigmaClassifier for cluster 7
2017-07-18 15:16:02,884 INFO Using ThreeS

[50, 65, 64, 64, 31, 64, 52, 49, 51, 51, 52, 50, 51, 65, 64, 64, 31, 64, 52, 48, 51, 52, 51, 51, 51, 65, 64, 64, 31, 64, 52, 48, 51, 52, 51, 51, 50, 65, 63, 64, 31, 64, 52, 49, 51, 51, 52, 50]
[132  96 129 138 135 135 135]
[-1, -1, -1, -1, -1, -1, -1]
[31, 64, 64, 65, 64, 52, 50, 48, 52, 51, 52, 50, 31, 64, 64, 65, 64, 52, 51, 49, 51, 51, 52, 50, 31, 64, 64, 65, 64, 52, 51, 49, 51, 52, 51, 50, 30, 64, 63, 65, 64, 52, 50, 48, 52, 51, 52, 50]
[ 96   3 135 126 138 135 135 135]
[-1, 1, -1, -1, -1, -1, -1, -1]
[51, 30, 63, 63, 65, 64, 51, 48, 52, 52, 51, 51, 52, 30, 64, 63, 65, 64, 50, 48, 52, 51, 51, 51, 52, 30, 64, 63, 66, 64, 50, 48, 52, 51, 52, 51, 51, 31, 64, 64, 65, 65, 51, 48, 52, 51, 51, 51]
[ 96 135 129 138 138 138 132]
[-1, -1, -1, -1, -1, -1, -1]
(12, 12)
(12, 12)
([12, 12], [12, 12], ['ip-172-31-12-78', 'ip-172-31-15-177', 'ip-172-31-2-144', 'ip-172-31-12-51'], ['ip-172-31-4-253'])


2017-07-18 15:16:03,098 INFO Running using file source
2017-07-18 15:16:03,100 INFO Start vectorization....
2017-07-18 15:16:03,104 INFO setting min_df = 1 and max_df = 1.0


['--sim_threshold', '0.8']
('control count = ', 30)


2017-07-18 15:16:03,542 INFO Running kemans with k = 15
2017-07-18 15:16:03,927 INFO found k = 15
2017-07-18 15:16:03,930 INFO Running kemans with k = 7
2017-07-18 15:16:04,118 INFO Running kemans with k = 11
2017-07-18 15:16:04,404 INFO Running kemans with k = 13
2017-07-18 15:16:04,729 INFO Running kemans with k = 14


('test count = ', 46)


2017-07-18 15:16:07,331 INFO Running kemans with k = 8
2017-07-18 15:16:07,517 INFO found k = 8
2017-07-18 15:16:07,519 INFO Running kemans with k = 4
2017-07-18 15:16:07,613 INFO found k = 4
2017-07-18 15:16:07,615 INFO Running kemans with k = 2
2017-07-18 15:16:07,666 INFO found k = 2
2017-07-18 15:16:07,668 INFO Running kemans with k = 1
2017-07-18 15:16:07,703 INFO Detect Count Anomalies....
2017-07-18 15:16:07,705 INFO Using ThreeSigmaClassifier for cluster 0
2017-07-18 15:16:07,707 INFO Using ThreeSigmaClassifier for cluster 1
2017-07-18 15:16:07,711 INFO Using ThreeSigmaClassifier for cluster 2
2017-07-18 15:16:07,714 INFO Using ThreeSigmaClassifier for cluster 3
2017-07-18 15:16:07,716 INFO Using ThreeSigmaClassifier for cluster 4
2017-07-18 15:16:07,721 INFO Using ThreeSigmaClassifier for cluster 5
2017-07-18 15:16:07,724 INFO Using ThreeSigmaClassifier for cluster 6
2017-07-18 15:16:07,726 INFO Using ThreeSigmaClassifier for cluster 7
2017-07-18 15:16:07,728 INFO Using ThreeS

[51, 30, 63, 63, 65, 64, 51, 48, 52, 52, 51, 51, 51, 52, 30, 64, 63, 65, 64, 50, 48, 52, 51, 51, 51, 51, 52, 30, 64, 63, 66, 64, 50, 48, 52, 51, 52, 51, 51, 51, 31, 64, 64, 65, 65, 51, 48, 52, 51, 51, 51, 51]
[ 96 135 129 138 138 138 132 135]
[-1, -1, -1, -1, -1, -1, -1, -1]
[31, 64, 64, 65, 64, 52, 50, 48, 52, 51, 52, 50, 52, 31, 64, 64, 65, 64, 52, 51, 49, 51, 51, 52, 50, 52, 31, 64, 64, 65, 64, 52, 51, 49, 51, 52, 51, 50, 52, 30, 64, 63, 65, 64, 52, 50, 48, 52, 51, 52, 50, 52]
[ 96   3 135 126 138 135 135 135 138]
[-1, 1, -1, -1, -1, -1, -1, -1, -1]
[50, 50, 65, 64, 64, 31, 64, 52, 49, 51, 51, 52, 52, 51, 51, 65, 64, 64, 31, 64, 52, 48, 51, 52, 51, 52, 51, 51, 65, 64, 64, 31, 64, 52, 48, 51, 52, 51, 52, 50, 50, 65, 63, 64, 31, 64, 52, 49, 51, 51, 52, 52]
[135 132  96 129 138 135 135 138]
[-1, -1, -1, -1, -1, -1, -1, -1]
(13, 13)
(13, 13)
([13, 13], [13, 13], ['ip-172-31-12-78', 'ip-172-31-15-177', 'ip-172-31-2-144', 'ip-172-31-12-51'], ['ip-172-31-4-253'])


2017-07-18 15:16:07,943 INFO Running using file source
2017-07-18 15:16:07,945 INFO Start vectorization....
2017-07-18 15:16:07,947 INFO setting min_df = 1 and max_df = 1.0


['--sim_threshold', '0.8']
('control count = ', 30)


2017-07-18 15:16:08,321 INFO Running kemans with k = 15
2017-07-18 15:16:08,666 INFO found k = 15
2017-07-18 15:16:08,668 INFO Running kemans with k = 7
2017-07-18 15:16:08,849 INFO Running kemans with k = 11
2017-07-18 15:16:09,146 INFO Running kemans with k = 13
2017-07-18 15:16:09,485 INFO Running kemans with k = 14


('test count = ', 48)


2017-07-18 15:16:12,276 INFO Running kemans with k = 9
2017-07-18 15:16:12,489 INFO found k = 9
2017-07-18 15:16:12,490 INFO Running kemans with k = 4
2017-07-18 15:16:12,584 INFO found k = 4
2017-07-18 15:16:12,586 INFO Running kemans with k = 2
2017-07-18 15:16:12,634 INFO found k = 2
2017-07-18 15:16:12,636 INFO Running kemans with k = 1
2017-07-18 15:16:12,671 INFO Detect Count Anomalies....
2017-07-18 15:16:12,673 INFO Using ThreeSigmaClassifier for cluster 0
2017-07-18 15:16:12,675 INFO Using ThreeSigmaClassifier for cluster 1
2017-07-18 15:16:12,679 INFO Using ThreeSigmaClassifier for cluster 2
2017-07-18 15:16:12,683 INFO Using ThreeSigmaClassifier for cluster 3
2017-07-18 15:16:12,686 INFO Using ThreeSigmaClassifier for cluster 4
2017-07-18 15:16:12,689 INFO Using ThreeSigmaClassifier for cluster 5
2017-07-18 15:16:12,692 INFO Using ThreeSigmaClassifier for cluster 6
2017-07-18 15:16:12,695 INFO Using ThreeSigmaClassifier for cluster 7
2017-07-18 15:16:12,697 INFO Using ThreeS

[50, 50, 65, 64, 64, 31, 64, 52, 49, 51, 51, 52, 52, 51, 51, 51, 65, 64, 64, 31, 64, 52, 48, 51, 52, 51, 52, 50, 51, 51, 65, 64, 64, 31, 64, 52, 48, 51, 52, 51, 52, 50, 50, 50, 65, 63, 64, 31, 64, 52, 49, 51, 51, 52, 52, 50]
[135 132  96 129 138 135 135 138 132]
[-1, -1, -1, -1, -1, -1, -1, -1, -1]
[31, 64, 64, 65, 64, 52, 50, 48, 52, 51, 52, 50, 52, 51, 31, 64, 64, 65, 64, 52, 51, 49, 51, 51, 52, 50, 52, 50, 31, 64, 64, 65, 64, 52, 51, 49, 51, 52, 51, 50, 52, 50, 30, 64, 63, 65, 64, 52, 50, 48, 52, 51, 52, 50, 52, 51]
[ 96   3 135 126 138 135 135 135 138 135]
[-1, 1, -1, -1, -1, -1, -1, -1, -1, -1]
[51, 30, 63, 63, 65, 64, 51, 48, 52, 52, 51, 51, 51, 50, 52, 30, 64, 63, 65, 64, 50, 48, 52, 51, 51, 51, 51, 51, 52, 30, 64, 63, 66, 64, 50, 48, 52, 51, 52, 51, 51, 51, 51, 31, 64, 64, 65, 65, 51, 48, 52, 51, 51, 51, 51, 51]
[ 96 135 129 138 138 138 132 135 135]
[-1, -1, -1, -1, -1, -1, -1, -1, -1]
(14, 14)
(14, 14)
([14, 14], [14, 14], ['ip-172-31-12-78', 'ip-172-31-15-177', 'ip-172-31-2-1

2017-07-18 15:16:12,934 INFO Running using file source
2017-07-18 15:16:12,936 INFO Start vectorization....
2017-07-18 15:16:12,937 INFO setting min_df = 1 and max_df = 1.0


['--sim_threshold', '0.8']
('control count = ', 30)


2017-07-18 15:16:13,353 INFO Running kemans with k = 15
2017-07-18 15:16:13,733 INFO found k = 15
2017-07-18 15:16:13,734 INFO Running kemans with k = 7
2017-07-18 15:16:13,939 INFO Running kemans with k = 11
2017-07-18 15:16:14,222 INFO Running kemans with k = 13
2017-07-18 15:16:14,574 INFO Running kemans with k = 14


('test count = ', 50)


2017-07-18 15:16:17,276 INFO Running kemans with k = 10
2017-07-18 15:16:17,516 INFO found k = 10
2017-07-18 15:16:17,518 INFO Running kemans with k = 5
2017-07-18 15:16:17,649 INFO found k = 5
2017-07-18 15:16:17,651 INFO Running kemans with k = 2
2017-07-18 15:16:17,706 INFO found k = 2
2017-07-18 15:16:17,707 INFO Running kemans with k = 1
2017-07-18 15:16:17,752 INFO Detect Count Anomalies....
2017-07-18 15:16:17,754 INFO Using ThreeSigmaClassifier for cluster 0
2017-07-18 15:16:17,756 INFO Using ThreeSigmaClassifier for cluster 1
2017-07-18 15:16:17,759 INFO Using ThreeSigmaClassifier for cluster 2
2017-07-18 15:16:17,763 INFO Using ThreeSigmaClassifier for cluster 3
2017-07-18 15:16:17,766 INFO Using ThreeSigmaClassifier for cluster 4
2017-07-18 15:16:17,770 INFO Using ThreeSigmaClassifier for cluster 5
2017-07-18 15:16:17,772 INFO Using ThreeSigmaClassifier for cluster 6
2017-07-18 15:16:17,775 INFO Using ThreeSigmaClassifier for cluster 7
2017-07-18 15:16:17,777 INFO Using Thre

[50, 50, 65, 64, 64, 31, 64, 52, 49, 51, 51, 52, 52, 51, 51, 51, 51, 65, 64, 64, 31, 64, 52, 48, 51, 52, 51, 52, 50, 52, 51, 51, 65, 64, 64, 31, 64, 52, 48, 51, 52, 51, 52, 50, 52, 50, 50, 65, 63, 64, 31, 64, 52, 49, 51, 51, 52, 52, 50, 51]
[135 132  96 129 138 135 135 138 132 135]
[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]
[31, 64, 64, 65, 64, 52, 50, 48, 52, 51, 52, 50, 52, 51, 52, 31, 64, 64, 65, 64, 52, 51, 49, 51, 51, 52, 50, 52, 50, 51, 31, 64, 64, 65, 64, 52, 51, 49, 51, 52, 51, 50, 52, 50, 51, 30, 64, 63, 65, 64, 52, 50, 48, 52, 51, 52, 50, 52, 51, 52]
[ 96   3 135 126 138 135 135 135 138 135 135]
[-1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1]
[51, 30, 63, 63, 65, 64, 51, 48, 52, 52, 51, 51, 51, 50, 52, 52, 30, 64, 63, 65, 64, 50, 48, 52, 51, 51, 51, 51, 51, 52, 52, 30, 64, 63, 66, 64, 50, 48, 52, 51, 52, 51, 51, 51, 52, 51, 31, 64, 64, 65, 65, 51, 48, 52, 51, 51, 51, 51, 51, 52]
[ 96 135 129 138 138 138 132 135 135 138]
[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]
