In [2]:
import sys
sys.path.insert(0, '..')
sys.path.insert(0, 'plots')

import os

from IPython.display import display
from IPython.display import clear_output
import ipywidgets as widgets


import json
from SplunkIntelOptimized import SplunkIntelOptimized
from sources.SplunkDatasetNew import SplunkDatasetNew
from sources.SplunkFileSource import SplunkFileSource

import plotly as py
import pandas as pd
import numpy as np
import plotNew as plot

py.offline.init_notebook_mode()
pd.set_option('display.notebook_repr_html', True)
    
data_sources_names = []

data_source_picker = None
time_range_picker = None
event_picker = None
event_text_area = None 
threshold_picker = None
new_time_range_picker = None


#create widgets
def create_widgets():
    global data_source_picker, time_range_picker, event_picker
    global event_text_area, threshold_picker, new_time_range_picker
    for file in os.listdir("../../data_prod"):
            if file.endswith(".json"):
                data_sources_names.append(file)

    data_source_picker = widgets.Dropdown(
            options=data_sources_names,
            description='Data Source:',
            disabled=False,
            button_style='' # 'success', 'info', 'warning', 'danger' or ''
        )    

    time_range_picker = widgets.IntRangeSlider(
                value=[1, 2],
                min=0,
                max=100,
                step=1,
                description='Time:',
                disabled=False,
                continuous_update=False,
                orientation='horizontal',
                readout=True,
                readout_format='i',
                slider_color='white',
                color='black'
        )
    
    threshold_picker = widgets.FloatSlider(
                value=0.8,
                min=0,
                max=1,
                step=0.1,
                description='Threshold:',
                disabled=False,
                continuous_update=False,
                orientation='horizontal',
                readout=True,
                readout_format='.1f',
                slider_color='white',
                color='black'
        )
    
    event_picker = widgets.Dropdown(
            options=[1,2,3],
            description='Event Number:',
            disabled=False,
            button_style='' # 'success', 'info', 'warning', 'danger' or ''
        )
    
    l = widgets.Layout(height='40px', width='800px')
    event_text_area = widgets.Textarea(value='TA: height=40px', layout=l)

    
def initialize(change):    
    # Just to set the widgets
    all_events = []
    all_events.extend(SplunkFileSource.load_data('../../data_prod/' + data_source_picker.value))    
    minutes = 0
    for dict in all_events:
        if int(dict.get('logCollectionMinute')) > minutes:
            minutes = dict.get('logCollectionMinute')

    time_range_picker.value = [0,minutes]
    time_range_picker.min = 0
    time_range_picker.max = minutes

    
    change_event_handler(None)
    
    
splunkDataset = SplunkDatasetNew()
splunkIntelargs = []

def event_picker_change_handler(change):
    event_text_area.value = json.dumps(splunkDataset.get_all_events_for_notebook()[event_picker.value])

def split(input, length, size):
    input.replace('\n', ' ')
    input.replace('\tat', ' ')
    return '<br>'.join([input[start:start + size] for start in range(0, length, size)])

def get_tool_tips(all_events):
    tooltips = []
    for idx, event in enumerate(all_events):
        tooltips.append(
            split(event[0], min(100, len(event[0])), 100) + '<br> id = ' + str(idx) + '<br> cluster = ' + str(
            event[3]))
            
    return tooltips
    
# Handle data source change
def change_event_handler(change):
    global splunkDataset
    
    control_start = time_range_picker.value[0]
    test_start = time_range_picker.value[0]
    
    control_end = time_range_picker.value[1]
    test_end = time_range_picker.value[1]

    clear_output()

    prev_out_file = None
    while(control_start <= control_end or test_start < test_end):

        del splunkIntelargs[:]
        splunkDataset = SplunkDatasetNew() 

        print(control_start, control_start)
        print(test_start, test_start)
        splunkDataset.load_prod_file('../../data_prod/' + data_source_picker.value,
                                     [control_start, control_start],
                                     [test_start, test_start], 
                                     ['ip-172-31-28-126'], ['ip-172-31-19-157'],     
                                          prev_out_file)
        
        if splunkDataset.new_data:
    
            splunkIntelargs.append('--sim_threshold')
            splunkIntelargs.append(str(float(threshold_picker.value)))

            print(splunkIntelargs)

            splunkIntel = SplunkIntelOptimized(splunkDataset, SplunkIntelOptimized.parse(splunkIntelargs))
            splunkDataset = splunkIntel.run()

            file_object  = open("result.json", "w")
            file_object.write(splunkDataset.get_output_for_notebook_as_json)
            file_object.close()

            prev_out_file = './result.json'
        
        control_start = control_start + 1
        test_start = test_start + 1

    xy_matrix, tooltips, labels,sizes = splunkDataset.control_scatter_plot()
    plot.scatter_plot_groups(xy_matrix, labels, tooltips, ['control', 'test', 'test-anomaly', 'test-unx-freq'], ['blue','green','red', 'orange'])
    
    #xy_matrix, tooltips, labels, sizes = splunkDataset.count_scatter_plot()
    
    #new_sizes = []
    #for size in sizes:
     #   new_sizes.append((3 + len(str(size)))**2)
        
    #print(new_sizes)    
    
    #plot.scatter_plot_groups(xy_matrix, labels, tooltips, ['test', 'test-anomaly'], ['green','red'], new_sizes)


    #for key,data in splunkDataset.get_anom_clusters().items():
     #   for host,anom in data.items():
     #       print(host, anom.get('text'))
    
    xy_matrix, tooltips, labels, clusters = splunkDataset.control_scatter_plot_4d()
    plot.scatter_plot_groups_4d(xy_matrix, labels, clusters, tooltips, ['blue','orange','red'])
    
    hist_data = splunkDataset.count_hist_plot()
    for key,value in hist_data.items():
        plot.hist_plot(value.get('control'), value.get('test'))
            
    event_picker.options = [ x for x in range(len(splunkDataset.get_all_events_for_notebook()))] 
     

create_widgets()

#intialize
initialize(None)
    
#setup even handlers    
data_source_picker.observe(initialize, names="value")
time_range_picker.observe(change_event_handler, names="value")
event_picker.observe(event_picker_change_handler, names="value")
threshold_picker.observe(change_event_handler, names="value")



# show widgets
display(widgets.HBox([data_source_picker]))
display(widgets.HBox([time_range_picker, threshold_picker]))
display(widgets.HBox([event_picker, event_text_area]))

2017-07-20 21:44:36,020 INFO loading file ../../data_prod/prodOut.json
2017-07-20 21:44:36,023 INFO loading file ../../data_prod/prodOut.json
2017-07-20 21:44:36,026 INFO loading file ../../data_prod/prodOut.json
2017-07-20 21:44:36,030 INFO Running using file source
2017-07-20 21:44:36,031 INFO Start vectorization....
2017-07-20 21:44:36,031 INFO setting min_df = 1 and max_df = 1.0
2017-07-20 21:44:36,073 INFO Running kemans with k = 2
2017-07-20 21:44:36,095 INFO Running kemans with k = 3
2017-07-20 21:44:36,131 INFO Running kemans with k = 4
2017-07-20 21:44:36,185 INFO found k = 4


(0, 0)
(0, 0)
([0, 0], [0, 0], ['ip-172-31-28-126'], ['ip-172-31-19-157'])
(1, 1)
(1, 1)
([1, 1], [1, 1], ['ip-172-31-28-126'], ['ip-172-31-19-157'])
(2, 2)
(2, 2)
([2, 2], [2, 2], ['ip-172-31-28-126'], ['ip-172-31-19-157'])
['--sim_threshold', '0.8']
('control count = ', 4)
('test count = ', 4)


2017-07-20 21:44:36,324 INFO Detect Count Anomalies....
2017-07-20 21:44:36,325 INFO Using ZeroDeviationClassifier for cluster 0
2017-07-20 21:44:36,326 INFO Using ZeroDeviationClassifier for cluster 1
2017-07-20 21:44:36,327 INFO Using ZeroDeviationClassifier for cluster 2
2017-07-20 21:44:36,328 INFO Using ZeroDeviationClassifier for cluster 3
2017-07-20 21:44:36,329 INFO done
2017-07-20 21:44:36,331 INFO loading file ../../data_prod/prodOut.json
2017-07-20 21:44:36,335 INFO loading file ./result.json
2017-07-20 21:44:36,337 INFO Running using file source
2017-07-20 21:44:36,338 INFO Start vectorization....
2017-07-20 21:44:36,339 INFO setting min_df = 1 and max_df = 1.0
2017-07-20 21:44:36,399 INFO Running kemans with k = 3
2017-07-20 21:44:36,426 INFO Running kemans with k = 5
2017-07-20 21:44:36,472 INFO found k = 5
2017-07-20 21:44:36,472 INFO Running kemans with k = 4


(3, 3)
(3, 3)
([3, 3], [3, 3], ['ip-172-31-28-126'], ['ip-172-31-19-157'])
['--sim_threshold', '0.8']
('control count = ', 6)
('test count = ', 6)


2017-07-20 21:44:36,739 INFO Detect Count Anomalies....
2017-07-20 21:44:36,741 INFO Using ZeroDeviationClassifier for cluster 0
2017-07-20 21:44:36,742 INFO Using ZeroDeviationClassifier for cluster 1
2017-07-20 21:44:36,744 INFO Using ZeroDeviationClassifier for cluster 2
2017-07-20 21:44:36,746 INFO Using ZeroDeviationClassifier for cluster 3
2017-07-20 21:44:36,747 INFO Using ZeroDeviationClassifier for cluster 4
2017-07-20 21:44:36,748 INFO done
2017-07-20 21:44:36,751 INFO loading file ../../data_prod/prodOut.json
2017-07-20 21:44:36,755 INFO loading file ./result.json
2017-07-20 21:44:36,759 INFO Running using file source
2017-07-20 21:44:36,760 INFO Start vectorization....
2017-07-20 21:44:36,761 INFO setting min_df = 1 and max_df = 1.0
2017-07-20 21:44:36,836 INFO Running kemans with k = 6
2017-07-20 21:44:36,908 INFO Running kemans with k = 9


(4, 4)
(4, 4)
([4, 4], [4, 4], ['ip-172-31-28-126'], ['ip-172-31-19-157'])
['--sim_threshold', '0.8']
('control count = ', 11)


2017-07-20 21:44:37,000 INFO Running kemans with k = 10
2017-07-20 21:44:37,096 INFO found k = 10


('test count = ', 11)


2017-07-20 21:44:37,320 INFO Detect Count Anomalies....
2017-07-20 21:44:37,321 INFO Using ThreeSigmaClassifier for cluster 0
2017-07-20 21:44:37,322 INFO Using ZeroDeviationClassifier for cluster 1
2017-07-20 21:44:37,324 INFO Using ZeroDeviationClassifier for cluster 2
2017-07-20 21:44:37,325 INFO Using ZeroDeviationClassifier for cluster 3
2017-07-20 21:44:37,326 INFO Using ZeroDeviationClassifier for cluster 4
2017-07-20 21:44:37,328 INFO Using ZeroDeviationClassifier for cluster 5
2017-07-20 21:44:37,329 INFO Using ZeroDeviationClassifier for cluster 6
2017-07-20 21:44:37,331 INFO Using ZeroDeviationClassifier for cluster 7
2017-07-20 21:44:37,332 INFO Using ZeroDeviationClassifier for cluster 8
2017-07-20 21:44:37,333 INFO Using ZeroDeviationClassifier for cluster 9
2017-07-20 21:44:37,334 INFO done
2017-07-20 21:44:37,337 INFO loading file ../../data_prod/prodOut.json
2017-07-20 21:44:37,340 INFO loading file ./result.json
2017-07-20 21:44:37,344 INFO Running using file source
2

(5, 5)
(5, 5)
([5, 5], [5, 5], ['ip-172-31-28-126'], ['ip-172-31-19-157'])
['--sim_threshold', '0.8']
('control count = ', 12)


2017-07-20 21:44:37,578 INFO Running kemans with k = 11
2017-07-20 21:44:37,764 INFO found k = 11
2017-07-20 21:44:37,765 INFO Running kemans with k = 10
2017-07-20 21:44:37,935 INFO found k = 10


('test count = ', 10)


2017-07-20 21:44:38,188 INFO Detect Count Anomalies....
2017-07-20 21:44:38,190 INFO Using ThreeSigmaClassifier for cluster 1
2017-07-20 21:44:38,191 INFO Using ZeroDeviationClassifier for cluster 2
2017-07-20 21:44:38,193 INFO Using ThreeSigmaClassifier for cluster 3
2017-07-20 21:44:38,197 INFO Using ZeroDeviationClassifier for cluster 4
2017-07-20 21:44:38,198 INFO Using ZeroDeviationClassifier for cluster 5
2017-07-20 21:44:38,200 INFO Using ZeroDeviationClassifier for cluster 6
2017-07-20 21:44:38,201 INFO Using ZeroDeviationClassifier for cluster 7
2017-07-20 21:44:38,203 INFO Using ZeroDeviationClassifier for cluster 8
2017-07-20 21:44:38,206 INFO Using ZeroDeviationClassifier for cluster 9
2017-07-20 21:44:38,208 INFO done
2017-07-20 21:44:38,211 INFO loading file ../../data_prod/prodOut.json
2017-07-20 21:44:38,216 INFO loading file ./result.json
2017-07-20 21:44:38,221 INFO Running using file source
2017-07-20 21:44:38,223 INFO Start vectorization....
2017-07-20 21:44:38,224 

(6, 6)
(6, 6)
([6, 6], [6, 6], ['ip-172-31-28-126'], ['ip-172-31-19-157'])
['--sim_threshold', '0.8']
('control count = ', 14)


2017-07-20 21:44:38,528 INFO Running kemans with k = 13
2017-07-20 21:44:38,661 INFO found k = 13
2017-07-20 21:44:38,662 INFO Running kemans with k = 12
2017-07-20 21:44:38,911 INFO Detect Count Anomalies....
2017-07-20 21:44:38,912 INFO Using ZeroDeviationClassifier for cluster 0
2017-07-20 21:44:38,913 INFO Using ZeroDeviationClassifier for cluster 2
2017-07-20 21:44:38,914 INFO Using ZeroDeviationClassifier for cluster 3
2017-07-20 21:44:38,916 INFO Using ZeroDeviationClassifier for cluster 4
2017-07-20 21:44:38,918 INFO Using ZeroDeviationClassifier for cluster 5
2017-07-20 21:44:38,919 INFO Using ThreeSigmaClassifier for cluster 6
2017-07-20 21:44:38,921 INFO Using ZeroDeviationClassifier for cluster 7
2017-07-20 21:44:38,923 INFO Using ZeroDeviationClassifier for cluster 8
2017-07-20 21:44:38,925 INFO Using ZeroDeviationClassifier for cluster 9
2017-07-20 21:44:38,926 INFO Using ZeroDeviationClassifier for cluster 11
2017-07-20 21:44:38,927 INFO Using ThreeSigmaClassifier for cl

('test count = ', 13)
(7, 7)
(7, 7)
([7, 7], [7, 7], ['ip-172-31-28-126'], ['ip-172-31-19-157'])
['--sim_threshold', '0.8']
('control count = ', 17)

2017-07-20 21:44:39,001 INFO Running kemans with k = 9
2017-07-20 21:44:39,126 INFO Running kemans with k = 13
2017-07-20 21:44:39,270 INFO found k = 13
2017-07-20 21:44:39,271 INFO Running kemans with k = 11
2017-07-20 21:44:39,419 INFO Running kemans with k = 12
2017-07-20 21:44:39,774 INFO Detect Count Anomalies....
2017-07-20 21:44:39,775 INFO Using ZeroDeviationClassifier for cluster 0
2017-07-20 21:44:39,776 INFO Using ZeroDeviationClassifier for cluster 2
2017-07-20 21:44:39,777 INFO Using ZeroDeviationClassifier for cluster 3
2017-07-20 21:44:39,778 INFO Using ZeroDeviationClassifier for cluster 4
2017-07-20 21:44:39,779 INFO Using ZeroDeviationClassifier for cluster 6
2017-07-20 21:44:39,781 INFO Using ThreeSigmaClassifier for cluster 7
2017-07-20 21:44:39,782 INFO Using ZeroDeviationClassifier for cluster 8
2017-07-20 21:44:39,783 INFO Using ZeroDeviationClassifier for cluster 9
2017-07-20 21:44:39,785 INFO Using ZeroDeviationClassifier for cluster 10
2017-07-20 21:44:39,786 


('test count = ', 15)
(8, 8)
(8, 8)


2017-07-20 21:44:39,796 INFO loading file ./result.json
2017-07-20 21:44:39,800 INFO Running using file source
2017-07-20 21:44:39,802 INFO Start vectorization....
2017-07-20 21:44:39,804 INFO setting min_df = 1 and max_df = 1.0
2017-07-20 21:44:39,861 INFO Running kemans with k = 9
2017-07-20 21:44:39,967 INFO Running kemans with k = 14


([8, 8], [8, 8], ['ip-172-31-28-126'], ['ip-172-31-19-157'])
['--sim_threshold', '0.8']
('control count = ', 18)


2017-07-20 21:44:40,134 INFO Running kemans with k = 16
2017-07-20 21:44:40,318 INFO found k = 16
2017-07-20 21:44:40,319 INFO Running kemans with k = 15
2017-07-20 21:44:40,711 INFO Detect Count Anomalies....
2017-07-20 21:44:40,712 INFO Using ZeroDeviationClassifier for cluster 1
2017-07-20 21:44:40,712 INFO Using ZeroDeviationClassifier for cluster 2
2017-07-20 21:44:40,713 INFO Using ZeroDeviationClassifier for cluster 3
2017-07-20 21:44:40,714 INFO Using ThreeSigmaClassifier for cluster 4
2017-07-20 21:44:40,716 INFO Using ZeroDeviationClassifier for cluster 5
2017-07-20 21:44:40,717 INFO Using ZeroDeviationClassifier for cluster 6
2017-07-20 21:44:40,718 INFO Using ZeroDeviationClassifier for cluster 7
2017-07-20 21:44:40,720 INFO Using ZeroDeviationClassifier for cluster 8
2017-07-20 21:44:40,721 INFO Using ZeroDeviationClassifier for cluster 9
2017-07-20 21:44:40,722 INFO Using ZeroDeviationClassifier for cluster 10
2017-07-20 21:44:40,724 INFO Using ZeroDeviationClassifier for

('test count = ', 16)
(9, 9)
(9, 9)
([9, 9], [9, 9], ['ip-172-31-28-126'], ['ip-172-31-19-157'])
['--sim_threshold', '0.8']


2017-07-20 21:44:40,742 INFO Running using file source
2017-07-20 21:44:40,743 INFO Start vectorization....
2017-07-20 21:44:40,744 INFO setting min_df = 1 and max_df = 1.0
2017-07-20 21:44:40,817 INFO Running kemans with k = 10
2017-07-20 21:44:40,959 INFO Running kemans with k = 15


('control count = ', 19)


2017-07-20 21:44:41,181 INFO Running kemans with k = 17
2017-07-20 21:44:41,421 INFO found k = 17
2017-07-20 21:44:41,421 INFO Running kemans with k = 16


('test count = ', 17)


2017-07-20 21:44:41,795 INFO Detect Count Anomalies....
2017-07-20 21:44:41,797 INFO Using ZeroDeviationClassifier for cluster 0
2017-07-20 21:44:41,798 INFO Using ZeroDeviationClassifier for cluster 1
2017-07-20 21:44:41,800 INFO Using ThreeSigmaClassifier for cluster 2
2017-07-20 21:44:41,801 INFO Using ThreeSigmaClassifier for cluster 3
2017-07-20 21:44:41,802 INFO Using ZeroDeviationClassifier for cluster 4
2017-07-20 21:44:41,803 INFO Using ZeroDeviationClassifier for cluster 5
2017-07-20 21:44:41,805 INFO Using ZeroDeviationClassifier for cluster 6
2017-07-20 21:44:41,806 INFO Using ZeroDeviationClassifier for cluster 7
2017-07-20 21:44:41,807 INFO Using ZeroDeviationClassifier for cluster 8
2017-07-20 21:44:41,808 INFO Using ZeroDeviationClassifier for cluster 9
2017-07-20 21:44:41,809 INFO Using ZeroDeviationClassifier for cluster 10
2017-07-20 21:44:41,810 INFO Using ZeroDeviationClassifier for cluster 11
2017-07-20 21:44:41,811 INFO Using ZeroDeviationClassifier for cluster 1

(10, 10)
(10, 10)
([10, 10], [10, 10], ['ip-172-31-28-126'], ['ip-172-31-19-157'])
['--sim_threshold', '0.8']
('control count = ', 21)


2017-07-20 21:44:42,070 INFO Running kemans with k = 16
2017-07-20 21:44:42,217 INFO Running kemans with k = 19
2017-07-20 21:44:42,391 INFO Running kemans with k = 20
2017-07-20 21:44:42,620 INFO found k = 20


('test count = ', 19)


2017-07-20 21:44:42,994 INFO Detect Count Anomalies....
2017-07-20 21:44:42,996 INFO Using ZeroDeviationClassifier for cluster 0
2017-07-20 21:44:42,998 INFO Using ThreeSigmaClassifier for cluster 1
2017-07-20 21:44:43,000 INFO Using ZeroDeviationClassifier for cluster 2
2017-07-20 21:44:43,002 INFO Using ThreeSigmaClassifier for cluster 3
2017-07-20 21:44:43,004 INFO Using ZeroDeviationClassifier for cluster 4
2017-07-20 21:44:43,006 INFO Using ZeroDeviationClassifier for cluster 5
2017-07-20 21:44:43,008 INFO Using ZeroDeviationClassifier for cluster 6
2017-07-20 21:44:43,010 INFO Using ZeroDeviationClassifier for cluster 7
2017-07-20 21:44:43,012 INFO Using ZeroDeviationClassifier for cluster 8
2017-07-20 21:44:43,013 INFO Using ZeroDeviationClassifier for cluster 9
2017-07-20 21:44:43,016 INFO Using ZeroDeviationClassifier for cluster 10
2017-07-20 21:44:43,018 INFO Using ZeroDeviationClassifier for cluster 11
2017-07-20 21:44:43,021 INFO Using ZeroDeviationClassifier for cluster 1

(11, 11)
(11, 11)
([11, 11], [11, 11], ['ip-172-31-28-126'], ['ip-172-31-19-157'])
['--sim_threshold', '0.8']
('control count = ', 24)


2017-07-20 21:44:43,386 INFO Running kemans with k = 18
2017-07-20 21:44:43,596 INFO Running kemans with k = 21
2017-07-20 21:44:43,791 INFO Running kemans with k = 23
2017-07-20 21:44:44,027 INFO found k = 23
2017-07-20 21:44:44,028 INFO Running kemans with k = 22
2017-07-20 21:44:44,308 INFO found k = 22


('test count = ', 22)


2017-07-20 21:44:44,761 INFO Detect Count Anomalies....
2017-07-20 21:44:44,763 INFO Using ZeroDeviationClassifier for cluster 0
2017-07-20 21:44:44,764 INFO Using ThreeSigmaClassifier for cluster 1
2017-07-20 21:44:44,765 INFO Using ThreeSigmaClassifier for cluster 2
2017-07-20 21:44:44,767 INFO Using ZeroDeviationClassifier for cluster 4
2017-07-20 21:44:44,769 INFO Using ZeroDeviationClassifier for cluster 5
2017-07-20 21:44:44,772 INFO Using ZeroDeviationClassifier for cluster 6
2017-07-20 21:44:44,774 INFO Using ZeroDeviationClassifier for cluster 7
2017-07-20 21:44:44,776 INFO Using ZeroDeviationClassifier for cluster 8
2017-07-20 21:44:44,777 INFO Using ZeroDeviationClassifier for cluster 9
2017-07-20 21:44:44,779 INFO Using ZeroDeviationClassifier for cluster 10
2017-07-20 21:44:44,781 INFO Using ZeroDeviationClassifier for cluster 11
2017-07-20 21:44:44,782 INFO Using ZeroDeviationClassifier for cluster 12
2017-07-20 21:44:44,784 INFO Using ZeroDeviationClassifier for cluster 

Widget Javascript not detected.  It may not be installed or enabled properly.


Widget Javascript not detected.  It may not be installed or enabled properly.


Widget Javascript not detected.  It may not be installed or enabled properly.
