In [3]:
import sys
sys.path.insert(0, '..')
sys.path.insert(0, 'plots')

import os

from IPython.display import display
from IPython.display import clear_output
import ipywidgets as widgets


import json
from SplunkIntelOptimized import SplunkIntelOptimized
from sources.SplunkDatasetNew import SplunkDatasetNew
from sources.SplunkFileSource import SplunkFileSource

import plotly as py
import pandas as pd
import numpy as np
import plotNew as plot

py.offline.init_notebook_mode()
pd.set_option('display.notebook_repr_html', True)
    
data_sources_names = []

data_source_picker = None
time_range_picker = None
event_picker = None
event_text_area = None 
threshold_picker = None
new_time_range_picker = None


#create widgets
def create_widgets():
    global event_picker
    global event_text_area, threshold_picker 
    
    threshold_picker = widgets.FloatSlider(
                value=0.9,
                min=0,
                max=1,
                step=0.1,
                description='Threshold:',
                disabled=False,
                continuous_update=False,
                orientation='horizontal',
                readout=True,
                readout_format='.1f',
                slider_color='white',
                color='black'
        )
    
    event_picker = widgets.Dropdown(
            options=[1,2,3],
            description='Event Number:',
            disabled=False,
            button_style='' # 'success', 'info', 'warning', 'danger' or ''
        )
    
    l = widgets.Layout(height='40px', width='800px')
    event_text_area = widgets.Textarea(value='TA: height=40px', layout=l)

    
def initialize(change):    
    change_event_handler(None)

def event_picker_change_handler(change):
    event_text_area.value = json.dumps(splunkDataset.get_all_events_for_notebook()[event_picker.value])

def split(input, length, size):
    input.replace('\n', ' ')
    input.replace('\tat', ' ')
    return '<br>'.join([input[start:start + size] for start in range(0, length, size)])

def get_tool_tips(all_events):
    tooltips = []
    for idx, event in enumerate(all_events):
        tooltips.append(
            split(event[0], min(100, len(event[0])), 100) + '<br> id = ' + str(idx) + '<br> cluster = ' + str(
            event[3]))
            
    return tooltips
    
# Handle data source change
def change_event_handler(change):
    splunkDataset = SplunkDatasetNew()
    splunkIntelargs = []

    control_start = 1
    test_start = 1
    
    control_end = 1
    test_end = 1

    clear_output()

    prev_out_file = None
    while(control_start <= control_end or test_start < test_end):

        del splunkIntelargs[:]
        splunkDataset = SplunkDatasetNew() 

        print(control_start, control_start)
        print(test_start, test_start)
        splunkDataset.load_prod_file_prev_run(
            '/Users/sriram_parthasarathy/wings/python/splunk_intelligence/data_prod/prev_run/control.json',
            [control_start, control_start],
            '/Users/sriram_parthasarathy/wings/python/splunk_intelligence/data_prod/prev_run/test.json',
            [test_start, test_start], prev_out_file)
        
        if splunkDataset.new_data:
    
            splunkIntelargs.append('--sim_threshold')
            splunkIntelargs.append(str(float(threshold_picker.value)))

            print(splunkIntelargs)

            splunkIntel = SplunkIntelOptimized(splunkDataset, SplunkIntelOptimized.parse(splunkIntelargs))
            splunkDataset = splunkIntel.run()

            file_object  = open("result.json", "w")
            file_object.write(splunkDataset.get_output_for_notebook_as_json)
            file_object.close()

            prev_out_file = './result.json'
        
        control_start = control_start + 1
        test_start = test_start + 1

    xy_matrix, tooltips, labels,sizes = splunkDataset.control_scatter_plot()
    plot.scatter_plot_groups(xy_matrix, labels, tooltips, ['control', 'test', 'test-anomaly', 'test-unx-freq'], ['blue','green','red', 'orange'])
    
    #xy_matrix, tooltips, labels, sizes = splunkDataset.count_scatter_plot()
    
    #new_sizes = []
    #for size in sizes:
     #   new_sizes.append((3 + len(str(size)))**2)
        
    #print(new_sizes)    
    
    #plot.scatter_plot_groups(xy_matrix, labels, tooltips, ['test', 'test-anomaly'], ['green','red'], new_sizes)


    #for key,data in splunkDataset.get_anom_clusters().items():
     #   for host,anom in data.items():
     #       print(host, anom.get('text'))
    
    xy_matrix, tooltips, labels, clusters = splunkDataset.control_scatter_plot_4d()
    plot.scatter_plot_groups_4d(xy_matrix, labels, clusters, tooltips, ['blue','orange','red'])
    
    hist_data = splunkDataset.count_hist_plot()
    for key,value in hist_data.items():
        plot.hist_plot(value.get('control'), value.get('test'))
            
    event_picker.options = [ x for x in range(len(splunkDataset.get_all_events_for_notebook()))] 
     

create_widgets()

#intialize
initialize(None)
    
#setup even handlers    
event_picker.observe(event_picker_change_handler, names="value")
threshold_picker.observe(change_event_handler, names="value")



# show widgets
display(widgets.HBox([threshold_picker]))
display(widgets.HBox([event_picker, event_text_area]))

2017-08-08 16:29:40,577 INFO loading file /Users/sriram_parthasarathy/wings/python/splunk_intelligence/data_prod/prev_run/control.json
2017-08-08 16:29:40,613 INFO loading file /Users/sriram_parthasarathy/wings/python/splunk_intelligence/data_prod/prev_run/test.json
2017-08-08 16:29:40,631 INFO Running analysis
2017-08-08 16:29:40,640 INFO Start vectorization....
2017-08-08 16:29:40,640 INFO setting min_df = 1 and max_df = 0.99


(1, 1)
(1, 1)
('/Users/sriram_parthasarathy/wings/python/splunk_intelligence/data_prod/prev_run/control.json', [1, 1], '/Users/sriram_parthasarathy/wings/python/splunk_intelligence/data_prod/prev_run/test.json', [1, 1])
['--sim_threshold', '0.9']
('control count = ', 1024)
('control count = ', 1024)


2017-08-08 16:29:46,143 INFO Running kemans with k = 100
2017-08-08 16:29:49,967 INFO found k = 100
2017-08-08 16:29:49,969 INFO Running kemans with k = 50
2017-08-08 16:29:53,186 INFO found k = 50
2017-08-08 16:29:53,189 INFO Running kemans with k = 25
2017-08-08 16:29:56,305 INFO found k = 25
2017-08-08 16:29:56,308 INFO Running kemans with k = 12
2017-08-08 16:29:56,522 INFO Running kemans with k = 18
2017-08-08 16:29:59,534 INFO found k = 18
2017-08-08 16:29:59,537 INFO Running kemans with k = 15
2017-08-08 16:29:59,772 INFO found k = 15
2017-08-08 16:29:59,774 INFO Running kemans with k = 13
2017-08-08 16:29:59,984 INFO Running kemans with k = 14
2017-08-08 16:30:00,194 INFO Finish kemans....
2017-08-08 16:30:00,196 INFO Detect unknown events


('test count = ', 452)


2017-08-08 16:30:03,364 INFO Finish detect unknown events
2017-08-08 16:30:03,370 INFO Create anomalous clusters
2017-08-08 16:30:03,901 INFO Running kemans with k = 49
2017-08-08 16:30:04,209 INFO found k = 49
2017-08-08 16:30:04,212 INFO Running kemans with k = 24
2017-08-08 16:30:04,423 INFO found k = 24
2017-08-08 16:30:04,426 INFO Running kemans with k = 12
2017-08-08 16:30:04,636 INFO found k = 12
2017-08-08 16:30:04,639 INFO Running kemans with k = 6
2017-08-08 16:30:04,849 INFO found k = 6
2017-08-08 16:30:04,852 INFO Running kemans with k = 3
2017-08-08 16:30:05,065 INFO found k = 3
2017-08-08 16:30:05,067 INFO Running kemans with k = 1
2017-08-08 16:30:05,273 INFO Running kemans with k = 2
2017-08-08 16:30:05,539 INFO Finish create anomolous clusters
2017-08-08 16:30:05,542 INFO Detect Count Anomalies....
2017-08-08 16:30:05,543 INFO Using ZeroDeviationClassifier for cluster 0
2017-08-08 16:30:05,544 INFO Using ZeroDeviationClassifier for cluster 1
2017-08-08 16:30:05,546 INF