In [1]:
import os
import time
import copy
import json
from datetime import datetime
from os import listdir
from os.path import isfile, join
from graphviz import Digraph
from graphviz import Source

In [2]:
class BrologAnalyzer:

    def __init__(self, directory, target):
        self._bro_dir = directory
        self._bro_target = target
        
    def _get_entries(self, fname):
        with open(self._bro_dir + fname) as f:
            entries = f.readlines()
        entries = [x.strip() for x in entries]
        #print(len(entries))
        return entries
    
    def _get_other_topics(self):
        fnames = [f for f in listdir(self._bro_dir) if isfile(join(self._bro_dir, f))]
        fnames.remove(self._bro_target)
        if self._bro_target == "conn.log":
            try:
                fnames.remove("tds.log")
            except:
                pass
        #print(fnames)
        return fnames
    
    def _get_target_tscut(self, target_log_str, tscut):
        res = []
        tstarget = json.loads(target_log_str)['ts']
        orig_h_target = json.loads(target_log_str)['id.orig_h']
        orig_p_target = json.loads(target_log_str)['id.orig_p']
        resp_h_target = json.loads(target_log_str)['id.resp_h']
        resp_p_target = json.loads(target_log_str)['id.resp_p']
        
        #print(tstarget)
        #print(orig_h_target, orig_p_target)
        #print(resp_h_target, resp_p_target)

        fnames = self._get_other_topics()
        for fname in fnames:
            entries = self._get_entries(fname)
            #print(fname)
            #print(len(entries))
            for entry in entries:
                try:
                    entry_dict = json.loads(entry)
                except:
                    continue

                try:
                    tsentry = entry_dict['ts']
                    orig_h_entry = json.loads(entry)['id.orig_h']
                    orig_p_entry = json.loads(entry)['id.orig_p']
                    resp_h_entry = json.loads(entry)['id.resp_h']
                    resp_p_entry = json.loads(entry)['id.resp_p']
                except:
                    continue

                #if_related = (orig_h_entry==resp_h_target and orig_p_entry==resp_p_target) or (resp_h_entry==resp_h_target and resp_p_entry==resp_p_target)
                diff = tsentry - tstarget
                if diff > 0 and diff < tscut:
                    #print(entry)
                    res.append(entry)
        return res
    
    def _get_graph(self, outdir, outfname, target_log_str, res_log_str_list):       
        dot = Digraph(comment='test graph')
        orig_h_target = json.loads(target_log_str)['id.orig_h']
        orig_p_target = json.loads(target_log_str)['id.orig_p']
        resp_h_target = json.loads(target_log_str)['id.resp_h']
        resp_p_target = json.loads(target_log_str)['id.resp_p']
        #print(type(orig_h_target))
        #print(type(orig_p_target))
        dot.node(orig_h_target, str(orig_h_target), color='red')
        dot.node(resp_h_target, str(resp_h_target), color='red')
        dot.edge(orig_h_target, resp_h_target, constraint='false', headlabel = str(orig_p_target), taillabel = str(resp_p_target), labelfontsize="8", len="3.0", color='red')
        
        for res_log_str in res_log_str_list:
            orig_h_res = json.loads(res_log_str)['id.orig_h']
            orig_p_res = json.loads(res_log_str)['id.orig_p']
            resp_h_res = json.loads(res_log_str)['id.resp_h']
            resp_p_res = json.loads(res_log_str)['id.resp_p']
            dot.node(orig_h_res, str(orig_h_res))
            dot.node(resp_h_res, str(resp_h_res))
            dot.edge(orig_h_res, resp_h_res, constraint='false', headlabel = str(orig_p_res), taillabel = str(resp_p_res), labelfontsize="8", len="3.0", color='blue')
        
        try:
            dot.render(outdir + '/' + outfname, view=False)  
            return dot
        except:
            print("render failed!")
            return None

In [3]:
#myBrologAnalyzer = BrologAnalyzer("../data/brologs/03-28/", "tds_sqlbatch.log")
myBrologAnalyzer = BrologAnalyzer("../data/brologs/03-28/", "conn.log")

#myBrologAnalyzer = BrologAnalyzer("../data/brologs/03-22-13/", "tds_sqlbatch.log")
#targets = myBrologAnalyzer._get_entries(myBrologAnalyzer._bro_target)
#myBrologAnalyzer._get_entries("conn.log")

#for thistarget in targets:
#    if json.loads(thistarget)['id.resp_p'] == 1433:
        #print(thistarget)
#        test_res = myBrologAnalyzer._get_target_tscut(thistarget, 1)
        
#myBrologAnalyzer._get_other_topics()
#03-22-13
#test_target_log_str = '{"ts":1521699403.756506,"uid":"C4Y1Xt1AqErW7gd1Kl","id.orig_h":"192.168.3.61","id.orig_p":61617,"id.resp_h":"172.16.1.58","id.resp_p":1433,"sqlbatch":"\\u0016\\u0012\\u0002\\u00fc\\u0088u\\u0001select * from EXT_ORDER_NEW\\u000d\\u000aWhere UPDSTATUS IS NUL"}'
#03-28
#test_target_log_str = '{"ts":1522298700.448996,"uid":"CJt0DqsKHR4gQU03e","id.orig_h":"172.16.3.106","id.orig_p":51300,"id.resp_h":"172.16.3.104","id.resp_p":1433,"sqlbatch":"\\u0016\\u0012\\u0002\\u0001SELECT TOP 1 * FROM [INTClient"}'
#test_target_log_str = '{"ts":1522298698.48293,"uid":"CP32sg2ZDTgkC1V2c4","id.orig_h":"172.16.3.106","id.orig_p":51705,"id.resp_h":"172.16.3.123","id.resp_p":1433,"proto":"tcp","duration":99.982921,"orig_bytes":1,"resp_bytes":0,"conn_state":"SF","missed_bytes":0,"history":"DadAFf","orig_pkts":10,"orig_ip_bytes":452,"resp_pkts":10,"resp_ip_bytes":452,"tunnel_parents":[]}'
#test_res_log_str_list = myBrologAnalyzer._get_target_tscut(test_target_log_str, 1)
#print(len(test_res_log_str_list))


In [4]:
#res_dot = myBrologAnalyzer._get_graph("graph_plots", "test_graph.gv", test_target_log_str, test_res_log_str_list)
#print(str(1))

In [5]:
good_labels = set([9,10,12,19,26,42,55,523])
#good_labels = set()
def _gen_graph_batch(thisBrologAnalyzer, tscut, port_target):
    i = 0
    outdir = "graph_tscut_" + str(tscut)
    raw_targets = thisBrologAnalyzer._get_entries(thisBrologAnalyzer._bro_target)
    targets = [x for x in raw_targets if ('"id.resp_p":' + str(port_target)) in x]
    print(len(targets))
    for thistarget in targets:
        i += 1
        print("processing event " + str(i) + "...")
        if i in good_labels:
            outfname = "graph_" + str(i) + "_" + str(port_target) + ".gv"
            thisres_list = thisBrologAnalyzer._get_target_tscut(thistarget, tscut)
            thisBrologAnalyzer._get_graph(outdir, outfname, thistarget, thisres_list)
            with open(outdir + '/' + outfname + '.txt', 'a') as the_file:
                the_file.write("Tagert: \n")
                the_file.write(thistarget)
                the_file.write("\n")
                the_file.write("\n")
                the_file.write("Ohters: \n")
                for x in thisres_list:
                    the_file.write(x)
                    the_file.write("\n")
                the_file.close()
        else:
            continue

#_gen_graph_batch(myBrologAnalyzer, 1, 1433)
#_gen_graph_batch(myBrologAnalyzer, 5, 1433)

In [40]:
def _ddn_json_reformat_js(path, fname):
    nodes_list = []
    edges_list = []
    with open(path + fname) as f:
        data = json.load(f)
        nodes = json.loads(data['nodes'])
        nodes_list = []
        #em... let's try to add some icons...
        for n in nodes["iid"].keys():
            nodes_list.append({"name": nodes["node_id"][n], "group": nodes["type"][n]})
        
        #"iid"
        #nodes["iid"]
        inv_node_iid_map = {v: k for k, v in nodes["iid"].items()}
        #print(inv_node_iid_map)
        
        edges = json.loads(data['edges'])
        for e in edges["ddn_id"].keys():
            thislabel = "_".join([edges["proto"][e], edges["port"][e], str(edges["total_bytes"][e])])
            #print(inv_node_id_map[edges["iid_from"][e]])
            #print(edges["iid_to"][e])
            iid_from = edges["iid_from"][e]
            iid_to = edges["iid_to"][e]
            if iid_from in inv_node_iid_map.keys() and iid_to in inv_node_iid_map.keys():
                #print(iid_from, iid_to)
                edges_list.append({"source": int(inv_node_iid_map[iid_from]), "target": int(inv_node_iid_map[iid_to]), "label": thislabel})
    #return nodes_list
    #return edges_list
    ddn_d3_res = {"nodes": nodes_list, "links": edges_list}
    
    with open("../d3js/ddn/" + "d3js_" + fname, 'w') as fp:
        json.dump(ddn_d3_res, fp)
    return ddn_d3_res

ddn_dict_d3js = _ddn_json_reformat_js("../data/ddn/", "ddn_hr.json")
ddn_dict_d3js = _ddn_json_reformat_js("../data/ddn/", "ddn_pii.json")
#print(type(ddn_dict_d3js))
#print(ddn_dict_d3js)

#### The end of this file