In [2]:
import os
import time
import copy
import json
from datetime import datetime
from os import listdir
from os.path import isfile, join
from graphviz import Digraph
from graphviz import Source

In [2]:
class BrologAnalyzer:

    def __init__(self, directory, target):
        self._bro_dir = directory
        self._bro_target = target
        
    def _get_entries(self, fname):
        with open(self._bro_dir + fname) as f:
            entries = f.readlines()
        entries = [x.strip() for x in entries]
        #print(len(entries))
        return entries
    
    def _get_other_topics(self):
        fnames = [f for f in listdir(self._bro_dir) if isfile(join(self._bro_dir, f))]
        fnames.remove(self._bro_target)
        if self._bro_target == "conn.log":
            try:
                fnames.remove("tds.log")
            except:
                pass
        #print(fnames)
        return fnames
    
    def _get_target_tscut(self, target_log_str, tscut):
        res = []
        tstarget = json.loads(target_log_str)['ts']
        orig_h_target = json.loads(target_log_str)['id.orig_h']
        orig_p_target = json.loads(target_log_str)['id.orig_p']
        resp_h_target = json.loads(target_log_str)['id.resp_h']
        resp_p_target = json.loads(target_log_str)['id.resp_p']
        
        #print(tstarget)
        #print(orig_h_target, orig_p_target)
        #print(resp_h_target, resp_p_target)

        fnames = self._get_other_topics()
        for fname in fnames:
            entries = self._get_entries(fname)
            #print(fname)
            #print(len(entries))
            for entry in entries:
                try:
                    entry_dict = json.loads(entry)
                except:
                    continue

                try:
                    tsentry = entry_dict['ts']
                    orig_h_entry = json.loads(entry)['id.orig_h']
                    orig_p_entry = json.loads(entry)['id.orig_p']
                    resp_h_entry = json.loads(entry)['id.resp_h']
                    resp_p_entry = json.loads(entry)['id.resp_p']
                except:
                    continue

                #if_related = (orig_h_entry==resp_h_target and orig_p_entry==resp_p_target) or (resp_h_entry==resp_h_target and resp_p_entry==resp_p_target)
                diff = tsentry - tstarget
                if diff > 0 and diff < tscut:
                    #print(entry)
                    res.append(entry)
        return res
    
    def _get_graph(self, outdir, outfname, target_log_str, res_log_str_list):       
        dot = Digraph(comment='test graph')
        orig_h_target = json.loads(target_log_str)['id.orig_h']
        orig_p_target = json.loads(target_log_str)['id.orig_p']
        resp_h_target = json.loads(target_log_str)['id.resp_h']
        resp_p_target = json.loads(target_log_str)['id.resp_p']
        #print(type(orig_h_target))
        #print(type(orig_p_target))
        dot.node(orig_h_target, str(orig_h_target), color='red')
        dot.node(resp_h_target, str(resp_h_target), color='red')
        dot.edge(orig_h_target, resp_h_target, constraint='false', headlabel = str(orig_p_target), taillabel = str(resp_p_target), labelfontsize="8", len="3.0", color='red')
        
        for res_log_str in res_log_str_list:
            orig_h_res = json.loads(res_log_str)['id.orig_h']
            orig_p_res = json.loads(res_log_str)['id.orig_p']
            resp_h_res = json.loads(res_log_str)['id.resp_h']
            resp_p_res = json.loads(res_log_str)['id.resp_p']
            dot.node(orig_h_res, str(orig_h_res))
            dot.node(resp_h_res, str(resp_h_res))
            dot.edge(orig_h_res, resp_h_res, constraint='false', headlabel = str(orig_p_res), taillabel = str(resp_p_res), labelfontsize="8", len="3.0", color='blue')
        
        try:
            dot.render(outdir + '/' + outfname, view=False)  
            return dot
        except:
            print("render failed!")
            return None

In [3]:
#myBrologAnalyzer = BrologAnalyzer("../data/brologs/03-28/", "tds_sqlbatch.log")
myBrologAnalyzer = BrologAnalyzer("../data/brologs/03-28/", "conn.log")

#myBrologAnalyzer = BrologAnalyzer("../data/brologs/03-22-13/", "tds_sqlbatch.log")
#targets = myBrologAnalyzer._get_entries(myBrologAnalyzer._bro_target)
#myBrologAnalyzer._get_entries("conn.log")

#for thistarget in targets:
#    if json.loads(thistarget)['id.resp_p'] == 1433:
        #print(thistarget)
#        test_res = myBrologAnalyzer._get_target_tscut(thistarget, 1)
        
#myBrologAnalyzer._get_other_topics()
#03-22-13
#test_target_log_str = '{"ts":1521699403.756506,"uid":"C4Y1Xt1AqErW7gd1Kl","id.orig_h":"192.168.3.61","id.orig_p":61617,"id.resp_h":"172.16.1.58","id.resp_p":1433,"sqlbatch":"\\u0016\\u0012\\u0002\\u00fc\\u0088u\\u0001select * from EXT_ORDER_NEW\\u000d\\u000aWhere UPDSTATUS IS NUL"}'
#03-28
#test_target_log_str = '{"ts":1522298700.448996,"uid":"CJt0DqsKHR4gQU03e","id.orig_h":"172.16.3.106","id.orig_p":51300,"id.resp_h":"172.16.3.104","id.resp_p":1433,"sqlbatch":"\\u0016\\u0012\\u0002\\u0001SELECT TOP 1 * FROM [INTClient"}'
#test_target_log_str = '{"ts":1522298698.48293,"uid":"CP32sg2ZDTgkC1V2c4","id.orig_h":"172.16.3.106","id.orig_p":51705,"id.resp_h":"172.16.3.123","id.resp_p":1433,"proto":"tcp","duration":99.982921,"orig_bytes":1,"resp_bytes":0,"conn_state":"SF","missed_bytes":0,"history":"DadAFf","orig_pkts":10,"orig_ip_bytes":452,"resp_pkts":10,"resp_ip_bytes":452,"tunnel_parents":[]}'
#test_res_log_str_list = myBrologAnalyzer._get_target_tscut(test_target_log_str, 1)
#print(len(test_res_log_str_list))


In [4]:
#res_dot = myBrologAnalyzer._get_graph("graph_plots", "test_graph.gv", test_target_log_str, test_res_log_str_list)
#print(str(1))

In [5]:
good_labels = set([9,10,12,19,26,42,55,523])
#good_labels = set()
def _gen_graph_batch(thisBrologAnalyzer, tscut, port_target):
    i = 0
    outdir = "graph_tscut_" + str(tscut)
    raw_targets = thisBrologAnalyzer._get_entries(thisBrologAnalyzer._bro_target)
    targets = [x for x in raw_targets if ('"id.resp_p":' + str(port_target)) in x]
    print(len(targets))
    for thistarget in targets:
        i += 1
        print("processing event " + str(i) + "...")
        if i in good_labels:
            outfname = "graph_" + str(i) + "_" + str(port_target) + ".gv"
            thisres_list = thisBrologAnalyzer._get_target_tscut(thistarget, tscut)
            thisBrologAnalyzer._get_graph(outdir, outfname, thistarget, thisres_list)
            with open(outdir + '/' + outfname + '.txt', 'a') as the_file:
                the_file.write("Tagert: \n")
                the_file.write(thistarget)
                the_file.write("\n")
                the_file.write("\n")
                the_file.write("Ohters: \n")
                for x in thisres_list:
                    the_file.write(x)
                    the_file.write("\n")
                the_file.close()
        else:
            continue

#_gen_graph_batch(myBrologAnalyzer, 1, 1433)
#_gen_graph_batch(myBrologAnalyzer, 5, 1433)

In [6]:
def _ddn_json_reformat_js(path, fname):
    nodes_list = []
    edges_list = []
    with open(path + fname) as f:
        data = json.load(f)
        nodes = json.loads(data['nodes'])
        nodes_list = []
        #em... let's try to add some icons...
        for n in nodes["iid"].keys():
            nodes_list.append({"name": nodes["node_id"][n], "group": nodes["type"][n]})
        
        #"iid"
        #nodes["iid"]
        inv_node_iid_map = {v: k for k, v in nodes["iid"].items()}
        #print(inv_node_iid_map)
        
        edges = json.loads(data['edges'])
        for e in edges["ddn_id"].keys():
            thislabel = "_".join([edges["proto"][e], edges["port"][e], str(edges["total_bytes"][e])])
            #print(inv_node_id_map[edges["iid_from"][e]])
            #print(edges["iid_to"][e])
            iid_from = edges["iid_from"][e]
            iid_to = edges["iid_to"][e]
            if iid_from in inv_node_iid_map.keys() and iid_to in inv_node_iid_map.keys():
                #print(iid_from, iid_to)
                edges_list.append({"source": int(inv_node_iid_map[iid_from]), "target": int(inv_node_iid_map[iid_to]), "label": thislabel})
    #return nodes_list
    #return edges_list
    ddn_d3_res = {"nodes": nodes_list, "links": edges_list}
    
    with open("../d3js/ddn/" + "d3js_" + fname, 'w') as fp:
        json.dump(ddn_d3_res, fp)
    return ddn_d3_res

ddn_dict_d3js = _ddn_json_reformat_js("../data/ddn/", "ddn_hr.json")
ddn_dict_d3js = _ddn_json_reformat_js("../data/ddn/", "ddn_pii.json")
#print(type(ddn_dict_d3js))
#print(ddn_dict_d3js)

#### New section for ddn api

In [10]:
import ast
import copy
from functools import reduce
import json

import pandas as pd

In [2]:
def load_ddn_json(fs_name):
    with open(fs_name) as f:
        #ddn_raw = f.read()
        ddn_raw = ast.literal_eval(f.read())
        #print(type(ddn_raw))
        #print(ddn_raw)
        ddn_df = pd.DataFrame(ddn_raw)
        return ddn_df

test_ddn_df = load_ddn_json("data/ddn_test.json")
test_ddn_df

Unnamed: 0,applications,datastores,ddnId,ddnName,users
0,"[{'uri': 'youtube.com', 'appName': 'youtube.co...","[{'nodeType': 'FILE', 'nodeId': '192.168.7.110...",5a105e8b9d40e1329780d62ea2265d8a,test1,"[{'ip': '192.168.7.188', 'lastSeenTime': '1', ..."
1,"[{'uri': 'youtube.com', 'appName': 'youtube.co...","[{'nodeType': 'FILE', 'nodeId': '192.168.7.110...",ad0234829205b9033196ba818f7a872b,test2,"[{'ip': '192.168.7.188', 'lastSeenTime': '1', ..."


In [3]:
def pandas_explode(df, column_to_explode):
    """
    Similar to Hive's EXPLODE function, take a column with iterable elements, and flatten the iterable to one element 
    per observation in the output table

    :param df: A dataframe to explod
    :type df: pandas.DataFrame
    :param column_to_explode: 
    :type column_to_explode: str
    :return: An exploded data frame
    :rtype: pandas.DataFrame
    """

    # Create a list of new observations
    new_observations = list()

    # Iterate through existing observations
    for row in df.to_dict(orient='records'):

        # Take out the exploding iterable
        explode_values = row[column_to_explode]
        del row[column_to_explode]

        # Create a new observation for every entry in the exploding iterable & add all of the other columns
        for explode_value in explode_values:

            # Deep copy existing observation
            new_observation = copy.deepcopy(row)

            # Add one (newly flattened) value from exploding iterable
            new_observation[column_to_explode] = explode_value

            # Add to the list of new observations
            new_observations.append(new_observation)

    # Create a DataFrame
    return_df = pd.DataFrame(new_observations)

    # Return
    return return_df

In [4]:
def __get_node_df(ddn_raw, node_type, node_id_tag):
    """
    input ddn raw dict list from ddn pb, node type we deal with(can be applications, users or datastores)
    """
    nodes_raw = [{'ddn_id': ddn['ddnId'], 'ddn_name': ddn['ddnName'], 'nodes': ddn[node_type]} for ddn in ddn_raw]
    nodes_df = pd.DataFrame(nodes_raw)
    #assign node_type
    nodes_df['node_type'] = node_type
    nodes_df = pandas_explode(nodes_df, "nodes")
    nodes_df = nodes_df.rename(index=str, columns={"nodes": "node"})
    #extract node_id from node column
    nodes_df['node_id'] = nodes_df['node'].apply(lambda x: x[node_id_tag] )
    #extract node_attr from node column
    nodes_df['node_attr'] = nodes_df['node'].apply(lambda x: x)
    #drop the original node column
    nodes_df = nodes_df.drop(columns=['node'])
    return nodes_df

def _ddn_pb_to_nodes_df():
    with open("data/ddn_test.json") as f:
        #ddn_raw = f.read()
        ddn_raw = ast.literal_eval(f.read())
    
    usr_nodes_df = __get_node_df(ddn_raw, 'users', 'ip')
    app_nodes_df = __get_node_df(ddn_raw, 'applications', 'nodeId')
    dat_nodes_df = __get_node_df(ddn_raw, 'datastores', 'nodeId')

    nodes_df = pd.concat([usr_nodes_df, app_nodes_df, dat_nodes_df], axis=0, sort=False)

    #return usr_nodes_df
    #return app_nodes_df
    #return dat_nodes_df
    return nodes_df

test_node_df = _ddn_pb_to_nodes_df()
res = test_node_df.to_dict('records')
print(res)
print(type(res))

[{'ddn_id': '5a105e8b9d40e1329780d62ea2265d8a', 'ddn_name': 'test1', 'node_type': 'users', 'node_id': '192.168.7.188', 'node_attr': {'ip': '192.168.7.188', 'lastSeenTime': '1', 'edges': [{'idFrom': '192.168.7.188', 'idTo': 'youtube.com/12.3.4.52131'}], 'attributes': [{'attrName': 'user_access', 'attrType': 'ACCESS_GROUP', 'attrValue': 'contractor'}], 'nodeType': 'USER', 'nodeId': '192.168.7.188'}}, {'ddn_id': '5a105e8b9d40e1329780d62ea2265d8a', 'ddn_name': 'test1', 'node_type': 'users', 'node_id': '192.168.7.166', 'node_attr': {'ip': '192.168.7.166', 'lastSeenTime': '1', 'edges': [{'idFrom': '192.168.7.166', 'idTo': 'youtube.com/12.3.4.52131'}], 'attributes': [{'attrName': 'user_access', 'attrType': 'ACCESS_GROUP', 'attrValue': 'executive'}], 'nodeType': 'USER', 'nodeId': '192.168.7.166'}}, {'ddn_id': 'ad0234829205b9033196ba818f7a872b', 'ddn_name': 'test2', 'node_type': 'users', 'node_id': '192.168.7.188', 'node_attr': {'ip': '192.168.7.188', 'lastSeenTime': '1', 'edges': [{'idFrom': '

In [17]:
def __get_edge_df(ddn_raw):
    """
    input ddn raw dict list from ddn pb, node type we deal with(can be applications, users or datastores)
    """
    edges_raw = [{'from_ddn_id': ddn['ddnId'], 'to_ddn_id': ddn['ddnId'],
                      'edges': reduce(lambda x, y: x+y, [node['inEdges'] for node in ddn['applications']])} for ddn in ddn_raw]
    
    edges_raw.extend([{'from_ddn_id': ddn['ddnId'], 'to_ddn_id': ddn['ddnId'],
                      'edges': reduce(lambda x, y: x+y, [node['outEdges'] for node in ddn['applications']])} for ddn in ddn_raw])
    print(edges_raw)
    edges_df = pd.DataFrame(edges_raw)
    edges_df = pandas_explode(edges_df, "edges")
    edges_df = edges_df.rename(index=str, columns={"edges": "edge"})
    
    #extract from_node_id and to_node_id from edge column
    edges_df['from_node_id'] = edges_df['edge'].apply(lambda x: x['idFrom'])
    edges_df['to_node_id'] = edges_df['edge'].apply(lambda x: x['idTo'])
    #extract edge_attr from edge column
    edges_df['edge_attr'] = edges_df['edge'].apply(lambda x: x)
    #drop the original edge column
    edges_df = edges_df.drop(columns=['edge'])
    return edges_df

def _ddn_pb_to_edges_df():
    with open("data/ddn_test.json") as f:
        #ddn_raw = f.read()
        ddn_raw = ast.literal_eval(f.read())
    #let's assume we can get edge information from applications
    edges_df = __get_edge_df(ddn_raw)
    return edges_df

test_edge_df = _ddn_pb_to_edges_df()
test_edge_df

[{'from_ddn_id': '5a105e8b9d40e1329780d62ea2265d8a', 'to_ddn_id': '5a105e8b9d40e1329780d62ea2265d8a', 'edges': [{'idFrom': '192.168.7.188', 'idTo': 'youtube.com/12.3.4.52131'}, {'idFrom': '192.168.7.166', 'idTo': 'youtube.com/12.3.4.52131'}]}, {'from_ddn_id': 'ad0234829205b9033196ba818f7a872b', 'to_ddn_id': 'ad0234829205b9033196ba818f7a872b', 'edges': [{'idFrom': '192.168.7.188', 'idTo': 'youtube.com/12.3.4.52131'}, {'idFrom': '192.168.7.166', 'idTo': 'youtube.com/12.3.4.52131'}]}, {'from_ddn_id': '5a105e8b9d40e1329780d62ea2265d8a', 'to_ddn_id': '5a105e8b9d40e1329780d62ea2265d8a', 'edges': [{'idFrom': 'youtube.com/12.3.4.52131', 'idTo': '192.168.7.110:8080/helios/data/store/'}]}, {'from_ddn_id': 'ad0234829205b9033196ba818f7a872b', 'to_ddn_id': 'ad0234829205b9033196ba818f7a872b', 'edges': [{'idFrom': 'youtube.com/12.3.4.52131', 'idTo': '192.168.7.110:8080/helios/data/store/'}]}]


Unnamed: 0,from_ddn_id,to_ddn_id,from_node_id,to_node_id,edge_attr
0,5a105e8b9d40e1329780d62ea2265d8a,5a105e8b9d40e1329780d62ea2265d8a,192.168.7.188,youtube.com/12.3.4.52131,"{'idFrom': '192.168.7.188', 'idTo': 'youtube.c..."
1,5a105e8b9d40e1329780d62ea2265d8a,5a105e8b9d40e1329780d62ea2265d8a,192.168.7.166,youtube.com/12.3.4.52131,"{'idFrom': '192.168.7.166', 'idTo': 'youtube.c..."
2,ad0234829205b9033196ba818f7a872b,ad0234829205b9033196ba818f7a872b,192.168.7.188,youtube.com/12.3.4.52131,"{'idFrom': '192.168.7.188', 'idTo': 'youtube.c..."
3,ad0234829205b9033196ba818f7a872b,ad0234829205b9033196ba818f7a872b,192.168.7.166,youtube.com/12.3.4.52131,"{'idFrom': '192.168.7.166', 'idTo': 'youtube.c..."
4,5a105e8b9d40e1329780d62ea2265d8a,5a105e8b9d40e1329780d62ea2265d8a,youtube.com/12.3.4.52131,192.168.7.110:8080/helios/data/store/,"{'idFrom': 'youtube.com/12.3.4.52131', 'idTo':..."
5,ad0234829205b9033196ba818f7a872b,ad0234829205b9033196ba818f7a872b,youtube.com/12.3.4.52131,192.168.7.110:8080/helios/data/store/,"{'idFrom': 'youtube.com/12.3.4.52131', 'idTo':..."


#### ddn view

In [1]:
import pandas as pd
from cassandra.cluster import Cluster

def cs_select(cs_host_list, csql_str):
    cluster = Cluster(cs_host_list)
    session = cluster.connect()
    res = session.execute(csql_str)
    cluster.shutdown()
    return res

In [11]:
query = "select * from api.ddn_node_linkage"

res_df = pd.DataFrame(list(cs_select(['192.168.7.110', '192.168.7.111'], query)))
res_df = res_df.groupby(['ddn_id', 'node_type'])['node_id'].count().reset_index().rename(columns={'node_id':'node_cnt'})
print(type(res_df))
res_df

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,ddn_id,node_type,node_cnt
0,290c332d1efec714faa4bb975a075ae8,1,18
1,290c332d1efec714faa4bb975a075ae8,2,20
2,290c332d1efec714faa4bb975a075ae8,3,2
3,54d752f95f5627ffbf66731317db0d20,1,2
4,54d752f95f5627ffbf66731317db0d20,2,20
5,54d752f95f5627ffbf66731317db0d20,3,3
6,54d752f95f5627ffbf66731317db0d20,4,11
7,5b382afdbbb1c127acf105a7d45041c4,1,1
8,5b382afdbbb1c127acf105a7d45041c4,3,3
9,5b382afdbbb1c127acf105a7d45041c4,4,11


In [18]:
def cs_to_pandas(cs_host_list, csql_str):
    res_df = pd.DataFrame(list(cs_select(cs_host_list, csql_str)))
    #res_df = res_df.groupby(['ddn_id', 'node_type'])['node_id'].count().reset_index().rename(columns={'node_id':'node_cnt'})
    return res_df

In [21]:
#query = "select * from api.user_node"
this_ddn_id = '54d752f95f5627ffbf66731317db0d20'
query = "select node_id, node_type from api.ddn_node_linkage where ddn_id = '" + this_ddn_id + "' allow filtering"

res_df = cs_to_pandas(['192.168.7.110', '192.168.7.111'], query)
res_df
#print(res_df.columns.values)
#print(type(res_df.columns.values))
node_id_tag = "user_id"
attr_col_names = [col_name for col_name in res_df.columns if col_name != node_id_tag]
print(attr_col_names)
print(type(attr_col_names))
res_df


['node_id', 'node_type']
<class 'list'>


Unnamed: 0,node_id,node_type
0,/data/form1.pdf,2
1,/data/form10.pdf,2
2,/data/form2.pdf,2
3,/data/form3.pdf,2
4,/data/form4.pdf,2
5,/data/form5.pdf,2
6,/data/form6.pdf,2
7,/data/form7.pdf,2
8,/data/form8.pdf,2
9,/data/form9.pdf,2


#### ddn nodes

In [2]:
import pandas as pd
from cassandra.cluster import Cluster

def cs_select(cs_host_list, csql_str):
    cluster = Cluster(cs_host_list)
    session = cluster.connect()
    res = session.execute(csql_str)
    session.shutdown()
    cluster.shutdown()
    return res

def cs_to_pandas(cs_host_list, csql_str):
    res_df = pd.DataFrame(list(cs_select(cs_host_list, csql_str)))
    return res_df

In [3]:
import ast
import copy
from functools import reduce
import json

import pandas as pd

def __normalize_node_df(df_raw, node_id_tag):
    """
    input: raw node pandas dataframe that loaded from cassandra directly, node_id tag
    output: a normalized node pandas dataframe, schema, node_id and node_attr
    """
    #rename node_id_tag to be node_id
    nodes_df = df_raw.rename(index=str, columns={node_id_tag: "node_id"})
    #extract node_attr from node column except for node_id_tag
    attr_col_names = [col_name for col_name in df_raw.columns if col_name != node_id_tag]
    nodes_df['node_attr'] = nodes_df.apply(lambda row: dict(zip(attr_col_names, [row[attr_col_name] for attr_col_name in attr_col_names])), axis=1)
    #nodes_df['node_attr'] = nodes_df.apply(lambda row: {attr_col_names[0]: row[attr_col_names[0]]}, axis=1)
    #nodes_df['node_attr'] = nodes_df.apply(lambda row: dict(zip('aaa', 'bbb')), axis=1)
    nodes_df = nodes_df.drop(columns=attr_col_names)
    return nodes_df

def cs_q_to_nodes_df(linkage_nodes_df,
                     usr_df_raw,
                     app_df_raw,
                     dat_df_raw):

    usr_nodes_df = __normalize_node_df(usr_df_raw, 'user_id')
    app_nodes_df = __normalize_node_df(app_df_raw, 'node_id')
    dat_nodes_df = __normalize_node_df(dat_df_raw, 'tf_loc')

    nodes_df = pd.concat([usr_nodes_df, app_nodes_df, dat_nodes_df], axis=0, sort=False)
    return nodes_df

In [4]:
CS_HOST_LIST = ['192.168.7.110', '192.168.7.111']
query = "select * from api.user_node"
usr_df_raw = cs_to_pandas(CS_HOST_LIST, query)
query = "select * from api.application_node"
app_df_raw = cs_to_pandas(CS_HOST_LIST, query)
query = "select * from api.tf_node"
dat_df_raw = cs_to_pandas(CS_HOST_LIST, query)

usr_nodes_df = __normalize_node_df(usr_df_raw, 'user_id')
app_nodes_df = __normalize_node_df(app_df_raw, 'node_id')
dat_nodes_df = __normalize_node_df(dat_df_raw, 'tf_loc')

nodes_df = pd.concat([usr_nodes_df, app_nodes_df, dat_nodes_df], axis=0)
#nodes_df = dat_nodes_df

nodes_df

Unnamed: 0,node_id,node_attr
0,71.19.251.142,{'attr_value': 'unknown_user'}
1,199.66.91.185,{'attr_value': 'unknown_user'}
2,185.217.117.140,{'attr_value': 'unknown_user'}
3,192.168.7.208,{'attr_value': 'unknown_user'}
4,192.168.7.114,{'attr_value': 'unknown_user'}
5,142.44.173.131,{'attr_value': 'unknown_user'}
6,185.217.117.116,{'attr_value': 'unknown_user'}
7,185.217.68.208,{'attr_value': 'unknown_user'}
8,185.212.171.21,{'attr_value': 'unknown_user'}
9,84.16.224.37,{'attr_value': 'unknown_user'}


In [6]:
this_ddn_id = '54d752f95f5627ffbf66731317db0d20'
query = "select node_id, node_type from api.ddn_node_linkage where ddn_id = '" + this_ddn_id + "' allow filtering"
linkage_nodes_df = cs_to_pandas(['192.168.7.110', '192.168.7.111'], query)
#linkage_nodes_df = linkage_nodes_df.set_index('node_id')
linkage_nodes_df.empty

False

In [60]:
linkage_nodes_df.set_index('node_id').join(nodes_df.set_index('node_id'), how='inner')

Unnamed: 0_level_0,node_type,node_attr
node_id,Unnamed: 1_level_1,Unnamed: 2_level_1
/data/form10.pdf,2,"{'ip': '192.168.8.71', 'port': 80, 'user_agent..."
/data/form2.pdf,2,"{'ip': '192.168.8.71', 'port': 80, 'user_agent..."
/data/form4.pdf,2,"{'ip': '192.168.8.71', 'port': 80, 'user_agent..."
/data/form5.pdf,2,"{'ip': '192.168.8.71', 'port': 80, 'user_agent..."
/data/form7.pdf,2,"{'ip': '192.168.8.71', 'port': 80, 'user_agent..."
/data/form8.pdf,2,"{'ip': '192.168.8.71', 'port': 80, 'user_agent..."
/data/form9.pdf,2,"{'ip': '192.168.8.71', 'port': 80, 'user_agent..."
/form10.pdf,2,"{'ip': '192.168.8.71', 'port': 80, 'user_agent..."
/form2.pdf,2,"{'ip': '192.168.8.71', 'port': 80, 'user_agent..."
/form3.pdf,2,"{'ip': '192.168.8.71', 'port': 80, 'user_agent..."


#### The end of this file