In [None]:
from alarms import *
import networkx as nx
import matplotlib.pyplot as plt
from pyvis.network import Network

### Enter the input variables

In [None]:
filter_short_alarms = 2 * 60  #seconds

# Section 0: Reading CSV files and Preprocesssing (filtering)

In [None]:
# files = ["haziran2019.csv","march2019.csv","mayis2019.csv","nisan2019.csv"]
files = ["mayis2019.csv"]
files = ["formatted-pre-2-"+f for f in files]
files_operator = ["MayOperation_v2.xls"]
path = "./data/new/"


# Reading alarms data

f = files[0]
print("==================== File : {} =============".format(f))
input_fname = f
df_csv_alarms = pd.read_csv(path + input_fname, low_memory=False ,parse_dates=["StartTime", "EndTime"])
df_csv_alarms["TimeDelta"] = df_csv_alarms["EndTime"] - df_csv_alarms["StartTime"]
df_csv_alarms["TimeDelta"] = df_csv_alarms["TimeDelta"].apply(lambda arg: timedelta.total_seconds(arg)) 
df_alarms_filtered = df_csv_alarms[df_csv_alarms["TimeDelta"]>filter_short_alarms] 

# df_temp = df_csv_alarms.loc[df_csv_alarms["SourceName"].isin(df_excel_operator["SourceName"].unique())]
# df_alarms_filtered.info()


# Reading operator data
f = files_operator[0]
print("==================== File : {} =============".format(f))
cols = ["MachineName","SourceName","EventTime","Message","Severity","Mask","NewState","EventType","EventCategory","AckReq","ActorID","Area","Attributes"]
df_excel_operator = pd.read_excel(path+"/operator-action/"+f,usecols=cols)
# print("Column  Type")
for col in df_excel_operator.columns:
    # print(col, type(df_excel_operator[col][0]))
    if isinstance(df_excel_operator[col][0],str):
        df_excel_operator[col] = df_excel_operator[col].apply(lambda s: " ".join(s.split()))

# print(type(df_excel_operator["EventTime"][0]))

def changeDate(d):
    d = d.replace(".000000000","")
    d = d.replace("/","-")
    return parse(d)
df_excel_operator["EventTime"] = df_excel_operator["EventTime"].apply(changeDate)

assert len(df_alarms_filtered["MachineName"].unique())==1
assert len(df_excel_operator["MachineName"].unique()) == 1
assert df_alarms_filtered["MachineName"].unique()[0]  == df_excel_operator["MachineName"].unique()[0]
 
talrms = len(df_alarms_filtered["SourceName"].unique())
toperator = len(df_excel_operator["SourceName"].unique())


temp_commons = [s for s in df_csv_alarms["SourceName"].unique()  if s in df_excel_operator["SourceName"].unique()]
commons = [s for s in df_alarms_filtered["SourceName"].unique()  if s in df_excel_operator["SourceName"].unique()]

# df_common_operator = df_excel_operator.loc[df_excel_operator["SourceName"].isin(commons)]
# assert len(df_common_operator]["SourceName"].unique())

print(" >> # of unique SourceNames in Alarms {}, Unique SourceNames in Operator Actions {}".format(talrms,toperator))
print(" >> # of Common SourceName bw filtered alarms and non-filtered alarms {} {} with operator action, respectively".format(len(commons),len(temp_commons)))
print(" >> # of filtered Alarms: {}, # of Operator Actions: {}".format(df_alarms_filtered.shape[0],df_excel_operator.shape[0]))
print("Raw Alarms ",df_csv_alarms.shape[0])
print(" >> Complete")

# Case 2: Analyze the Common SourceNames (Alarms SourceNames and Operator Action Source Names)

In [None]:
# >>> from pyvis.network import Network
# >>> import networkx as nx
# >>> nx_graph = nx.cycle_graph(10)
# >>> nx_graph.nodes[1]['title'] = 'Number 1'
# >>> nx_graph.nodes[1]['group'] = 1
# >>> nx_graph.nodes[3]['title'] = 'I belong to a different group!'
# >>> nx_graph.nodes[3]['group'] = 10
# >>> nx_graph.add_node(20, size=20, title='couple', group=2)
# >>> nx_graph.add_node(21, size=15, title='couple', group=2)
# >>> nx_graph.add_edge(20, 21, weight=5)
# >>> nx_graph.add_node(25, size=25, label='lonely', title='lonely node', group=3)
# >>> nt = Network("500px", "500px")
# populates the nodes and edges data structures
# >>> nt.from_nx(nx_graph)
# >>> nt.show("nx.html")

### Construting Graph




In [None]:
def case2Graph(df_alarms,df_operator, common_sources):
    g = nx.Graph() # Undirected graph
    df_operator = df_operator.loc[df_operator["SourceName"].isin(common_sources)]
    df_alarms = df_alarms.loc[df_alarms["SourceName"].isin(common_sources)]
    print(">> Alarms:{}, Operator Actions:{}".format(df_alarms.shape[0], df_operator.shape[0]))
    for s in common_sources:
        g.add_node(s,size=10,count=0,color="Green", group = 1)
    
    for s in common_sources:
        g.add_node("Operator->"+s,size=10,count=0,color="Orange", group = 1)

    for s in df_alarms["SourceName"]:
        if g.has_node(s) == True:
            g.nodes[s]["size"] += 0.2
            g.nodes[s]["count"] += 1

    for s in df_operator["SourceName"]:
        if g.has_node("Operator->"+s) == True:
            g.nodes["Operator->"+s]["size"] += 0.2
            g.nodes["Operator->"+s]["count"] += 1

    for s in common_sources:
        g.add_edge(s,"Operator->"+s)
    
    for s in list(g.nodes):
        g.nodes[s]["title"] = "{}:{}".format(s,g.nodes[s]["count"]) 
    
    print(">> {}".format(nx.classes.function.info(g)))

    return g


ug = case2Graph(df_alarms_filtered, df_excel_operator, commons) 


### Orange Nodes are operator action nodes

In [None]:
nt = Network("500px", "100%", notebook=True)
nt.from_nx(ug)
# nt.show_buttons()
# nt.repulsion()
nt.show("nt.html")

# Case 3: Relating alarms with actions

### Process data to cross check this use case.

In [None]:
def case3RelatingAlarmsWithActions(df_alarms,df_actions, common_sources, filter1 = (60*60) * 5):
    print(">> Alarms:{}, Operator Actions:{}".format(df_alarms.shape[0], df_actions.shape[0]))    
    
    g = nx.DiGraph() # Directed graph
    
    #---------- Adding Nodes----------------
    for s in df_alarms["SourceName"]:
        if g.has_node(s) == True:
            # g.nodes[s]["size"] += 0.2
            g.nodes[s]["count"] += 1
        else:
            g.add_node(s,size=10,count=1,color="Black", group = 1) 

    for s in df_actions["SourceName"]:
        if g.has_node("Operator->"+s) == True:
            g.nodes["Operator->"+s]["size"] += 0.2
            g.nodes["Operator->"+s]["count"] += 1
        else:
            g.add_node("Operator->"+s,size=10,count=1,color="Orange", group = 1)    

    #------------------ ADDING EDGES --------------------------
    alarms_by_etime = [a for a in sorted(df_alarms.to_dict(orient="records"), key=lambda arg: arg["EndTime"], reverse=False)]
    actions_by_time = [r for r in sorted(df_actions.to_dict(orient="records"), key=lambda arg: arg["EventTime"], reverse=False)]
    
    for i in range(len(actions_by_time)):
         action = actions_by_time[i]
        #  if i%100 == 0:
        #     print(i,end=",")
         for j in range(len(alarms_by_etime)):            
            alarm = alarms_by_etime[j]
            if action["EventTime"] > alarm["StartTime"] and action["EventTime"] <= alarm["EndTime"] and timedelta.total_seconds(alarm["EndTime"]-action["EventTime"])<filter1:
                if g.has_edge("Operator->"+action["SourceName"],alarm["SourceName"])==False:  
                    g.add_edge("Operator->"+action["SourceName"],alarm["SourceName"],weight=1)
                else:
                    g.edges["Operator->"+action["SourceName"],alarm["SourceName"]]["weight"] +=1
    

    for s in list(g.nodes):
        g.nodes[s]["title"] = "{}:{}".format(s,g.nodes[s]["count"])

    for e in list(g.edges):
        g.edges[e]["title"] = "{}:{}".format(e,g.edges[e]["weight"])

    remove_edges = []

    for edge in list(g.edges):
        if g.edges[edge]["weight"] <=100:
            remove_edges.append(edge)

    g.remove_edges_from(remove_edges)

    print(">> {}".format(nx.classes.function.info(g)))
    return g

dG_case3 = case3RelatingAlarmsWithActions(df_alarms_filtered, df_excel_operator, commons) 


### Orange nodes are the operator action nodes

In [None]:
nt = Network("500px", "100%", notebook=True)
nt.from_nx(dG_case3)
nt.show_buttons()
# nt.repulsion()
nt.show("nt.html")

In [None]:
nodes_dict = {}
for s in list(dG_case3.nodes):
    nodes_dict[s] = {"count":dG_case3.nodes[s]["count"], "outd" : dG_case3.out_degree(s,"weight"), "ind":dG_case3.in_degree(s,"weight"), "totald": dG_case3.degree(s,"weight")}

nodes_dict = {k:v for k, v in sorted(nodes_dict.items(), key=lambda arg: arg[1]["ind"], reverse=True) if v["count"]>20 and v["totald"]>20}
print(nodes_dict)

# Case 4 (Extra): Find the most important SourceName in the Graph using PageRank or Centrality Algos
Not considering operator data overhere
 

In [None]:
def case4FindImportantSensorNodes(df_alarms, common_sources, tfilter1 = 60):
    print(">> Alarms:{} ".format(df_alarms.shape[0]))
    G = nx.DiGraph() # Directed Graph

    # # Adding Nodes
    for s in df_alarms["SourceName"]:
        if G.has_node(s) == True:
            G.nodes[s]["size"] += 0.1
            G.nodes[s]["count"] += 1
        else:
            G.add_node(s,size=5,count =1)
    
    for s in list(G.nodes):
        G.nodes[s]["title"] = "{}:{}".format(s,G.nodes[s]["count"])

    start_records = [v for v in sorted(df_alarms.to_dict(orient="records"), key=lambda arg: arg["StartTime"], reverse=False)]
    for i in range(len(start_records)):
        prevd = start_records[i]
        for j in range(i+1,len(start_records)):        
            nextd = start_records[j]
            if timedelta.total_seconds(nextd["StartTime"]-prevd["StartTime"]) > tfilter1: # if next alarm is not triggered within tfilter1 duration then break 
                break 
            if nextd["SourceName"] != prevd["SourceName"] and nextd["StartTime"] >= prevd["StartTime"]: 
               if nextd["EndTime"] <= prevd["EndTime"] or timedelta.total_seconds(nextd["EndTime"]-prevd["EndTime"]) < tfilter1:
                    if G.has_edge(prevd["SourceName"],nextd["SourceName"]) == False:
                        G.add_edge(prevd["SourceName"],nextd["SourceName"],color="Red",weight=1)
                    else:
                        G.edges[prevd["SourceName"],nextd["SourceName"]]["weight"] +=1


    # remove_edges = []

    for edge in list(G.edges):
        G.edges[edge]["title"] = "{}".format(G.edges[edge]["weight"])
        # G.edges[edge]["weight"] = 1
        # if G.edges[edge]["weight"] <=2:
        #     remove_edges.append(edge)

    # G.remove_edges_from(remove_edges)
    
    G.remove_nodes_from(list(nx.isolates(G)))
    print(">> {}".format(nx.classes.function.info(G)))
    return G 

dG_case4 = case4FindImportantSensorNodes(df_alarms_filtered, commons)


### Importan SourceNames

In [None]:
# Google PageRank Algo
result = nx.algorithms.link_analysis.pagerank_alg.pagerank(dG_case4,weight="weight",max_iter=100000)
result = {k:float(format(v, '.4f')) for k,v in sorted(result.items(), key=lambda arg: arg[1], reverse=True)}
print(">> Page Rank (Highest to Lowest) :",list(result.items())[:50])

print("              --------------------------------------------------")

# Eigenvector Centrality Algo
result = nx.eigenvector_centrality(dG_case4, weight="weight")
result = {k:float(format(v, '.4f')) for k,v in sorted(result.items(), key=lambda arg: arg[1], reverse=True)}
print(">> Eigenvector Centrality (Highest to Lowest) :",list(result.items())[:50])

# result = nx.closeness_centrality(G)
# result = {k:float(format(v, '.4f')) for k,v in sorted(result.items(), key=lambda arg: arg[1], reverse=True)}
# print("Closeness Centrality:",list(result.items())[:50])
# # closeness_centrality

### Important nodes based on incoming and outgoing edges

outgoing-> triggering other alarms;

incoming -> triggered after another alarm

In [None]:
# Hits Algo
h, a = nx.hits(dG_case4)
result = h
result = {k:float(format(v, '.4f')) for k,v in sorted(result.items(), key=lambda arg: arg[1], reverse=True)}
print(">> Hub => Outgoing Edges: Based on out_degree (max to min):",list(result.items())[:50])

print("              --------------------------------------------------")

result = a
result = {k:float(format(v, '.4f')) for k,v in sorted(result.items(), key=lambda arg: arg[1], reverse=True)}
print("Auth => Incoming Edges: Based on in_degree (max to min):",list(result.items())[:50])


### Degree Analysis

In [None]:
nodes_dict = {}
for s in list(dG_case4.nodes):
    nodes_dict[s] = {"count":dG_case4.nodes[s]["count"], "outd" : dG_case4.out_degree(s,"weight"), "ind":dG_case4.in_degree(s,"weight"), "totald": dG_case4.degree(s,"weight")}

nodes_dict = {k:v for k, v in sorted(nodes_dict.items(), key=lambda arg: arg[1]["ind"], reverse=True) if v["count"]>4 and v["totald"]>1}
print(nodes_dict)

### Visualiztion

In [None]:
# undirected = ipycytoscape.CytoscapeWidget()
# undirected.graph.add_graph_from_networkx(G )
# undirected


In [None]:
# nt.enable_physics(False)
# nt.set_options('{nodes: {shape: "dot",size: 30,font: {size: 32,color: "#ffffff"},borderWidth: 2},edges: {width: 2}}')
# nt.toggle_physics(False)
# nt.toggle_stabilization(False)


### Old

In [None]:
# ranks_dict_alarms = df_common_alarms["SourceName"].value_counts()
# ranks_dict_actions =df_common_operator["SourceName"].value_counts()

# for sname in sorted(df_common_alarms["SourceName"].unique()):
#     if ranks_dict_actions[sname]== ranks_dict_alarms[sname]:
#         print(">{}, Operator = {}, Alarms = {}".format(sname, ranks_dict_actions[sname], ranks_dict_alarms[sname]))




In [None]:
# def getDF (df, col, values_arra):
#     return df.loc[df[col].isin(values_arra)]


In [None]:
# sname = "47FIC1569"

# getDF(df_common_alarms,"SourceName", [sname]).sort_values(by=["StartTime"])

In [None]:
# getDF(df_common_operator,"SourceName", [sname]).sort_values(by=["EventTime"])

In [None]:
# cols = ["MachineName","SourceName","EventTime", "Message","MessageType","Quality","Condition","Mask","NewState","Status"]
# df_normal = pd.read_csv(path+"formatted-pre-1-mayis2019.csv",parse_dates=["EventTime"],usecols=cols)
# getDF(df_normal,"SourceName",[sname]).sort_values(by=["EventTime"])