## 1. Environment

In [None]:
import re
import os
import sys
import math
import pickle
import textwrap
import heapq
from collections import Counter
from itertools import cycle, islice
from typing import (
    Callable, List, Tuple, Sequence, Set,
    Hashable, Dict, Iterable, Union, Pattern, Optional
)

import networkx as nx
import matplotlib.pyplot as plt
from tqdm import tqdm

import DAG_functions as dag_func

## 2. Preprocessing

In [None]:
sig_file_path = '/Users/qingyuanzheng/Documents/VScode/Agent/WF_objects/Drain_CPD_LLM/results/time_series/gnb_10m_20m(Drain_mid).pkl'
log_file = '/Users/qingyuanzheng/Documents/VScode/Agent/WF_objects/Drain_CPD_LLM/log/gnb_10m_20m.log' 
log_cluster_path = '/Users/qingyuanzheng/Documents/VScode/Agent/WF_objects/Drain_CPD_LLM/results/cluster_results/gnb_10m_20m(Drain_mid).txt'

In [None]:
with open(sig_file_path, 'rb') as f:
    signals = pickle.load(f)
with open(log_file, 'r', encoding='utf-8') as f:
    lines = f.readlines()
    
layer_patterns = {
    'PDCP': r'\[PDCP\s*\]',
    'RLC': r'\[RLC\s*\]',
    'MAC': r'\[MAC\s*\]'
}

ue_idx_list = dag_func.get_ue_idx_list(lines, 2)

Scanning UE 0


Scanning UE 0: 100%|██████████| 3319232/3319232 [00:02<00:00, 1597752.71it/s]


Scanning UE 1


Scanning UE 1: 100%|██████████| 3319232/3319232 [00:02<00:00, 1244163.68it/s]


In [None]:
def tag_select(all_paths_MG, log_cluster_path):
    with open(log_cluster_path) as f:
        log_cluster_lines = f.readlines()


    matched_logs_tuples = dag_func.extract_log_tuples(log_cluster_lines, all_paths_MG)

    filtered_logs_tuples = dag_func.trim_after_last_period(matched_logs_tuples) 
    
    return filtered_logs_tuples, matched_logs_tuples


with open(log_cluster_path) as f:
    log_cluster_lines = f.readlines()

pattern = re.compile(
    r'ID=(\d+)'              
    r'.*?'                   
    r'\[\s*([^\]\s]+)\s*\]'  
)

component_to_category = {
    'PDCP': 'PDCP',
    'GTPU': 'GTPU',
    'RLC':  'RLC',
    'MAC':  'MAC',
    'SCHED':'SCHED'
}

def classify_ids(log_cluster_lines):
    pdcp_ids, gtp_u_ids = [], []
    rlc_ids, mac_ids = [], []
    sched_ids, other_ids = [], []

    for line in log_cluster_lines:
        m = pattern.search(line)
        if not m:
            continue
        event_id = int(m.group(1))
        comp     = m.group(2)
        cat = component_to_category.get(comp, 'OTHER')
        if cat == 'PDCP':
            pdcp_ids.append(event_id)
        elif cat == 'GTPU':
            gtp_u_ids.append(event_id)
        elif cat == 'RLC':
            rlc_ids.append(event_id)
        elif cat == 'MAC':
            mac_ids.append(event_id)
        elif cat == 'SCHED':
            sched_ids.append(event_id)
        else:
            other_ids.append(event_id)

    return pdcp_ids, gtp_u_ids, rlc_ids, mac_ids, sched_ids, other_ids


pdcp_ids, gtp_u_ids, rlc_ids, mac_ids, sched_ids, other_ids = classify_ids(log_cluster_lines)
print("PDCP:", pdcp_ids)
print("GTPU:", gtp_u_ids)
print("RLC: ", rlc_ids)
print("MAC: ", mac_ids)
print("SCHED:", sched_ids)
print("OTHER:", other_ids)


PDCP: [192, 191, 193, 194, 195, 197, 71, 124, 70, 72, 73, 74, 75, 93, 94, 115, 121, 122, 123, 126, 127, 114, 118, 119, 32, 33, 128, 138, 142, 120, 135, 136, 137, 141, 143, 144, 145, 146, 201, 202, 203, 204, 222, 223, 236, 237, 238, 239]
GTPU: [196, 3, 133, 134, 257, 2, 198, 199, 200]
RLC:  [175, 177, 178, 179, 174, 176, 185, 186, 187, 188, 183, 189, 190, 172, 173, 181, 182, 184, 207, 206, 226, 225, 209, 220, 228, 240, 247, 248, 36, 35, 88, 90, 42, 63, 65, 66, 67, 68, 69, 76, 77, 78, 79, 80, 81, 64, 82, 89, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 108, 109, 111, 112, 113, 110, 253, 254, 22, 23, 41, 20, 21, 30, 34, 43, 44, 45, 147, 148, 154, 255, 256, 205, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 221, 230, 231, 232, 233, 234, 235]
MAC:  [61, 180, 91, 224, 227, 243, 208, 242, 246, 244, 60, 249, 130, 107, 245, 171, 250, 105, 46, 19, 24, 25, 26, 27, 28, 29, 149, 150, 151, 152, 47, 161, 229, 252]
SCHED: [17, 11, 14, 158, 50, 241, 156, 251, 51, 83]
OTHER: [6, 7, 8, 1, 4, 5, 9, 10

In [None]:
# If you want to select a specific UE, you can use the following code
ue0_idx_list = ue_idx_list[0]

## Build Graph

In [13]:
def graph_final(ue_idx_lists, lines, layer_patterns, window_size, step, log_cluster_path):
    
    refined_all_paths_MGs = []
    refined_Ms = []
    pruned_graphs_list = []
    filtered_logs_tuples_list = []
    all_paths_MG_list = []

    
    for idx, uex_idx_list in enumerate(ue_idx_lists):
        
        print("!!!!!!!!!!! Processing UEX idx:", idx)
        
        uex_layer_idx = dag_func.classify_ue_layers(uex_idx_list, lines, layer_patterns)
        print("Layer seperation finished")
        
        uex_sig = dag_func.select_uex_sig(uex_idx_list, signals)
        print("UE signal selection finished")
        print("ue0 signal length:",  len(uex_sig))
        
        uex_layer_sig =[]

        uex_rlc_sig = dag_func.select_uex_sig(uex_layer_idx['RLC'], signals)
        uex_pdcp_sig = dag_func.select_uex_sig(uex_layer_idx['PDCP'], signals)
        
        # print("#rlc_sig_o:", len(uex_rlc_sig_o))
        
        # uex_rlc_sig = select_uex_sig_aug(uex_layer_idx['RLC'], signals, layer='RLC')
        # uex_pdcp_sig = select_uex_sig_aug(uex_layer_idx['PDCP'], signals, layer='PDCP')

        print("UE rlc signal selection finished")
        
        print("ue0 rlc layer signal length:", len(uex_rlc_sig))
        print("ue0 pdcp layer signal length:", len(uex_pdcp_sig))
        uex_layer_sig.append(uex_rlc_sig)
        uex_layer_sig.append(uex_pdcp_sig)
        layer_idx = 0
        
        for layer_sig in uex_layer_sig:
            if layer_idx == 0:  
                print("Processing rlc layer")
            else:
                print("Processing pdcp layer")
            layer_idx += 1
            
            uex_layer_sig_slices = dag_func.sliding_windows_step(layer_sig, window_size, step)
            uex_sig_slices = dag_func.sliding_windows_step(uex_sig, window_size, step)
            print("uex rlc slices length:", len(uex_layer_sig_slices))
            print("uex sig slices length:", len(uex_sig_slices))
            
            
            
            print("Generating DAG for uex rlc slices...")
            traces = uex_layer_sig_slices.copy()
            graphs_list, node_supports, edge_supports = dag_func.generate_DAG(traces)
            print("DAG generation finished")
            
            all_dag, dag_flags = dag_func.check_graphs_are_dag(graphs_list, False)
            print("DAG checking finished")
            
            graph_list, node_support_list, edge_support_list = dag_func.filter_graphs(dag_flags,graphs_list,node_supports,edge_supports)
            print("Numbers of initial sub graphs", len(graph_list))
            print("Filtering finished")
            
            forward_confidence, backward_confidence = dag_func.compute_all_confidences(
                node_support_list,
                edge_support_list,
                dag_func.compute_confidences
            )
            print("Confidence calculation finished")
            
            print("Pruning...")
            pruned_graphs = dag_func.prune_graphs(
                graph_list,
                forward_confidence,
                backward_confidence,
                threshold=0.3,
                prune_causality_graph = dag_func.prune_causality_graph
            )
            print("Pruning finished")
            
            print("Model selection...")
            all_paths = dag_func.model_selection(pruned_graphs, cutoff=None)
            print("Model selection finished")
            
            
            all_paths_MG = dag_func.plot_model_paths(all_paths, layout="stair", print_graph=False)
            print("Model paths plotting finished")
            num_edges = dag_func.count_edges_in_graph(all_paths_MG)
            print(f"all_paths_MG has {num_edges} Edges")
            
            filtered_logs_tuples, matched_logs_tuples = tag_select(all_paths_MG, log_cluster_path)
            
            print("Model evaluation...")
            
            print("Sorting paths...")
            
            sorted_paths = dag_func.compute_scores_multi(pruned_graphs, forward_confidence, backward_confidence)
            
            print("Model refinement...")
            
            refined_M, final_AR, acc_list = dag_func.model_refinement(sorted_paths, all_paths, traces, accuracy_th=0.98, max_iteration=30)

            refined_all_paths_MG = dag_func.plot_model_paths(refined_M, layout="stair", print_graph=False)
            
            print("Drawing FSM...")


            dag_func.draw_layered_fsm_note(all_paths_MG,
                filtered_logs_tuples,
                prog="dot",
                rankdir="TB",
                nodesep=0.5,
                ranksep=0.8,
                figsize=(80, 70),
                node_size=3000,
                font_size=17,
                arrowsize=60,
                wrap_width=27,
                y_offset_pts=-35
                )

            dag_func.draw_layered_fsm_note(refined_all_paths_MG,
                filtered_logs_tuples,
                prog="dot",
                rankdir="TB",
                nodesep=0.5,
                ranksep=0.8,
                figsize=(80, 70),
                node_size=3000,
                font_size=17,
                arrowsize=60,
                wrap_width=27,
                y_offset_pts=-35
                )
            
            acc_list.insert(0, 0)
            dag_func.plot_model_refine_score(acc_list)
            
            all_paths_MG_list.append(all_paths_MG)
            refined_all_paths_MGs.append(refined_all_paths_MG)
            refined_Ms.append(refined_M)
            pruned_graphs_list.append(pruned_graphs)
            filtered_logs_tuples_list.append(filtered_logs_tuples)
        break
        
    return refined_all_paths_MGs, all_paths_MG_list, refined_Ms, pruned_graphs_list, filtered_logs_tuples_list
        
            

In [None]:
refined_all_paths_MGs, all_paths_MG_list, refined_Ms, pruned_graphs_list, filtered_logs_tuples_list=graph_final(ue_idx_lists = ue_idx_list, lines = lines, layer_patterns = layer_patterns, window_size = 2000, step = 1500, log_cluster_path = log_cluster_path)