In [None]:
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from pgmpy.estimators import PC
import argparse

def read_log_lines(log_path):
    with open(log_path, 'r') as f:
        lines = [line.strip() for line in f if line.strip()]
    return lines

def build_windowed_dataset(log_lines, window_size=5):
    windows = []
    for i in range(len(log_lines) - window_size + 1):
        window = log_lines[i:i+window_size]
        windows.append(set(window))  # conjunto evita repetições na janela

    mlb = MultiLabelBinarizer()
    binary_matrix = mlb.fit_transform(windows)
    df = pd.DataFrame(binary_matrix, columns=mlb.classes_)
    return df

def run_pc(df, significance_level=0.01):
    print("Executando algoritmo PC...")
    pc = PC(data=df)
    model = pc.estimate(significance_level=significance_level)
    return model

def main(log_path, window_size, output_path):
    log_lines = read_log_lines(log_path)
    df = build_windowed_dataset(log_lines, window_size)

    print(f"Dataset montado com {df.shape[0]} janelas e {df.shape[1]} eventos únicos.")
    model = run_pc(df)

    print("Arestas causais detectadas:")
    for edge in model.edges():
        print(f"{edge[0]} --> {edge[1]}")

    # Salvar resultado
    with open(output_path, 'w') as f:
        f.write("source,target\n")
        for edge in model.edges():
            f.write(f"{edge[0]},{edge[1]}\n")
    print(f"Resultado salvo em: {output_path}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--log_path", required=True, help="Arquivo de log de entrada")
    parser.add_argument("--window_size", type=int, default=5, help="Tamanho da janela de eventos")
    parser.add_argument("--output_path", default="pc_result.csv", help="Arquivo de saída com as arestas causais")
    args = parser.parse_args()

    main(args.log_path, args.window_size, args.output_path)

## python pc_logs.py --log_path caminho/para/seu.log --window_size 5 --output_path resultado.csv


In [1]:
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from pgmpy.estimators import PC
import argparse

def read_log_lines(log_path):
    with open(log_path, 'r') as f:
        lines = [line.strip() for line in f if line.strip()]
    return lines

def build_windowed_dataset(log_lines, window_size=5):
    windows = []
    for i in range(len(log_lines) - window_size + 1):
        window = log_lines[i:i+window_size]
        windows.append(set(window))  # conjunto evita repetições na janela

    mlb = MultiLabelBinarizer()
    binary_matrix = mlb.fit_transform(windows)
    df = pd.DataFrame(binary_matrix, columns=mlb.classes_)
    return df

def run_pc(df, significance_level=0.01):
    print("Executando algoritmo PC...")
    pc = PC(data=df)
    model = pc.estimate(significance_level=significance_level)
    return model

def main(log_path, window_size, output_path):
    log_lines = read_log_lines(log_path)
    df = build_windowed_dataset(log_lines, window_size)

    print(f"Dataset montado com {df.shape[0]} janelas e {df.shape[1]} eventos únicos.")
    model = run_pc(df)

    print("Arestas causais detectadas:")
    for edge in model.edges():
        print(f"{edge[0]} --> {edge[1]}")

    # Salvar resultado
    with open(output_path, 'w') as f:
        f.write("source,target\n")
        for edge in model.edges():
            f.write(f"{edge[0]},{edge[1]}\n")
    print(f"Resultado salvo em: {output_path}")

# ===== Versão para rodar em notebook/IPython ====

log_path = "/logs/logs_teste.log"
window_size = 5
output_path = "resultado.csv"

log_lines = read_log_lines(log_path)
df = build_windowed_dataset(log_lines, window_size)

print(f"Dataset montado com {df.shape[0]} janelas e {df.shape[1]} eventos únicos.")
model = run_pc(df)

print("Arestas causais detectadas:")
for edge in model.edges():
    print(f"{edge[0]} --> {edge[1]}")

with open(output_path, 'w') as f:
    f.write("source,target\n")
    for edge in model.edges():
        f.write(f"{edge[0]},{edge[1]}\n")
print(f"Resultado salvo em: {output_path}")



FileNotFoundError: [Errno 2] No such file or directory: '/logs/logs_teste.log'