In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from torch.utils.data import Dataset, DataLoader
import os
import matplotlib.pyplot as plt
import csv

import torch
import captum
from captum.attr import IntegratedGradients
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from collections import defaultdict
import re
import json
import joblib
import pandas as pd
import pickle
import networkx as nx
import pandas as pd
import numpy as np
import os



# Set device (GPU if available)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

device = torch.device("cpu")
print(f"Using device: {device}")

import warnings
warnings.filterwarnings("ignore", message=".*weights_only=False.*")

  from .autonotebook import tqdm as notebook_tqdm


Using device: cpu


In [2]:
# **Define LSTM Model**
class LSTMForecaster(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMForecaster, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

    def forward(self, x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(device)
        c_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(device)
        out, _ = self.lstm(x, (h_0, c_0))
        out = self.fc(out[:, -1, :])
        return out

def autoregressive_forecast(model, initial_sequence, steps_ahead=5, device='cpu'):
    # Ensure input is a torch tensor of shape (1, 60, M)
    if isinstance(initial_sequence, np.ndarray):
        initial_sequence = torch.tensor(initial_sequence, dtype=torch.float32)
    
    if len(initial_sequence.shape) == 2:
        initial_sequence = initial_sequence.unsqueeze(0)

    initial_sequence = initial_sequence.to(device)
    
    generated_sequence = []

    current_sequence = initial_sequence.clone()

    for _ in range(steps_ahead):
        with torch.no_grad():
            # Predict next step
            next_step = model(current_sequence)  # (1, M)
        
        generated_sequence.append(next_step.squeeze(0))  # (M,)
        
        # Prepare input for next step by appending prediction and removing oldest timestep
        next_step_expanded = next_step.unsqueeze(1)  # (1, 1, M)
        current_sequence = torch.cat((current_sequence[:, 1:, :], next_step_expanded), dim=1)  # (1, 60, M)

    return torch.stack(generated_sequence)  # (steps_ahead, M)

# === JVGAN COMPONENTS ===
class JVGANEncoder(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(JVGANEncoder, self).__init__()
        self.lstm = nn.LSTM(input_dim, 128, batch_first=True)
        self.fc_mu = nn.Linear(128, latent_dim)
        self.fc_logvar = nn.Linear(128, latent_dim)

    def forward(self, x):
        _, (h_n, _) = self.lstm(x)
        h = h_n.squeeze(0)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        std = torch.exp(0.5 * logvar)
        z = mu + std * torch.randn_like(std)
        return z, mu, logvar

class JVGANDecoder(nn.Module):
    def __init__(self, latent_dim, input_dim, sequence_length=65):
        super(JVGANDecoder, self).__init__()
        self.latent_to_hidden = nn.Linear(latent_dim, 128)
        self.lstm = nn.LSTM(128, 128, batch_first=True)
        self.output_layer = nn.Linear(128, input_dim)
        self.sequence_length = sequence_length

    def forward(self, z):
        h0 = self.latent_to_hidden(z).unsqueeze(1).repeat(1, self.sequence_length, 1)
        lstm_out, _ = self.lstm(h0)
        return self.output_layer(lstm_out)

class JVGANDiscriminator(nn.Module):
    def __init__(self, input_dim):
        super(JVGANDiscriminator, self).__init__()
        self.lstm = nn.LSTM(input_dim, 128, batch_first=True)
        self.classifier = nn.Sequential(
            nn.Linear(128, 64),
            nn.LeakyReLU(0.2),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        _, (h_n, _) = self.lstm(x)
        return self.classifier(h_n.squeeze(0))

class JVGAN(nn.Module):
    def __init__(self, input_dim, latent_dim=32, sequence_length=65):
        super(JVGAN, self).__init__()
        self.encoder = JVGANEncoder(input_dim, latent_dim)
        self.decoder = JVGANDecoder(latent_dim, input_dim, sequence_length)
        self.discriminator = JVGANDiscriminator(input_dim)

    def forward(self, x):
        z, mu, logvar = self.encoder(x)
        x_recon = self.decoder(z)
        d_real = self.discriminator(x)
        d_fake = self.discriminator(x_recon.detach())
        return x_recon, d_real, d_fake, mu, logvar

import torch.nn.functional as F


def detect_anomaly(x, model, threshold):
    model.eval()
    with torch.no_grad():
        x_recon, _, _, _, _ = model(x)
        last_real = x[:, -1, :]
        last_recon = x_recon[:, -1, :]
        error = F.mse_loss(last_recon, last_real, reduction='none').mean(dim=1)
        return error > threshold, error

def get_result(target_df, CU, DU):
    """
    Compare target DataFrame with original dataset to calculate accuracy.
    Args:
        target_df (pd.DataFrame): DataFrame with CU/DU targets.
        CU (int): CU index.
        DU (int): DU index.
    Returns:
        accuracy (float): Accuracy of the CU/DU targets against the original dataset.
    """
    original_dataframe = pd.read_csv(f'dataset_srscu{CU}_srsdu{DU}.csv')

    score = 0

    for df in target_df.itertuples():
        timestamp = df.Timestamp
        srscu_stepStress = df.srscu_stepStress
        srsdu_stepStress = df.srsdu_stepStress
        
        # Find the corresponding row in the original DataFrame
        original_row = original_dataframe[original_dataframe['Timestamp'] == timestamp]
        
        # compare the values of original srscu_stressType and srsdu_stressType with the new values
        if not original_row.empty:
            original_srscu_stressType = original_row['srscu_stressType'].values[0]
            original_srsdu_stressType = original_row['srsdu_stressType'].values[0]
            
            # if one match score 1 if both 2 and calculare accuracy accordingly
            if(srscu_stepStress==original_srscu_stressType):
                score += 1
            if(srsdu_stepStress==original_srsdu_stressType):
                score += 1
    # Calculate accuracy# === Step 1: Load the full causal graph ===
    with open("causal_graph.gpickle", "rb") as f:
        full_graph = pickle.load(f)
    accuracy = score / (2 * len(target_df))  # Each anomaly can contribute up to 2 points
    print(f"Accuracy of CU/DU targets against original dataset: {accuracy * 100:.2f}%")
    print(f"Score = {score/2}, Total Anomalies = {len(target_df)}")

    return accuracy

# **Data Preprocessing**
# Handle missing values
def preprocessing(features):
    """
    Preprocess the features DataFrame by handling missing values and duplicates.
    """
    # Handle missing values
    features_Timestamp = {'Timestamp': features['Timestamp'].copy()}  # Preserve Timestamp for later
    features = features.drop(columns=['Timestamp'])

    features = features.apply(lambda x: x.fillna(0) if x.isna().all() else x)
    # column_names = features.columns

    threshold = 0.6 * len(features)
    features = features.loc[:, ~features.columns.duplicated()]  # Remove duplicates

    for col in features.columns:
        nan_count = features[col].isna().sum()
        if int(nan_count) > threshold:  # Explicit scalar conversion
            mode_value = features[col].mode().iloc[0] if not features[col].mode().empty else 0
            features[col].fillna(mode_value, inplace=True)

    numeric_cols = features.select_dtypes(include=[np.number]).columns
    features[numeric_cols] = features[numeric_cols].fillna(features[numeric_cols].mean())
    
    # return pd.DataFrame(features, columns=column_names, index=features_Timestamp)
    return features, features_Timestamp

def LSTM_inference(LSTM_model, pca_features_input, pca_features_output, steps_ahead=5):
    full_sequences_with_predictions = []

    for i, sequence in enumerate(pca_features_input):
        temp_columns = sequence.columns.difference(['Timestamp'])
        sequence = sequence.drop(columns=['Timestamp']).values  # Ensure sequence is only numeric data
        predicted_future = autoregressive_forecast(LSTM_model, sequence, steps_ahead=steps_ahead)  # (5, M)
        
        # Convert input sequence and prediction to tensors if not already
        if isinstance(sequence, np.ndarray):
            sequence_tensor = torch.tensor(sequence, dtype=torch.float32)
        else:
            sequence_tensor = sequence

        # Concatenate original 60 input samples with 5 predicted ones → (65, M)
        full_sequence = torch.cat([sequence_tensor, predicted_future.cpu()], dim=0)
        full_sequence = pd.DataFrame(full_sequence.numpy(), columns=temp_columns, index = pd.Index(list(pca_features_input[i].index) + list(pca_features_output[i].index)))  # Convert to DataFrame for consistency
        full_sequences_with_predictions.append(full_sequence)
        # print(full_sequence.numpy())

    # Final shape: (num_sequences, 65, M)
    full_sequences_with_predictions = np.array([df.to_numpy() for df in full_sequences_with_predictions])

    print("Shape of sequences with input + predictions:", full_sequences_with_predictions.shape)

    return full_sequences_with_predictions



# === Step 2: Define wrapper model returning last timestep ===
class LastStepDecoder(nn.Module):
    def __init__(self, jvgan_model):
        super().__init__()
        self.encoder = jvgan_model.encoder
        self.decoder = jvgan_model.decoder

    def forward(self, x):
        z, _, _ = self.encoder(x)
        x_recon = self.decoder(z)
        return x_recon[:, -1, :]  # (batch_size, M)


def RCA(is_anomaly, full_sequences_with_predictions, JVGAN_model, original_feature_names, CU, DU, device='cpu'):
    """
    Perform Integrated Gradients (IG) for anomaly detection.
    Args:
        is_anomaly (pd.DataFrame): DataFrame with anomaly detection results.
        full_sequences_with_predictions (np.ndarray): Full sequences with predictions.
        model (JVGAN): The JVGAN model used for reconstruction.
        original_feature_names (list): List of original feature names.
        device (str): Device to run the computations on ('cpu' or 'cuda').
    """
    # Ensure model is in evaluation mode
    # === Step 3: Select Anomalous Samples ===
    anomaly_indices = torch.nonzero(torch.tensor(is_anomaly['Anomaly Detected'])).squeeze().cpu().numpy()
    if anomaly_indices.ndim == 0:
        anomaly_indices = anomaly_indices.reshape(1)

    # Get corresponding DataFrame index values
    anomaly_df_indices = is_anomaly.index[anomaly_indices]

    max_explain = min(100, len(anomaly_indices))
    samples_to_explain = torch.tensor(full_sequences_with_predictions[anomaly_indices[:max_explain]],
                                    dtype=torch.float32).to(device)

    # === Step 4: Setup model and IG explainer ===
    wrapped_decoder = LastStepDecoder(JVGAN_model).to(device).eval()
    ig = IntegratedGradients(wrapped_decoder)
    baseline = torch.zeros_like(samples_to_explain)

    # === Step 5: Compute attributions ===
    all_attributions = []

    for i in range(samples_to_explain.size(0)):
        input_sample = samples_to_explain[i].unsqueeze(0)         # (1, 65, M)
        baseline_sample = baseline[i].unsqueeze(0)                # (1, 65, M)

        with torch.no_grad():
            recon = wrapped_decoder(input_sample)                 # (1, M)
            original = input_sample[:, -1, :]                     # (1, M)
            recon_error = torch.abs(recon - original)             # (1, M)
            target_feature = torch.argmax(recon_error).item()     # int

        # Compute IG for the selected output index
        attr, _ = ig.attribute(input_sample, baselines=baseline_sample,
                            target=target_feature,
                            return_convergence_delta=True)

        # Attribution shape: (1, 65, M), extract last step
        attr_last_step = attr.squeeze(0)[-1].detach().cpu().numpy()
        all_attributions.append(attr_last_step)

        # === Step 6: Print Top 10 Features ===
        top_10_idx = np.argsort(np.abs(attr_last_step))[-10:][::-1]
        # print(f"\nTop 10 impacting features using Integrated Gradients for Anomalous Sample #{i+1}:")
        # for rank, idx in enumerate(top_10_idx, 1):
        #     print(f"{rank}. {original_feature_names[idx]} | IG Value: {attr_last_step[idx]:.4f}")

    # === Step 7: Save IG attributions to CSV ===
    ig_df = pd.DataFrame(all_attributions, columns=original_feature_names, index=anomaly_df_indices)
    ig_df.to_csv(f'integrated_gradients_srscu{CU}_srsdu{DU}.csv', index=True)
    return ig_df


def create_sequences(features_pca, targets, look_back, look_forward, CU, DU):
    """
    Create LSTM-ready sequences from a DataFrame.
    """
    pca_features_input, pca_features_output, input_targets, output_targets = [], [], [], []

    for i in range(len(features_pca) - look_back - look_forward + 1):
        pca_features_input.append(features_pca.iloc[i:(i + look_back)])
        pca_features_output.append(features_pca.iloc[(i + look_back):i + look_back + look_forward ])

        input_targets.append(targets.iloc[i:(i + look_back)].values)
        output_targets.append(targets.iloc[(i + look_back):i + look_back + look_forward ])  # assuming this is a scalar/class label

    return pca_features_input, pca_features_output, input_targets, output_targets


def inverse_pca(full_sequences_with_predictions, pca, look_back, look_forward):
    # Flatten from (num_sequences, 65, reduced_dim) → (num_sequences * 65, reduced_dim)
    full_sequences_with_predictions = full_sequences_with_predictions.reshape(-1, full_sequences_with_predictions.shape[-1])
    print("Shape before inverse PCA:", full_sequences_with_predictions.shape)

    # Inverse PCA transformation → (num_sequences * 65, original_feature_dim)
    full_sequences_with_predictions = pca.inverse_transform(full_sequences_with_predictions)
    print("Shape after inverse PCA:", full_sequences_with_predictions.shape)

    # Reshape back to (num_sequences, 65, original_feature_dim)
    original_feature_dim = pca.inverse_transform(np.zeros((1, pca.n_components_))).shape[1]

    full_sequences_with_predictions = full_sequences_with_predictions.reshape(
        -1, look_back + look_forward, original_feature_dim
    )
    print("Final shape after inverse PCA:", full_sequences_with_predictions.shape)
    
    return full_sequences_with_predictions

def JVGAN_anomaly_detection(model, full_sequences_with_predictions, output_targets, threshold):
    is_anomaly, recon_errors = detect_anomaly(torch.tensor(full_sequences_with_predictions, dtype=torch.float32), model, threshold)
    anomaly_indices = [df.iloc[-1]['Timestamp'] for df in output_targets]
    is_anomaly = pd.DataFrame(is_anomaly.numpy(), columns=['Anomaly Detected'], index=anomaly_indices)


    true_labels = [1 if (val.iloc[-1][f'srscu_stressType'] != 0 or val.iloc[-1][f'srsdu_stressType'] != 0) else 0 for val in output_targets]
    # Calculate accuracy
    # accuracy = np.mean(is_anomaly.numpy() == true_labels)
    predicted_labels = []
    for val in np.array(is_anomaly['Anomaly Detected']):
        if(val==False):
            predicted_labels.append(0)
        else:
            predicted_labels.append(1)
    accuracy = np.mean(predicted_labels == true_labels)

    print(f"Accuracy: {accuracy * 100:.2f}%")
    # Calculate F1 score
    f1 = f1_score(true_labels, np.array(is_anomaly['Anomaly Detected']))
    print(f"F1 Score: {f1:.2f}")
    # Print classification report
    print(classification_report(true_labels, np.array(is_anomaly['Anomaly Detected']), target_names=['Normal', 'Anomalous']))
    # Print confusion matrix
    conf_matrix = confusion_matrix(true_labels, np.array(is_anomaly['Anomaly Detected']))
    print("Confusion Matrix:")
    print(conf_matrix)
    # Save the results to a CSV file
    results_df = pd.DataFrame({
        'True Label': true_labels,
        'Timestamp': anomaly_indices,
        'Predicted Label': np.array(is_anomaly['Anomaly Detected']),
        'Reconstruction Error': recon_errors.numpy()
    })
    results_df.to_csv(f'results_srscu{CU}_srsdu{DU}.csv', index=False)

    return is_anomaly, recon_errors, accuracy, f1, results_df

# === Step 2: Define function to get subgraph and root causes ===
def get_subgraph_and_roots(graph, top_features):
    sub_G = graph.subgraph(top_features).copy()
    root_nodes = [n for n in sub_G.nodes if sub_G.in_degree(n) == 0]
    return sub_G, root_nodes

def get_root_features(ig_df, original_feature_names, full_graph, CU, DU, save_dir="anomaly_subgraphs", save_subgraphs=True):
    """
    Analyze Integrated Gradients (IG) and causal graph to find root causes for each anomaly.
    Args:
        ig_df (pd.DataFrame): DataFrame with IG values.
        original_feature_names (list): List of original feature names.
        full_graph (nx.Graph): Full causal graph.
        CU (int): CU index.
        DU (int): DU index.
    Returns:
        anomaly_subgraphs (dict): Dictionary of subgraphs for each anomaly.
        root_cause_records (list): List of root cause records.
    """
    # === Step 3: Analyze IG and causal graph for each anomaly ===
    anomaly_subgraphs = {}
    root_cause_records = []

    for i, row in ig_df.iterrows():
        anomaly_id = f"anomaly_{i}"
        
        # Step 3.1: Top 10 features by IG magnitude
        abs_values = np.abs(row.values)
        top_10_idx = np.argsort(abs_values)[-10:][::-1]
        top_features = [original_feature_names[idx] for idx in top_10_idx]

        # Step 3.2: Build causal subgraph and get root causes
        subgraph, roots = get_subgraph_and_roots(full_graph, top_features)
        anomaly_subgraphs[anomaly_id] = subgraph

        if roots:
            for rank, r in enumerate(roots, 1):
                root_cause_records.append({
                    "anomaly_id": anomaly_id,
                    "Timestamp": row.name,  # Use the index as Timestamp
                    "root_cause": r,
                    "rank": rank
                })
        else:
            cu_count, du_count,app_count = 0, 0, 0
            for r in top_features:
                if "srscu" in r:
                    cu_count += 1
                elif "srsdu" in r:
                    du_count += 1
                elif "Application" in r:
                    app_count += 1
            if cu_count > du_count and cu_count > app_count:
                root_cause_records.append({
                    "anomaly_id": anomaly_id,
                    "Timestamp": row.name,  # Use the index as Timestamp
                    "root_cause": "srscu",
                    "rank": 1
                })
            elif du_count > cu_count and du_count > app_count:
                root_cause_records.append({
                    "anomaly_id": anomaly_id,
                    "Timestamp": row.name,  # Use the index as Timestamp
                    "root_cause": "srsdu",
                    "rank": 1
                })
            elif app_count > cu_count and app_count > du_count:
                root_cause_records.append({
                    "anomaly_id": anomaly_id,
                    "Timestamp": row.name,  # Use the index as Timestamp
                    "root_cause": "Application",
                    "rank": 1
                })
            elif(cu_count == du_count):
                root_cause_records.append({
                    "anomaly_id": anomaly_id,
                    "Timestamp": row.name,  # Use the index as Timestamp
                    "root_cause": "srscu",
                    "rank": 1
                })
                root_cause_records.append({
                    "anomaly_id": anomaly_id,
                    "Timestamp": row.name,  # Use the index as Timestamp
                    "root_cause": "srsdu",
                    "rank": 2
                })
            else:
                root_cause_records.append({
                    "anomaly_id": anomaly_id,
                    "Timestamp": row.name,  # Use the index as Timestamp
                    "root_cause": "node",
                    "rank": 1
                })

            # root_cause_records.append({
            #     "anomaly_id": anomaly_id,
            #     "root_cause": [if "srscu" in r for r in top_features],
            #     "rank": 1
            # })

    # === Step 4: Save root causes and optionally subgraphs ===
    root_df = pd.DataFrame(root_cause_records)
    root_df.to_csv(f'root_causes_top10_srscu{CU}_srsdu{DU}.csv', index=False)
    print(f"Saved root causes per anomaly to: root_causes_top10_srscu{CU}_srsdu{DU}.csv")

    if(save_subgraphs==True):
        # Optional: Save each subgraph individually as a .gpickle
        save_dir = "anomaly_subgraphs"
        os.makedirs(save_dir, exist_ok=True)
        for anomaly_id, sub_G in anomaly_subgraphs.items():
            with open(os.path.join(save_dir, f"{anomaly_id}_subgraph.gpickle"), "wb") as f:
                pickle.dump(sub_G, f)

    return anomaly_subgraphs, root_df


def generate_labels(root_df, CU, DU):
    """
    Generate binary targets based on root cause counts.
    Args:
        root_df (pd.DataFrame): DataFrame with root causes.
        CU (int): CU index.
        DU (int): DU index.
    Returns:
        target_df (pd.DataFrame): DataFrame with binary targets.
    """
    # === Step 5: Generate binary targets from root cause counts ===
    grouped = root_df.groupby('anomaly_id')['root_cause'].apply(list).reset_index()

    target_records = []

    for _, row in grouped.iterrows():
        anomaly_id = row['anomaly_id']
        causes = row['root_cause']

        srscu_count = sum(1 for c in causes if 'srscu' in c)
        srsdu_count = sum(1 for c in causes if 'srsdu' in c)
        
        app_count = sum(1 for c in causes if 'Application' in c)

        app_count += sum(1 for c in causes if 'node' in c)


        srscu_count += app_count  # Treat Application as srscu for target generation
        srsdu_count += app_count  # Treat Application as srsdu for target generation

        if srscu_count > srsdu_count:
            target_srscu, target_srsdu = 1, 0
            
        elif srsdu_count > srscu_count:
            target_srscu, target_srsdu = 0, 1
        else:  # equal counts or both 0
            target_srscu, target_srsdu = 1, 1

        target_records.append({
            'anomaly_id': anomaly_id,
            "Timestamp": row.name,  # Use the index as Timestamp
            'srscu_stepStress': target_srscu,
            'srsdu_stepStress': target_srsdu
        })




    # Convert to DataFrame
    target_df = pd.DataFrame(target_records)
    target_df.to_csv(f'cu_du_targets_srscu{CU}_srsdu{DU}.csv', index=False)
    print(f"Saved CU/DU targets to: cu_du_targets_srscu{CU}_srsdu{DU}.csv")

    return target_df

def apply_pca(features, scaler, pca, features_Timestamp):
    """
    Apply MinMax scaling and PCA transformation to the features DataFrame.
    """
        # Scale the new features using the same scaler
    features_scaled = scaler.transform(features)


    # Apply PCA transformation using the loaded PCA model
    features_pca = pca.transform(features_scaled)


    # Convert to DataFrame
    features_pca = pd.DataFrame(features_pca, columns=[f'Reduced-{i+1}' for i in range(pca.n_components_)])
    features_pca['Timestamp'] = features_Timestamp['Timestamp'].values  # Add Timestamp back
    features_pca.index = features.index
    
    return features_pca 



In [3]:
# Load models
scaler = joblib.load('minmax_scaler.pkl')
pca = joblib.load('pca_model.pkl')


# Load LSTM the model
LSTM_model_path = 'lstm_models/lstm_fold1_RMSprop.pt'
LSTM_model = torch.load(LSTM_model_path, map_location=device)
LSTM_model.eval()



# === Step 1: Load the full causal graph ===
with open("causal_graph.gpickle", "rb") as f:
    full_graph = pickle.load(f)


# load the JVGAN model
JVGAN_model_path = 'jvgan_models/jvgan_fold1.pt'
# testing of jvgan model
JVGAN_model = torch.load(JVGAN_model_path, map_location=device)
# Validation
JVGAN_model.eval()

JVGAN(
  (encoder): JVGANEncoder(
    (lstm): LSTM(344, 128, batch_first=True)
    (fc_mu): Linear(in_features=128, out_features=32, bias=True)
    (fc_logvar): Linear(in_features=128, out_features=32, bias=True)
  )
  (decoder): JVGANDecoder(
    (latent_to_hidden): Linear(in_features=32, out_features=128, bias=True)
    (lstm): LSTM(128, 128, batch_first=True)
    (output_layer): Linear(in_features=128, out_features=344, bias=True)
  )
  (discriminator): JVGANDiscriminator(
    (lstm): LSTM(344, 128, batch_first=True)
    (classifier): Sequential(
      (0): Linear(in_features=128, out_features=64, bias=True)
      (1): LeakyReLU(negative_slope=0.2)
      (2): Linear(in_features=64, out_features=1, bias=True)
      (3): Sigmoid()
    )
  )
)

In [4]:
NoOfCUs = 4
NoOfDUs = 4

# Creating Topology
topology = {}

# Form the graph where srscu0 connects to srsdu0, srscu1 to srsdu1, and so on
for i in range(min(NoOfCUs, NoOfDUs)):  # Prevent index errors
    topology[f"srscu{i}"] = [f"srsdu{i}"]

UEsOfDUs = {}

for i in range(NoOfDUs):
    UEsOfDUs[f"srsdu{i}"] = []


# Display the graph
print(f'Topology is as follows: \n{topology}', end="\n\n")

# Load dataset
dataset = pd.read_csv('small_sample.csv')
# dataset = dataset[:int(0.01*len(dataset))]

dataset.index = dataset['Timestamp']
dataset = dataset.drop(columns=['Timestamp'])

# for column in dataset.columns:
#     if 'PCI' in column:
#         print(column)

# Dictionary to store for each PCI:
# a list of RNTIs and a list of metric types
pci_info_map = defaultdict(lambda: {'rntis': set(), 'metrics': set()})


# Process each column
for column in dataset.columns:
    match = re.match(r'PCI-(\d+)_RNTI-(\d+)_([a-zA-Z0-9_]+)', column)
    if match:
        pci = match.group(1)
        rnti = match.group(2)
        metric = match.group(3)
        pci_info_map[pci]['rntis'].add(rnti)
        pci_info_map[pci]['metrics'].add(metric)

# # Print results
# for pci, info in pci_info_map.items():
#     print(f"PCI-{pci}:")
#     print(f"  RNTIs: {sorted(info['rntis'])}")
#     print(f"  Metrics: {sorted(info['metrics'])}\n\t   Length: {len(info['metrics'])}")


application_features = pci_info_map['1'][ 'metrics']
print(f"Application Features: {application_features}")


Topology is as follows: 
{'srscu0': ['srsdu0'], 'srscu1': ['srsdu1'], 'srscu2': ['srsdu2'], 'srscu3': ['srsdu3']}

Application Features: {'pusch_ta_ns', 'bsr', 'ul_nof_nok', 'pucch_ta_ns', 'dl_bs', 'dl_nof_nok', 'ul_nof_ok', 'ri', 'ul_brate', 'cqi', 'srs_ta_ns', 'pusch_snr_db', 'dl_brate', 'dl_nof_ok', 'ta_ns', 'dl_mcs', 'ul_mcs', 'pucch_snr_db'}


In [5]:
# import argparse


# parser = argparse.ArgumentParser(description="Run script with optional CU and DU values.")
# parser.add_argument("--cu", type=int, default=0, help="CU value (default: 0)")
# parser.add_argument("--du", type=int, default=0, help="DU value (default: 0)")

# args = parser.parse_args()

# CU, DU = args.cu, args.du

CU = 0
DU = 0

# Load dataset
dataset = pd.read_csv(f'dataset_srscu{CU}_srsdu{DU}.csv')
# dataset = dataset[:int(0.01*len(dataset))]

dataset.index = dataset['Timestamp']
# dataset = dataset.drop(columns=['Timestamp'])

dataset.head()

Unnamed: 0_level_0,Timestamp,"((node_memory_MemTotal_bytes{instance=""node-exporter:9100"", job=""node""} - node_memory_MemFree_bytes{instance=""node-exporter:9100"", job=""node""}) / node_memory_MemTotal_bytes{instance=""node-exporter:9100"", job=""node""}) * 100","((node_memory_SwapTotal_bytes{instance=""node-exporter:9100"",job=""node""} - node_memory_SwapFree_bytes{instance=""node-exporter:9100"",job=""node""}) / (node_memory_SwapTotal_bytes{instance=""node-exporter:9100"",job=""node""})) * 100","((sum(container_memory_usage_bytes{name=""srscu"",instance=""cadvisor:8080""}) by (instance) - sum(container_memory_cache{name=""srscu"",instance=""cadvisor:8080""}) by (instance)) / sum(machine_memory_bytes{instance=""cadvisor:8080""}) by (instance)) * 100","((sum(container_memory_usage_bytes{name=""srsdu"",instance=""cadvisor:8080""}) by (instance) - sum(container_memory_cache{name=""srsdu"",instance=""cadvisor:8080""}) by (instance)) / sum(machine_memory_bytes{instance=""cadvisor:8080""}) by (instance)) * 100","(1 - (node_memory_MemAvailable_bytes{instance=""node-exporter:9100"", job=""node""} / node_memory_MemTotal_bytes{instance=""node-exporter:9100"", job=""node""})) * 100","(node_memory_SwapTotal_bytes{instance=""node-exporter:9100"",job=""node""} - node_memory_SwapFree_bytes{instance=""node-exporter:9100"",job=""node""})","100 * (1 - avg(rate(node_cpu_seconds_total{mode=""idle"", instance=""node-exporter:9100""}[10s])))","100 - ((node_filesystem_avail_bytes{instance=""node-exporter:9100"",job=""node"",device!~'rootfs'} * 100) / node_filesystem_size_bytes{instance=""node-exporter:9100"",job=""node"",device!~'rootfs'})","100 - ((node_filesystem_avail_bytes{instance=""node-exporter:9100"",job=""node"",mountpoint=""/"",fstype!=""rootfs""} * 100) / node_filesystem_size_bytes{instance=""node-exporter:9100"",job=""node"",mountpoint=""/"",fstype!=""rootfs""})",...,PCI-4_Average_pusch_ta_ns,PCI-4_Average_pucch_ta_ns,PCI-4_Summation_ul_nof_ok,PCI-4_Summation_ul_nof_nok,PCI-4_Summation_dl_nof_nok,PCI-4_Summation_ul_brate,PCI-4_Summation_dl_bs,PCI-4_Summation_bsr,PCI-4_Summation_dl_brate,PCI-4_Summation_dl_nof_ok
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-05-23 12:58:38.769,2025-05-23 12:58:38.769,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2025-05-23 12:58:39.769,2025-05-23 12:58:39.769,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2025-05-23 12:58:40.770,2025-05-23 12:58:40.770,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2025-05-23 12:58:41.819,2025-05-23 12:58:41.819,,,0.408751,0.454471,,,84.108545,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2025-05-23 12:58:42.820,2025-05-23 12:58:42.820,,,0.408947,0.454973,,,75.533914,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
features = dataset.drop(columns=[f"srscu_stepStress", f"srscu_stressType", f"srsdu_stepStress", f"srsdu_stressType"])
original_feature_names = features.columns
targets = dataset[[f"srscu_stepStress", f"srscu_stressType", f"srsdu_stepStress", f"srsdu_stressType", "Timestamp"]]

print(targets.head())

                         srscu_stepStress  srscu_stressType  srsdu_stepStress  \
Timestamp                                                                       
2025-05-23 12:58:38.769                 0                 0                 0   
2025-05-23 12:58:39.769                 0                 0                 0   
2025-05-23 12:58:40.770                 0                 0                 0   
2025-05-23 12:58:41.819                 0                 0                 0   
2025-05-23 12:58:42.820                 0                 0                 0   

                         srsdu_stressType                Timestamp  
Timestamp                                                           
2025-05-23 12:58:38.769                 0  2025-05-23 12:58:38.769  
2025-05-23 12:58:39.769                 0  2025-05-23 12:58:39.769  
2025-05-23 12:58:40.770                 0  2025-05-23 12:58:40.770  
2025-05-23 12:58:41.819                 0  2025-05-23 12:58:41.819  
2025-05-23 12:58:4

In [7]:
true_labels = targets[[f"srscu_stressType", f"srsdu_stressType"]].values
output = [] 
for val in true_labels:
    if val[0] != 0 or val[1] != 0:
        output.append(1)
    else:
        output.append(0)

# from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import classification_report
# import numpy as np

# index = int(0.8*len(features))
# # Reassign train/test sets properly for classifier training
# X_train, X_test = features[:index], features[index:]  # features was re-assigned above
# y_train, y_test = output[:index], output[index:]    # output was re-assigned above

# # Initialize and train the Random Forest classifier
# rf = RandomForestClassifier(n_estimators=100, random_state=42,  class_weight='balanced')
# rf.fit(X_train.drop(columns=['Timestamp']), y_train)

# # Predict on the test set

# y_pred = rf.predict(X_test.drop(columns=['Timestamp']))

# # Print classification report
# print(classification_report(y_test, y_pred))

# # Print confusion matrix
# conf_matrix = confusion_matrix(y_test, y_pred)
# print("Confusion Matrix:")
# print(conf_matrix)

In [8]:
features, features_Timestamp = preprocessing(features)

In [9]:
features_pca = apply_pca(features, scaler, pca, features_Timestamp)



In [10]:
LOOK_BACK, LOOK_FORWARD = 60, 5

pca_features_input , pca_features_output, input_targets, output_targets = create_sequences(features_pca, targets, LOOK_BACK, LOOK_FORWARD, CU, DU)

print(f"Input shape: {np.array(pca_features_input).shape}")
print(f"Output shape: {np.array(output_targets).shape}")

print(output_targets[0])

Input shape: (1435, 60, 6)
Output shape: (1435, 5, 5)
                         srscu_stepStress  srscu_stressType  srsdu_stepStress  \
Timestamp                                                                       
2025-05-23 12:59:49.257                 0                 0                 0   
2025-05-23 12:59:50.591                 0                 0                 0   
2025-05-23 12:59:51.923                 0                 0                 0   
2025-05-23 12:59:53.154                 0                 0                 0   
2025-05-23 12:59:54.297                 0                 0                 0   

                         srsdu_stressType                Timestamp  
Timestamp                                                           
2025-05-23 12:59:49.257                 0  2025-05-23 12:59:49.257  
2025-05-23 12:59:50.591                 0  2025-05-23 12:59:50.591  
2025-05-23 12:59:51.923                 0  2025-05-23 12:59:51.923  
2025-05-23 12:59:53.154          

In [11]:
full_sequences_with_predictions = LSTM_inference(LSTM_model, pca_features_input, pca_features_output, steps_ahead=LOOK_FORWARD)

Shape of sequences with input + predictions: (1435, 65, 5)


In [12]:
full_sequences_with_predictions = inverse_pca(full_sequences_with_predictions, pca, look_back=LOOK_BACK, look_forward=LOOK_FORWARD)

Shape before inverse PCA: (93275, 5)
Shape after inverse PCA: (93275, 344)
Final shape after inverse PCA: (1435, 65, 344)


In [13]:
# Define the threshold for anomaly detection
threshold = 0.05  # Adjust based on your dataset and requirements

is_anomaly, recon_errors, accuracy, f1, results_df = JVGAN_anomaly_detection(JVGAN_model, full_sequences_with_predictions, output_targets, threshold)

Accuracy: 0.00%
F1 Score: 0.11
              precision    recall  f1-score   support

      Normal       0.17      0.93      0.29       252
   Anomalous       0.80      0.06      0.11      1183

    accuracy                           0.21      1435
   macro avg       0.49      0.50      0.20      1435
weighted avg       0.69      0.21      0.14      1435

Confusion Matrix:
[[ 235   17]
 [1113   70]]


In [14]:
# original_feature_names = features.columns.tolist()  # after inverse_transform

# import shap
# import torch
# import numpy as np
# import torch.nn as nn

# # === Step 1: Wrap Decoder for SHAP ===
# class LastStepDecoder(nn.Module):
#     def __init__(self, jvgan_model):
#         super().__init__()
#         self.encoder = jvgan_model.encoder
#         self.decoder = jvgan_model.decoder

#     def forward(self, x):
#         z, _, _ = self.encoder(x)
#         x_recon = self.decoder(z)
#         return x_recon[:, -1, :]  # Only last timestep reconstruction

# # === Step 2: Select Anomalous Samples ===
# anomaly_indices = torch.nonzero(is_anomaly).squeeze().cpu().numpy()
# if anomaly_indices.ndim == 0:
#     anomaly_indices = anomaly_indices.reshape(1)

# # Limit to a small number for SHAP (e.g., first 5–10 anomalies)
# max_explain = min(10, len(anomaly_indices))
# samples_to_explain = torch.tensor(full_sequences_with_predictions[anomaly_indices[:max_explain]], dtype=torch.float32).to(device)
# print("Initialization started")

# # === Step 3: Wrap Model & Initialize SHAP Explainer ===
# wrapped_decoder = LastStepDecoder(model).to(device)

# explainer = shap.GradientExplainer(wrapped_decoder, samples_to_explain)
# # Time taken for initialization
# print("Initialization done. SHAP Explainer ready to compute values.")

# # === Step 4: Compute SHAP Values ===
# shap_values = explainer.shap_values(samples_to_explain)  # List with one element: (N, M)
# print("computation of SHAP values done.")

# # === Step 5: Display Top 10 Features ===
# for i, shap_val in enumerate(shap_values[0]):
#     top_10_idx = np.argsort(np.abs(shap_val))[-10:][::-1]
#     print(f"\nTop 10 impacting features for Anomalous Sample #{i+1}:")
#     for rank, idx in enumerate(top_10_idx, start=1):
#         print(f"{rank}. {original_feature_names[idx]} | SHAP value: {shap_val[idx]:.4f}")
# # === Step 6: Visualize SHAP Values ===
# shap.summary_plot(shap_values[0], samples_to_explain.cpu().numpy(), feature_names=original_feature_names, max_display=10)
# # Save the SHAP values to a CSV file
# shap_df = pd.DataFrame(shap_values[0], columns=original_feature_names)
# shap_df.to_csv(f'shap_values_srscu{CU}_srsdu{DU}.csv', index=False)

In [15]:
# === Step 1: Extract original feature names ===
original_feature_names = features.columns.tolist()

# Run the RCA function
ig_df = RCA(is_anomaly, full_sequences_with_predictions, JVGAN_model, original_feature_names, CU, DU, device=device)

  anomaly_indices = torch.nonzero(torch.tensor(is_anomaly['Anomaly Detected'])).squeeze().cpu().numpy()


In [16]:
# import networkx as nx
# import os
# import pickle

# # === Step 8: Load causal graph ===
# causal_graph_path = "causal_graph.gpickle"
# if not os.path.exists(causal_graph_path):
#     raise FileNotFoundError("Causal graph not found! Please run the Granger causality code first.")

# with open("causal_graph.gpickle", "rb") as f:
#     G = pickle.load(f)


# # === Step 9: Root cause tracing function ===
# def find_root_causes(graph, feature):
#     """
#     For a given feature, walk backward in the graph to find root causes
#     (nodes with no incoming edges that can reach this node).
#     """
#     root_causes = set()

#     def dfs(node, visited):
#         if node in visited:
#             return
#         visited.add(node)
#         preds = list(graph.predecessors(node))
#         if not preds:
#             root_causes.add(node)
#         else:
#             for pred in preds:
#                 dfs(pred, visited)

#     dfs(feature, set())
#     return list(root_causes)

# # === Step 10: Extract top IG features and rank root causes ===
# ranked_root_cause_rows = []

# for i, row in ig_df.iterrows():
#     anomaly_id = f"anomaly_{i+1:03d}"
#     abs_values = np.abs(row.values)
#     top_10_idx = np.argsort(abs_values)[-10:][::-1]
#     top_features = [original_feature_names[idx] for idx in top_10_idx]

#     root_cause_counter = {}

#     for f in top_features:
#         if f in G.nodes:
#             root_causes = find_root_causes(G, f)
#             for root in root_causes:
#                 root_cause_counter[root] = root_cause_counter.get(root, 0) + 1
#         else:
#             continue

#     if not root_cause_counter:
#         ranked_root_cause_rows.append({
#             "anomaly_id": anomaly_id,
#             "root_cause": "None",
#             "rank": 1,
#             "count": 0
#         })
#         continue

#     # Rank by frequency (descending)
#     sorted_roots = sorted(root_cause_counter.items(), key=lambda x: -x[1])
#     for rank, (rc, count) in enumerate(sorted_roots, 1):
#         ranked_root_cause_rows.append({
#             "anomaly_id": anomaly_id,
#             "root_cause": rc,
#             "rank": rank,
#             "count": count
#         })

# # === Step 11: Save ranked root causes ===
# ranked_df = pd.DataFrame(ranked_root_cause_rows)
# ranked_df.to_csv(f'ranked_root_causes_srscu{CU}_srsdu{DU}.csv', index=False)
# print(f"\nSaved ranked root causes to: ranked_root_causes_srscu{CU}_srsdu{DU}.csv")


In [17]:
# Run the root cause analysis
anomaly_subgraphs, root_df = get_root_features(ig_df, original_feature_names, full_graph, CU, DU, save_dir="anomaly_subgraphs", save_subgraphs=True)

Saved root causes per anomaly to: root_causes_top10_srscu0_srsdu0.csv


In [18]:
target_df = generate_labels(root_df, CU, DU)

Saved CU/DU targets to: cu_du_targets_srscu0_srsdu0.csv


In [19]:
accuracy = get_result(target_df, CU, DU)

Accuracy of CU/DU targets against original dataset: 0.00%
Score = 0.0, Total Anomalies = 87


In [20]:
NoOfCUs = 4
NoOfDUs = 4

# Creating Topology
topology = {}

# Form the graph where srscu0 connects to srsdu0, srscu1 to srsdu1, and so on
for i in range(min(NoOfCUs, NoOfDUs)):  # Prevent index errors
    topology[f"srscu{i}"] = [f"srsdu{i}"]

UEsOfDUs = {}

for i in range(NoOfDUs):
    UEsOfDUs[f"srsdu{i}"] = []


# Display the graph
print(f'Topology is as follows: \n{topology}', end="\n\n")

# Load dataset
dataset = pd.read_csv('small_sample.csv')
# dataset = dataset[:int(0.01*len(dataset))]

dataset.index = dataset['Timestamp']
dataset = dataset.drop(columns=['Timestamp'])

# for column in dataset.columns:
#     if 'PCI' in column:
#         print(column)

# Dictionary to store for each PCI:
# a list of RNTIs and a list of metric types
pci_info_map = defaultdict(lambda: {'rntis': set(), 'metrics': set()})


# Process each column
for column in dataset.columns:
    match = re.match(r'PCI-(\d+)_RNTI-(\d+)_([a-zA-Z0-9_]+)', column)
    if match:
        pci = match.group(1)
        rnti = match.group(2)
        metric = match.group(3)
        pci_info_map[pci]['rntis'].add(rnti)
        pci_info_map[pci]['metrics'].add(metric)

# # Print results
# for pci, info in pci_info_map.items():
#     print(f"PCI-{pci}:")
#     print(f"  RNTIs: {sorted(info['rntis'])}")
#     print(f"  Metrics: {sorted(info['metrics'])}\n\t   Length: {len(info['metrics'])}")


application_features = pci_info_map['1'][ 'metrics']
print(f"Application Features: {application_features}")


Topology is as follows: 
{'srscu0': ['srsdu0'], 'srscu1': ['srsdu1'], 'srscu2': ['srsdu2'], 'srscu3': ['srsdu3']}

Application Features: {'pusch_ta_ns', 'bsr', 'ul_nof_nok', 'pucch_ta_ns', 'dl_bs', 'dl_nof_nok', 'ul_nof_ok', 'ri', 'ul_brate', 'cqi', 'srs_ta_ns', 'pusch_snr_db', 'dl_brate', 'dl_nof_ok', 'ta_ns', 'dl_mcs', 'ul_mcs', 'pucch_snr_db'}
