In [39]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from torch.utils.data import Dataset, DataLoader
import os
import matplotlib.pyplot as plt
import csv

import torch
import captum
from captum.attr import IntegratedGradients
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from collections import defaultdict
import re
import json



# Set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [40]:
NoOfCUs = 4
NoOfDUs = 4

# Creating Topology
topology = {}

# Form the graph where srscu0 connects to srsdu0, srscu1 to srsdu1, and so on
for i in range(min(NoOfCUs, NoOfDUs)):  # Prevent index errors
    topology[f"srscu{i}"] = [f"srsdu{i}"]

UEsOfDUs = {}

for i in range(NoOfDUs):
    UEsOfDUs[f"srsdu{i}"] = []


# Display the graph
print(f'Topology is as follows: \n{topology}', end="\n\n")

# Load dataset
dataset = pd.read_csv('small_sample.csv')
# dataset = dataset[:int(0.01*len(dataset))]

dataset.index = dataset['Timestamp']
dataset = dataset.drop(columns=['Timestamp'])

# for column in dataset.columns:
#     if 'PCI' in column:
#         print(column)

# Dictionary to store for each PCI:
# a list of RNTIs and a list of metric types
pci_info_map = defaultdict(lambda: {'rntis': set(), 'metrics': set()})


# Process each column
for column in dataset.columns:
    match = re.match(r'PCI-(\d+)_RNTI-(\d+)_([a-zA-Z0-9_]+)', column)
    if match:
        pci = match.group(1)
        rnti = match.group(2)
        metric = match.group(3)
        pci_info_map[pci]['rntis'].add(rnti)
        pci_info_map[pci]['metrics'].add(metric)

# # Print results
# for pci, info in pci_info_map.items():
#     print(f"PCI-{pci}:")
#     print(f"  RNTIs: {sorted(info['rntis'])}")
#     print(f"  Metrics: {sorted(info['metrics'])}\n\t   Length: {len(info['metrics'])}")


application_features = pci_info_map['1'][ 'metrics']
print(f"Application Features: {application_features}")


Topology is as follows: 
{'srscu0': ['srsdu0'], 'srscu1': ['srsdu1'], 'srscu2': ['srsdu2'], 'srscu3': ['srsdu3']}

Application Features: {'ul_nof_ok', 'ul_nof_nok', 'dl_nof_nok', 'ul_brate', 'pusch_snr_db', 'pucch_snr_db', 'dl_bs', 'ta_ns', 'bsr', 'cqi', 'dl_brate', 'srs_ta_ns', 'dl_mcs', 'ul_mcs', 'ri', 'dl_nof_ok', 'pusch_ta_ns', 'pucch_ta_ns'}


In [41]:
for CU, connected_DUs in topology.items():
    for DU in connected_DUs:
        # Read the JSON file
        with open(f'features_{CU}_{DU}.json', 'r') as f:
            loaded_features = json.load(f)

        # # Print feature list
        # for feature in loaded_features:
        #     print(f"Feature: {feature}")
        # print("\n\n\n\n")

        # Extract the relevant sample columns
        samples = dataset[loaded_features].copy()

        # get all column names with different RNTIs
        # rnti_columns = [col for col in samples.columns if 'RNTI' in col]

        # Process each column
        for column in dataset.columns:
            match = re.match(r'PCI-(\d+)_RNTI-(\d+)_([a-zA-Z0-9_]+)', column)
            if match:
                pci = match.group(1)
                rnti = match.group(2)
                metric = match.group(3)
                pci_info_map[pci]['rntis'].add(rnti)
                pci_info_map[pci]['metrics'].add(metric)

        average_application_metrics = ['dl_mcs', 'srs_ta_ns', 'pusch_snr_db', 'cqi', 'ri', 'ul_mcs', 'pucch_snr_db', 'ta_ns', 'pusch_ta_ns', 'pucch_ta_ns']
        for pci, info in pci_info_map.items():
            for i in range(len(samples)):
                for average_metric in average_application_metrics:
                    metric_values = []
                    for rnti in sorted(info['rntis']):
                        metric = f'PCI-{pci}_RNTI-{rnti}_{average_metric}'
                        if metric in samples.columns:
                            metric_values.append(samples.iloc[i][metric])
                    samples.at[samples.index[i], f'PCI-{pci}_Average_{average_metric}'] = np.mean(metric_values) if metric_values else 0
                    for rnti in sorted(info['rntis']):
                        metric = f'PCI-{pci}_RNTI-{rnti}_{average_metric}'
                        if metric in samples.columns:
                            samples.drop(columns=[metric], inplace=True, errors='ignore')

                for other_metrics in info['metrics']:
                    if other_metrics not in average_application_metrics:
                        metric_values = []
                        for rnti in sorted(info['rntis']):
                            metric = f'PCI-{pci}_RNTI-{rnti}_{other_metrics}'
                            if metric in samples.columns:
                                metric_values.append(samples.iloc[i][metric])
                        samples.at[samples.index[i], f'PCI-{pci}_Summation_{other_metrics}'] = np.sum(metric_values) if metric_values else 0
                        for rnti in sorted(info['rntis']):
                            metric = f'PCI-{pci}_RNTI-{rnti}_{other_metrics}'
                            if metric in samples.columns:
                                samples.drop(columns=[metric], inplace=True, errors='ignore')

                
        CU_number = int(CU.replace('srscu', ''))
        DU_number = int(DU.replace('srsdu', ''))
        print(f"Processing features for CU: {CU} (Number: {CU_number}) and DU: {DU} (Number: {DU_number})")

        # Rename columns using a mapping dictionary
        rename_dict = {}
        for feature in loaded_features:
            new_name = feature
            if f'srscu{CU_number}' in feature:
                new_name = feature.replace(f'srscu{CU_number}', 'srscu')
            elif f'srsdu{DU_number}' in feature:
                new_name = feature.replace(f'srsdu{DU_number}', 'srsdu')
            elif f'PCI-{DU_number+1}' in feature:
                new_name = feature.replace(f'PCI-{DU_number+1}', 'Application')
            rename_dict[feature] = new_name
        


        samples.rename(columns=rename_dict, inplace=True)

        # Save to CSV
        with open(f'dataset_{CU}_{DU}.csv', 'w') as f:
            samples.to_csv(f, index=True, header=True)
        print(f"Dataset for {CU} and {DU} saved with features: {samples.columns.tolist()}\n")

Processing features for CU: srscu0 (Number: 0) and DU: srsdu0 (Number: 0)
Dataset for srscu0 and srsdu0 saved with features: ['((node_memory_MemTotal_bytes{instance="node-exporter:9100", job="node"} - node_memory_MemFree_bytes{instance="node-exporter:9100", job="node"}) / node_memory_MemTotal_bytes{instance="node-exporter:9100", job="node"}) * 100', '((node_memory_SwapTotal_bytes{instance="node-exporter:9100",job="node"} - node_memory_SwapFree_bytes{instance="node-exporter:9100",job="node"}) / (node_memory_SwapTotal_bytes{instance="node-exporter:9100",job="node"})) * 100', '((sum(container_memory_usage_bytes{name="srscu",instance="cadvisor:8080"}) by (instance) - sum(container_memory_cache{name="srscu",instance="cadvisor:8080"}) by (instance)) / sum(machine_memory_bytes{instance="cadvisor:8080"}) by (instance)) * 100', '((sum(container_memory_usage_bytes{name="srsdu",instance="cadvisor:8080"}) by (instance) - sum(container_memory_cache{name="srsdu",instance="cadvisor:8080"}) by (instan