In [203]:
import pandas as pd
from prometheus_api_client import PrometheusConnect, MetricSnapshotDataFrame
from datetime import datetime
import os
import re

In [204]:
# Create a connection to prometheus
try:
    prom = PrometheusConnect(url="http://localhost:9090", disable_ssl=True)
    print("Connected to Prometheus")
except Exception as e:
    print(f"Error connecting to Prometheus: {e}")

Connected to Prometheus


In [205]:
start_time = "2025-03-19T20:25:00Z"
end_time = "2025-03-19T20:33:00Z"
step = "1s"

start_time = datetime.strptime(start_time, "%Y-%m-%dT%H:%M:%SZ")
end_time = datetime.strptime(end_time, "%Y-%m-%dT%H:%M:%SZ")

In [206]:
# List of important metrics
metrics = [
    # Open5GS metrics
    "fivegs_amffunction_amf_authreject",
    "fivegs_amffunction_amf_authreq",
    "fivegs_amffunction_rm_reginitsucc", # for registration success
    "fivegs_ep_n3_gtp_indatapktn3upf", # for incoming data packets
    "fivegs_ep_n3_gtp_outdatapktn3upf", # for outgoing data packets
    "fivegs_upffunction_upf_sessionnbr", # for session number
    "ues_active", # for active UEs

    # NetData metrics for network traffic
    'netdata_net_packets_packets_persec_average{device="br-02c136a167f8", dimension="received"}',
    'netdata_net_packets_packets_persec_average{device="br-02c136a167f8", dimension="sent"}',
    "netdata_net_net_kilobits_persec_average{device='br-02c136a167f8', dimension='received'}",
    "netdata_net_net_kilobits_persec_average{device='br-02c136a167f8', dimension='sent'}",
    "netdata_cgroup_cpu_percentage_average{cgroup_name='cd2487a23764'}",
    ]

In [207]:
# Create an empty DataFrame to store all metrics
df_list = []

In [208]:
import pytz  # For timezone conversion

# Define your local timezone (change this if necessary)
LOCAL_TZ = pytz.timezone("Europe/Bratislava")  # Change if needed

# Fetch metrics and transform timestamps
for metric in metrics:
    try:
        response = prom.custom_query_range(
            metric, start_time=start_time, end_time=end_time, step=step
        )

        # Ensure response is not empty
        if not response:
            print(f"⚠️ Warning: No data for metric {metric}")
            continue

        # Process each metric entry
        for entry in response:
            base_metric_name = entry["metric"]["__name__"]

            # Extract metadata if available
            dimension = entry["metric"].get("dimension", None)
            device = entry["metric"].get("device", None)
            cgroup_name = entry["metric"].get("cgroup_name", None)

            # Construct metric name properly
            metric_name = base_metric_name
            if device and dimension:
                metric_name = f"{base_metric_name}_{device}_{dimension}"
            elif device:
                metric_name = f"{base_metric_name}_{device}"
            elif cgroup_name:
                metric_name = f"{base_metric_name}_{cgroup_name}"

            # Extract and format values with timezone conversion
            if "values" in entry and isinstance(entry["values"], list):
                extracted_values = [
                   (datetime.utcfromtimestamp(int(ts)).replace(tzinfo=pytz.utc).astimezone(LOCAL_TZ), float(val))
                    for ts, val in entry["values"]
                ]

                # Create DataFrame
                metric_df = pd.DataFrame(extracted_values, columns=["timestamp", "value"])
                metric_df["metric_name"] = metric_name

                # Append to list
                df_list.append(metric_df)
            else:
                print(f"⚠️ Warning: No valid values found for {metric}")

    except Exception as e:
        print(f"❌ Error fetching {metric}: {e}")


In [209]:
if df_list:
    final_df = pd.concat(df_list, ignore_index=True)

In [210]:
# Remove +01:00 from the timestamp
final_df['timestamp'] = final_df['timestamp'].astype(str).str.replace(r'\+\d{2}:\d{2}', '', regex=True)

In [211]:
final_df.head()

Unnamed: 0,timestamp,value,metric_name
0,2025-03-19 20:28:05,0.0,fivegs_amffunction_amf_authreject
1,2025-03-19 20:28:06,0.0,fivegs_amffunction_amf_authreject
2,2025-03-19 20:28:07,0.0,fivegs_amffunction_amf_authreject
3,2025-03-19 20:28:08,0.0,fivegs_amffunction_amf_authreject
4,2025-03-19 20:28:09,0.0,fivegs_amffunction_amf_authreject


In [212]:
log_dir = "../log/"

# Example:
# 03/19 11:20:11.151: [amf] INFO: ngap_server() [172.22.0.10]:38412 (../src/amf/ngap-sctp.c:61)
# 03/19 11:20:11.154: [sctp] INFO: AMF initialize...done (../src/amf/app.c:33)
# 03/19 11:20:11.174: [sbi] INFO: [bd5d91d4-04ab-41f0-8871-a9dc3c5ef804] NF registered [Heartbeat:10s] (../lib/sbi/nf-sm.c:208)
# 03/19 11:20:11.179: [sbi] INFO: NF EndPoint(addr) setup [172.22.0.12:7777] (../lib/sbi/nnrf-handler.c:949)


log_pattern = re.compile(r"(\d{2}/\d{2} \d{2}:\d{2}:\d{2}\.\d{3}):\s+\[(\w+)\]\s+(\w+):\s*(.+)")

log_data = []

In [213]:
# Iterate over all log files in the directory
for log_file in os.listdir(log_dir):
    log_path = os.path.join(log_dir, log_file)
    
    with open(log_path, "r", encoding="utf-8", errors="ignore") as f:
        for line in f:
            match = log_pattern.match(line)
            if match:
                timestamp_str, application, log_level, log_message = match.groups()

                # Convert timestamp to datetime (add missing year)
                log_timestamp = datetime.strptime(timestamp_str, "%m/%d %H:%M:%S.%f")
                log_timestamp = log_timestamp.replace(year=start_time.year)  # Assign correct year

                # 🔹 Remove milliseconds to match Prometheus format
                log_timestamp = log_timestamp.strftime("%Y-%m-%d %H:%M:%S")

                # Check if the log timestamp is between start and end time
                if start_time.strftime("%Y-%m-%d %H:%M:%S") <= log_timestamp <= end_time.strftime("%Y-%m-%d %H:%M:%S"):
                    log_data.append({
                        "timestamp": log_timestamp,
                        "application": application,
                        "log_level": log_level,
                        "log_message": log_message
                    })


In [214]:
log_data = pd.DataFrame(log_data)
log_data.head()

Unnamed: 0,timestamp,application,log_level,log_message
0,2025-03-19 20:27:59,app,INFO,Configuration: '/open5gs/install/etc/open5gs/s...
1,2025-03-19 20:27:59,app,INFO,File Logging: '/open5gs/install/var/log/open5g...
2,2025-03-19 20:27:59,pfcp,INFO,pfcp_server() [172.22.0.6]:8805 (../lib/pfcp/p...
3,2025-03-19 20:27:59,pfcp,INFO,ogs_pfcp_connect() [172.22.0.5]:8805 (../lib/p...
4,2025-03-19 20:27:59,gtp,INFO,gtp_server() [172.22.0.6]:2152 (../lib/gtp/pat...


In [215]:
log_data["timestamp"] = pd.to_datetime(log_data["timestamp"])
log_data.head(1)

Unnamed: 0,timestamp,application,log_level,log_message
0,2025-03-19 20:27:59,app,INFO,Configuration: '/open5gs/install/etc/open5gs/s...


In [216]:
final_df["timestamp"] = pd.to_datetime(final_df["timestamp"])
final_df.head(1)

Unnamed: 0,timestamp,value,metric_name
0,2025-03-19 20:28:05,0.0,fivegs_amffunction_amf_authreject


log_data and final_df
----------------------
save()

In [217]:
log_data.to_csv("log_data.csv", index=False)
final_df.to_csv("metrics_data.csv", index=False)

In [218]:
# Load logs separately before merging
logs = log_data
metrics = final_df

In [219]:
# 🔹 Aggregate NetData metrics (choose appropriate aggregation: mean, sum, max, etc.)
netdata_aggregated = metrics.groupby(["timestamp", "metric_name"])["value"].mean().reset_index()

# Pivot NetData metrics so each metric has its own column
netdata_pivot = netdata_aggregated.pivot(index="timestamp", columns="metric_name", values="value")

# Flatten column names
netdata_pivot.columns = [f"{col}_value" for col in netdata_pivot.columns]

# Reset index to bring timestamp back
netdata_pivot.reset_index(inplace=True)

print("✅ NetData metrics aggregated and pivoted successfully!")
netdata_pivot.head(1)

✅ NetData metrics aggregated and pivoted successfully!


Unnamed: 0,timestamp,fivegs_amffunction_amf_authreject_value,fivegs_amffunction_amf_authreq_value,fivegs_amffunction_rm_reginitsucc_value,fivegs_ep_n3_gtp_indatapktn3upf_value,fivegs_ep_n3_gtp_outdatapktn3upf_value,fivegs_upffunction_upf_sessionnbr_value,netdata_cgroup_cpu_percentage_average_cd2487a23764_value,netdata_net_net_kilobits_persec_average_br-02c136a167f8_received_value,netdata_net_net_kilobits_persec_average_br-02c136a167f8_sent_value,netdata_net_packets_packets_persec_average_br-02c136a167f8_received_value,netdata_net_packets_packets_persec_average_br-02c136a167f8_sent_value,ues_active_value
0,2025-03-19 20:28:04,,,,,,,2.512706,8.318827,-9.338733,2.653609,-1.649435,


In [220]:
# Define function to classify log messages
def classify_log_message(message):
    if isinstance(message, str):  # Ensure it's a string before applying .lower()
        if "connect" in message.lower():
            return "connect"
        elif "request" in message.lower():
            return "request"
        elif "reject" in message.lower():
            return "reject"
        else:
            return "nothing"
    return "nothing"  # Handle missing or NaN values

In [221]:
# Apply classification **before merging**
logs["log_type"] = logs["log_message"].apply(classify_log_message)

In [222]:
logs.head(1)

Unnamed: 0,timestamp,application,log_level,log_message,log_type
0,2025-03-19 20:27:59,app,INFO,Configuration: '/open5gs/install/etc/open5gs/s...,nothing


In [223]:
# Keep only necessary columns from logs
logs_short = logs[["timestamp", "application", "log_type"]]

In [224]:
logs_short.tail(1)

Unnamed: 0,timestamp,application,log_type
397,2025-03-19 20:28:09,sbi,nothing


In [225]:
# Remove logs before 2025-03-19 12:56:20
logs_short = logs_short[logs_short["timestamp"] >= "2025-03-19 12:56:20"]

In [226]:
netdata_pivot.head(1)

Unnamed: 0,timestamp,fivegs_amffunction_amf_authreject_value,fivegs_amffunction_amf_authreq_value,fivegs_amffunction_rm_reginitsucc_value,fivegs_ep_n3_gtp_indatapktn3upf_value,fivegs_ep_n3_gtp_outdatapktn3upf_value,fivegs_upffunction_upf_sessionnbr_value,netdata_cgroup_cpu_percentage_average_cd2487a23764_value,netdata_net_net_kilobits_persec_average_br-02c136a167f8_received_value,netdata_net_net_kilobits_persec_average_br-02c136a167f8_sent_value,netdata_net_packets_packets_persec_average_br-02c136a167f8_received_value,netdata_net_packets_packets_persec_average_br-02c136a167f8_sent_value,ues_active_value
0,2025-03-19 20:28:04,,,,,,,2.512706,8.318827,-9.338733,2.653609,-1.649435,


In [227]:
# Merge logs with NetData metrics
merged_data = pd.merge(netdata_pivot, logs_short, on="timestamp", how="outer")

In [228]:
# Define a mapping for renaming columns
column_rename_mapping = {
    "fivegs_amffunction_amf_authreject_value": "Auth Reject Count",
    "fivegs_amffunction_amf_authreq_value": "Auth Request Count",
    "fivegs_amffunction_rm_reginitsucc_value": "Registration Success",
    "fivegs_ep_n3_gtp_outdatapktn3upf_value": "Outgoing Data Packets",
    "fivegs_ep_n3_gtp_indatapktn3upf_value": "Incoming Data Packets",
    "fivegs_upffunction_upf_sessionnbr_value": "Session Number",

    # NetData metrics
    "netdata_cgroup_cpu_percentage_average_cd2487a23764_value": "CPU Usage (Open5GS)",
    "netdata_net_net_kilobits_persec_average_br-02c136a167f8_received_value": "Network Traffic In (kbps)",
    "netdata_net_net_kilobits_persec_average_br-02c136a167f8_sent_value": "Network Traffic Out (kbps)",
    "netdata_net_packets_packets_persec_average_br-02c136a167f8_received_value": "Packets Received (pps)",
    "netdata_net_packets_packets_persec_average_br-02c136a167f8_sent_value": "Packets Sent (pps)",

    # UEs and logs
    "ues_active_value": "Active UEs",
    "application": "Application Name",
    "log_type": "Log Type"
}

# Apply renaming
merged_data.rename(columns=column_rename_mapping, inplace=True)

# Print updated column names for verification
print("✅ Updated Column Names:", merged_data.columns)


✅ Updated Column Names: Index(['timestamp', 'Auth Reject Count', 'Auth Request Count',
       'Registration Success', 'Incoming Data Packets',
       'Outgoing Data Packets', 'Session Number', 'CPU Usage (Open5GS)',
       'Network Traffic In (kbps)', 'Network Traffic Out (kbps)',
       'Packets Received (pps)', 'Packets Sent (pps)', 'Active UEs',
       'Application Name', 'Log Type'],
      dtype='object')


In [229]:
merged_data.to_csv("merged_data.csv", index=False)
print("✅ Data merged successfully!")

✅ Data merged successfully!
