In [3]:
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go

log_file_path = "../2025-01-07-04-19-59--memory.log"

columns = ["timestamp", "uid", "pid", "minflt/s", "majflt/s", "vsz", "rss", "%mem", "command"]
data = []

# Parse the log file
with open(log_file_path, 'r') as file:
    header_found = False
    for line in file:
        # Ignore lines before the header
        if not header_found:
            if line.strip().startswith("Timestamp,UID,PID,minflt/s,majflt/s,VSZ,RSS,%MEM,Command"):
                header_found = True
            continue

        # Parse the log data after the header
        parts = line.split(",")
        if len(parts) < 9:  # Skip lines that don't have enough columns
            continue
        timestamp = parts[0]
        uid = int(parts[1])
        pid = int(parts[2])
        minflt_s = float(parts[3]) if parts[3] else None
        majflt_s = float(parts[4]) if parts[4] else None
        vsz = float(parts[5]) if parts[5] else None
        rss = float(parts[6]) if parts[6] else None
        p_mem = float(parts[7]) if parts[7] else None
        command = parts[8].strip()
        data.append([timestamp, uid, pid, minflt_s, majflt_s, vsz, rss, p_mem, command])

df = pd.DataFrame(data, columns=columns)
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

df['RSS (GB)'] = df['rss'] / (1024 * 1024)
df['VSZ (GB)'] = df['vsz'] / (1024 * 1024)

print(df.head())

# Create subplots with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces for memory usage
fig.add_trace(
    go.Scatter(x=df.index, y=df['RSS (GB)'], name="RSS (GB)", line=dict(color="blue")),
    secondary_y=False
)
fig.add_trace(
    go.Scatter(x=df.index, y=df['VSZ (GB)'], name="VSZ (GB)", line=dict(color="cyan")),
    secondary_y=False
)

# Add traces for page faults
fig.add_trace(
    go.Scatter(x=df.index, y=df['minflt/s'], name="Minor Page Faults (minflt/s)", line=dict(color="orange", dash="dot")),
    secondary_y=True
)
fig.add_trace(
    go.Scatter(x=df.index, y=df['majflt/s'], name="Major Page Faults (majflt/s)", line=dict(color="red", dash="dot")),
    secondary_y=True
)

# Update layout with axis titles
fig.update_layout(
    title_text="Agave Validator Memory Usage and Page Faults Over Time",
    xaxis_title="Timestamp",
    legend_title="Metrics",
    template="plotly_white"
)

# Set y-axes titles and scaling
fig.update_yaxes(title_text="Memory Usage (GB)", secondary_y=False)
fig.update_yaxes(title_text="Page Faults per Second", secondary_y=True)

fig.show()

                            uid      pid  minflt/s  majflt/s       vsz  \
timestamp                                                                
2025-01-07 04:20:01+00:00  1001  2645070    5838.0       0.0  321536.0   
2025-01-07 04:20:03+00:00  1001  2645070    2070.0       0.0  321536.0   
2025-01-07 04:20:05+00:00  1001  2645070    2754.0       0.0  321536.0   
2025-01-07 04:20:07+00:00  1001  2645070    4852.0       0.0  324608.0   
2025-01-07 04:20:09+00:00  1001  2645070    1351.0       0.0  330752.0   

                               rss  %mem          command  RSS (GB)  VSZ (GB)  
timestamp                                                                      
2025-01-07 04:20:01+00:00  27648.0   0.0  agave-validator  0.026367  0.306641  
2025-01-07 04:20:03+00:00  32256.0   0.0  agave-validator  0.030762  0.306641  
2025-01-07 04:20:05+00:00  32256.0   0.0  agave-validator  0.030762  0.306641  
2025-01-07 04:20:07+00:00  36864.0   0.0  agave-validator  0.035156  0.309570  
2