## Preliminary preparations

In [1]:
from datetime import datetime
import os
from pathlib import Path

import pandas as pd
import numpy as np
import re
import plotly.graph_objects as go
import warnings

warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)
pd.set_option('display.max_rows', 100)

In [2]:
logfile = "../logs/256G/2025-01-14-22-46-43-mainnet-beta-256GB.log"
csv = "../logs/512G/tps_512gb.csv"

## Preparing Data Source

In [3]:
data = []

with open(logfile, 'r') as file:
    for line in file:
        if "datapoint: replay-slot-stats" in line:

            timestamp_match = re.search(r"\[(.*?)Z", line)
            transactions_match = re.search(r"total_transactions=(\d+)i", line)
            execute_us_match = re.search(r"execute_us=(\d+)i", line)
            
            if timestamp_match and transactions_match and execute_us_match:
                timestamp = timestamp_match.group(1)
                total_transactions_executed = int(transactions_match.group(1))
                execute_us = int(execute_us_match.group(1))

                data.append([timestamp, total_transactions_executed, execute_us])

df = pd.DataFrame(data, columns=['timestamp', 'total_transactions_executed', 'execute_us'])
df

Unnamed: 0,timestamp,total_transactions_executed,execute_us
0,2025-01-15T00:41:01.429102226,1882,597308
1,2025-01-15T00:41:02.009025801,1838,484771
2,2025-01-15T00:41:02.506907732,1489,414585
3,2025-01-15T00:41:03.166724231,2396,435415
4,2025-01-15T00:41:03.461913043,985,484050
...,...,...,...
7128,2025-01-15T02:21:52.281631191,2430,425057
7129,2025-01-15T02:22:02.354642978,1829,411011
7130,2025-01-15T02:22:17.581129652,1681,306897
7131,2025-01-15T02:23:03.339850679,1705,455255


In [4]:
def convert_to_unixtime(timestamp):
    base_time, microseconds = timestamp.split('.')
    microseconds = microseconds[:6]
    dt = datetime.strptime(base_time, '%Y-%m-%dT%H:%M:%S')
    return int(dt.timestamp() * 1_000_000) + int(microseconds)

In [5]:
df['end'] = df['timestamp'].apply(convert_to_unixtime)
df['start'] = df['end'] - df['execute_us']
df['tps'] = df['total_transactions_executed'] * 1000000 / df['execute_us']
df

Unnamed: 0,timestamp,total_transactions_executed,execute_us,end,start,tps
0,2025-01-15T00:41:01.429102226,1882,597308,1736930461429102,1736930460831794,3150.803271
1,2025-01-15T00:41:02.009025801,1838,484771,1736930462009025,1736930461524254,3791.480926
2,2025-01-15T00:41:02.506907732,1489,414585,1736930462506907,1736930462092322,3591.543351
3,2025-01-15T00:41:03.166724231,2396,435415,1736930463166724,1736930462731309,5502.796183
4,2025-01-15T00:41:03.461913043,985,484050,1736930463461913,1736930462977863,2034.913749
...,...,...,...,...,...,...
7128,2025-01-15T02:21:52.281631191,2430,425057,1736936512281631,1736936511856574,5716.880324
7129,2025-01-15T02:22:02.354642978,1829,411011,1736936522354642,1736936521943631,4450.002555
7130,2025-01-15T02:22:17.581129652,1681,306897,1736936537581129,1736936537274232,5477.407730
7131,2025-01-15T02:23:03.339850679,1705,455255,1736936583339850,1736936582884595,3745.153815


In [6]:
start_time = df.start.min() // 1000000 * 1000000
end_time = (df.start.max() + 1000000) // 1000000 * 1000000
time_step = 100000 # 10,000 microsends = .1 second

time_series = pd.DataFrame({
    't': np.arange(start_time, end_time + time_step, time_step)
})

def aggregate_tps(row, df):
    mask = (df['start'] <= row['t']) & (df['end'] >= row['t'])
    return df.loc[mask, 'tps'].sum()

time_series['sum_tps'] = time_series.apply(aggregate_tps, axis=1, df=df)
# window=10 means creating a moving average with the past 1 second's data. If you change it to 100, it would be the past 10 seconds
time_series['moving_average'] = time_series['sum_tps'].rolling(window=50, min_periods=1).mean()
time_series.to_csv(csv, index=False)


In [7]:
# Calculate the maximum TPS
max_tps = time_series['sum_tps'].max()

# Calculate the overall average TPS
average_tps = time_series['sum_tps'].mean()

# Print the results
print(f"Maximum TPS: {max_tps:,.2f}")
print(f"Average TPS: {average_tps:,.2f}")

# Convert the max TPS x-coordinate to the corresponding time
max_tps_time = time_series.loc[time_series['sum_tps'].idxmax(), 't']

# Calculate the total duration spent with 0 TPS
time_with_zero_tps = time_series[time_series['sum_tps'] == 0]
total_time_zero_tps = len(time_with_zero_tps) * time_step / 1_000_000  # Convert microseconds to seconds
total_time_zero_tps_hours = total_time_zero_tps / 3600  # Convert seconds to hours

print(f"Total time spent with 0 TPS: {total_time_zero_tps:.2f} seconds ({total_time_zero_tps_hours:.2f} hours)")

Maximum TPS: 48,578.82
Average TPS: 2,141.11
Total time spent with 0 TPS: 3969.90 seconds (1.10 hours)


In [8]:
# Display every 10 seconds
filtered_time_series = time_series[time_series['t'] % 10000000 == 0]
# from micro sec to sec
filtered_time_series['t'] = np.floor(filtered_time_series['t'] / 1000000)

filtered_time_series

Unnamed: 0,t,sum_tps,moving_average
0,1.736930e+09,0.0,0.000000
100,1.736930e+09,0.0,4559.915205
200,1.736930e+09,0.0,4109.370421
300,1.736930e+09,0.0,390.279748
400,1.736930e+09,0.0,277.338606
...,...,...,...
61900,1.736937e+09,0.0,0.000000
62000,1.736937e+09,0.0,0.000000
62100,1.736937e+09,0.0,0.000000
62200,1.736937e+09,0.0,0.000000


In [9]:
# Calculate time elapsed in seconds from the start of the log
start_time = filtered_time_series['t'].min()  # First timestamp
filtered_time_series['elapsed_time'] = filtered_time_series['t'] - start_time
filtered_time_series['elapsed_time_hours'] = filtered_time_series['elapsed_time'] / 3600

# Convert elapsed time to HH:MM:SS format
filtered_time_series['elapsed_time_formatted'] = filtered_time_series['elapsed_time'].apply(
    lambda x: f"{int(x // 3600):02}:{int((x % 3600) // 60):02}:{int(x % 60):02}"
)

In [10]:
# Determine the range for the x-axis
x_min = filtered_time_series['elapsed_time_hours'].min() - 1  # One hour before the first data point
x_max = filtered_time_series['elapsed_time_hours'].max() + 1  # One hour after the last data point

fig = go.Figure()

fig.add_trace(go.Scatter(x=filtered_time_series['elapsed_time_hours'], y=filtered_time_series['sum_tps'], mode='markers', name='TPS', marker=dict(size=2)))
fig.add_trace(go.Scatter(x=filtered_time_series['elapsed_time_hours'], y=filtered_time_series['moving_average'], mode='lines', name='TPS Moving Average(1sec)', line=dict(width=0.5)))

# Add horizontal lines for max and average TPS
fig.add_hline(y=average_tps, line_dash="dot", line_color="green", annotation_text=f"Average TPS: {average_tps:.2f}", annotation_position="bottom right")
fig.add_hline(y=max_tps, line_dash="dash", line_color="red", annotation_text=f"Max TPS: {max_tps:.2f}", annotation_position="top right")

fig.update_layout(
    title=f"TPS Over Time | Total Time Spent with 0 TPS: {total_time_zero_tps:.2f} seconds ({total_time_zero_tps_hours:.2f} hours) | Max TPS: {max_tps:,.2f}",
    xaxis_title="Time Elapsed (hours)",
    yaxis_title="TPS",
    xaxis=dict(
        tickmode='linear',
        dtick=1,
        tickformat=".1f",
        range=[x_min, x_max]
    ),
    yaxis=dict(range=[0, 15000])
)


fig.show()


In [11]:
window_size = 50  # Adjust the window size as needed
time_series['variance'] = time_series['sum_tps'].rolling(window=window_size, min_periods=1).var()
time_series['std_dev'] = time_series['sum_tps'].rolling(window=window_size, min_periods=1).std()

# Create a new plot
fig = go.Figure()

# # Add TPS line
# fig.add_trace(
#     go.Scatter(
#         x=time_series.index,
#         y=time_series['sum_tps'],
#         mode='lines',
#         name='TPS',
#         line=dict(color='blue')
#     )
# )

# Add shaded region for standard deviation
fig.add_trace(
    go.Scatter(
        x=pd.concat([pd.Series(time_series.index), pd.Series(time_series.index[::-1])]),
        y=pd.concat([time_series['sum_tps'] + time_series['std_dev'], 
                     (time_series['sum_tps'] - time_series['std_dev'])[::-1]]),
        fill='toself',
        fillcolor='rgba(135, 206, 250, 1)',  # Light blue shade
        line=dict(color='rgba(255,255,255,0)'),
        name='Standard Deviation'
    )
)

# # Add variance as a separate line (optional)
# fig.add_trace(
#     go.Scatter(
#         x=time_series.index,
#         y=time_series['variance'],
#         mode='lines',
#         name='Variance',
#         line=dict(dash='dot', color='orange')
#     )
# )

# Update layout
fig.update_layout(
    title="TPS with Variance and Standard Deviation",
    xaxis_title="Time",
    yaxis_title="TPS",
    template="plotly_white",
    legend_title="Metrics"
)

fig.show()


## Notes
 - If all data points are plotted on the graph, the points and lines become indistinguishable, so I plot them every 10 seconds.
 - TPS values above 15000 are excluded as outliers.
