# Constants

In [1]:
from pathlib import Path
CONDUIT_LOG = Path.cwd() / "conduit.log"

PGCONN = "postgresql://algorand:algorand@localhost:65432/performance_db"
LIMIT = 15

# Query columns
QUERY_COL = "substring(trim(regexp_replace(regexp_replace(query, '--.*?$', '', 'gn'), '\\s+', ' ', 'g')), 1, 100) AS query"
TOTAL_SECS_COL = "round((total_exec_time/1000)::numeric, 3) AS tot_s"
MEAN_SECS_COL = "round((mean_exec_time/1000)::numeric, 3) AS mean_s"
MIN_SECS_COL = "round((min_exec_time/1000)::numeric, 3) AS min_s"
MAX_SECS_COL = "round((max_exec_time/1000)::numeric, 3) AS max_s"
CPU_COL = "round((100 * total_exec_time / sum(total_exec_time::numeric) OVER ())::numeric, 2) AS cpu_pct"

# Queries
QUERY_TOTAL_TIME = f"""SELECT dbid, {QUERY_COL}, {TOTAL_SECS_COL}, calls, {MEAN_SECS_COL}, {CPU_COL}
FROM pg_stat_statements
ORDER BY total_exec_time DESC
LIMIT {LIMIT}"""

QUERY_SLOWEST = f"""SELECT dbid, {QUERY_COL}, calls, {TOTAL_SECS_COL}, {MIN_SECS_COL}, {MAX_SECS_COL}, {MEAN_SECS_COL}
FROM pg_stat_statements
ORDER BY mean_exec_time DESC
LIMIT {LIMIT}"""

QUERY_MEMHOG = f"""SELECT dbid, {QUERY_COL}, (shared_blks_hit+shared_blks_dirtied) as mem
FROM pg_stat_statements
ORDER BY (shared_blks_hit+shared_blks_dirtied) DESC
LIMIT {LIMIT}"""

print(f"{CONDUIT_LOG=}")

CONDUIT_LOG=PosixPath('/Users/zeph/github/algorand/conduit/performance/conduit.log')


# Parse the log

In [2]:
from datetime import datetime
import json
import re

with open(CONDUIT_LOG) as f:
    log_content = f.read()

lines = log_content.strip().split("\n")


# Regular expressions for extracting required data
start_time_pattern = re.compile(r'Block 1 read time')
finish_time_pattern = re.compile(r'round r=(\d+) .* exported in')
time_pattern = re.compile(
    r'(?P<time>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?[+-]\d{2}:\d{2})'
)

# Initialize variables to store the required information
start_time = None
finish_time = None
log_rounds = None

# Iterate over the log lines
for i, line in enumerate(log_content.strip().split("\n")):
    log_entry = json.loads(line)
    msg = log_entry.get("msg", "")
    
    # Extract start_time
    if start_time_pattern.search(msg):
        match = time_pattern.search(line)
        if match:
            start_time = datetime.fromisoformat(match.group("time"))

    # Extract finish_time and log_rounds
    finish_match = finish_time_pattern.search(msg)
    if finish_match:
        match = time_pattern.search(line)
        if match:
            finish_time = datetime.fromisoformat(match.group("time"))
            log_rounds = int(finish_match.group(1))


# Calculate total_export_time and mean_export_time
total_export_time = finish_time - start_time
mean_export_time = total_export_time.total_seconds() / log_rounds

# Print results
print(f"Start Time: {start_time}")
print(f"Finish Time: {finish_time}")
print(f"Log Rounds: {log_rounds}")
print(f"Total Export Time: {total_export_time}")
print(f"Mean Export Time: {mean_export_time} seconds")

Start Time: 2023-08-20 22:59:43.436898-05:00
Finish Time: 2023-08-20 23:01:32.968865-05:00
Log Rounds: 10
Total Export Time: 0:01:49.531967
Mean Export Time: 10.9531967 seconds


# Query the DB

In [3]:
import pandas as pd

def query(sql: str) -> pd.DataFrame:
    return pd.read_sql(sql, PGCONN)

def get_stats() -> dict[str, pd.DataFrame]:
    return {
        "total_time":  query(QUERY_TOTAL_TIME),
        "slowest": query(QUERY_SLOWEST),
        "memhog": query(QUERY_MEMHOG)
    }

In [4]:
dfs = get_stats()

## Memory Hogs 

In [5]:
dfs["memhog"]

Unnamed: 0,dbid,query,mem
0,16384,"copy ""txn_participation"" ( ""addr"", ""round"", ""i...",2983769
1,16384,"copy ""txn"" ( ""round"", ""intra"", ""typeenum"", ""as...",1815364
2,16384,"INSERT INTO app (index, creator, params, delet...",1622496
3,16384,"INSERT INTO account (addr, microalgos, rewards...",1490737
4,16384,"INSERT INTO account (addr, microalgos, rewards...",889495
5,16384,"INSERT INTO account_app (addr, app, localstate...",726251
6,16384,"INSERT INTO app_box AS ab (app, name, value) V...",549362
7,16384,"INSERT INTO account (addr, microalgos, rewards...",30614
8,16384,"SELECT max(round) as max_round, count(*) as tx...",6877
9,16384,CREATE EXTENSION pg_stat_statements,2796


## Slowest

In [6]:
dfs["slowest"]

Unnamed: 0,dbid,query,calls,tot_s,min_s,max_s,mean_s
0,16384,"copy ""txn_participation"" ( ""addr"", ""round"", ""i...",10,34.932,2.411,6.391,3.493
1,16384,"copy ""txn"" ( ""round"", ""intra"", ""typeenum"", ""as...",107,339.045,0.021,5.926,3.169
2,16384,"SELECT max(round) as max_round, count(*) as tx...",1,0.148,0.148,0.148,0.148
3,13757,create database performance_db,1,0.12,0.12,0.12,0.12
4,16384,"INSERT INTO block_header (round, realtime, rew...",11,1.28,0.0,0.584,0.116
5,16384,"SELECT round, COUNT(*) as txns FROM txn GROUP ...",1,0.102,0.102,0.102,0.102
6,16384,CREATE EXTENSION pg_stat_statements,1,0.009,0.009,0.009,0.009
7,16384,CREATE TABLE IF NOT EXISTS block_header ( roun...,1,0.007,0.007,0.007,0.007
8,16384,CREATE TABLE IF NOT EXISTS app_box ( app bigin...,1,0.007,0.007,0.007,0.007
9,16384,CREATE TABLE IF NOT EXISTS app ( index bigint ...,1,0.005,0.005,0.005,0.005


# !! Most Overall Time

In [7]:
ttime = dfs["total_time"]
ttime

Unnamed: 0,dbid,query,tot_s,calls,mean_s,cpu_pct
0,16384,"copy ""txn"" ( ""round"", ""intra"", ""typeenum"", ""as...",339.045,107,3.169,80.58
1,16384,"copy ""txn_participation"" ( ""addr"", ""round"", ""i...",34.932,10,3.493,8.3
2,16384,"INSERT INTO app (index, creator, params, delet...",23.537,165793,0.0,5.59
3,16384,"INSERT INTO account (addr, microalgos, rewards...",8.791,83229,0.0,2.09
4,16384,"INSERT INTO account (addr, microalgos, rewards...",8.716,183247,0.0,2.07
5,16384,"INSERT INTO account_app (addr, app, localstate...",2.271,73873,0.0,0.54
6,16384,"INSERT INTO app_box AS ab (app, name, value) V...",1.493,73869,0.0,0.35
7,16384,"INSERT INTO block_header (round, realtime, rew...",1.28,11,0.116,0.3
8,16384,"INSERT INTO account (addr, microalgos, rewards...",0.216,10001,0.0,0.05
9,16384,"SELECT max(round) as max_round, count(*) as tx...",0.148,1,0.148,0.04


## `txn` validation and stats correction for concurrency

In [8]:
df_txn_stats = query("""SELECT max(round) as max_round, count(*) as txn_count
FROM txn""")
                     
rounds = df_txn_stats["max_round"][0]
txn_count = df_txn_stats["txn_count"][0]
                     
df_txn_stats

Unnamed: 0,max_round,txn_count
0,10,285789


In [9]:
df_round_txn = query("""SELECT round, COUNT(*) as txns FROM txn GROUP BY round""")
df_round_txn

Unnamed: 0,round,txns
0,1,50001
1,2,26220
2,3,26285
3,4,26237
4,5,26122
5,6,26193
6,7,26241
7,8,25991
8,9,26218
9,10,26281


In [10]:
ttime

Unnamed: 0,dbid,query,tot_s,calls,mean_s,cpu_pct
0,16384,"copy ""txn"" ( ""round"", ""intra"", ""typeenum"", ""as...",339.045,107,3.169,80.58
1,16384,"copy ""txn_participation"" ( ""addr"", ""round"", ""i...",34.932,10,3.493,8.3
2,16384,"INSERT INTO app (index, creator, params, delet...",23.537,165793,0.0,5.59
3,16384,"INSERT INTO account (addr, microalgos, rewards...",8.791,83229,0.0,2.09
4,16384,"INSERT INTO account (addr, microalgos, rewards...",8.716,183247,0.0,2.07
5,16384,"INSERT INTO account_app (addr, app, localstate...",2.271,73873,0.0,0.54
6,16384,"INSERT INTO app_box AS ab (app, name, value) V...",1.493,73869,0.0,0.35
7,16384,"INSERT INTO block_header (round, realtime, rew...",1.28,11,0.116,0.3
8,16384,"INSERT INTO account (addr, microalgos, rewards...",0.216,10001,0.0,0.05
9,16384,"SELECT max(round) as max_round, count(*) as tx...",0.148,1,0.148,0.04


## Masssage `ttime` with rounds / total time / concurrency mindedness

In [11]:
ttime.insert(0, 'runtime', total_export_time.total_seconds())
ttime.insert(1, 'txns', txn_count)

ttime['extrap_s'] = ttime['tot_s']
mask = ttime['query'].str.contains('copy "txn" \( "round"')
ttime.loc[mask, 'extrap_s'] = ttime.loc[mask, 'mean_s'] * log_rounds

ttime['load_pct'] = ttime['extrap_s'] / ttime['runtime'] * 100

ttime = ttime[['extrap_s', 'cpu_pct', 'load_pct', 'tot_s', 'calls', 'mean_s', 'query', 'dbid', 'runtime', 'txns']]
ttime = ttime.rename(columns={
    'extrap_s': 'extrap_s*',
    'load_pct': 'load_pct**'
})

In [12]:
ttime

Unnamed: 0,extrap_s*,cpu_pct,load_pct**,tot_s,calls,mean_s,query,dbid,runtime,txns
0,31.69,80.58,28.932193,339.045,107,3.169,"copy ""txn"" ( ""round"", ""intra"", ""typeenum"", ""as...",16384,109.531967,285789
1,34.932,8.3,31.892059,34.932,10,3.493,"copy ""txn_participation"" ( ""addr"", ""round"", ""i...",16384,109.531967,285789
2,23.537,5.59,21.488704,23.537,165793,0.0,"INSERT INTO app (index, creator, params, delet...",16384,109.531967,285789
3,8.791,2.09,8.025967,8.791,83229,0.0,"INSERT INTO account (addr, microalgos, rewards...",16384,109.531967,285789
4,8.716,2.07,7.957494,8.716,183247,0.0,"INSERT INTO account (addr, microalgos, rewards...",16384,109.531967,285789
5,2.271,0.54,2.073367,2.271,73873,0.0,"INSERT INTO account_app (addr, app, localstate...",16384,109.531967,285789
6,1.493,0.35,1.363072,1.493,73869,0.0,"INSERT INTO app_box AS ab (app, name, value) V...",16384,109.531967,285789
7,1.28,0.3,1.168609,1.28,11,0.116,"INSERT INTO block_header (round, realtime, rew...",16384,109.531967,285789
8,0.216,0.05,0.197203,0.216,10001,0.0,"INSERT INTO account (addr, microalgos, rewards...",16384,109.531967,285789
9,0.148,0.04,0.13512,0.148,1,0.148,"SELECT max(round) as max_round, count(*) as tx...",16384,109.531967,285789


# `to_clipboard()`

In [13]:
ttime.to_clipboard()