# Constants

In [1]:
from pathlib import Path
CONDUIT_LOG = Path.cwd() / "conduit.log"

PGCONN = "postgresql://algorand:algorand@localhost:65432/performance_db"
QUERY_CHARS = 1000
LIMIT = 15

# Query columns
QUERY_COL = f"substring(trim(regexp_replace(regexp_replace(query, '--.*?$', '', 'gn'), '\\s+', ' ', 'g')), 1, {QUERY_CHARS}) AS query"
TOTAL_SECS_COL = "round((total_exec_time/1000)::numeric, 3) AS tot_s"
MEAN_SECS_COL = "round((mean_exec_time/1000)::numeric, 3) AS mean_s"
MIN_SECS_COL = "round((min_exec_time/1000)::numeric, 3) AS min_s"
MAX_SECS_COL = "round((max_exec_time/1000)::numeric, 3) AS max_s"
CPU_COL = "round((100 * total_exec_time / sum(total_exec_time::numeric) OVER ())::numeric, 2) AS cpu_pct"

# Queries
QUERY_TOTAL_TIME = f"""SELECT dbid, {QUERY_COL}, {TOTAL_SECS_COL}, calls, {MEAN_SECS_COL}, {CPU_COL}
FROM pg_stat_statements
ORDER BY total_exec_time DESC
LIMIT {LIMIT}"""

QUERY_SLOWEST = f"""SELECT dbid, {QUERY_COL}, calls, {TOTAL_SECS_COL}, {MIN_SECS_COL}, {MAX_SECS_COL}, {MEAN_SECS_COL}
FROM pg_stat_statements
ORDER BY mean_exec_time DESC
LIMIT {LIMIT}"""

QUERY_MEMHOG = f"""SELECT dbid, {QUERY_COL}, (shared_blks_hit+shared_blks_dirtied) as mem
FROM pg_stat_statements
ORDER BY (shared_blks_hit+shared_blks_dirtied) DESC
LIMIT {LIMIT}"""

print(f"{CONDUIT_LOG=}")

CONDUIT_LOG=PosixPath('/Users/zeph/github/algorand/conduit/performance/conduit.log')


# Parse the log

In [2]:
from datetime import datetime
import json
import re

with open(CONDUIT_LOG) as f:
    log_content = f.read()

lines = log_content.strip().split("\n")


# Regular expressions for extracting required data
start_time_pattern = re.compile(r'Block 1 read time')
finish_time_pattern = re.compile(r'round r=(\d+) .* exported in')
time_pattern = re.compile(
    r'(?P<time>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?[+-]\d{2}:\d{2})'
)

# Initialize variables to store the required information
start_time = None
finish_time = None
log_rounds = None

# Iterate over the log lines
for i, line in enumerate(log_content.strip().split("\n")):
    log_entry = json.loads(line)
    msg = log_entry.get("msg", "")
    
    # Extract start_time
    if start_time_pattern.search(msg):
        match = time_pattern.search(line)
        if match:
            start_time = datetime.fromisoformat(match.group("time"))

    # Extract finish_time and log_rounds
    finish_match = finish_time_pattern.search(msg)
    if finish_match:
        match = time_pattern.search(line)
        if match:
            finish_time = datetime.fromisoformat(match.group("time"))
            log_rounds = int(finish_match.group(1))


# Calculate total_export_time and mean_export_time
total_export_time = finish_time - start_time
mean_export_time = total_export_time.total_seconds() / log_rounds

# Print results
print(f"Start Time: {start_time}")
print(f"Finish Time: {finish_time}")
print(f"Log Rounds: {log_rounds}")
print(f"Total Export Time: {total_export_time}")
print(f"Mean Export Time: {mean_export_time} seconds")

Start Time: 2023-08-21 21:53:40.559895-05:00
Finish Time: 2023-08-21 21:55:06.472253-05:00
Log Rounds: 10
Total Export Time: 0:01:25.912358
Mean Export Time: 8.5912358 seconds


# Query the DB

In [3]:
import pandas as pd

def query(sql: str) -> pd.DataFrame:
    return pd.read_sql(sql, PGCONN)

def get_stats() -> dict[str, pd.DataFrame]:
    return {
        "total_time":  query(QUERY_TOTAL_TIME),
        "slowest": query(QUERY_SLOWEST),
        "memhog": query(QUERY_MEMHOG)
    }

In [4]:
dfs = get_stats()

## Memory Hogs 

In [5]:
dfs["memhog"]

Unnamed: 0,dbid,query,mem
0,16384,"copy ""txn"" ( ""round"", ""intra"", ""typeenum"", ""as...",3114960
1,16384,"copy ""txn_participation"" ( ""addr"", ""round"", ""i...",2984581
2,16384,"INSERT INTO app (index, creator, params, delet...",1622496
3,16384,"INSERT INTO account (addr, microalgos, rewards...",1490860
4,16384,"INSERT INTO account (addr, microalgos, rewards...",889598
5,16384,"INSERT INTO account_app (addr, app, localstate...",726251
6,16384,"INSERT INTO app_box AS ab (app, name, value) V...",546805
7,16384,"INSERT INTO account (addr, microalgos, rewards...",30614
8,16384,CREATE EXTENSION pg_stat_statements,2796
9,16384,CREATE TABLE IF NOT EXISTS block_header ( roun...,630


## Slowest

In [6]:
dfs["slowest"]

Unnamed: 0,dbid,query,calls,tot_s,min_s,max_s,mean_s
0,16384,"copy ""txn_participation"" ( ""addr"", ""round"", ""i...",10,32.761,2.466,4.274,3.276
1,16384,"copy ""txn"" ( ""round"", ""intra"", ""typeenum"", ""as...",429,547.225,0.045,2.569,1.276
2,13757,create database performance_db,1,0.083,0.083,0.083,0.083
3,16384,CREATE EXTENSION pg_stat_statements,1,0.009,0.009,0.009,0.009
4,16384,CREATE TABLE IF NOT EXISTS block_header ( roun...,1,0.005,0.005,0.005,0.005
5,16384,CREATE TABLE IF NOT EXISTS txn ( round bigint ...,1,0.003,0.003,0.003,0.003
6,16384,CREATE TABLE IF NOT EXISTS account_app ( addr ...,1,0.003,0.003,0.003,0.003
7,16384,CREATE TABLE IF NOT EXISTS app_box ( app bigin...,1,0.003,0.003,0.003,0.003
8,16384,CREATE TABLE IF NOT EXISTS app ( index bigint ...,1,0.003,0.003,0.003,0.003
9,16384,CREATE TABLE IF NOT EXISTS account ( addr byte...,1,0.003,0.003,0.003,0.003


# !! Most Overall Time

In [7]:
ttime = dfs["total_time"]
ttime

Unnamed: 0,dbid,query,tot_s,calls,mean_s,cpu_pct
0,16384,"copy ""txn"" ( ""round"", ""intra"", ""typeenum"", ""as...",547.225,429,1.276,88.48
1,16384,"copy ""txn_participation"" ( ""addr"", ""round"", ""i...",32.761,10,3.276,5.3
2,16384,"INSERT INTO app (index, creator, params, delet...",19.993,165793,0.0,3.23
3,16384,"INSERT INTO account (addr, microalgos, rewards...",7.12,83229,0.0,1.15
4,16384,"INSERT INTO account (addr, microalgos, rewards...",7.062,183247,0.0,1.14
5,16384,"INSERT INTO account_app (addr, app, localstate...",2.215,73873,0.0,0.36
6,16384,"INSERT INTO app_box AS ab (app, name, value) V...",1.691,73869,0.0,0.27
7,16384,"INSERT INTO account (addr, microalgos, rewards...",0.234,10001,0.0,0.04
8,13757,create database performance_db,0.083,1,0.083,0.01
9,16384,begin isolation level read uncommitted,0.02,439,0.0,0.0


## `txn` validation and stats correction for concurrency

In [8]:
df_txn_stats = query("""SELECT max(round) as max_round, count(*) as txn_count
FROM txn""")
                     
rounds = df_txn_stats["max_round"][0]
txn_count = df_txn_stats["txn_count"][0]
                     
df_txn_stats

Unnamed: 0,max_round,txn_count
0,10,500011


In [9]:
df_round_txn = query("""SELECT round, COUNT(*) as txns FROM txn GROUP BY round""")
df_round_txn

Unnamed: 0,round,txns
0,1,50001
1,2,50000
2,3,50003
3,4,50001
4,5,50000
5,6,50001
6,7,50003
7,8,50001
8,9,50000
9,10,50001


In [10]:
ttime

Unnamed: 0,dbid,query,tot_s,calls,mean_s,cpu_pct
0,16384,"copy ""txn"" ( ""round"", ""intra"", ""typeenum"", ""as...",547.225,429,1.276,88.48
1,16384,"copy ""txn_participation"" ( ""addr"", ""round"", ""i...",32.761,10,3.276,5.3
2,16384,"INSERT INTO app (index, creator, params, delet...",19.993,165793,0.0,3.23
3,16384,"INSERT INTO account (addr, microalgos, rewards...",7.12,83229,0.0,1.15
4,16384,"INSERT INTO account (addr, microalgos, rewards...",7.062,183247,0.0,1.14
5,16384,"INSERT INTO account_app (addr, app, localstate...",2.215,73873,0.0,0.36
6,16384,"INSERT INTO app_box AS ab (app, name, value) V...",1.691,73869,0.0,0.27
7,16384,"INSERT INTO account (addr, microalgos, rewards...",0.234,10001,0.0,0.04
8,13757,create database performance_db,0.083,1,0.083,0.01
9,16384,begin isolation level read uncommitted,0.02,439,0.0,0.0


## Masssage `ttime` with rounds / total time / concurrency mindedness

In [11]:
ttime.insert(0, 'runtime', total_export_time.total_seconds())
ttime.insert(1, 'txns', txn_count)

ttime['extrap_s'] = ttime['tot_s']
mask = ttime['query'].str.contains('copy "txn" \( "round"')
ttime.loc[mask, 'extrap_s'] = ttime.loc[mask, 'mean_s'] * log_rounds

ttime['load_pct'] = ttime['extrap_s'] / ttime['runtime'] * 100

ttime = ttime[['extrap_s', 'cpu_pct', 'load_pct', 'tot_s', 'calls', 'mean_s', 'query', 'dbid', 'runtime', 'txns']]
ttime = ttime.rename(columns={
    'extrap_s': 'extrap_s*',
    'load_pct': 'load_pct**'
})

In [12]:
ttime

Unnamed: 0,extrap_s*,cpu_pct,load_pct**,tot_s,calls,mean_s,query,dbid,runtime,txns
0,12.76,88.48,14.852345,547.225,429,1.276,"copy ""txn"" ( ""round"", ""intra"", ""typeenum"", ""as...",16384,85.912358,500011
1,32.761,5.3,38.133047,32.761,10,3.276,"copy ""txn_participation"" ( ""addr"", ""round"", ""i...",16384,85.912358,500011
2,19.993,3.23,23.27139,19.993,165793,0.0,"INSERT INTO app (index, creator, params, delet...",16384,85.912358,500011
3,7.12,1.15,8.287516,7.12,83229,0.0,"INSERT INTO account (addr, microalgos, rewards...",16384,85.912358,500011
4,7.062,1.14,8.220005,7.062,183247,0.0,"INSERT INTO account (addr, microalgos, rewards...",16384,85.912358,500011
5,2.215,0.36,2.578209,2.215,73873,0.0,"INSERT INTO account_app (addr, app, localstate...",16384,85.912358,500011
6,1.691,0.27,1.968285,1.691,73869,0.0,"INSERT INTO app_box AS ab (app, name, value) V...",16384,85.912358,500011
7,0.234,0.04,0.272371,0.234,10001,0.0,"INSERT INTO account (addr, microalgos, rewards...",16384,85.912358,500011
8,0.083,0.01,0.09661,0.083,1,0.083,create database performance_db,13757,85.912358,500011
9,0.02,0.0,0.02328,0.02,439,0.0,begin isolation level read uncommitted,16384,85.912358,500011


# `to_clipboard()`

In [13]:
ttime.to_clipboard()