In [None]:
import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_csv("~/troubles/YQ-2614/dump.csv")

In [None]:
query_args_value = "[2025-05-24 16:00:00 +0000 UTC 2025-05-25 16:00:00 +0000 UTC]"
df = data[data["query_args"] == query_args_value][["database_name", "query_text", "state", "elapsed_time_ms", "created_at"]]
df['duration_seconds'] = pd.to_timedelta(df['elapsed_time_ms'], unit='ms').dt.total_seconds()
df = df.drop("elapsed_time_ms", axis=1)
df['tablet_id'] = df['query_text'].str.extract(r"TabletId\s*=\s*'([^']+)'")
df = df.drop("query_text", axis=1)
df = df.sort_values('tablet_id')

df['created_at'] = pd.to_datetime(df['created_at'], utc=True)
start = pd.Timestamp('2025-05-26T00:00:00Z')
df = df[df['created_at'] >= start]
df

In [None]:
import matplotlib.ticker as mticker

def format_seconds(x, pos=None):
    seconds = int(x)
    hours = seconds // 3600
    remainder = seconds % 3600
    minutes = remainder // 60
    secs = remainder % 60
    if hours > 0:
        return f"{hours}h {minutes:02d}m {secs:02d}s"
    elif minutes > 0:
        return f"{minutes}m {secs:02d}s"
    else:
        return f"{secs}s"

unique_dbs = sorted(df['database_name'].unique())
n = len(unique_dbs)
fig, axes = plt.subplots(1, n, figsize=(7*n, 6), sharey=True)
fig.suptitle("Retrieving data from YDB Cloud Logging (GH issue #18802)")    

if n == 1:
    axes = [axes]  # make iterable

for ax, dbname in zip(axes, unique_dbs):
    group = df[df['database_name'] == dbname].sort_values('tablet_id')
    # Scatter plot of all points
    group.plot.scatter(x="tablet_id", y="duration_seconds", ax=ax, label='Query latency (sample)')
    ax.set_title(f"Database: {dbname}")
    ax.set_xlabel("Tablet ID")
    ax.set_ylabel("Latency (seconds)")
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    
    # Compute and plot median per tablet_id (line)
    medians = (
        group.groupby("tablet_id")["duration_seconds"]
        .median()
        .reindex(sorted(group["tablet_id"].unique()))
    )
    ax.plot(
        medians.index, medians.values, 'r-', linewidth=1, marker='o', label='Query latency (median)'
    )

    ax.yaxis.set_major_formatter(mticker.FuncFormatter(format_seconds))

    ax.legend()

plt.tight_layout()
plt.show()