In [None]:
import json
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from typing import Any

qps_values = [0.5, 1, 2, 3, 4, 6, 8, 12]
p50, p90, p95 = [], [], []

for qps in qps_values:
    with open(f'logs/rerank-qps-{qps}.json') as f:
        latencies = [1000*a for a in np.array(json.load(f))]
    p50.append(np.percentile(latencies, 50))
    p90.append(np.percentile(latencies, 90))
    p95.append(np.percentile(latencies, 99))

plt.figure(figsize=(8, 5))
plt.plot(qps_values, p50, 'o-', label='p50')
plt.plot(qps_values, p90, 's-', label='p90')
plt.plot(qps_values, p95, '^-', label='p99')
plt.yscale('log')
plt.xlabel('QPS')
plt.ylabel('Latency (ms)')
plt.gca().yaxis.set_major_formatter(ticker.ScalarFormatter())
plt.gca().yaxis.set_minor_formatter(ticker.ScalarFormatter())
plt.title('Rerank Latency vs QPS')
plt.legend()
plt.grid(True, which='major', alpha=0.5)
plt.grid(True, which='minor', alpha=0.2)
plt.xticks(qps_values)
plt.tight_layout()
plt.show()

In [None]:
import json
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import LogLocator, FuncFormatter

qps_values = [10, 50, 100, 200, 300, 500]
p50, p90, p95 = [], [], []

for qps in qps_values:
    with open(f'logs/embed-queries-qps-{qps}.json') as f:
        latencies = [1000*a for a in np.array(json.load(f))]
    p50.append(np.percentile(latencies, 50))
    p90.append(np.percentile(latencies, 90))
    p95.append(np.percentile(latencies, 95))

plt.figure(figsize=(8, 5))
plt.plot(qps_values, p50, 'o-', label='p50')
plt.plot(qps_values, p90, 's-', label='p90')
plt.plot(qps_values, p95, '^-', label='p95')
plt.yscale('log')
plt.xlabel('QPS')
plt.ylabel('Latency (ms)')
ax = plt.gca()
ax.yaxis.set_major_locator(LogLocator(base=10, subs=[1.0]))
ax.yaxis.set_minor_locator(LogLocator(base=10, subs=[1.4, 2, 3, 4, 5, 6.5, 8]))

def fmt(x: float, _: Any) -> str:
    if x >= 1000:
        return f'{x:.0f}'
    return f'{x:.0f}'

ax.yaxis.set_major_formatter(FuncFormatter(fmt))
ax.yaxis.set_minor_formatter(FuncFormatter(fmt))
plt.title('Embed (Query) Latency vs QPS')
plt.legend()
plt.grid(True, which='major', alpha=0.5)
plt.grid(True, which='minor', alpha=0.2)
plt.xticks(qps_values)
plt.tight_layout()
plt.show()