diff --git a/CHANGES.rst b/CHANGES.rst index f142a95..2e2c331 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -48,6 +48,12 @@ - Allow excluding particular benchmarks on the command line. For example, ``-cold``. +- When benchmarking multiple ZODB configurations, run a particular + benchmark for all databases before moving on to the next benchmark. Previously + all benchmarks for a database were run before moving on to the next + database. This makes it a bit easier to eyeball results as the + process is running. + 0.7.0 (2019-05-31) ================== diff --git a/scripts/zs_matrix_graph.py b/scripts/zs_matrix_graph.py index d230d31..4cb64b5 100755 --- a/scripts/zs_matrix_graph.py +++ b/scripts/zs_matrix_graph.py @@ -13,7 +13,6 @@ from __future__ import print_function # pylint:disable=too-many-locals - import argparse import json import os @@ -36,6 +35,8 @@ def _fix_database(n, version=''): result = 'ZEO' elif 'fs' in n.lower() or 'filestorage' in n.lower(): result = 'FileStorage' + if 'sqlite' in n.lower(): + result = 'SQLite' if version: result = f'{result} ({version})' return result @@ -54,7 +55,14 @@ def suite_to_benchmark_data(_args, benchmark_suite, version=''): # '{c=1 processes, o=100', ' mysqlclient_hf: read 100 hot objects' prefix, suffix = name.rsplit('}', 1) - ConcurrencyKind = 'processes' if 'processes' in prefix else 'threads' + if 'processes' in prefix: + ConcurrencyKind = 'processes' + elif 'greenlets' in prefix: + ConcurrencyKind = 'greenlets' + else: + assert 'threads' in prefix + ConcurrencyKind = 'threads' + prefix = prefix.replace(' processes', '').replace(' threads', '') prefix = prefix.replace(' greenlets', '') @@ -87,34 +95,134 @@ def suite_to_benchmark_data(_args, benchmark_suite, version=''): return df -def save_one(df, benchmark_name, outdir, palette=None): - df = df.query('action=="%s"' % benchmark_name) - fname = benchmark_name.replace(' ', '_').replace('/', '_') + '.png' +def save_one(df, benchmark_name, outdir, palette=None, + # The x and y are for an individual graph in the matrix + x="concurrency", y="duration", + # The col and row define the columns and rows of the matrix + col="objects", row="concurrency_kind", + # while hue defines the category within an individual graph. + hue="database", hue_order=None, + show_y_ticks=False, kind="bar", + **kwargs): + + fname = benchmark_name.replace(' ', '_').replace('/', '_').replace(':', '_') + fname = f'{fname}_{x}_{y}_{col}_{row}_{hue}_{kind}' fig = seaborn.catplot( - "concurrency", "duration", + x, y, data=df, - #kind="swarm", # The swarm plots is also interesting - kind="bar", - hue="database", - hue_order=sorted(df['database'].unique()), - col="objects", - row="concurrency_kind", + #kind="swarm", # The swarm plots is also interesting, as is point + kind=kind, + hue=hue, + hue_order=sorted(df[hue].unique()) if hue_order is None else hue_order, + col=col, + row=row, palette=palette, sharey=False, legend=False, + **kwargs ) - fig.set(ylabel="ms") + if not show_y_ticks: + fig.set(yticks=[]) + else: + fig.set(ylabel="ms") fig.add_legend(title=benchmark_name) - fig.savefig(os.path.join(outdir, fname), transparent=True) + for ext in ('.png',): + fig.savefig(os.path.join(outdir, fname) + ext, transparent=True) fig.despine() plt.close(fig.fig) -def save_all(df, outdir, versions=None): +def save_all(df, outdir, versions=None, pref_db_order=None): all_bmarks = df['action'].unique() + # The drawing functions use Cocoa and don't work on either threads + # or processes. + # pool = ProcessPoolExecutor() + # def _save_one(*args, **kwargs): + # pool.submit(save_one, *args, **kwargs) + _save_one = save_one + for bmark in all_bmarks: - save_one(df, bmark, outdir, palette='Paired' if versions else None) + action_data = df[df.action == bmark] + action_data = action_data[action_data.concurrency_kind != "greenlets"] + _save_one( + action_data, bmark, outdir, + palette='Paired' if versions or pref_db_order else None, + hue_order=pref_db_order, + ) + + + if versions: + all_dbs_including_versions = df['database'].unique() + all_dbs = { + db.replace('(' + versions[0] + ')', '').replace( + '(' + versions[1] + ')', '' + ).strip() + for db in all_dbs_including_versions + } + + parent_out_dir = outdir + for root_db in all_dbs: + outdir = os.path.join(parent_out_dir, root_db) + os.makedirs(outdir, exist_ok=True) + db_v1 = f"{root_db} ({versions[0]})" + db_v2 = f"{root_db} ({versions[1]})" + db_df = df[df.database == db_v1] + db_df2 = df[df.database == db_v2] + db_df = db_df.append(db_df2) + + for bmark in all_bmarks: + # adf: By database, by action + adf = db_df[db_df.action == bmark] + _save_one( + adf.query('concurrency > 1'), + f"{root_db}: {bmark}", + outdir, + x="concurrency_kind", + hue="database", + row="concurrency", + palette='Paired', + ) + # This puts all three concurrencies together + # and emphasizes the differences between them. + + _save_one( + adf.query('concurrency > 1'), + f"{root_db}: {bmark}", + outdir, + x="database", + hue="concurrency_kind", + row="concurrency", + palette='Accent', + order=sorted((db_v1, db_v2)), + ) + + cdf = adf[adf.objects == 20] + try: + _save_one( + cdf, + f"{root_db}: {bmark} | objects = 20", + outdir, + palette='Paired', + col="concurrency_kind", row="objects", + ) + except ValueError: + continue + + + for ck in adf['concurrency_kind'].unique(): + ckf = adf[adf.concurrency_kind == ck] + # ckf: drilldown by database, by action, by concurrency kind. + for oc in ckf['objects'].unique(): + ocf = ckf[ckf.objects == oc] + _save_one( + ocf, + f"{root_db}: {bmark} ck={ck} o={oc}", + outdir, + palette='Paired', + col="concurrency_kind", row="objects", + show_y_ticks=True, + ) def main(): @@ -162,7 +270,7 @@ def main(): print("Saving images to", outdir) - matplotlib.rcParams["figure.figsize"] = 10, 5 + matplotlib.rcParams["figure.figsize"] = 20, 10 seaborn.set(style="white") save_all(df, outdir, args.versions) diff --git a/scripts/zs_matrix_runner.py b/scripts/zs_matrix_runner.py index 895bfb5..253d783 100755 --- a/scripts/zs_matrix_runner.py +++ b/scripts/zs_matrix_runner.py @@ -25,10 +25,11 @@ # The type of runner to enable, and the arguments needed # to use it. +# TODO: Be intelligent about picking gevent based on the drivers procs = { - 'gevent': ('--threads', 'shared', '--gevent'), - # 'process': (), - # 'threads': ('--threads', 'shared'), + #'gevent': ('--threads', 'shared', '--gevent'), + 'process': (), + 'threads': ('--threads', 'shared'), } # The concurrency levels. @@ -41,8 +42,8 @@ # How many objects counts = [ 1, - 10, - 100 + 5, + 20 ] # The virtual environment to use. @@ -50,7 +51,7 @@ envs = [ #'relstorage38', 'relstorage27', - #'relstorage27-rs2', + 'relstorage27-rs2', ] if 'ZS_MATRIX_ENV' in os.environ: @@ -90,13 +91,13 @@ def run_one( env, proc, conc, count, conf, excluded=(), - processes=1, # How many times the whole thing is repeated. + processes=2, # How many times the whole thing is repeated. # How many times does the function get to run its loops. If # processes * values = 1, then it can't report a standard deviation # or print stability warnings. - values=3, + values=4, warmups=0, - min_time_ms=20.0, # Default is 100ms + min_time_ms=50.0, # Default is 100ms loops=3 # How many loops (* its inner loops) ): # pylint:disable=too-many-locals if 'pypy' in env: @@ -158,6 +159,12 @@ def run_one( # 'new_oid', # ]) + cmd.extend([ + 'add', + 'warm', + 'cold', + ]) + if excluded: cmd.append('--') for exc in excluded: diff --git a/src/zodbshootout/_runner.py b/src/zodbshootout/_runner.py index b3734c9..b5e7826 100644 --- a/src/zodbshootout/_runner.py +++ b/src/zodbshootout/_runner.py @@ -22,6 +22,7 @@ import os import functools +from collections import defaultdict from pyperf import Benchmark from pyperf import BenchmarkSuite @@ -190,10 +191,24 @@ def run_with_options(runner, options): # If we're not going to run the add benchmark, # put data in if it's not there already. include_data='add' not in options.benchmarks) + _run_with_data(runner, options, data, contenders) - for db_factory in contenders: - _run_benchmarks_for_contender(runner, options, data, db_factory) +def _run_with_data(runner, options, data, contenders): + db_benchmarks = defaultdict(dict) + speedtest = _create_speedtest(options, data) + for bench in BENCHMARKS: + bench_opt_name = bench[2] + if bench_opt_name not in options.benchmarks: + continue + + for db_factory in contenders: + result = _run_benchmark_for_contender(runner, options, + speedtest, bench, db_factory) + db_benchmarks[db_factory.name][bench_opt_name] = result + + for db_factory in contenders: + _combine_benchmark_results(options, db_factory, db_benchmarks[db_factory.name]) def _create_speedtest(options, data): # For concurrency of 1, or if we're using forked concurrency, we @@ -261,6 +276,9 @@ def _is_known_bad(options, bench_opt_name, db_factory): # b'\x03\xd3Pv\xabK\xfc\xdd', Protocol(('localhost', # 24003), '1', False)) 'readCurrent', + + # This tends to produce errors: exceptions.AssertionError', ('finished called wo lock',) + 'conflicts', ) return False @@ -285,7 +303,26 @@ def __call__(self, *args, **kwargs): logger.exception("When running %s", self.wrapped) return _MAGIC_NUMBER -def _run_benchmarks_for_contender(runner, options, data, db_factory): +BENCHMARKS = ( + # human name format, method name, benchmark name + # order matters + ('%s: add %d objects', "bench_add", 'add'), + ('%s: store %d raw pickles', "bench_store", 'store'), + ('%s: update %d objects', "bench_update", 'update',), + ('%s: read %d cold objects', "bench_cold_read", 'cold',), + ('%s: read %d cold prefeteched objects', "bench_cold_read_prefetch", 'prefetch_cold',), + ('%s: readCurrent %d objects', "bench_readCurrent", 'readCurrent',), + ('%s: write/read %d objects', "bench_read_after_write", 'warm',), + ('%s: read %d hot objects', "bench_hot_read", 'hot',), + ('%s: read %d steamin objects', "bench_steamin_read", 'steamin',), + ('%s: empty explicit commit', "bench_empty_transaction_commit_explicit", 'ex_commit',), + ('%s: empty implicit commit', "bench_empty_transaction_commit_implicit", 'im_commit',), + ('%s: tpc', "bench_tpc", "tpc", ), + ('%s: allocate %d OIDs', "bench_new_oid", "new_oid",), + ('%s: update %d conflicting objects', "bench_conflicting_updates", "conflicts",), +) + +def _run_benchmark_for_contender(runner, options, speedtest, bench, db_factory): metadata = { 'gevent': options.gevent, 'threads': options.threads, @@ -294,7 +331,7 @@ def _run_benchmarks_for_contender(runner, options, data, db_factory): 'objects_per_txn': options.objects_per_txn, } # TODO: Include the gevent loop implementation in the metadata. - speedtest = _create_speedtest(options, data) + if options.gevent: conc_name = 'greenlets' @@ -309,57 +346,34 @@ def _run_benchmarks_for_contender(runner, options, data, db_factory): options.objects_per_txn, db_factory.name ) - db_benchmarks = {} - # TODO: Where to include leak prints? - for bench_descr, bench_func, bench_opt_name in ( - # order matters - ('%s: add %d objects', speedtest.bench_add, 'add'), - ('%s: store %d raw pickles', speedtest.bench_store, 'store'), - ('%s: update %d objects', speedtest.bench_update, 'update',), - ('%s: read %d cold objects', speedtest.bench_cold_read, 'cold',), - ('%s: read %d cold prefeteched objects', speedtest.bench_cold_read_prefetch, - 'prefetch_cold',), - ('%s: readCurrent %d objects', speedtest.bench_readCurrent, 'readCurrent',), - ('%s: write/read %d objects', speedtest.bench_read_after_write, 'warm',), - ('%s: read %d hot objects', speedtest.bench_hot_read, 'hot',), - ('%s: read %d steamin objects', speedtest.bench_steamin_read, 'steamin',), - ('%s: empty explicit commit', speedtest.bench_empty_transaction_commit_explicit, - 'ex_commit',), - ('%s: empty implicit commit', speedtest.bench_empty_transaction_commit_implicit, - 'im_commit',), - ('%s: tpc', speedtest.bench_tpc, "tpc", ), - ('%s: allocate %d OIDs', speedtest.bench_new_oid, "new_oid",), - ('%s: update %d conflicting objects', speedtest.bench_conflicting_updates, "conflicts",), - ): - if bench_opt_name not in options.benchmarks: - continue - if _is_known_bad(options, bench_opt_name, db_factory): - # TODO: Add option to disable this. - bench_func = _disabled_benchmark - bench_descr += ' (disabled)' - - if options.keep_going: - bench_func = _SafeFunction(bench_func) - - name_args = (benchmark_descriptor, ) if '%d' not in bench_descr else ( - benchmark_descriptor, options.objects_per_txn) - bench_name = bench_descr % name_args - - # The decision on how to account for concurrency (whether to treat - # that as part of the inner loop and thus divide total times by it) - # depends on the runtime behaviour. See DistributedFunction for details. - benchmark = runner.bench_time_func( - bench_name, - bench_func, - db_factory, - inner_loops=speedtest.inner_loops if bench_func.inner_loops else 1, - metadata=metadata, - ) - - db_benchmarks[bench_opt_name] = benchmark - - _combine_benchmark_results(options, db_factory, db_benchmarks) + # TODO: Where to include leak prints? + bench_descr, bench_func, bench_opt_name = bench + bench_func = getattr(speedtest, bench_func) + + if _is_known_bad(options, bench_opt_name, db_factory): + # TODO: Add option to disable this. + bench_func = _disabled_benchmark + bench_descr += ' (disabled)' + + if options.keep_going: + bench_func = _SafeFunction(bench_func) + + name_args = (benchmark_descriptor, ) if '%d' not in bench_descr else ( + benchmark_descriptor, options.objects_per_txn) + bench_name = bench_descr % name_args + + # The decision on how to account for concurrency (whether to treat + # that as part of the inner loop and thus divide total times by it) + # depends on the runtime behaviour. See DistributedFunction for details. + benchmark = runner.bench_time_func( + bench_name, + bench_func, + db_factory, + inner_loops=speedtest.inner_loops if bench_func.inner_loops else 1, + metadata=metadata, + ) + return benchmark def _combine_benchmark_results(options, db_factory, db_benchmarks): # Do this in the master only, after running all the benchmarks diff --git a/src/zodbshootout/speedtest.py b/src/zodbshootout/speedtest.py index 6a63899..34c23cc 100644 --- a/src/zodbshootout/speedtest.py +++ b/src/zodbshootout/speedtest.py @@ -711,6 +711,7 @@ def __do_bench_cold_read(self, loops, db_factory, prefetch): for i in range(total_loops): db = db_factory() + begin = perf_counter() conn = db.open() root = conn.root()