From 3460fd5948b561ea772f21e5582d2d3c50ea9ee6 Mon Sep 17 00:00:00 2001 From: Aditya Gurajada Date: Thu, 13 Jun 2024 19:54:02 -0700 Subject: [PATCH] [Perf] Enhance test.sh and perf_report.py to support multiple iterations. In previous performance test-runs, we were able to only run one iteration of the client-server program for each run-type, test-parameter. We were seeing some fluctuations and unexpected results in the comparison report. This commit enhances the testing framework to: a) Run 1 or more iterations of the client/server program b) Extend the Python perf-comparison report generator to work with another source of input metrics-line(s). c) Comparison report generated using the median value of the relevant metric across the n-iterations. Note: (b) was needed as runs on AWS were aborted midway and had to be partially re-run. We add Python parsing logic to extract out and massage metrics from "Summary: " output lines, that the server program emits to `stdout`. This expedited the report generation task w/o having to re-run all perf-tests on AWS instances. This approach re-casts the new summary data into an input format needed by the existing report generation logic. - To run multiple iterations, use env-var L3_PERF_TEST_NUM_ITERS= Workflow to run the enhanced perf_report.py: - Run test.sh with diff server-threads and #iterations. Example: L3_PERF_TEST_NUM_ITERS=5 L3_PERF_SERVER_NUM_THREADS="1 2 4 8" ./test.sh --clock-default $((1000 * 1000)) - Redirect output to a /tmp/file: > /tmp/perf.out 2>&1 - grep 'Summary:' /tmp/perf.out > /tmp/perf.summary.out - Feed the summary file to the Python tool using: ./scripts/perf_report.py --summary-file /tmp/perf.summary.out - tests/pytests/perf_report_test.py: - Add unit-test cases for new parsing methods added --- scripts/perf_report.py | 479 +++++++++++++++++++++++++++++- test.sh | 33 +- tests/pytests/perf_report_test.py | 178 +++++++++++ 3 files changed, 682 insertions(+), 8 deletions(-) diff --git a/scripts/perf_report.py b/scripts/perf_report.py index 7319649..257cb82 100755 --- a/scripts/perf_report.py +++ b/scripts/perf_report.py @@ -9,10 +9,42 @@ """ import sys import argparse +import statistics from collections import OrderedDict from collections import namedtuple from prettytable import PrettyTable +# ########################################################## +# Layout of a summary row from the output file: +# ########################################################## +# pylint: disable-next=line-too-long +# Summary: For 32 clients, Baseline - No logging, num_threads=1, num_ops=32000000 (32 Million) ops, Elapsed time=105613818950 (~105.61 Billion) ns, Avg. Elapsed real time=3300 ns/msg, Server throughput=302990 (~302.99 K) ops/sec, Client throughput=10035 (~10.03 K) ops/sec, Avg. ops per thread=32000000 (32 Million) ops/thread + +# Define indexes for each field in above row-format +FLD_NUM_CLIENTS_IDX = 0 +FLD_RUN_TYPE_IDX = 1 +FLD_NUM_THREADS_IDX = 2 +FLD_NUM_OPS_IDX = 3 +FLD_ELAPSED_TIME_IDX = 4 +FLD_AVG_ELAPSED_TIME_IDX = 5 +FLD_SERVER_THROUGHPUT_IDX = 6 +FLD_CLIENT_THROUGHPUT_IDX = 7 +FLD_AVG_OPS_PER_THREAD_IDX = 8 + +# ----------------------------------------------------------------------------- +# Define a named tuple class for metrics extracted out from a summary line. +# pylint: disable-next=line-too-long +MetricsTuple = namedtuple('MetricsTuple', 'svr_value cli_value svr_units_val svr_units_str cli_units_val cli_units_str thread_str') + +# Define a named tuple class for aggregated metrics for a given metric value. +# niters is the # of raw-data-rows that went into computing the aggregates. +AggMetrics = namedtuple('AggMetrics', 'niters, min_val, avg_val, med_val, max_val') + +# Define a named tuple class for the set-of-aggregated metrics from a set of +# summary line-metrics that are squashed into a set of AggMetrics named tuples. +# pylint: disable-next=line-too-long +AggMetricsTuple = namedtuple('AggMetrics', 'svr_value_agg cli_value_agg svr_units_val_agg svr_units_str cli_units_val_agg cli_units_str thread_str') + ############################################################################### # main() driver ############################################################################### @@ -41,6 +73,11 @@ def do_main(args:list): Parse each comma-separated fields and extract values. """ parsed_args = perf_parse_args(args) + perf_summ_file = parsed_args.summ_file + if perf_summ_file is not None: + gen_perf_report_from_summary_file(perf_summ_file) + sys.exit(0) + perf_file = parsed_args.file lctr = 0 @@ -52,7 +89,6 @@ def do_main(args:list): baseline_by_threads = OrderedDict() # Key: Run-type 'Baseline - No logging'; Value: [ ] - metrics_by_run = OrderedDict() heading_row = [] # Setup some defaults to avoid pylint errors. @@ -108,9 +144,11 @@ def do_main(args:list): # ------------------------------------------------------------------ by_nthreads_dict = OrderedDict() if nthreads_field not in metrics_by_threads: + # pylint: disable-next=line-too-long metrics_by_threads[nthreads_field] = { run_type: [svr_value, cli_value, svr_str, cli_str, thread_str ] } else: by_nthreads_dict = metrics_by_threads[nthreads_field] + # pylint: disable-next=line-too-long by_nthreads_dict.update( {run_type: [svr_value, cli_value, svr_str, cli_str, thread_str ] } ) metrics_by_threads[nthreads_field] = by_nthreads_dict @@ -277,6 +315,412 @@ def gen_perf_report(heading:list, metrics:dict, base_svr_value:int, base_cli_val print(perf_table) +# ############################################################################# +# pylint: disable=line-too-long +def gen_perf_report_from_summary_file(perf_summ_file:str): + """ + Driver method to parse `Summary:` lines from perf_summ_file to extract out + the metrics gathered at run-time. Generates a summary performance + comparison report. + + The input file is expected to have multiple lines of this format: + +Summary: For 32 clients, Baseline - No logging, num_threads=1, num_ops=32000000 (32 Million) ops, Elapsed time=105613818950 (~105.61 Billion) ns, Avg. Elapsed real time=3300 ns/msg, Server throughput=302990 (~302.99 K) ops/sec, Client throughput=10035 (~10.03 K) ops/sec, Avg. ops per thread=32000000 (32 Million) ops/thread + +Summary: For 32 clients, Baseline - No logging, num_threads=2, num_ops=32000000 (32 Million) ops, Elapsed time=69448583392 (~69.44 Billion) ns, Avg. Elapsed real time=2170 ns/msg, Server throughput=460772 (~460.77 K) ops/sec, Client throughput=15524 (~15.52 K) ops/sec, Avg. ops per thread=16000000 (16 Million) ops/thread + + The layout of this file is as follows: + For a given # of clients: e.g., 'For 32 clients': + + - One summary-line: + - for each logging-type + - for one server-thread-count + - for 1 or more iterations of server execution + + - For one run-parameter: (logging-type, num-server-threads), we could have + 1 or more lines, for each iteration of the test. (Iteration lines are not + tagged.) + + - After n-iterations, we could move to the next num-server-threads count. + + - If the experiment was run with just one thread-count; i.e., env-var + L3_PERF_SERVER_NUM_THREADS=4, we will have just one set of these rows + for a single thread-count. + + We have to rearrange the metrics rows so that the performance comparison + report for each logging-type as compared to baseline performance is + generated for different server-thread counts. + + For runs with multiple iterations, this method computes the median metric + value and re-casts the metrics to drive-off of that. We massage the computed + median metric(s) to invoke existing report-generation code. + + """ + # pylint: enable=line-too-long + + # Create an in-memory list of lines, which we'll scan multiple times. + flines = read_summary_lines(perf_summ_file) + + if len(flines) == 0: + return + + (num_clients_msg, num_ops_msg) = extract_nclients_ops_fields(flines[0]) + + (svr_metric_name, cli_metric_name, thread_metric_name) = extract_metric_names(flines[0]) + heading_row = [ 'Run-Type', svr_metric_name, cli_metric_name, thread_metric_name ] + + # Returns 'Baseline' run-type as 0'th row. + list_of_nthreads = gen_nthreads_list(flines) + # DEBUG: print(list_of_nthreads) + + list_of_runtypes = gen_runtypes_list(flines) + # DEBUG: print(list_of_runtypes) + + base_svr_value = 0 + base_cli_value = 0 + + # Run through diff server-thread parameters, and generate report + # We make multiple passes extracing metrics for each combination + # of #-of-server-threads and run-types. + for threads in list_of_nthreads: + + metrics_by_run_type = OrderedDict() + + for run_type in list_of_runtypes: + + list_of_metrics = extract_metrics_for(flines, threads, run_type) + + # No. of iterations done for each client/server combo run-type + num_iters = len(list_of_metrics) + + agg_metrics = compute_aggregates(list_of_metrics) + + run_type_metrics_list = extract_med_from_agg_metrics(agg_metrics) + # DEBUG: print(run_type_metrics_list) + + if run_type.startswith('Baseline'): + base_svr_value = run_type_metrics_list[0] + base_cli_value = run_type_metrics_list[1] + + metrics_by_run_type[run_type] = run_type_metrics_list + + # sign = 'positive' if x > 0 else 'non-positive' + one_iters_msg = "(1 iteration)" if num_iters == 1 else "" + + # pylint: disable-next=line-too-long + print(f"\n **** Performance comparison for {num_clients_msg}, {num_ops_msg}, {threads} {one_iters_msg} ****") + if num_iters > 1: + print(f" **** (Using median value of metric across {num_iters} iterations) ****") + gen_perf_report(heading_row, metrics_by_run_type, base_svr_value, base_cli_value) + + +# ############################################################################# +def read_summary_lines(perf_summ_file:str) -> list: + """ + Read the summary lines, filtering out comment lines. + + Returns: A list of lines. + """ + flines = [] + with open(perf_summ_file, 'rt', encoding="utf-8") as file: + flines = [line for line in file.readlines() if not line.startswith('#')] + return flines + +# ############################################################################# +def extract_nclients_ops_fields(line:str) -> tuple: + """ + Parse an input line to extract out the number of clients and number of + msgs (ops) fields and return a tuple to the caller + Returns: Tuple: ('For 32 clients', 'num_ops=32000000 (32 Million) msgs') + """ + nclients_msg = extract_nth_field(line, FLD_NUM_CLIENTS_IDX) + nops_msg = extract_nth_field(line, FLD_NUM_OPS_IDX) + + # Further massage fields to get output data as described above. + nclients_msg = nclients_msg.split(':')[1] + nclients_msg = nclients_msg.lstrip().rstrip() + + nops_msg = nops_msg.split('=')[1] + nops_msg = nops_msg.split(')')[0] + ') msgs' + nops_msg = nops_msg.lstrip().rstrip() + return (nclients_msg, nops_msg) + +# ############################################################################# +def extract_metric_names(line:str) -> tuple: + """ + Parse the input line and extract the names of server / client metric. + Synthesize the server-thread metric's name. + Returns: (svr_metric_name, cli_metric_name, thread_metric_name) + """ + svr_throughput_str = extract_nth_field(line, FLD_SERVER_THROUGHPUT_IDX) + svr_throughput_str = svr_throughput_str.split('=')[0] + + cli_throughput_str = extract_nth_field(line, FLD_CLIENT_THROUGHPUT_IDX) + cli_throughput_str = cli_throughput_str.split('=')[0] + + thread_throughput_str = 'NumOps/thread' + + return (svr_throughput_str, cli_throughput_str, thread_throughput_str) + + +# ############################################################################# +def extract_nth_field(line:str, field_num:int) -> tuple: + """ + Extract out the n'th field from a comma-separated set of fields from line. + + Returns: + - The field, if found, with leading / trailing spaces trimmed off. + - None otherwise + """ + fields = line.split(",") + if field_num < 0 or len(fields) < field_num: + return None + + field_str = fields[field_num] + field_str = field_str.lstrip().rstrip() + return field_str + +# ############################################################################# +def gen_nthreads_list(flines:list) -> list: + """ + Process the list of input lines and identify the list of server-threads for + which the tests were run. + Returns: List of 'num_threads=' items. + """ + # Generate list of threads using list comprehension. + seen_threads = set() + list_of_nthreads = [ nthreads_term + for line in flines + if (nthreads_term := extract_nth_field(line, FLD_NUM_THREADS_IDX)) + not in seen_threads and not seen_threads.add(nthreads_term) + ] + + return list_of_nthreads + +# ############################################################################# +def gen_runtypes_list(flines:list) -> list: + """ + Process the list of input lines and identify the list of logging run-types + for which the tests were run. + Returns: List of run-types, keeping the Baseline list at [0]'th slot. + """ + # Generate list of run-types using list comprehension. + seen_run_types = set() + list_of_run_types = [ run_type + for line in flines + if (run_type := extract_nth_field(line, FLD_RUN_TYPE_IDX)) + not in seen_run_types and not seen_run_types.add(run_type) + ] + + # Re-arrange so that baseline row is the 1st item in the list + if list_of_run_types[0].startswith('Baseline') is False: + baseline_off = -1 + rctr = 0 + for runtype in list_of_run_types: + if runtype.startswith('Baseline'): + baseline_off = rctr + break + + rctr += 1 + + # Flip rows, if baseline record is found + if baseline_off != -1: + baseline_row = list_of_run_types[baseline_off] + list_of_run_types[baseline_off] = list_of_run_types[0] + list_of_run_types[0] = baseline_row + + return list_of_run_types + + +# ############################################################################# +def extract_metrics_for(flines:list, nthreads:str, run_type:str) -> list: + """ + Given a list of summary lines, #-of-threads parameter and run-type, + extract out the relevants metrics for matching summary-lines. + + Returns: List of MetricsTuple() named tuples for metrics. + """ + metrics = [] + for line in flines: + if line_matches(line, nthreads, run_type) is False: + continue + + (svr_value, cli_value, svr_units_val, svr_units_str, cli_units_val, cli_units_str, thread_str) = extract_metrics(line) + metrics.append(MetricsTuple(svr_value, cli_value, svr_units_val, svr_units_str, cli_units_val, cli_units_str, thread_str)) + + # print(f"\nList of metrics tuple(s) for {nthreads}, {run_type}") + # print_list_of_metrics_tuples(metrics) + return metrics + +# ############################################################################# +def compute_aggregates(list_of_metrics:list) -> AggMetricsTuple: + """ + Given a list of MetricsTuple() tuples, compute the required aggregates for + each interesting metric. + + Returns: Tuple of type AggMetrics() + """ + # DEBUG: print("\nList of metrics tuple(s)") + # DEBUG: print_list_of_metrics_tuples(list_of_metrics) + + num_list = [ mtuple.svr_value for mtuple in list_of_metrics ] + # DEBUG: print(sorted(num_list)) + + svr_value_agg = aggregate_num_list(num_list) + # DEBUG: print(f"{svr_value_agg}\n") + + num_list = [ mtuple.cli_value for mtuple in list_of_metrics ] + # DEBUG: print(sorted(num_list)) + + cli_value_agg = aggregate_num_list(num_list) + # DEBUG: print(f"{cli_value_agg}\n") + + num_list = [ mtuple.svr_units_val for mtuple in list_of_metrics ] + # DEBUG: print(sorted(num_list)) + + svr_units_val_agg = aggregate_num_list(num_list) + # DEBUG: print(f"{svr_units_val_agg}\n") + + num_list = [ mtuple.cli_units_val for mtuple in list_of_metrics ] + # DEBUG: print(sorted(num_list)) + + cli_units_val_agg = aggregate_num_list(num_list) + # DEBUG: print(f"{cli_units_val_agg}\n") + + # All server/client units-value have the same units. Grab the 1st one. + for mtuple in list_of_metrics: + svr_units_str = mtuple.svr_units_str + cli_units_str = mtuple.cli_units_str + + # We expect that the distribution of #-ops processed / thread to be uniform. + # So, do a loose check that we got a unique value. And warn the user if + # that's not the case. + ops_per_thread_set = set() + ops_per_thread_list = [ mtuple.thread_str + for mtuple in list_of_metrics + if mtuple.thread_str not in ops_per_thread_set + and not ops_per_thread_set.add(mtuple.thread_str) + ] + + nunique_ops_per_thread = len(ops_per_thread_list) + if nunique_ops_per_thread > 1: + print("Warning! Found {nunique_ops_per_thread} ops/thread metric.") + + thread_str = ops_per_thread_list[0] + # DEBUG: print(thread_str) + + return AggMetricsTuple(svr_value_agg, cli_value_agg, svr_units_val_agg, svr_units_str, cli_units_val_agg, cli_units_str, thread_str) + +# ############################################################################# +def aggregate_num_list(list_of_nums:list) -> AggMetrics: + """ + Given a list of numbers (ints/floats), compute their aggregates. + Returns: AggMetrics() tuple + """ + nitems = len(list_of_nums) + if nitems == 0: + return None + + min_num = sys.maxsize + max_num = 0 + avg_num = 0 + med_num = 0 + sum_num = 0 + for num_val in list_of_nums: + min_num = min(min_num, num_val) + max_num = max(max_num, num_val) + sum_num += num_val + + med_num = statistics.median(list_of_nums) + avg_num = round(((sum_num * 1.0) / nitems), 2) + + return AggMetrics(nitems, min_num, avg_num, med_num, max_num) + +# ############################################################################# +def extract_metrics(line:str) -> tuple: + """ + Given a summary line, extract key-metrics fields that will be needed to + compute aggregates and also for reporting. + + Example: For a line like this: + +# pylint: disable-next=line-too-long +# Summary: For 32 clients, Baseline - No logging, num_threads=1, num_ops=32000000 (32 Million) ops, Elapsed time=105613818950 (~105.61 Billion) ns, Avg. Elapsed real time=3300 ns/msg, Server throughput=302990 (~302.99 K) ops/sec, Client throughput=10035 (~10.03 K) ops/sec, Avg. ops per thread=32000000 (32 Million) ops/thread + + Returns: Tuple: + - svr_value : int : 302990 + - cli_value : int : 10035 + - svr_units_val : float : 302.99 + - svr_units_str : str : 'K ops/sec' + - cli_units_val : float : 10.03 + - cli_units_str : str : 'K ops/sec' + - thread_str : str : '32 Million ops' + """ + svr_throughput_str = extract_nth_field(line, FLD_SERVER_THROUGHPUT_IDX) + svr_throughput_str = svr_throughput_str.split('=')[1] + + (svr_value, svr_units_str) = svr_throughput_str.split('(') + svr_value = int(svr_value.lstrip().rstrip()) + svr_units_str = strip_parens_etc(svr_units_str) + (svr_units_val, svr_units_str) = svr_units_str.split(sep=' ', maxsplit=1) + svr_units_val = float(svr_units_val) + + cli_throughput_str = extract_nth_field(line, FLD_CLIENT_THROUGHPUT_IDX) + cli_throughput_str = cli_throughput_str.split('=')[1] + + (cli_value, cli_units_str) = cli_throughput_str.split('(') + cli_value = int(cli_value.lstrip().rstrip()) + cli_units_str = strip_parens_etc(cli_units_str) + (cli_units_val, cli_units_str) = cli_units_str.split(sep=' ', maxsplit=1) + cli_units_val = float(cli_units_val) + + avg_ops_per_thread_str = extract_nth_field(line, FLD_AVG_OPS_PER_THREAD_IDX) + thread_str = avg_ops_per_thread_str.split('=')[1] + + # pylint: disable-next=unused-variable + (thread_val_unused, thread_str) = thread_str.split(sep=' ', maxsplit=1) + thread_str = strip_parens_etc(thread_str) + thread_str = thread_str.split('/')[0] + + return (svr_value, cli_value, svr_units_val, svr_units_str, cli_units_val, cli_units_str, thread_str) + +# ############################################################################# +def extract_med_from_agg_metrics(agg_metrics:AggMetricsTuple) -> list: + """ + Given a named-tuple, AggMetricsTuple, of aggregated metrics, extract the + key metrics fields that are needed for report generation. We use the + median value of the metric to generate performance gain/drop %age. + + Returns: List of metric values in a form as needed by downstream code. + """ + svr_value = agg_metrics.svr_value_agg.med_val + cli_value = agg_metrics.cli_value_agg.med_val + svr_str = str(agg_metrics.svr_units_val_agg.med_val) + ' ' + agg_metrics.svr_units_str + cli_str = str(agg_metrics.cli_units_val_agg.med_val) + ' ' + agg_metrics.cli_units_str + thread_str = agg_metrics.thread_str + + return (svr_value, cli_value, svr_str, cli_str, thread_str) + +# ############################################################################# +def strip_parens_etc(field_str:str) -> str: + """ + Given an input like: '(~10.03 K) ops/sec', strips away stuff and + Returns: '10.03 K ops/sec' + """ + field_str = field_str.replace('(', '') + field_str = field_str.replace(')', '') + field_str = field_str.replace('~', '') + return field_str + +# ############################################################################# +def line_matches(line:str, nthreads:str, run_type:str) -> bool: + """ + See if input line matches the desired #-of-threads and run-type. + """ + nthreads_field = extract_nth_field(line, FLD_NUM_THREADS_IDX) + run_type_field = extract_nth_field(line, FLD_RUN_TYPE_IDX) + return (nthreads == nthreads_field) and (run_type == run_type_field) + # ############################################################################# def compute_pct_drop(baseval:int, metric:int) -> float: """ @@ -308,13 +752,31 @@ def perf_parse_args(args:list): # Define required arguments supported by this script parser.add_argument('--file', dest='file' , metavar='' - , required=True + , required=False + , default=None , help='Perf test-run output file name') + parser.add_argument('--summary-file', dest='summ_file' + , metavar='' + , required=False + , default=None + , help='Perf output file name with "Summary:" lines') + parsed_args = parser.parse_args(args) if parsed_args is False: parser.print_help() + # Either one of the file-specifiers should be provided + if parsed_args.file is None and parsed_args.summ_file is None: + # pylint: disable-next=line-too-long + print(f"{sys.argv[0]} Error: Either one of --file or --summary-file arguments must be provided.") + sys.exit(1) + + if parsed_args.file is not None and parsed_args.summ_file is not None: + # pylint: disable-next=line-too-long + print(f"{sys.argv[0]} Error: Only one of --file or --summary-file arguments must be provided.") + sys.exit(1) + return parsed_args # ############################################################################# @@ -337,6 +799,19 @@ def pr_metrics(tdict:dict, indent:str = ""): for key in tdict.keys(): print(f"{indent}{key=}, value = {tdict[key]}") +# ############################################################################# +def print_list_of_metrics_tuples(metrics:list, indent:str = " "): + """ Print contents of a list of MetricsTuple()s""" + print("[") + for mtuple in metrics: + # This will print using Python's native print method. + print(f"{indent}{mtuple}") + + # Print by unpacking each field + # pylint: disable-next=line-too-long + # print(f"{indent}svr_value={mtuple.svr_value}, cli_value={mtuple.cli_value}, svr_units_val={mtuple.svr_units_val}, cli_units_val={mtuple.cli_units_val}, units_str={mtuple.cli_units_str}, thread_str={mtuple.thread_str}") + print("]") + ############################################################################### # Start of the script: Execute only if run as a script ############################################################################### diff --git a/test.sh b/test.sh index 368d438..eda9bed 100755 --- a/test.sh +++ b/test.sh @@ -63,9 +63,12 @@ SvrPerfOutfileArg= # execution, we can now specify a list of server-threads to execute, # using env-var as: # -# SERVER_NUM_THREADS="1 2 4 8" ./test.sh run-all-client-server-perf-tests +# L3_PERF_SERVER_NUM_THREADS="1 2 4 8" ./test.sh run-all-client-server-perf-tests # -SrvNumThreadsList="${SERVER_NUM_THREADS:-1}" +SrvNumThreadsList="${L3_PERF_SERVER_NUM_THREADS:-1}" + +# Number of iterations to perform each test. +PerfTestNumIters="${L3_PERF_TEST_NUM_ITERS:-1}" # ################################################################## # Array of test function names. If you add a new test_, @@ -157,7 +160,8 @@ function usage() echo " ./${Me} test-build-and-run-csperf-l3_loc_eq_2 [ server-clock-ID [ num-msgs ] ]" echo " " echo "Environment Variables:" - echo " SERVER_NUM_THREADS: List of server-threads to exercise; e.g. \"1 2 4\"" + echo " L3_PERF_SERVER_NUM_THREADS: List of server-threads to exercise; e.g. \"1 2 4\"" + echo " L3_PERF_TEST_NUM_ITERS: Number of iterations to perform test." echo " " echo "Examples:" @@ -168,10 +172,12 @@ function usage() echo " " echo "Run client-server performance tests with different server-thread settings." - echo " SERVER_NUM_THREADS=\"1 2 4 8\" ./${Me} run-all-client-server-perf-tests --clock-default ${nmsgs}" + echo " L3_PERF_SERVER_NUM_THREADS=\"1 2 4 8\" ./${Me} run-all-client-server-perf-tests --clock-default ${nmsgs}" echo " " echo " Client-Server performance u-benchmarking defaults:" + echo " L3_PERF_SERVER_NUM_THREADS = 1" + echo " L3_PERF_TEST_NUM_ITERS = 1" echo " Number of clients = ${NumClients}" echo " Number of messages sent by each client = ${NumMsgsPerClient}" echo " Default clock-ID used (client) = CLOCK_REALTIME" @@ -994,15 +1000,30 @@ function run-client-server-tests_vary_threads() set +x + echo " " + echo "${Me}: $(TZ="America/Los_Angeles" date) Started run-client-server-tests_vary_threads() ... " + echo " " + for threads in ${SrvNumThreadsList}; do echo " " echo "${Me}:${LINENO}: ***** Run run-client-server-test with l3_enabled=${l3_enabled}, num_msgs_per_client=${num_msgs_per_client}, ${threads} threads ..." - echo " " - run-client-server-test "${num_msgs_per_client}" "${l3_enabled}" "${threads}" + local ictr=1 + # shellcheck disable=SC2086 + while [ ${ictr} -le ${PerfTestNumIters} ]; do + + echo " " + echo "${Me}:${LINENO}: Iteration: ${ictr} Run run-client-server-test ..." + run-client-server-test "${num_msgs_per_client}" "${l3_enabled}" "${threads}" + ictr=$((ictr + 1)) + done done + echo " " + echo "${Me}: $(TZ="America/Los_Angeles" date) Completed run-client-server-tests_vary_threads() ... " + echo " " + set -x } diff --git a/tests/pytests/perf_report_test.py b/tests/pytests/perf_report_test.py index 133e569..575421c 100755 --- a/tests/pytests/perf_report_test.py +++ b/tests/pytests/perf_report_test.py @@ -36,6 +36,11 @@ FIELD1 = 'Server throughput=53856 (~53.85 K) ops/sec' FIELD2 = 'Server throughput=989 () ops/sec' +# pylint: disable=line-too-long +SAMPLE_SUMM_LINE = 'Summary: For 32 clients, Baseline - No logging, num_threads=1, num_ops=32000000 (32 Million) ops, Elapsed time=105613818950 (~105.61 Billion) ns, Avg. Elapsed real time=3300 ns/msg, Server throughput=302990 (~302.99 K) ops/sec, Client throughput=10035 (~10.03 K) ops/sec, Avg. ops per thread=32000000 (32 Million) ops/thread' + +# pylint: enable=line-too-long + # ############################################################################# # To see output from test-cases run: # $ pytest --capture=tee-sys perf_report_test.py -k test_compute_pct_drop @@ -45,6 +50,7 @@ def test_parse_perf_line_names(): """Verify parsing and output from parse_perf_line_names()""" + # pylint: disable-next=line-too-long (nclients_field, nops_field, nthreads_field, svr_metric, cli_metric, thread_metric) = perf_report.parse_perf_line_names(MSG1) assert nclients_field == 'NumClients=5' @@ -54,6 +60,7 @@ def test_parse_perf_line_names(): assert cli_metric == 'Client throughput' assert thread_metric == 'NumOps/thread' + # pylint: disable-next=line-too-long (nclients_field, nops_field, nthreads_field, svr_metric, cli_metric, thread_metric) = perf_report.parse_perf_line_names(MSG2) assert nclients_field == 'Number-of-Xacts=5' @@ -67,6 +74,7 @@ def test_parse_perf_line_names(): def test_parse_perf_line_values(): """Verify parsing and output from parse_perf_line_values()""" + # pylint: disable-next=line-too-long (run_type, nthreads_field, svr_value, svr_str, cli_value, cli_str, thread_str) = perf_report.parse_perf_line_values(MSG1) assert run_type == 'Baseline - No logging' @@ -77,6 +85,7 @@ def test_parse_perf_line_values(): assert cli_str == '~20.93 K ops/sec' assert thread_str == '5 K' + # pylint: disable-next=line-too-long (run_type, nthreads_field, svr_value, svr_str, cli_value, cli_str, thread_str) = perf_report.parse_perf_line_values(MSG2) assert run_type == 'Field is unrelated to parsing' @@ -121,3 +130,172 @@ def test_compute_pct_drop(): newval = 101.81 * one_k pct_float = perf_report.compute_pct_drop(baseval, newval) assert pct_float == 10.92 + +# ############################################################################# +def test_extract_nth_field(): + """ + Verify extraction logic of extract_nth_field(), using #defines. + """ + field = perf_report.extract_nth_field(SAMPLE_SUMM_LINE, + perf_report.FLD_NUM_CLIENTS_IDX) + assert field == 'Summary: For 32 clients' + + field = perf_report.extract_nth_field(SAMPLE_SUMM_LINE, + perf_report.FLD_RUN_TYPE_IDX) + assert field == 'Baseline - No logging' + + field = perf_report.extract_nth_field(SAMPLE_SUMM_LINE, + perf_report.FLD_NUM_THREADS_IDX) + assert field == 'num_threads=1' + + field = perf_report.extract_nth_field(SAMPLE_SUMM_LINE, + perf_report.FLD_NUM_OPS_IDX) + assert field == 'num_ops=32000000 (32 Million) ops' + + field = perf_report.extract_nth_field(SAMPLE_SUMM_LINE, + perf_report.FLD_ELAPSED_TIME_IDX) + assert field == 'Elapsed time=105613818950 (~105.61 Billion) ns' + + field = perf_report.extract_nth_field(SAMPLE_SUMM_LINE, + perf_report.FLD_AVG_ELAPSED_TIME_IDX) + assert field == 'Avg. Elapsed real time=3300 ns/msg' + + field = perf_report.extract_nth_field(SAMPLE_SUMM_LINE, + perf_report.FLD_SERVER_THROUGHPUT_IDX) + assert field == 'Server throughput=302990 (~302.99 K) ops/sec' + + field = perf_report.extract_nth_field(SAMPLE_SUMM_LINE, + perf_report.FLD_CLIENT_THROUGHPUT_IDX) + assert field == 'Client throughput=10035 (~10.03 K) ops/sec' + + field = perf_report.extract_nth_field(SAMPLE_SUMM_LINE, + perf_report.FLD_AVG_OPS_PER_THREAD_IDX) + assert field == 'Avg. ops per thread=32000000 (32 Million) ops/thread' + + # Negative tests: Extraction should return None for out-of-band field indices + field = perf_report.extract_nth_field(SAMPLE_SUMM_LINE, -1) + assert field is None + + field = perf_report.extract_nth_field(SAMPLE_SUMM_LINE, 100) + assert field is None + + +# ############################################################################# +def test_extract_nclients_ops_fields(): + """ + Verify the extraction logic of extract_nclients_ops_fields() + """ + (nclients_msg, nops_msg) = perf_report.extract_nclients_ops_fields(SAMPLE_SUMM_LINE) + assert nclients_msg == 'For 32 clients' + assert nops_msg == '32000000 (32 Million) msgs' + +# ############################################################################# +def test_gen_nthreads_list(): + """ + Verify the extraction logic of gen_nthreads_list(), which returns a list + of server-thread parameters + """ + # Create an input list of lines, with some duplicate rows for num_threads= field + # and some unique rows. + # pylint: disable=line-too-long + flines = [ 'Summary: For 32 clients, Baseline - No logging, num_threads=1, num_ops=32000000 (32 Million) ops, Elapsed time=105613818950 (~105.61 Billion) ns, Avg. Elapsed real time=3300 ns/msg, Server throughput=302990 (~302.99 K) ops/sec, Client throughput=10035 (~10.03 K) ops/sec, Avg. ops per thread=32000000 (32 Million) ops/thread' + , 'Summary: For 32 clients, Baseline - No logging, num_threads=1, num_ops=32000000 (32 Million) ops, Elapsed time=105613818950 (~105.61 Billion) ns, Avg. Elapsed real time=3300 ns/msg, Server throughput=302990 (~302.99 K) ops/sec, Client throughput=10035 (~10.03 K) ops/sec, Avg. ops per thread=32000000 (32 Million) ops/thread' + + , 'Summary: For 32 clients, Baseline - No logging, num_threads=2, num_ops=32000000 (32 Million) ops, Elapsed time=105613818950 (~105.61 Billion) ns, Avg. Elapsed real time=3300 ns/msg, Server throughput=302990 (~302.99 K) ops/sec, Client throughput=10035 (~10.03 K) ops/sec, Avg. ops per thread=32000000 (32 Million) ops/thread' + , 'Summary: For 32 clients, Baseline - No logging, num_threads=2, num_ops=32000000 (32 Million) ops, Elapsed time=105613818950 (~105.61 Billion) ns, Avg. Elapsed real time=3300 ns/msg, Server throughput=302990 (~302.99 K) ops/sec, Client throughput=10035 (~10.03 K) ops/sec, Avg. ops per thread=32000000 (32 Million) ops/thread' + + , 'Summary: For 32 clients, Baseline - No logging, num_threads=8, num_ops=32000000 (32 Million) ops, Elapsed time=105613818950 (~105.61 Billion) ns, Avg. Elapsed real time=3300 ns/msg, Server throughput=302990 (~302.99 K) ops/sec, Client throughput=10035 (~10.03 K) ops/sec, Avg. ops per thread=32000000 (32 Million) ops/thread' + ] + # pylint: enable=line-too-long + + # Verify that the parsing routine returns this expected result. + exp_list_of_nthreads = [ 'num_threads=1', 'num_threads=2', 'num_threads=8' ] + + list_of_nthreads = perf_report.gen_nthreads_list(flines) + assert list_of_nthreads == exp_list_of_nthreads + +# ############################################################################# +def test_gen_runtypes_list(): + """ + Verify the extraction logic of gen_runtypes_list(), which returns a list + of logging run-types + """ + # Create an input list of the same lines, with some duplicate rows for run-type + # fields and some unique rows. + # pylint: disable=line-too-long + flines = [ + 'Summary: For 32 clients, spdlog-backtrace-logging, num_threads=8, num_ops=32000000 (32 Million) ops, Elapsed time=105613818950 (~105.61 Billion) ns, Avg. Elapsed real time=3300 ns/msg, Server throughput=302990 (~302.99 K) ops/sec, Client throughput=10035 (~10.03 K) ops/sec, Avg. ops per thread=32000000 (32 Million) ops/thread' + + + , 'Summary: For 32 clients, L3-logging (no LOC), num_threads=2, num_ops=32000000 (32 Million) ops, Elapsed time=105613818950 (~105.61 Billion) ns, Avg. Elapsed real time=3300 ns/msg, Server throughput=302990 (~302.99 K) ops/sec, Client throughput=10035 (~10.03 K) ops/sec, Avg. ops per thread=32000000 (32 Million) ops/thread' + + , 'Summary: For 32 clients, L3-logging (no LOC), num_threads=2, num_ops=32000000 (32 Million) ops, Elapsed time=105613818950 (~105.61 Billion) ns, Avg. Elapsed real time=3300 ns/msg, Server throughput=302990 (~302.99 K) ops/sec, Client throughput=10035 (~10.03 K) ops/sec, Avg. ops per thread=32000000 (32 Million) ops/thread' + + , 'Summary: For 32 clients, Baseline - No logging, num_threads=1, num_ops=32000000 (32 Million) ops, Elapsed time=105613818950 (~105.61 Billion) ns, Avg. Elapsed real time=3300 ns/msg, Server throughput=302990 (~302.99 K) ops/sec, Client throughput=10035 (~10.03 K) ops/sec, Avg. ops per thread=32000000 (32 Million) ops/thread' + + , 'Summary: For 32 clients, Baseline - No logging, num_threads=1, num_ops=32000000 (32 Million) ops, Elapsed time=105613818950 (~105.61 Billion) ns, Avg. Elapsed real time=3300 ns/msg, Server throughput=302990 (~302.99 K) ops/sec, Client throughput=10035 (~10.03 K) ops/sec, Avg. ops per thread=32000000 (32 Million) ops/thread' + + ] + # pylint: enable=line-too-long + + # Verify that the parsing routine returns this expected result, reordering + # the run-types so that the Baseline row is the 1st one. + exp_list_of_run_types = [ 'Baseline - No logging' + , 'L3-logging (no LOC)' + , 'spdlog-backtrace-logging' + ] + + list_of_run_types = perf_report.gen_runtypes_list(flines) + assert list_of_run_types == exp_list_of_run_types + + # 0th row is always expected to be the baseline (no-logging) run-type + assert list_of_run_types[0].startswith('Baseline') + +# ############################################################################# +def test_strip_parens_etc(): + """ + Verify stripped string returned by strip_parens_etc() + """ + + item = '(~10.03 K) ops/sec' + assert perf_report.strip_parens_etc(item) == '10.03 K ops/sec' + + item = '(10.03 K) ops/sec' + assert perf_report.strip_parens_etc(item) == '10.03 K ops/sec' + + item = '10.03 K ops/sec' + assert perf_report.strip_parens_etc(item) == '10.03 K ops/sec' + +# ############################################################################# +def test_extract_metrics(): + """ + Verify tuple returned by extract_metrics() on a summary output line. + """ + # pylint: disable=line-too-long + (svr_value, cli_value, svr_units_val, svr_units_str, cli_units_val, cli_units_str, thread_str) = perf_report.extract_metrics(SAMPLE_SUMM_LINE) + + print(f"'{svr_value}' '{cli_value}' '{svr_units_val}' '{svr_units_str} '{cli_units_val}' '{cli_units_str}' '{thread_str}'") + + # pylint: enable=line-too-long + + assert svr_value == 302990 + assert cli_value == 10035 + assert svr_units_val == 302.99 + assert svr_units_str == 'K ops/sec' + assert cli_units_val == 10.03 + assert cli_units_str == 'K ops/sec' + assert thread_str == '32 Million ops' + +# ############################################################################# +def test_line_matches(): + """ + Verify boolean returned by line_matches() on a summary output line. + """ + assert perf_report.line_matches(SAMPLE_SUMM_LINE, + 'num_threads=1', 'Baseline - No logging') is True + + assert perf_report.line_matches(SAMPLE_SUMM_LINE, + 'num_threads=2', 'Baseline - No logging') is False + + assert perf_report.line_matches(SAMPLE_SUMM_LINE, + 'num_threads=1', 'L3-logging (no LOC)') is False