In [1]:
FLASK_ADDR = "http://10.73.196.185:5000"

WORKSPACE = "/workspace"
YAML = WORKSPACE + "/benchmark_config.yaml"
BASE_DATASTORE = WORKSPACE + "/base.datastore.json"
TEST_DATASTORE = WORKSPACE + "/test.datastore.json"
BASE_METADATA = WORKSPACE + "/base.testrun_metadata.json"
TEST_METADATA = WORKSPACE + "/test.testrun_metadata.json"

BASE_TESTRUN_RESULT = WORKSPACE + "/base.testrun_result.csv"
TEST_TESTRUN_RESULT = WORKSPACE + "/test.testrun_result.csv"
METADATA = WORKSPACE + "/2way_metadata.csv"
BENCHMARK = WORKSPACE + "/2way_benchmark.csv"

import os
import sys
import yaml
import json
import pandas as pd
from IPython.display import display, HTML, Markdown
from datetime import datetime

BASEPATH = os.path.abspath('.')
SCRIPTPATH = BASEPATH + "/../data_process"

# Performance Test Report Portal

In [2]:
now = datetime.now()
dt_string = "Generate time: *{}*".format(now.strftime("%Y-%m-%d %H:%M:%S"))
display(Markdown(dt_string))

Generate time: *2020-12-28 16:15:58*

## Introduction

In [28]:
def read_json(json_file):
    with open(json_file, 'r') as f:
        try:
            data = json.load(f)
        except Exception as e:
            print("Fail to load {}".format(json_file))
            raise
    return data

base_metadata = read_json(BASE_METADATA)
test_metadata = read_json(TEST_METADATA)
assert base_metadata.get("testrun.type") == test_metadata.get("testrun.type"), "Base and Test type must be the same! Exit."

run_type = base_metadata.get("testrun.type")
base_platform = base_metadata.get("testrun.platform")
test_platform = test_metadata.get("testrun.platform")
base_id = base_metadata.get("testrun.id")
test_id = test_metadata.get("testrun.id")
# Type and platform must not be None
assert run_type is not None, "Type is None! Exit."
assert base_platform is not None, "Base platform is None! Exit."
assert test_platform is not None, "Test platform is None! Exit."

with open('{}/templates/introduction_{}_{}.md'.format(BASEPATH, base_platform.lower(), run_type), 'r') as f:
    display(Markdown(f.read()))
    
if base_platform != test_platform:
    with open('{}/templates/introduction_{}_{}.md'.format(BASEPATH, test_platform.lower(), run_type), 'r') as f:
        display(Markdown('\n'+f.read()))

#### ESXi Storage test
VMware ESXi (formerly ESX) is an enterprise-class, type-1 hypervisor developed by VMware for deploying and serving virtual computers. 



In [4]:
%%html
<style>
    table {
        display: inline-block
    }
</style>

In [6]:
# Generate Base testrun result script
base_gen_result_script = "{}/generate_testrun_results.py --config {} --datastore {} --metadata {} --output {}".format(
SCRIPTPATH, YAML, BASE_DATASTORE, BASE_METADATA, BASE_TESTRUN_RESULT)

# Generate Test testrun result script
test_gen_result_script = "{}/generate_testrun_results.py --config {} --datastore {} --metadata {} --output {}".format(
SCRIPTPATH, YAML, TEST_DATASTORE, TEST_METADATA, TEST_TESTRUN_RESULT)

# Generate 2way metadata script
gen_metadata_script = "{}/generate_2way_metadata.py --test {} --base {} --output {}".format(
SCRIPTPATH, TEST_METADATA, BASE_METADATA, METADATA)

# Generate 2way benchmark script
gen_benchmark_script = "{}/generate_2way_benchmark.py --config {} --test {} --base {} --output {}".format(
SCRIPTPATH, YAML, TEST_TESTRUN_RESULT, BASE_TESTRUN_RESULT, BENCHMARK)

# Run scripts parallelly
import multiprocessing
all_processes = (base_gen_result_script, test_gen_result_script, gen_metadata_script)   

def execute(process):                                                             
    os.system(f'python3 {process}') 

process_pool = multiprocessing.Pool(processes = 3)                                                        
process_pool.map(execute, all_processes)

for result in [BASE_TESTRUN_RESULT, TEST_TESTRUN_RESULT, METADATA]:
    assert os.path.exists(result), "Fail to generate {}! Exit.".format(result)

# Generate 2way benchmark
os.system('python3 {}'.format(gen_benchmark_script))
assert os.path.exists(BENCHMARK), "Fail to generate {}! Exit.".format(BENCHMARK)

In [29]:
def color_delta(val):
    with open(YAML) as f:
        fillna = yaml.safe_load(f).get('benchmark_comparison_generator').get('defaults').get('fillna', "NaN")
    color_dict = {
        "DR": 'color: red; background-color: #FFB6C1',
        "MR": 'color: black',
        "DI": 'color: green; background-color: #F0FFF0',
        "MI": 'color: black',
        "HV": 'color: orange; background-color: #FAFAD2',
        "NS": 'color: gray',
        "NC": 'color: gray',
        "ID": 'color: red',
        fillna: 'color: #D3D3D3',
        
    }
    return '{}'.format(color_dict.get(val, 'color: black'))

def highlight_cols(s):
    return 'background-color: #eeffff'

def bold_font(s):
    return 'font-weight: bold'

def displayComparison(df):
    #These are the columns which need special formatting
    deltacols=df.columns.map(lambda x: x.endswith("-CON"))
#    roundcols=df.columns.map(lambda x: x.endswith(("-AVG", "", "-%SD", "-%DIFF", "-SIGN")))
    display(df.style\
            .applymap(color_delta,subset=deltacols)\
            .applymap(bold_font,subset=deltacols)\
            .format({'Test': lambda x: '<a target="_blank" href="{}">link</a>'.format(x)})\
            .format({'Base': lambda x: '<a target="_blank" href="{}">link</a>'.format(x)}))
#    display(df.style.applymap(color_delta,subset=deltacols).applymap(bold_font,subset=deltacols).format(FORMATER, subset=roundcols).hide_index())

## Metadata

In [8]:
%%HTML
* The differences between Test and Base are <b style='color:orange'>highlighted</b>.

In [9]:
def highlight_diff(row, cell_format):
    cell_format = cell_format if row['TEST'] != row['BASE'] else ''
    format_row = ['', cell_format, cell_format]
    return format_row

def color_diff(row):
    return highlight_diff(row, 'color: orange')

def bold_diff(row):
    return highlight_diff(row, 'font-weight: bold')
    
conf_df = pd.read_csv(METADATA, index_col=0)
conf_df.fillna('', inplace=True)
#conf_df = conf_df[['KEY', 'TEST', 'BASE']]
#sorter = ['testrun.id'] + [x for x in conf_df['KEY'] if x != 'testrun.id']
#conf_df['KEY'] = conf_df['KEY'].astype("category")
#conf_df["KEY"].cat.set_categories(sorter, inplace=True)
#conf_df.sort_values(['KEY'], inplace=True)
display(conf_df.style.applymap(bold_font, subset=['KEY']).apply(color_diff, axis=1).apply(bold_diff, axis=1).hide_index())

KEY,TEST,BASE
disk.backend,NVMe,NVMe
disk.driver,scsi,scsi
disk.format,raw,raw
host.cpu_model,AMD Opteron(tm) Processor 4284,AMD Opteron(tm) Processor 4284
only.base,,8.3
only.test,8.4,
os.branch,RHEL-8.4,RHEL-8.3
os.compose,RHEL-8.4.0-20201130.n.1,RHEL-8.3.0-2020111009.2
os.kernel,4.18.0-259.el8.x86_64,4.18.0-240.1.1.el8_3.x86_64
testrun.comments,Some comments here.,Some comments here.


## Summary

In [10]:
with open('{}/templates/benchmark_description.md'.format(BASEPATH), 'r') as f:
    display(Markdown(f.read()))

**Description**

This benchmark report compares the TEST results over the BASE results. The column names of the table consist of two parts. The first part is the KEYs that identifies the specified test case, and the second part is the KPIs as the test results.

KPI is the abbreviation of "Key Performance Indicator", it measures the performance that a test can achieve, and usually reflects the effectiveness of the system. In this table, each KPI is divided into 7 columns. Take "IOPS" as an example, "IOPS" (input/output operations per second) is a typical KPI for storage performance testing. It will be divided into the following seven columns:
1. IOPS-BASE-AVG: The **mean value** of the BASE samples
4. IOPS-BASE-%SD: The **standard deviation in percentage** of the BASE samples
5. IOPS-TEST-AVG: Same as above, but for TEST samples.
6. IOPS-TEST-%SD: Same as above, but for TEST samples.
7. IOPS-%DF: The **differance in percentage** of TEST over BASE samples.
8. IOPS-SGN: The **significance** calculated by the t-test of the two samples.
9. IOPS-CON: The preliminary **conclusion** determined by algorithm.

**About calculations**

- AVG = SUM(the values of all the samples) / COUNT(samples)
- %SD = (The Standard Deviation of all the samples) / AVG * 100%
- %DF = (TEST-AVG - BASE-AVG) / BASE-AVG * 100%
- SGN = 1 - TTEST(all the BASE samples, all the TEST samples)

**About conclusion**

An algorithm helps reaching a preliminary conclusion for each KPI.

| Abbr | Conclusion             | Reaching Condition (priority decreasing)                 |
| :--- | :--------------------- | :------------------------------------------------------- |
| ID   | Invalid Data           | Any of the input data is invalid.                        |
| HV   | High Variance          | %SD > MAX_PCTDEV_THRESHOLD                               |
| NS   | No Significance        | SGN < CONFIDENCE_THRESHOLD                               |
| NC   | Negligible Changes     | abs(%DF) <= NEGLIGIBLE_THRESHOLD                         |
| MI   | Moderately Improved    | NEGLIGIBLE_THRESHOLD < abs(%DF) <= REGRESSION_THRESHOLD. |
| MR   | Moderately Regressed   | Same as above, but in the negative direction.            |
| DI   | Dramatically Improved  | abs(%DF) > REGRESSION_THRESHOLD                          |
| DR   | Dramatically Regressed | Same as above, but in the negative direction.            |

The following parameters can be specified by users.

| Parameters           | Description                           | Default |
| -------------------- | ------------------------------------- | :------ |
| HIGHER_IS_BETTER     | Describe the direction of improvement | True    |
| MAX_PCTDEV_THRESHOLD | Max %SD can be accepted (0 = disable) | 0.10    |
| CONFIDENCE_THRESHOLD | Min SIG to be considered reproducible | 0.95    |
| NEGLIGIBLE_THRESHOLD | Max %DF can be ignored (0 = disable)  | 0.05    |
| REGRESSION_THRESHOLD | Min %DF to be considered dramatically | 0.10    |


In [30]:
def get_report_link(row, metadata_file):
    with open(metadata_file) as f:
        m = json.load(f)
    info = {
        "flask_addr": FLASK_ADDR,
        "testrun_id": m.get('testrun_id'),
        "platform": m.get('testrun.platform'),
        "backend": m.get('disk.backend'),
        'driver': m.get('disk.driver'),
        'format': m.get('disk.format'),
        'iodepth': row['IOdepth'],
        'numjobs': row['Numjobs'],
        'bs': row['BS'],
        'rw': row['RW'],
            }
    return "{flask_addr}/storageresultpubview/list/?\
_flt_3_testrun={testrun_id}&\
_flt_3_platform={platform}&\
_flt_3_backend={backend}&\
_flt_3_driver={driver}&\
_flt_3_format={format}&\
_flt_3_bs={bs}\
_flt_3_rw={rw}\
_flt_0_iodepth={iodepth}&\
_flt_0_numjobs={numjobs}&\
".format(**info)

benchmark_df = pd.read_csv(BENCHMARK, index_col=0, dtype=str, keep_default_na=False)
#summary_df = benchmark_df[['RW','BS','IOdepth','Numjobs']+list(benchmark_df.filter(regex='-CON$').columns)]
benchmark_df.insert(0, 'Test', benchmark_df.apply(lambda row: get_report_link(row, TEST_METADATA), axis=1))
benchmark_df.insert(0, 'Base', benchmark_df.apply(lambda row: get_report_link(row, BASE_METADATA), axis=1))

displayComparison(benchmark_df)

Unnamed: 0,Base,Test,RW,BS,IOdepth,Numjobs,IOPS-BASE-AVG,IOPS-BASE-%SD,IOPS-TEST-AVG,IOPS-TEST-%SD,IOPS-%DF,IOPS-SGN,IOPS-CON,LAT-BASE-AVG(ms),LAT-BASE-%SD,LAT-TEST-AVG,LAT-TEST-%SD,LAT-%DF,LAT-SGN,LAT-CON,CLAT-BASE-AVG(ms),CLAT-BASE-%SD,CLAT-TEST-AVG,CLAT-TEST-%SD,CLAT-%DF,CLAT-SGN,CLAT-CON
0,link,link,randread,1024k,1,1,,,99.2,1.31,,,,,,10.047,1.32,,,,,,9.804,1.34,,,
1,link,link,randread,1024k,64,1,,,101.2,1.29,,,,,,642.944,2.01,,,,,,642.412,2.0,,,
2,link,link,randread,4k,1,1,,,287.8,1.38,,,,,,3.512,1.89,,,,,,3.413,1.95,,,
3,link,link,randread,4k,64,1,,,787.0,1.49,,,,,,83.679,1.56,,,,,,83.606,1.55,,,
4,link,link,randrw,1024k,1,1,50.6,11.18,97.0,1.46,91.74,1.0,HV,19.378,11.32,10.2,1.64,-47.37,1.0,HV,14.359,7.98,9.929,1.57,-30.85,1.0,DR
5,link,link,randrw,1024k,64,1,58.6,3.34,103.6,1.75,76.85,1.0,DI,1116.524,3.24,611.18,1.02,-45.26,1.0,DR,1099.648,3.38,610.486,1.03,-44.48,1.0,DR
6,link,link,randrw,4k,1,1,127.0,19.25,271.2,1.78,113.59,1.0,HV,8.212,23.79,3.654,2.3,-55.5,1.0,HV,4.892,21.43,3.547,2.31,-27.49,0.98,HV
7,link,link,randrw,4k,64,1,223.3,3.25,674.4,0.75,202.06,1.0,DI,295.64,3.97,96.506,0.72,-67.36,1.0,DR,290.955,3.85,96.416,0.72,-66.86,1.0,DR
8,link,link,randwrite,1024k,1,1,,,97.6,0.92,,,,,,10.205,0.98,,,,,,9.917,0.91,,,
9,link,link,randwrite,1024k,64,1,,,107.4,0.83,,,,,,606.369,0.95,,,,,,605.748,0.96,,,
