#  Compare Model Zoo Benchmark performance among different data types on Intel optimized  Tensorflow

This jupyter notebook will help you evaluate performance benefits among different data types like int8/bf16 on Intel-optimized Tensorflow via several models from Intel Model Zoo. 
The notebook will show users some bar charts like below for performance comparison among different data types.

<img src="images\perf_comparison_types.png"  />

# Section 1: Display Platform Information 

In [None]:
# ignore all warning messages
import warnings
warnings.filterwarnings('ignore')

#### ACTION: Users should change the value of os.environ['ModelZooRoot'] according to their environment

In [None]:
import os
# If default path does not work, change ModelZooRoot path according to your environment
## USER INPUT
accuracy_only=False
current_path = os.getcwd()
os.environ['ModelZooRoot'] = current_path + "/../../../"
os.environ['ProfileUtilsRoot'] = os.environ['ModelZooRoot'] + "docs/notebooks/perf_analysis/profiling/"
print(os.environ['ModelZooRoot'])
print(os.environ['ProfileUtilsRoot'])

####  Check for mandatory python scripts after ModelZooRoot and ProfileUtilsRoot are assigned

In [None]:
import os
current_path = os.getcwd()
benchmark_path = os.environ['ModelZooRoot'] + "benchmarks/launch_benchmark.py"
if os.path.exists(benchmark_path) == True:
    print(benchmark_path)
else:
    print("ERROR! Can't find benchmark script!")
    
profile_utils_path = os.environ['ProfileUtilsRoot'] + "profile_utils.py"
if os.path.exists(profile_utils_path) == True:
    print(profile_utils_path)
else:
    print("ERROR! Can't find profile_utils script!")

In [None]:
from profiling.profile_utils import PlatformUtils
plat_utils = PlatformUtils()
plat_utils.dump_platform_info()

# Section 2: Run the benchmark on the Intel TensorFlow 

## Step 1: Check TensorFlow version and oneDNN enablement

In [None]:
import tensorflow as tf
print ("We are using Tensorflow version", tf.__version__)
major_version = int(tf.__version__.split(".")[0])
if major_version >= 2:
   from tensorflow.python.util import _pywrap_util_port
   print("oneDNN enabled:", _pywrap_util_port.IsMklEnabled())
else:
   print("oneDNN enabled:", tf.pywrap_tensorflow.IsMklEnabled())

<a id='step_2'></a>
## Step 2 : Select a supported Topology/Benchmark with different data types support



<a id='step_2_1'></a>
### Step 2.1 : List out the Topologies/Benchmarks with multiple data types support

####  Set accuracy_only to True if needed
By default, we benchmark the topology for throughput only. Please set accuracy_only to True if you also want to get accuracy number. 

In [None]:
accuracy_only=False
# overwrite the accuracy_only value by the "ACCURACY_ENABLE" environment variable.
import os
accuracy_enable=os.environ.get('ACCURACY_ENABLE', '')
if accuracy_enable != '':
    accuracy_only= eval(accuracy_enable)

List out all supported benchmarks.

In [None]:
import sys
from profiling.profile_utils import ConfigFile, CSVCommonUtils
config = ConfigFile()
df, df_types, df_types_obj = config.convert_configs_to_pd_dataframe(accuracy_only=accuracy_only)

name_list = []
precision_list = []
for name,group in df_types_obj:
    name_list.append(name)

cache_csv_file = "cache.csv"
fnames = ['csv_fname_list', 'selected_precision_list']
if os.path.isfile(cache_csv_file) is False:#only initialize list for the first run among data types
    # Initial CSV files list
    csv_fname_list = []
    selected_precision_list=[]
    cachecsv = CSVCommonUtils(fnames, cache_csv_file)
else:
    cachecsv = CSVCommonUtils(fnames, cache_csv_file)
    csv_fname_list, selected_precision_list = cachecsv.read_from_csv()

print(csv_fname_list)
print(selected_precision_list)
    
print(" Benchmark that supports multiple data types :")
index=0
for name in name_list:
    print(' Index %d : %s' %(index, name))
    index +=1

<a id='step_2_2'></a>
### Step 2.2 : Pick a Topology/Benchmark

The User needs to pick a topology for the first run.

In [None]:
# User picks a benchmark type
# use the "TOPO_INDEX" environment variable value if it exists.
import os
env_topo_index=os.environ.get('TOPO_INDEX', '')
if env_topo_index != '':
    selected_index= int(env_topo_index)
else:
    ## USER INPUT
    selected_index= int(input('Input a index number of a topology: '))
os.environ['TOPO_INDEX']=str(selected_index)

show the selected topology.

In [None]:
if selected_index > index:
    print("Please select a valid index number.")
else:
    selected_df = df_types_obj.get_group(name_list[selected_index])
    #print(name_list[selected_index])
    selected_topology = name_list[selected_index][0] +'-'+name_list[selected_index][1]
    print(selected_topology)

<a id='step_3'></a>
## Step 3 : Select a supported data types for the Topology/Benchmark



<a id='step_3_1'></a>
### Step 3.1 : List out all data types supported by the selected Topology/Benchmark

In [None]:
selected_df = df_types_obj.get_group(name_list[selected_index])
print(name_list[selected_index])
print(" supported data types :")
precision_df = selected_df["precision"]

for i in range(len(precision_df)):
    print(' Index %d : %s' %(i, selected_df["precision"].iloc[i]))
    precision_list.append(selected_df["precision"].iloc[i])

<a id='step_3_2'></a>
### Step 3.2 : Pick a Data Type

#### ACTION : Please select one supported data type and change data_type_index accordingly
By default, users can start with 0 as data_type_index.

In [None]:
# User picks a data type for selected benchmark type
if len(csv_fname_list) == 0: #only for the first run among data types
    data_type_index = 0
# overwrite the accuracy_only value by the "ACCURACY_ENABLE" environment variable.
env_data_type_index=os.environ.get('DATA_TYPE_INDEX', '')
if env_data_type_index != '':
    data_type_index= int(env_data_type_index)

<a id='step_3_3'></a>
### Step 3.3 : List out the selected topology/benchmark name

In [None]:
if data_type_index >= len(precision_df):
    print("Please select a valid index number.")
else:
    topology_name = selected_df.iloc[data_type_index]['benchmark']
    print("selected topology/benchmark for this run : ", topology_name)

selected_precision_list.append(precision_list[data_type_index])

Export the selected topology and data type as environment variables.

In [None]:
import os
os.environ['SELECTED_TOPO'] = selected_topology
os.environ['SELECTED_TYPE'] = selected_df["precision"].iloc[data_type_index]
print(os.environ['SELECTED_TOPO'])
print(os.environ['SELECTED_TYPE'])

<a id='step_4'></a>
## Step 4: Configure parameters for launch_benchmark.py according to the selected Topology

### Step 4.1: Import Model Zoo CPU info

In [None]:
sys.path.append(os.environ['ModelZooRoot']+os.sep+'benchmarks/common/')
from platform_util import PlatformUtil 
cpu_info = PlatformUtil("")

### Step 4.2: User can also manually set batch size and number of threads

In [None]:
import os
numa_nodes = cpu_info.numa_nodes
print("CPU count per socket:" , cpu_info.cores_per_socket ," \nSocket count:", cpu_info.sockets, " \nNuma nodes:",numa_nodes)
if numa_nodes > 0:
    socket_number = 1
    cpu_count = cpu_info.cores_per_socket
    inter_thread = 1
else:
    # on non-numa machine, we should use all the cores and don't use numactl
    socket_number = -1
    cpu_count = cpu_info.cores_per_socket * cpu_info.sockets
    inter_thread = cpu_info.sockets

#### ACTION: Users can change the value of thread_number and batch_size to see different performance
1. thread_umber: the value will apply to num_cores parameters in launch_benchmark.py  
2. utilized_socket_number:  the value will apply to the socket-id parameter in launch_benchmark.py 
3. num_inter_threads: the value will  apply to the num-inter-threads parameter in launch_benchmark.py 
4. num_intra_threads: the value will  apply to the num-intra-threads parameter in launch_benchmark.py 
5. batch_size: the value will apply to the batch_size parameter in launch_benchmark.py 
6. log_folder: the folder where the logs are stored.


##### Here are some performance optimzation BKMs for your reference.

In [None]:
# Get the parameters from config
config = ConfigFile()
config.read_config(topology_name)
if config.perf_bkm != '':
    print(config.perf_bkm)
else:
    print("No BKM for this topology")

In [None]:
## USER INPUT
thread_number=cpu_count 
utilized_socket_number=socket_number
num_inter_threads = inter_thread
num_intra_threads = thread_number
batch_size=config.batch_size # Used recommended batch_size if any. Users can overwrite the value of batch_size
log_folder=os.getcwd() + os.sep + "logs"

### Step 4.3: get required data and files if needed.
No action if there is no output from this below cell.

#### Optional ACTION: Users input the folder paths for those required data and files
1. data_download_path: the value will be set as data-location parameters in topo.ini for the related topology.
2. model_source_dir: the value will be set as the model-source-dir parameter in topo.ini for the related topology such as Wide and Deep. Those models use tensorflow-models.

In [None]:
import os
# Get the parameters from config
config = ConfigFile()
config.read_config(topology_name)
data_download_path=''
model_source_dir=''
if ( config.data_download != '' or (config.data_download_accuracy != '' and accuracy_only is True) ) and config.data_location == '':
    print("\nPlease follow below command to get the data : ")
    val = config.read_value_from_section(topology_name, 'data-download') if config.data_download != '' else config.read_value_from_section(topology_name, 'data-download-accuracy')
    print(val)
    # use the "DATA_DOWNLOAD_PATH" environment variable value if it exists.
    env_data_download_path=os.environ.get('DATA_DOWNLOAD_PATH', '')
    if env_data_download_path != '':
        data_download_path= env_data_download_path
    else:
        ## USER INPUT
        data_download_path= input('Input a data download path: ')
    print(" data download path : " , data_download_path)

if config.preprocessing != '':
    print("\nPlease follow below command to get required files and installation : ")
    val = config.read_value_from_section(topology_name, 'preprocessing')
    print(val)
    # use the "MODEL_SOURCE_DIR" environment variable value if it exists.
    env_model_source_dir=os.environ.get('MODEL_SOURCE_DIR', '')
    if env_model_source_dir != '':
        model_source_dir= env_model_source_dir
    else:
        ## USER INPUT
        model_source_dir= input('Input a model source dir: ')
    print(" model_source_dir : " , model_source_dir)

>NOTE: If no action required from above cell, please skip below cell and go to [Step 4.4](#step_4_4).

#### Users set the configurations in topo.ini for those required data and files


In [None]:

# Overwrite configurations in topo.ini
config = ConfigFile()
config.read_config(topology_name)

if ( config.data_download != '' or (config.data_download_accuracy != '' and accuracy_only is True) ) and data_download_path != '':
    config.write_value_from_section(topology_name, 'data-location', data_download_path)
    config.data_location = data_download_path

if accuracy_only == True and config.data_location == '':
    print("ERROR! STOP! need data for accuacy evaluatoin!")  

if config.preprocessing != '':
    config.write_value_from_section(topology_name, 'model-source-dir', model_source_dir)    


<a id='step_4_4'></a>
### Step 4.4: Prepare the pre-trained model and model parameters for running the benchmark
1. Get related parameters according to the selected topology
2. Get the pretrained model if needed

In [None]:
config = ConfigFile()
configvals = []
# Get common parameters according to users' inputs  
params = config.get_parameters(topology_name, configvals,
                   batch_size=batch_size, thread_number=thread_number, socket_number=utilized_socket_number,
                   num_inter_threads=num_inter_threads, num_intra_threads=num_intra_threads, accuracy_only=accuracy_only)

# Get the parameters from config
configvals=config.read_config(topology_name)

# Get the pre-trained model file
if config.wget != '' and ( config.in_graph == '' or config.checkpoint == ''  ):
    pretrain_model_path = config.download_pretrained_model(current_path=current_path)
    pretrain_model_path = config.uncompress_file(pretrain_model_path, current_path=current_path)
    if config.in_graph == 'NA':
        config.checkpoint = pretrain_model_path        
    if config.checkpoint == 'NA':
        pretrain_model_path = config.find_pretrained_model_in_folder(pretrain_model_path)
        config.in_graph = pretrain_model_path 
# set pre-trained model path        
if config.checkpoint == 'NA':
    configvals.append("--in-graph")
    configvals.append(config.in_graph)
if config.in_graph == 'NA':
    configvals.append("--checkpoint")
    configvals.append(config.checkpoint)
    
#Set output-dir folder
if log_folder !='':
    configvals.append("--output-dir")
    configvals.append(log_folder)

# Add custom arguments
if config.custom_args != '':
    configvals.append("--")
    custom_config = config.parsing_custom_args(topology_name, config.custom_args)
    configvals = configvals + custom_config

# Combine common parameters and config parameters
params = params + configvals    
    
sys.argv=[benchmark_path]+params
print(sys.argv)

### Step 4.5: Create a CSV file to log the performance numbers

In [None]:
from profiling.profile_utils import PerfPresenter
job_type = topology_name.split(' ')[1]
csv_fname=job_type+'_'+topology_name.replace(' ', '')+'.csv'
print(csv_fname)
perfp=PerfPresenter()
perfp.create_csv_logfile(job_type, csv_fname)

found = False
for csv in csv_fname_list:
    if csv == csv_fname:
        found = True
        break
if found == False:
    csv_fname_list.append(csv_fname)


### Step 4.6: Retrieve oneDNN Runtime Information
> NOTE : performance may be impacted if users enable those oneDNN debug features.

Users can enable oneDNN VERBOSE Mode to have more information from oneDNN library.  
Users can export the DNNL_VERBOSE environment variable to turn verbose mode on and control the level of verbosity.

|Environment variable|Value|Description|
|:-----|:----|:-----|
|DNNL_VERBOSE| 0 |no verbose output (default)|
||1|primitive information at execution|
||2|primitive information at creation and execution|  

Refer to the [link](https://oneapi-src.github.io/oneDNN/dev_guide_verbose.html) for detailed verbose mode information

In [None]:
os.environ['DNNL_VERBOSE'] = '1'

Users can use the DNNL_JIT_DUMP environment variable to inspect oneDNN JIT code,  
and then check instructions usage by dissassembling the JIT kernel.

|Environment variable|Value|Description|
|:-----|:----|:-----|
|DNNL_JIT_DUMP | 0 |JIT dump is disabled (default)|
||any other value|JIT dump is enabled|

Refer to the [link](https://oneapi-src.github.io/oneDNN/dev_guide_inspecting_jit.html) for detailed JIT Dump information

> NOTE: recomend only enable JIT DUMP for inference. For training, number of JIT DUMP files would be huge!

In [None]:
os.environ['DNNL_JIT_DUMP'] = '0'

<a id='step_5'></a>
## Step 5:  Run the benchmark 

> NOTE: Below section will enable Tensorflow timeline for the model by patching it, and then unpatch it after the model completes its training or inference.

Set enable_tf_timeline to False if users don't want to get TF timeline information.

In [None]:
enable_tf_timeline = True
# overwrite the accuracy_only value by the "ENABLE_TIMELINE" environment variable.
timeline_enable=os.environ.get('ENABLE_TIMELINE', '')
if timeline_enable != '':
    enable_tf_timeline= eval(timeline_enable)

Run the benchmark
> NOTE: Users don't need to finish training if whole training takes a long time.  
Users can stop below cell in the middle of the training, and still get the related performance data.

In [None]:
if enable_tf_timeline == True:
    # patch related model script
    repo_path = os.environ['ModelZooRoot'] #current_path + os.sep + "../../"
    config.patch_model_to_enable_timeline(repopath=repo_path)

# run the benchmark with the patch
import sys
benchmark_path = os.environ['ModelZooRoot']+os.sep+"benchmarks/"
sys.path.append(benchmark_path)
from launch_benchmark import LaunchBenchmark

util = LaunchBenchmark()
util.main()

if enable_tf_timeline == True:
    # unpatch related model script
    config.unpatch_model_to_enable_timeline(model_path=repo_path+'/models/')

<a id='step_6'></a>
## Step 6: Parse output for performance number

#### Found the file path of the related runtime log.

In [None]:
# identify the path of the latest log file
configvals=config.read_config(topology_name)
import os
for file in os.listdir(log_folder):
    if file.endswith(".log"):
        logpath = os.path.join(log_folder, file)
        used_logpath = logpath + ".old"
        os.rename(logpath, used_logpath)
        print(used_logpath)
        break
os.environ["TF_LOGPATH"] = used_logpath

#### Parse the logfile for performance number.

In [None]:
if accuracy_only == False:
    print("get throughput")
    val = config.throughput_keyword
    index = int(config.throughput_index)
    splitter = config.throughput_splitter
    line = perfp.read_throughput(used_logpath, keyword=val, index=index, splitter=splitter)
    if line!=None:
        throughput=line
        print(throughput)
        # log the perf number
        perfp.log_infer_perfcsv(0, throughput, 0, csv_fname)
    else:
        print("ERROR! can't find correct performance number from log. please check log for runtime issues")
else:
    # get accuracy number and caculate throughput
    print("get accuracy and throughput")
    #val = config.throughput_keyword
    #index = int(config.throughput_index)
    accuracy = perfp.read_accuracy(used_logpath)
    iternation = perfp.read_iteration_time(used_logpath)
    if accuracy != [] and iternation != []:
        final_accuracy=accuracy[-1]
        iternation_time = iternation[-1]
        throughput = float(batch_size)/iternation_time
        print(final_accuracy,throughput)
        # log the perf number
        perfp.log_infer_perfcsv(0, throughput, final_accuracy, csv_fname)
    else:
        print("ERROR! can't find correct performance number from log. please check log for runtime issues")

#### Optional : print out the log file 

In [None]:
print_out_log = False
if print_out_log is True:
    logfile = open(used_logpath)
    logout = logfile.read()
    print(logout)

#### Users should be able to see a new Timeline json file after running the benchmark
If users don't see a new timeline json file, they need to make sure that they patch the model script correctly.

In [None]:
from profiling.profile_utils import CommonUtils
utils = CommonUtils()
paths = []
paths.append(os.environ['ModelZooRoot']+os.sep + "benchmarks")
paths.append(os.environ['ModelZooRoot']+os.sep + "docs/notebooks/perf_analysis")
pattern = "*.json"
timeline_files, timeline_paths = utils.found_files_in_folders(pattern, paths)
if timeline_paths == []:
    print("No %s files found" %(pattern))
else:
    print(timeline_paths)

#### Users should be able to see new JIT DUMP files after running the benchmark if they enable the oneDNN JIT DUMP feature.


In [None]:
from profiling.profile_utils import CommonUtils
utils = CommonUtils()
paths = []
paths.append(os.environ['ModelZooRoot']+os.sep + "benchmarks")
paths.append(os.environ['ModelZooRoot']+os.sep + "docs/notebooks/perf_analysis")
pattern = "*.bin"
jitdump_files, jitdump_paths = utils.found_files_in_folders(pattern, paths)
if jitdump_paths == []:
    print("No %s files found" %(pattern) ,"Need to set DNNL_JIT_DUMP as 1 first")
else:
    print(jitdump_paths)

####  Gather all generated JIT DUMP files
Copy the jit dump files from benchmark folder to the JITDUMP folder.  
Those jit dump files will be analyzed in another Jupyter notebook.

In [None]:
import shutil

jitdump_dir_path = os.environ['ModelZooRoot']+os.sep + "docs/notebooks/perf_analysis" + os.sep + "JITDUMP"
if os.path.isfile(jitdump_dir_path) == False:
    os.mkdir(jitdump_dir_path)
shutil.move(os.environ['TF_LOGPATH'],jitdump_dir_path)
if jitdump_paths != []:
    for path in jitdump_paths:
        shutil.move(path,jitdump_dir_path)
target_path = jitdump_dir_path+'_'+os.environ['SELECTED_TYPE']
if os.path.isfile(target_path) == True:
    os.remove(target_path)
shutil.move(jitdump_dir_path, target_path)
print(target_path)
    

#### Repeat Step 3 to Step 6 among different data types for selected topology/benchmark
> NOTE : Please iterate over different data types before you start Step 7 for performance comparison.
Users can pick one of below options.

In [None]:
fname = [csv_fname_list[-1], selected_precision_list[-1]]
cachecsv.write_to_csv(fname)

##### Option 1: Automatically pick next data type

In [None]:
data_type_index +=1
if data_type_index < len(precision_list):
    os.environ['DATA_TYPE_INDEX'] = str(data_type_index)
else:
    os.environ['DATA_TYPE_INDEX'] = ''
    os.environ['TOPO_INDEX'] = ''
    cachecsv.delete_csv()

> Go back Step 3.3 by clicking the link : [Step 3.3](#step_3_3) or run below cell to automatically go over all steps for next date type

##### Option 2: manully pick next data type

> Go back Step 3.2 by clicking the link : [Step 3.2](#step_3_2).

<a id='step_7'></a>
## Step 7: Draw the performance comparison diagram
> NOTE: Please iterate over different data types before the Step 7 and the Step 8


In [None]:
for csv in csv_fname_list:
    print(csv)
for precision in selected_precision_list:
    print(precision)
print(selected_topology)

In [None]:
%matplotlib inline
from profiling.profile_utils import PerfPresenter

perfp=PerfPresenter(True)
# inference  throughput
title = 'Perf comparison among data types'
perfp.draw_perf_diag_from_csvs(csv_fname_list, selected_precision_list, 'throughput','throughput (image/sec)', selected_topology, title)
perfp.draw_perf_ratio_diag_from_csvs(csv_fname_list, selected_precision_list, 'throughput','speedup', selected_topology, title)
if accuracy_only == True:
    perfp.draw_perf_diag_from_csvs(csv_fname_list, selected_precision_list,'accuracy','accuracy', selected_topology, title)
    perfp.draw_perf_ratio_diag_from_csvs(csv_fname_list, selected_precision_list,'accuracy','accuracy loss', selected_topology, title)

<a id='step_8'></a>
## Step 8: Gather all generated Tensorflow Timeline Json files and JITDUMP files
Copy the timeline json files from benchmark folder to the Timeline folder.
Those Timeline files will be analyzed in another Jupyter notebook.

In [None]:
import shutil
import datetime
from profiling.profile_utils import CommonUtils
utils = CommonUtils()
print(len(precision_list))
if timeline_paths != [] and len(timeline_paths) >= len(precision_list):
    timeline_dir_path = os.environ['ModelZooRoot']+os.sep + "docs/notebooks/perf_analysis" + os.sep + "Timeline"
    if os.path.isfile(timeline_dir_path) == False:
        os.mkdir(timeline_dir_path)
    for path in timeline_paths:
        shutil.move(path,timeline_dir_path)
    # move JITDUMP results into Timeline folder     
    pattern = "JITDUMP_*"
    jitdump_fds, jitdump_fd_paths = utils.found_files_in_folder(pattern, os.environ['ModelZooRoot']+os.sep + "docs/notebooks/perf_analysis")
    for fd_path in jitdump_fd_paths:
        print(fd_path)
        shutil.move(fd_path,timeline_dir_path)
    # rename Timeline folder with topo and time info   
    timeinfo = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M")
    target_path = timeline_dir_path+'_'+os.environ['SELECTED_TOPO']+'_'+timeinfo
    shutil.move(timeline_dir_path, target_path)
    cachecsv.delete_csv()
    print("Successfully gather all results in ",target_path)