#  Compare Model Zoo Benchmark performance between Intel optimized and stock Tensorflow

This jupyter notebook will help you evaluate performance benefits from Intel-optimized Tensorflow via several pre-trained models from Intel Model Zoo. 
The notebook will show users a bar chart like below for performance comparison among Stock and Intel Tensorflow.

<img src="images\perf_comparison.png"  />

<a id='section_1'></a>
# Section 1: Display Platform Information 

In [None]:
# ignore all warning messages
import warnings
warnings.filterwarnings('ignore')

#### ACTION: Users should change the value of os.environ['ModelZooRoot'] according to their environment

In [None]:
import os
# If default path does not work, change ModelZooRoot path according to your environment
## USER INPUT
current_path = os.getcwd()
os.environ['ModelZooRoot'] = current_path + "/../../../"
os.environ['ProfileUtilsRoot'] = os.environ['ModelZooRoot'] + "docs/notebooks/perf_analysis/profiling/"
print(os.environ['ModelZooRoot'])
print(os.environ['ProfileUtilsRoot'])

#### Check for mandatory python scripts after ModelZooRoot and ProfileUtilsRoot are assigned

In [None]:
import os
current_path = os.getcwd()
benchmark_path = os.environ['ModelZooRoot'] + "benchmarks/launch_benchmark.py"
if os.path.exists(benchmark_path) == True:
    print(benchmark_path)
else:
    print("ERROR! Can't find benchmark script!")
    
profile_utils_path = os.environ['ProfileUtilsRoot'] + "profile_utils.py"
if os.path.exists(profile_utils_path) == True:
    print(profile_utils_path)
else:
    print("ERROR! Can't find profile_utils script!")

In [None]:
from profiling.profile_utils import PlatformUtils
plat_utils = PlatformUtils()
plat_utils.dump_platform_info()

# Section 2: Run the benchmark on the selected Jupyter Kernels

## Step 1: Check TensorFlow version and MKL enablement

In [None]:
import tensorflow as tf
print ("We are using Tensorflow version", tf.__version__)
major_version = int(tf.__version__.split(".")[0])
if major_version >= 2:
    from tensorflow.python import _pywrap_util_port
    on_mkl = _pywrap_util_port.IsMklEnabled()
else:
    on_mkl = tf.pywrap_tensorflow.IsMklEnabled()
print("MKL enabled:", on_mkl)

## Step 2: Configure parameters for launch_benchmark.py according to the selected Topology

### Step 2.1: List out the supported topologies

In [None]:
import sys
from profiling.profile_utils import ConfigFile

accuracy_only=False

config = ConfigFile()
sections = config.read_supported_section(accuracy_only=accuracy_only)
print("Supported topologies: ")
index =0 
for section in sections:
    print(" %d: %s " %(index, section))
    index+=1

### Step 2.2: Pick a topology. 
#### ACTION : Please select one supported topology and change topo_index accordingly

In [None]:
# User picks a topology, Batch Size, and number of required threads
## USER INPUT
topo_index=0

#### List out the selected topology name

In [None]:
if topo_index >= len(sections):
    print("ERROR! please input a topo_index within range")
else:
    topology_name=sections[topo_index]
    print(topology_name)

### Step 2.3 Import Model Zoo CPU info

In [None]:
sys.path.append(os.environ['ModelZooRoot']+os.sep+'benchmarks/common/')
from platform_util import PlatformUtil 
cpu_info = PlatformUtil("")

### Step 2.4: User can also manually set batch size and number of threads

In [None]:
import os
numa_nodes = cpu_info.numa_nodes
print("CPU count per socket:" , cpu_info.cores_per_socket ," \nSocket count:", cpu_info.sockets, " \nNuma nodes:",numa_nodes)
if numa_nodes > 0:
    socket_number = 1
    cpu_count = cpu_info.cores_per_socket
    inter_thread = 1
else:
    # on non-numa machine, we should use all the cores and don't use numactl
    socket_number = -1
    cpu_count = cpu_info.cores_per_socket * cpu_info.sockets
    inter_thread = cpu_info.sockets

#### ACTION: Users can change the value of thread_number and batch_size to see different performance
1. thread_umber: the value will apply to num_cores parameters in launch_benchmark.py  
2. utilized_socket_number:  the value will apply to the socket-id parameter in launch_benchmark.py 
3. num_inter_threads: the value will  apply to the num-inter-threads parameter in launch_benchmark.py 
4. num_intra_threads: the value will  apply to the num-intra-threads parameter in launch_benchmark.py 
5. batch_size: the value will apply to the batch_size parameter in launch_benchmark.py 
6. log_folder: the folder where the logs are stored.

In [None]:
## USER INPUT
thread_number=cpu_count 
utilized_socket_number=socket_number
num_inter_threads = inter_thread
num_intra_threads = thread_number
batch_size=32
log_folder=os.getcwd() + os.sep + "logs"

### Step 2.5: get required data and files if needed.
No action if there is no output from this below cell.

In [None]:
# Get the parameters from config
config = ConfigFile()
config.read_config(topology_name)
if config.data_download != '' and config.data_location == '':
    print("\nPlease follow below command to get the data : ")
    val = config.read_value_from_section(topology_name, 'data-download')
    print(val)
if config.preprocessing != '':
    print("\nPlease follow below command to get required files and installation : ")
    val = config.read_value_from_section(topology_name, 'preprocessing')
    print(val)

#### ACTION: Users set the configuration in topo.ini for those required data and files
1. data_download_path: the value will be set as data-location parameters in topo.ini for the related topology.
2. model_source_dir: the value will be set as the model-source-dir parameter in topo.ini for the related topology such as Wide and Deep. Those models use tensorflow-models.

In [None]:
## USER INPUT
data_download_path = ''
model_source_dir = ''

# Overwrite configurations in topo.ini
config = ConfigFile()
config.read_config(topology_name)

if config.data_download != '' and data_download_path != '':
    config.write_value_from_section(topology_name, 'data-location', data_download_path)
    config.data_location = data_download_path
    
if accuracy_only == True and config.data_location == '':
    print("ERROR! STOP! need data for accuacy evaluatoin!")
    
if config.preprocessing != '':
    config.write_value_from_section(topology_name, 'model-source-dir', model_source_dir)    


### Step 2.6: Prepare pre-trained model and model parameters for running the benchmark
1. Get related parameters according to selected topology
2. Get pretrained model if needed

In [None]:
config = ConfigFile()
configvals = []
# Get common parameters according to users' inputs  
params = config.get_parameters(topology_name, configvals,
                   batch_size=batch_size, thread_number=thread_number, socket_number=utilized_socket_number,
                   num_inter_threads=num_inter_threads, num_intra_threads=num_intra_threads, accuracy_only=accuracy_only)

# Get the parameters from config
configvals=config.read_config(topology_name)

# Get the pre-trained model file
if config.wget != '' and ( config.in_graph == '' or config.checkpoint == ''  ):
    pretrain_model_path = config.download_pretrained_model(current_path=current_path)
    pretrain_model_path = config.uncompress_file(pretrain_model_path, current_path=current_path)
    if config.in_graph == 'NA':
        config.checkpoint = pretrain_model_path        
    if config.checkpoint == 'NA':
        config.in_graph = pretrain_model_path 
# set pre-trained model path        
if config.checkpoint == 'NA':
    configvals.append("--in-graph")
    configvals.append(config.in_graph)
if config.in_graph == 'NA':
    configvals.append("--checkpoint")
    configvals.append(config.checkpoint)
    
#Set output-dir folder
if log_folder !='':
    configvals.append("--output-dir")
    configvals.append(log_folder)

# Combine common parameters and config parameters
params = params + configvals    
    
sys.argv=[benchmark_path]+params
print(sys.argv)

### Step 2.7: Create a CSV file to log the performance numbers

In [None]:
from profiling.profile_utils import PerfPresenter
job_type = topology_name.split(' ')[1]#'inference'
csv_fname=job_type+'_'+topology_name.replace(' ', '')+'.csv'
perfp=PerfPresenter()
perfp.create_csv_logfile(job_type, csv_fname)

## Step 3:  Run the benchmark 

> NOTE: Below section will enable Tensorflow timeline for the model by patching it, and then unpatch it after the model completes its training or inference.

Set enable_tf_timeline to False if users don't want to get TF timeline information.

In [None]:
enable_tf_timeline = True

Run the benchmark

In [None]:
if enable_tf_timeline == True:
    # patch related model script
    repo_path = os.environ['ModelZooRoot'] #current_path + os.sep + "../../"
    config.patch_model_to_enable_timeline(repopath=repo_path)

# run the benchmark with the patch
import sys
benchmark_path = os.environ['ModelZooRoot']+os.sep+"benchmarks/"
sys.path.append(benchmark_path)
from launch_benchmark import LaunchBenchmark

util = LaunchBenchmark()
util.main()

if enable_tf_timeline == True:
    # unpatch related model script
    config.unpatch_model_to_enable_timeline(model_path=repo_path+'/models/')

## Step 4: Parse output for performance number

#### Found the file path of the related runtime log.

In [None]:
# identify the path of the latest log file
configvals=config.read_config(topology_name)
import os
for file in os.listdir(log_folder):
    if file.endswith(".log"):
        logpath = os.path.join(log_folder, file)
        used_logpath = logpath + ".old"
        os.rename(logpath, used_logpath)
        print(used_logpath)
        break


#### Parse the logfile for performance number.

In [None]:
if accuracy_only == False:
    print("get throughput")
    val = config.throughput_keyword
    index = int(config.throughput_index)
    line = perfp.read_throughput(used_logpath, keyword=val, index=index)
    if line!=None:
        throughput=line
        print(throughput)
        # log the perf number
        perfp.log_infer_perfcsv(0, throughput, 0, csv_fname)
    else:
        print("ERROR! can't find correct performance number from log. please check log for runtime issues")
else:
    # get accuracy number and caculate throughput
    print("get accuracy and throughput")
    #val = config.throughput_keyword
    #index = int(config.throughput_index)
    accuracy = perfp.read_accuracy(used_logpath)
    iternation = perfp.read_iteration_time(used_logpath)
    if accuracy != [] and iternation != []:
        final_accuracy=accuracy[-1]
        iternation_time = iternation[-1]
        throughput = float(batch_size)/iternation_time
        print(final_accuracy,throughput)
        # log the perf number
        perfp.log_infer_perfcsv(0, throughput, final_accuracy, csv_fname)
    else:
        print("ERROR! can't find correct performance number from log. please check log for runtime issues")

#### Optional : print out the log file for runtime issues

In [None]:
logfile = open(used_logpath)
logout = logfile.read()
print(logout)

#### Users should be able to see a new Timeline json file after running the benchmark
If users don't see a new timeline json file, they need to make sure that they patch the model script correctly.

In [None]:
from profiling.profile_utils import CommonUtils
utils = CommonUtils()
paths = []
paths.append(os.environ['ModelZooRoot']+os.sep + "benchmarks")
paths.append(os.environ['ModelZooRoot']+os.sep + "docs/notebooks/perf_analysis")
pattern = "*.json"
timeline_files, timeline_paths = utils.found_files_in_folders(pattern, paths)
if timeline_paths == []:
    print("No %s files found" %(pattern))
else:
    print(timeline_paths)

## Repeat Sections 1 and 2 on different Jupyter kernel
Users can find information in docs/notebooks/perf_analysis/README.md for switching among different Juypter kernels.  
> Go back Section 1 by clicking the link : [Section 1](#section_1).

# Section 3: Results
## Draw the performance comparison diagram
>NOTE: Please go over Sections 1 and 2 on different Jupyter kernel before comparison


In [None]:
%matplotlib inline
from profiling.profile_utils import PerfPresenter

perfp=PerfPresenter(True)
# inference  throughput
perfp.draw_perf_diag_from_csv(csv_fname,'throughput','throughput (image/sec)', topology_name)
perfp.draw_perf_ratio_diag_from_csv(csv_fname,'throughput','speedup', topology_name)
if accuracy_only == True:
    perfp.draw_perf_diag_from_csv(csv_fname,'accuracy','accuracy', topology_name)

## Gather all generated Tensorflow Timeline Json files
Copy the timeline json file from benchmark folder to the Timeline folder with time information.
Those Timeline files will be analyzed in another Jupyter notebook.

In [None]:
import shutil
import datetime
from profiling.profile_utils import CommonUtils
utils = CommonUtils()

if timeline_paths != []:
    timeline_dir_path = os.environ['ModelZooRoot']+os.sep + "docs/notebooks/perf_analysis" + os.sep + "Timeline"
    if os.path.isfile(timeline_dir_path) == False:
        os.mkdir(timeline_dir_path)
    for path in timeline_paths:
        shutil.move(path,timeline_dir_path)
    # rename Timeline folder with topo and time info   
    timeinfo = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M")
    target_path = timeline_dir_path+'_'+timeinfo
    shutil.move(timeline_dir_path, target_path)
    print(target_path)