In [1]:
import os
import time

# python -c "from tensorflow.python.client import device_lib; print(device_lib.list_local_devices())"

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
from tensorflow.python.client import device_lib
# print(device_lib.list_local_devices())
print(tf.__version__)
assert tf.test.is_gpu_available(), "¯\_(ツ)_/¯"

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

1.14.0


In [2]:
def get_op(inp, func=lambda x: tf.identity(x)):
  inp = func(inp)
  y   = tf.nn.l2_loss(inp)
  return y

In [3]:
sess = tf.Session()

In [4]:
def _vars_init():
  uninitialized_vars = []
  for var in tf.all_variables():
    try:
      sess.run(var)
    except tf.errors.FailedPreconditionError:
      uninitialized_vars.append(var)
  init_new_vars_op = tf.initialize_variables(uninitialized_vars)
  sess.run(init_new_vars_op)

def get_ops(filters=64, kernel=[3, 3], shape=[128, 256, 256, 3], format="channels_last"):
  with tf.device("gpu"):
    inp = tf.random.normal(shape)
    id_op     = get_op(inp)
    dwconv_op = get_op(inp, func=lambda x: tf.layers.separable_conv2d(inp, filters, kernel, padding="SAME", data_format=format))
    conv_op   = get_op(inp, func=lambda x: tf.layers.conv2d(inp, filters, kernel, data_format=format))
    _vars_init()
    return id_op, dwconv_op, conv_op
  print('¯\_(ツ)_/¯')

In [12]:
def run_avg(op, n=10, name=None):
  times = []
  res   = []
  for i in range(n):
    start = time.time()
    res += sess.run(op)
    times.append(time.time() - start)  
#   if name: print('%10s: %f (+/-%f) [%f, %f]' % (name, np.mean(times), np.std(times), min(times), max(times)))
  return np.mean(times)


op_cache = {}
def run_single_comparison(**kwargs):
  # caching
  key = str(kwargs)
  if key not in op_cache:
    op_cache[key] = get_ops(**kwargs)
  id_op, conv_op, dwconv_op = op_cache[key]
  
  id_t    = run_avg(id_op, name='id')
  conv_t  = run_avg(conv_op, name='conv')    - id_t
  dconv_t = run_avg(dwconv_op, name='dconv') - id_t
  print('%.4f -> %.4f (%.2f%%)' % (conv_t, dconv_t, ((conv_t/dconv_t-1)*100)), kwargs)

In [6]:
print("Stability test for given N:")
run_single_comparison(shape=[128, 256, 256, 3])
run_single_comparison(shape=[128, 256, 256, 3])
run_single_comparison(shape=[128, 256, 256, 3])

run_single_comparison(shape=[128, 256, 256, 128])
run_single_comparison(shape=[128, 256, 256, 128])
run_single_comparison(shape=[128, 256, 256, 128])

Stability test for given N:
0.1116 -> 0.0556 (100.82%) {'shape': [128, 256, 256, 3]}
0.0465 -> 0.0445 (4.55%) {'shape': [128, 256, 256, 3]}
0.0464 -> 0.0444 (4.48%) {'shape': [128, 256, 256, 3]}
0.1774 -> 0.2146 (-17.30%) {'shape': [128, 256, 256, 128]}
0.1701 -> 0.1491 (14.03%) {'shape': [128, 256, 256, 128]}
0.1710 -> 0.1498 (14.18%) {'shape': [128, 256, 256, 128]}


First run is not like the others

### Channel_first vs channel_last forward path

In [7]:
# what about bigger volume
run_single_comparison(shape=[128, 256, 256, 128], format="channels_last")
run_single_comparison(shape=[128, 128, 256, 256], format="channels_first")
run_single_comparison(shape=[128, 256, 256, 128], format="channels_last")
run_single_comparison(shape=[128, 128, 256, 256], format="channels_first")

0.1702 -> 0.1494 (13.92%) {'shape': [128, 256, 256, 128], 'format': 'channels_last'}
0.1345 -> 0.1126 (19.52%) {'shape': [128, 128, 256, 256], 'format': 'channels_first'}
0.1711 -> 0.1501 (14.00%) {'shape': [128, 256, 256, 128], 'format': 'channels_last'}
0.1348 -> 0.1127 (19.59%) {'shape': [128, 128, 256, 256], 'format': 'channels_first'}


In [11]:
run_single_comparison(shape=[128, 64, 64, 512], format="channels_last")
run_single_comparison(shape=[128, 64, 64, 512], format="channels_last")
run_single_comparison(shape=[128, 64, 64, 512], format="channels_last")
run_single_comparison(shape=[128, 512, 64, 64], format="channels_first")
run_single_comparison(shape=[128, 512, 64, 64], format="channels_first")
run_single_comparison(shape=[128, 512, 64, 64], format="channels_first")

0.0304 -> 0.0208 (46.23%) {'shape': [128, 64, 64, 512], 'format': 'channels_last'}
0.0302 -> 0.0208 (44.82%) {'shape': [128, 64, 64, 512], 'format': 'channels_last'}
0.0302 -> 0.0210 (43.75%) {'shape': [128, 64, 64, 512], 'format': 'channels_last'}
0.0231 -> 0.0150 (53.92%) {'shape': [128, 512, 64, 64], 'format': 'channels_first'}
0.0238 -> 0.0146 (62.80%) {'shape': [128, 512, 64, 64], 'format': 'channels_first'}
0.0237 -> 0.0147 (61.37%) {'shape': [128, 512, 64, 64], 'format': 'channels_first'}


In [16]:
s, c = 64, 512
run_single_comparison(shape=[128, s, s, c], format="channels_last")
run_single_comparison(shape=[128, s, s, c], format="channels_last")
run_single_comparison(shape=[128, s, s, c], format="channels_last")
run_single_comparison(shape=[128, c, s, s], format="channels_first")
run_single_comparison(shape=[128, c, s, s], format="channels_first")
run_single_comparison(shape=[128, c, s, s], format="channels_first")

0.0287 -> 0.0197 (45.60%) {'shape': [128, 64, 64, 512], 'format': 'channels_last'}
0.0296 -> 0.0203 (45.36%) {'shape': [128, 64, 64, 512], 'format': 'channels_last'}
0.0296 -> 0.0211 (40.34%) {'shape': [128, 64, 64, 512], 'format': 'channels_last'}
0.0237 -> 0.0145 (62.88%) {'shape': [128, 512, 64, 64], 'format': 'channels_first'}
0.0240 -> 0.0148 (62.40%) {'shape': [128, 512, 64, 64], 'format': 'channels_first'}
0.0240 -> 0.0145 (65.60%) {'shape': [128, 512, 64, 64], 'format': 'channels_first'}


In [17]:
s, c = 32, 4096
run_single_comparison(shape=[128, s, s, c], format="channels_last")
run_single_comparison(shape=[128, s, s, c], format="channels_last")
run_single_comparison(shape=[128, s, s, c], format="channels_last")
run_single_comparison(shape=[128, c, s, s], format="channels_first")
run_single_comparison(shape=[128, c, s, s], format="channels_first")
run_single_comparison(shape=[128, c, s, s], format="channels_first")

0.0327 -> 0.0596 (-45.18%) {'shape': [128, 32, 32, 4096], 'format': 'channels_last'}
0.0251 -> 0.0348 (-27.98%) {'shape': [128, 32, 32, 4096], 'format': 'channels_last'}
0.0252 -> 0.0346 (-27.24%) {'shape': [128, 32, 32, 4096], 'format': 'channels_last'}
0.0132 -> 0.0228 (-42.21%) {'shape': [128, 4096, 32, 32], 'format': 'channels_first'}
0.0130 -> 0.0225 (-42.06%) {'shape': [128, 4096, 32, 32], 'format': 'channels_first'}
0.0132 -> 0.0228 (-41.99%) {'shape': [128, 4096, 32, 32], 'format': 'channels_first'}
