# Testing Tensorflow-GPU Installation


## Installation


### Installation Guides
https://www.tensorflow.org/install/gpu
https://towardsdatascience.com/installing-tensorflow-with-cuda-cudnn-and-gpu-support-on-windows-10-60693e46e781
https://tensorflow2objectdetectioninstallation.readthedocs.io/en/latest/install.html

### Current Setup

CUDA: 11.2
cuDNN: 8.1
tensorflow: 2.4.1


## Testing

### cusolver64_10.dll

Error: Could not load dynamic library 'cusolver64_10.dll'; dlerror: cusolver64_10.dll not found
Solution: rename cusolver64_11.dll as cusolver64_10.dll

### XLA
The Not creating XLA devices, tf_xla_enable_xla_devices not set message is an information log which you can safely ignore.

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import time
import tensorflow as tf

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0],True) # not allocate all memory on GPU


In [2]:
print('Num GPUs Available: ', len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [3]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the first GPU
  try:
    tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
  except RuntimeError as e:
    # Visible devices must be set before GPUs have been initialized
    print(e)
# reference : https://www.tensorflow.org/guide/gpu

1 Physical GPUs, 1 Logical GPU


In [4]:
tf.DeviceSpec(
    job=None, replica=None, task=None, device_type=None, device_index=None
)

<tensorflow.python.framework.device_spec.DeviceSpecV2 at 0x1a2d481bee0>

In [None]:
k = 2000
a = tf.random.uniform(shape=[k,k], minval=0, maxval=20,dtype=tf.float16)
b = tf.random.uniform(shape=[k,k], minval=0, maxval=20,dtype=tf.float16)

cpu_slot = 0
gpu_slot = 0

print("start")
# Using CPU at slot 0
with tf.device('/CPU:' + str(cpu_slot)):
    start = time.time()
    c1 = tf.matmul(a,b)
    print("Time on CPU:")
    end = time.time() - start
    print(end)

# Using the GPU at slot 0
with tf.device('/GPU:' + str(gpu_slot)):
    start = time.time()
    c2 = tf.matmul(a,b)
    print("Time on GPU:")
    end = time.time() - start
    print(end)
print("done")

In [2]:
import sys
import numpy as np
import tensorflow as tf
from datetime import datetime

device_name = "/gpu:0"

with tf.device(device_name):
    a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
    b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
    c = tf.matmul(a, b)

    print(c)

tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)


In [3]:
tf.debugging.set_log_device_placement(True)

gpus = tf.config.experimental.list_logical_devices('GPU')
if gpus:
  # Replicate your computation on multiple GPUs
  c = []
  for gpu in gpus:
    with tf.device(gpu.name):
      a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
      b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
      c.append(tf.matmul(a, b))

  with tf.device('/CPU:0'):
    matmul_sum = tf.add_n(c)

  print(matmul_sum)

tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)
