In [1]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
try:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
except:
    # Invalid device or cannot modify virtual devices once initialized.
    pass
print(tf.__version__)
# Print list of all available physical devices (including GPUs)
physical_devices = tf.config.list_physical_devices()
print("Available Physical Devices:", physical_devices)

# Print list of all available logical devices
logical_devices = tf.config.list_logical_devices()
print("Available Logical Devices:", logical_devices)

# Check if GPU is available
gpu_available = tf.config.experimental.list_physical_devices('GPU')
if gpu_available:
    print("GPU is available and recognized by TensorFlow.")
else:
    print("No GPU is available or TensorFlow doesn't recognize the GPU.")

2.10.0
Available Physical Devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Available Logical Devices: [LogicalDevice(name='/device:CPU:0', device_type='CPU'), LogicalDevice(name='/device:GPU:0', device_type='GPU')]
GPU is available and recognized by TensorFlow.


In [2]:
import tensorflow as tf
print(tf.__version__)
import numpy as np
import cv2,os,math,keras,torch,time
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input
import tensorflow.keras.backend as K
data_size = 960
x_test = np.load(f'C:/Users/Unknown/Documents/VSCode/Diploma/Validation{data_size}_half.npy')
output_size=640,640
def imresize(array,shape):
    resized_images = np.empty((array.shape[0], shape, shape), dtype=array.dtype)
    for i in range(array.shape[0]):
        # Extract the current image
        current_image = array[i, :, :]  # Assuming the channel axis is the last one
        # Resize the image using cv2.resize
        resized_image = cv2.resize(current_image, (shape, shape), interpolation=cv2.INTER_LINEAR)
        # Add the resized image to the new array
        resized_images[i, :, :] = resized_image
    return resized_images

resize=1
if resize:
    shape=output_size[0]
    x_test=imresize(x_test,shape)
x_test=tf.image.convert_image_dtype(x_test,dtype='float16')#do not use bfloat16. it is only for TPU and AVX-512 for now.

#plt.imshow(tf.cast(x_test[0],dtype='float32'))
x_test = tf.expand_dims(x_test, axis=-1)
#confirm they are idential except N
print(x_test.shape)
print(x_test.dtype)

def PropagationLayer(input,mode=None):#Angular spectrum method for input or output
    input=tf.cast(input,dtype='float32')#estimated POH itself
    inputs=Input((output_size[0],output_size[1],1))
    input=tf.squeeze(input, axis=-1)#to eliminate calculation error in ASM
    lamda = 532e-9 #lambda is the reserved word
    dp = 8e-6 #pixel size of SLM you use
    z = 100e-3 #propagation distance you assume
    pad_m = inputs.shape[1] // 2
    pad_n = inputs.shape[2] // 2
    if mode=="input":
        padded_real = tf.math.cos(input)
        padded_imaginary = tf.math.sin(input)
        z = -z #propagation distance,bu reverse direction
    else: #input is 0 to 1 data by sigmoid
        padded_real = tf.math.cos(2*math.pi*input)
        padded_imaginary = tf.math.sin(2*math.pi*input)
    inputsc = tf.complex(padded_real, padded_imaginary)#complex amp of POH, same as exp(1j*...)
    Lx = dp * inputs.shape[2]
    Ly = dp * inputs.shape[1]
    f_max = 0.5 / dp
    du = 1.0 / Lx
    dv = 1.0 / Ly
    x_range = tf.range(-f_max, f_max, du, dtype=tf.float32)
    y_range = tf.range(-f_max, f_max, dv, dtype=tf.float32)
    fx, fy = tf.meshgrid(x_range, y_range)
    FH = tf.signal.fftshift(tf.signal.ifft2d(tf.signal.fftshift(inputsc)))
    real_part = tf.math.cos(2 * math.pi * z * tf.sqrt(1 /(lamda**2) - fx**2 - fy**2))
    imaginary_part = tf.math.sin(2 * math.pi * z * tf.sqrt(1 /(lamda**2) - fx**2 - fy**2))
    P = tf.complex(real_part, imaginary_part)#normal ASM kernel
    #define frequency limit for sampling theorem
    fx_max = tf.math.divide(inputs.shape[1] * dp, lamda * tf.math.sqrt((2 * z) ** 2 + (inputs.shape[1]  * dp) ** 2))
    fy_max = tf.math.divide(inputs.shape[2] * dp, lamda * tf.math.sqrt((2 * z) ** 2 + (inputs.shape[2]  * dp) ** 2))
    P = tf.where(tf.math.logical_and(tf.abs(fx) < fx_max, tf.abs(fy) < fy_max), x=P, y=0.0)#band-limiting mask for kernal(sampling theorem)

    P = tf.expand_dims(P, axis=0)  # Add a batch dimension
    U = tf.signal.fftshift(tf.signal.fft2d(tf.signal.fftshift(FH * P)))#kernel applied in Fourier domain
    if mode=="input":
        U = tf.expand_dims(U, axis=-1)
        asm = tf.concat([tf.math.real(U), tf.math.imag(U)], axis=-1)#complex amp
    else:
        asm = tf.square(tf.abs(U)) #observed info
    return asm

2.10.0
(50, 640, 640, 1)
<dtype: 'float16'>


TensorRT Engine creation takes some time.

In [3]:
import onnx,onnxruntime
model = onnx.load("./Unet_POH_640fixed_16bit.onnx")
#!onnxsim "./Unet_POH_512.onnx" "./Unet_POH_512_sim.onnx"
#onnx.checker.check_model(model)
#print(onnx.helper.printable_graph(model.graph))
options = onnxruntime.SessionOptions()
#options.enable_profiling=True
print(onnxruntime.get_available_providers())#list available ep
os.environ["ORT_TENSORRT_FP16_ENABLE"] = "1"  # Enable FP16 precision
#os.environ["ORT_TENSORRT_INT8_ENABLE"] = "1"  # Enable INT8 precision
os.environ["ORT_TENSORRT_ENGINE_CACHE_ENABLE"] = "1"  # Enable engine caching
ort_session = onnxruntime.InferenceSession('./Unet_POH_640fixed_16bit.onnx',sess_options=options,providers=['TensorrtExecutionProvider'])
input_name = model.graph.input[0].name #usually it is input_1
output_names = [output.name for output in model.graph.output]

['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']


In [9]:
input = x_test[21:31]
print(input.shape)
if input.shape[0] == x_test.shape[1]:#single image case
    data_num=1
else:
    data_num=input.shape[0]
#torch.cuda.synchronize() #it is for accurate time measurement with GPU, but I chose profile feature of onnxruntime
start_time = time.time()
input_datas = (tf.cast(PropagationLayer(input,mode="input"),dtype='float16')).numpy()
start_time2 = time.time()
holo = ort_session.run(output_names, {input_name: input_datas})

#torch.cuda.synchronize()
end_time = time.time()
inference_time = (end_time - start_time)/(data_num)
inference_time2 = (end_time - start_time2)/(data_num)
print(f"Inference + Data prep time for each image: {inference_time} seconds")#with data prep
print(f"Inference time for each image: {inference_time2} seconds")#inference only
print(f"total inference:{(end_time - start_time)}")
holo=holo[0]
output=PropagationLayer(holo,mode="output")
print(output.shape)

(10, 640, 640, 1)
Inference + Data prep time for each image: 0.01660282611846924 seconds
Inference time for each image: 0.013902831077575683 seconds
total inference:0.16602826118469238
(10, 640, 640)


In [None]:
ort_session.end_profiling()#stop onnx profiling
n = 11
plt.figure(figsize=(n+2, 4), dpi=250)
plt.subplots_adjust(hspace=0.5)
input=tf.cast(input,dtype='float32')
for i in range(1, n):#1 to n-1
    # Display original
    ax = plt.subplot(3, n, i)
    plt.imshow(input[i-1])
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    if i==1:
        ax.set_title("Original")  # Set a title for this subplot
    # Display reconstructed image
    ax = plt.subplot(3, n, i + n)
    plt.imshow(output[i-1])  # Reconstructed one
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    if i==1:
        ax.set_title("Reconstructed")  # Set a title for this subplot
    # Display hologram
    ax = plt.subplot(3, n, i + 2 * n)
    plt.imshow(holo[i-1])  # Reconstructed one
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    if i==1:
        ax.set_title("Hologram")  # Set a title for this subplot
plt.show()


num=25
plt.imshow(output[num-21],cmap='gray')
plt.figure(figsize=(6, 5),dpi=250)
# Subplot 1: Reconstructed hologram
plt.subplot(1, 2, 1)
plt.imshow(output[num-21],cmap='gray')# tf.math.log(tf.abs(D)) for good visualization
plt.title('Reconstructed hologram with DNN')
plt.axis('image')
# Subplot 2: Original image
plt.subplot(1, 2, 2)
plt.imshow(tf.cast(input[num-21],dtype="float32"), cmap='gray')
plt.title('Original image')
plt.axis('image')
plt.show()


Evaluate single image with several criteria

In [8]:
'''from onnxruntime.quantization import QuantizationMode, quantize_dynamic,QuantType,quantize_static
quantized_model = quantize_dynamic("./Unet_POH_512.onnx", "./Unet_POH_512_quantized.onnx", weight_type=QuantType.QUInt8)

model = onnx.load("./Unet_POH_512_quantized.onnx")
#!onnxsim "./Unet_POH_512.onnx" "./Unet_POH_512_sim.onnx"
#onnx.checker.check_model(model)
#print(onnx.helper.printable_graph(model.graph))
options = onnxruntime.SessionOptions()
#options.enable_profiling=True
ort_session = onnxruntime.InferenceSession('./Unet_POH_512_quantized.onnx',sess_options=options,providers=['DmlExecutionProvider'])
# Get the input name from the model
input_name = model.graph.input[0].name
output_names = [output.name for output in model.graph.output]'''

'from onnxruntime.quantization import QuantizationMode, quantize_dynamic,QuantType,quantize_static\nquantized_model = quantize_dynamic("./Unet_POH_512.onnx", "./Unet_POH_512_quantized.onnx", weight_type=QuantType.QUInt8)\n\nmodel = onnx.load("./Unet_POH_512_quantized.onnx")\n#!onnxsim "./Unet_POH_512.onnx" "./Unet_POH_512_sim.onnx"\n#onnx.checker.check_model(model)\n#print(onnx.helper.printable_graph(model.graph))\noptions = onnxruntime.SessionOptions()\n#options.enable_profiling=True\nort_session = onnxruntime.InferenceSession(\'./Unet_POH_512_quantized.onnx\',sess_options=options,providers=[\'DmlExecutionProvider\'])\n# Get the input name from the model\ninput_name = model.graph.input[0].name\noutput_names = [output.name for output in model.graph.output]'

In [9]:
'''all_holo = []

input = x_test[20:30]
print(input.shape)
# Run the model
start_time = time.time()
input_datas = (PropagationLayer(input,mode="input"))
start_time2 = time.time()
for input_data in input_datas:
    input_data = np.expand_dims(input_data, axis=0)
    holo = ort_session.run(output_names, {input_name: input_data})
    all_holo.append(holo)
end_time = time.time()
inference_time = (end_time - start_time)/(input.shape[0])
inference_time2 = (end_time - start_time2)/(input.shape[0])
print(f"Inference time for each image: {inference_time} seconds")#with data prep
print(f"Prediction time for each image: {inference_time2} seconds")
holo = np.squeeze(all_holo)
holo= tf.expand_dims(holo, axis=-1)
output=PropagationLayer(holo,mode="output")
print(output.shape)'''

'all_holo = []\n\ninput = x_test[20:30]\nprint(input.shape)\n# Run the model\nstart_time = time.time()\ninput_datas = (PropagationLayer(input,mode="input"))\nstart_time2 = time.time()\nfor input_data in input_datas:\n    input_data = np.expand_dims(input_data, axis=0)\n    holo = ort_session.run(output_names, {input_name: input_data})\n    all_holo.append(holo)\nend_time = time.time()\ninference_time = (end_time - start_time)/(input.shape[0])\ninference_time2 = (end_time - start_time2)/(input.shape[0])\nprint(f"Inference time for each image: {inference_time} seconds")#with data prep\nprint(f"Prediction time for each image: {inference_time2} seconds")\nholo = np.squeeze(all_holo)\nholo= tf.expand_dims(holo, axis=-1)\noutput=PropagationLayer(holo,mode="output")\nprint(output.shape)'

In [None]:
'''all_holo = []
input = x_test[21:31]
print(input.shape)
if input.shape[0] == x_test.shape[1]:#single image case
    data_num=1
else:
    data_num=input.shape[0]
#torch.cuda.synchronize() #it is for accurate time measurement with GPU, but I chose profile feature of onnxruntime
start_time = time.time()
input_datas = (tf.cast(PropagationLayer(input,mode="input"),dtype='float16'))
start_time2 = time.time()
for input_data in input_datas:
    input_data = np.expand_dims(input_data, axis=0)
    holo = ort_session.run(output_names, {input_name: input_data})
    all_holo.append(holo)
#torch.cuda.synchronize()
end_time = time.time()
inference_time = (end_time - start_time)/(data_num)
inference_time2 = (end_time - start_time2)/(data_num)
print(f"Inference + Data prep time for each image: {inference_time} seconds")#with data prep
print(f"Inference time for each image: {inference_time2} seconds")#inference only
print(f"total inference:{(end_time - start_time)}")
holo = np.squeeze(all_holo)
holo= tf.expand_dims(holo, axis=-1)
output=PropagationLayer(holo,mode="output")
print(output.shape)'''