zaccharieramzi · zaccharieramzi · Jun 24, 2020 · Jun 18, 2020 · Jun 18, 2020 · Jun 18, 2020
diff --git a/README.md b/README.md
@@ -13,6 +13,25 @@ pip install tfkbnufft
 This package is a verly early-stage and modest adaptation to TensorFlow of the [torchkbnufft](https://github.com/mmuckley/torchkbnufft) package written by Matthew Muckley for PyTorch.
 Please cite his work appropriately if you use this package.
 
+## Computation speed
+
+The computation speeds are given in seconds, for a 256x256 image with a spokelength of 512 and 405 spokes.
+These numbers are not to be directly compared to those of [torchkbnufft](https://github.com/mmuckley/torchkbnufft#computation-speed), since the computation is not the same.
+They are just to give a sense of the time required for computation.
+
+| Operation     | CPU    | GPU    |
+|---------------|--------|--------|
+| Forward NUFFT | 0.1676 | 0.0626 |
+| Adjoint NUFFT | 0.7005 | 0.0635 |
+
+To obtain these numbers for your machine, run the following commands, after installing this package:
+```
+pip install scikit-image Pillow
+python profile_tfkbnufft.py
+```
+
+These numbers were obtained with a Quadro P5000.
+
 ## References
 
 1. Fessler, J. A., & Sutton, B. P. (2003). Nonuniform fast Fourier transforms using min-max interpolation. *IEEE transactions on signal processing*, 51(2), 560-574.

diff --git a/profile_tfkbnufft.py b/profile_tfkbnufft.py
@@ -0,0 +1,94 @@
+import time
+
+import numpy as np
+from PIL import Image
+from skimage.data import camera
+import tensorflow as tf
+
+from tfkbnufft import kbnufft_forward, kbnufft_adjoint
+from tfkbnufft.kbnufft import KbNufftModule
+
+
+def profile_tfkbnufft(
+        image,
+        ktraj,
+        im_size,
+        device,
+    ):
+    if device == 'CPU':
+        num_nuffts = 20
+    else:
+        num_nuffts = 50
+    print(f'Using {device}')
+    device_name = f'/{device}:0'
+    with tf.device(device_name):
+        image = tf.constant(image)
+        if device == 'GPU':
+            image = tf.cast(image, tf.complex64)
+        ktraj = tf.constant(ktraj)
+        nufft_ob = KbNufftModule(im_size=im_size, grid_size=None, norm='ortho')
+        forward_op = kbnufft_forward(nufft_ob._extract_nufft_interpob())
+        adjoint_op = kbnufft_adjoint(nufft_ob._extract_nufft_interpob())
+
+        # warm-up computation
+        for _ in range(2):
+            y = forward_op(image, ktraj)
+
+        start_time = time.perf_counter()
+        for _ in range(num_nuffts):
+            y = forward_op(image, ktraj)
+        end_time = time.perf_counter()
+        avg_time = (end_time-start_time) / num_nuffts
+        print('forward average time: {}'.format(avg_time))
+
+        # warm-up computation
+        for _ in range(2):
+            x = adjoint_op(y, ktraj)
+
+        # run the adjoint speed tests
+        start_time = time.perf_counter()
+        for _ in range(num_nuffts):
+            x = adjoint_op(y, ktraj)
+        end_time = time.perf_counter()
+        avg_time = (end_time-start_time) / num_nuffts
+        print('backward average time: {}'.format(avg_time))
+
+
+def run_all_profiles():
+    print('running profiler...')
+    spokelength = 512
+    nspokes = 405
+
+    print('problem size (radial trajectory, 2-factor oversampling):')
+    print('number of spokes: {}'.format(nspokes))
+    print('spokelength: {}'.format(spokelength))
+
+    # create an example to run on
+    image = np.array(Image.fromarray(camera()).resize((256, 256)))
+    image = image.astype(np.complex)
+    im_size = image.shape
+
+    image = image[None, None, ...]
+
+    # create k-space trajectory
+    ga = np.deg2rad(180 / ((1 + np.sqrt(5)) / 2))
+    kx = np.zeros(shape=(spokelength, nspokes))
+    ky = np.zeros(shape=(spokelength, nspokes))
+    ky[:, 0] = np.linspace(-np.pi, np.pi, spokelength)
+    for i in range(1, nspokes):
+        kx[:, i] = np.cos(ga) * kx[:, i - 1] - np.sin(ga) * ky[:, i - 1]
+        ky[:, i] = np.sin(ga) * kx[:, i - 1] + np.cos(ga) * ky[:, i - 1]
+
+    ky = np.transpose(ky)
+    kx = np.transpose(kx)
+
+    ktraj = np.stack((ky.flatten(), kx.flatten()), axis=0)
+
+    ktraj = ktraj[None, ...]
+
+    profile_tfkbnufft(image, ktraj, im_size, device='CPU')
+    profile_tfkbnufft(image, ktraj, im_size, device='GPU')
+
+
+if __name__ == '__main__':
+    run_all_profiles()
diff --git a/tfkbnufft/__init__.py b/tfkbnufft/__init__.py
@@ -1,6 +1,6 @@
 """Package info"""
 
-__version__ = '0.0.5'
+__version__ = '0.1.0'
 __author__ = 'Zaccharie Ramzi'
 __author_email__ = 'zaccharie.ramzi@inria.fr'
 __license__ = 'MIT'

diff --git a/tfkbnufft/nufft/interp_functions.py b/tfkbnufft/nufft/interp_functions.py
@@ -47,8 +47,13 @@ def calc_coef_and_indices(tm, kofflist, Jval, table, centers, L, dims, conjcoef=
             coef = coef * tf.math.conj(sliced_table)
         else:
             coef = coef * sliced_table
-        arr_ind = arr_ind + tf.math.floormod(gridind[d, :], dims[d]) * \
-            tf.reduce_prod(dims[d + 1:])
+
+        floormod = tf.where(
+            tf.less(gridind[d, :], 0),
+            gridind[d, :] + dims[d],
+            gridind[d, :],
+        )
+        arr_ind = arr_ind + floormod * tf.reduce_prod(dims[d + 1:])
 
     return coef, arr_ind
 
@@ -136,9 +141,11 @@ def run_interp_back(kdat, tm, params):
 
     # initialize output array
     griddat = tf.zeros(
-        shape=(tf.shape(kdat)[0], tf.cast(tf.reduce_prod(dims), tf.int32)),
+        shape=(tf.cast(tf.reduce_prod(dims), tf.int32), tf.shape(kdat)[0]),
         dtype=kdat.dtype,
     )
+    griddat_real = tf.math.real(griddat)
+    griddat_imag = tf.math.imag(griddat)
 
     # loop over offsets and take advantage of numpy broadcasting
     for J in Jlist:
@@ -148,8 +155,12 @@ def run_interp_back(kdat, tm, params):
         updates = tf.transpose(coef[None, ...] * kdat)
         # TODO: change because the array of indexes was only in one dimension
         arr_ind = arr_ind[:, None]
-        griddat = tf.transpose(tf.tensor_scatter_nd_add(tf.transpose(griddat), arr_ind, updates))
+        # a hack related to https://github.com/tensorflow/tensorflow/issues/40672
+        # is to deal with real and imag parts separately
+        griddat_real = tf.tensor_scatter_nd_add(griddat_real, arr_ind, tf.math.real(updates))
+        griddat_imag = tf.tensor_scatter_nd_add(griddat_imag, arr_ind, tf.math.imag(updates))
 
+    griddat = tf.transpose(tf.complex(griddat_real, griddat_imag))
     return griddat
 
 @tf.function(experimental_relax_shapes=True)