This demonstrates two methods of scattering angle computation.

In [1]:
import numpy as np
import cupy as cp
from cupyx import jit
print(f"CuPy version {cp.__version__}")
mempool = cp.get_default_memory_pool()
mempool.free_all_blocks()
print(f"mempool.used_bytes {mempool.used_bytes()}")

CuPy version 11.0.0
mempool.used_bytes 0


In [2]:
@cp.fuse()
def get_theta_fuse(random_in, g):
    temp = (1 - g * g) / (1 - g + random_in)
    cost = (1 + g * g - temp * temp) / (2 * g)
    return cp.arccos(cost)

# deposit the output in the input location
@jit.rawkernel()
def get_theta_raw(random_inout, size):
    tid = jit.blockIdx.x * jit.blockDim.x + jit.threadIdx.x
    ntid = jit.gridDim.x * jit.blockDim.x
    for i in range(tid, size, ntid):
        temp = (1 - g * g) / (1 - g + random_inout[i])
        cost = (1 + g * g - temp * temp) / (2 * g)
        random_inout[i] = cp.arccos(cost)

g = 0.9
size = 300000000

  cupy._util.experimental('cupyx.jit.rawkernel')


In [3]:
#%%time
#random_input_fuse = cp.random.uniform(0, 2 * g, size, dtype=np.float32)
#theta_fuse = get_theta_fuse(random_input_fuse, g)

In [4]:
#del random_input_fuse, theta_fuse

In [5]:
#%%time
#random_input_fuse = cp.random.uniform(0, 2 * g, size, dtype=np.float32)
#theta_fuse = get_theta_fuse(random_input_fuse, g)

In [6]:
#del random_input_fuse, theta_fuse

In [7]:
%%time
random_input = cp.random.uniform(0, 2 * g, size, dtype=np.float32)
get_theta_raw((128,), (1024,), (random_input, size))

CPU times: user 230 ms, sys: 32.9 ms, total: 263 ms
Wall time: 262 ms


In [8]:
print(f"mempool.used_bytes {mempool.used_bytes()}")
del random_input

mempool.used_bytes 1200000000


In [9]:
%%time
random_input = cp.random.uniform(0, 2 * g, size, dtype=np.float32)
get_theta_raw((128,), (1024,), (random_input, size))

CPU times: user 195 µs, sys: 60 µs, total: 255 µs
Wall time: 189 µs


In [10]:
#del random_input_fuse, theta_fuse
del random_input