In [15]:
import torch
import numpy as np


device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
dtype = np.complex64

N, M = 100, 20
dt = 0.2

Using cuda device


In [16]:
a_np = np.array(np.random.rand(N, N, N, N) * 10, dtype=dtype)
a_cpu = torch.tensor(a_np, device="cpu").detach()
a_gpu = torch.tensor(a_np, device=device).detach()

bs_np = [np.array(np.random.rand(N, N) * 10, dtype=dtype) for _ in range(a_np.ndim)]
bs_np = [(b + b.T)*0.5 for b in bs_np]
bs_cpu = [torch.tensor(b, device="cpu").detach() for b in bs_np]
bs_gpu = [torch.tensor(b, device=device).detach() for b in bs_np]


- Testing the partial transforms in CPU using pytorch:

In [17]:
%%time
for i, b in enumerate(bs_cpu):
    u = torch.matrix_exp(-1.0j * dt * b)
    a_cpu = torch.tensordot(a_cpu, u, ([i], [0]))
    a_cpu = torch.moveaxis(a_cpu, -1, i)
print(a_cpu.shape, a_cpu.device)

torch.Size([100, 100, 100, 100]) cpu
CPU times: user 6.95 s, sys: 554 ms, total: 7.5 s
Wall time: 627 ms


- Testing the partial transforms in GPU using pytorch:

In [18]:
%%time
for i, b in enumerate(bs_gpu):
    u = torch.matrix_exp(-1.0j * dt * b)
    a_gpu = torch.tensordot(a_gpu, u, ([i], [0]))
    a_gpu = torch.moveaxis(a_gpu, -1, i)
print(a_gpu.shape, a_gpu.device)

torch.Size([100, 100, 100, 100]) cuda:0
CPU times: user 317 ms, sys: 0 ns, total: 317 ms
Wall time: 322 ms
