In [3]:
import cudaq

cudaq.set_target("nvidia")  # GPU state-vector simulator


In [4]:
import cudaq

@cudaq.kernel
def bell():
    q = cudaq.qvector(2)
    h(q[0]); x.ctrl(q[0], q[1])
    mz(q)

counts = cudaq.sample(bell)   # defaults to 1000 shots
print(counts)                 # expect ~{'00': ~500, '11': ~500}



{ 00:491 11:509 }



In [6]:
print(cudaq.get_target())


Target nvidia
	simulator=cusvsim_fp32
	platform=default
	description=The NVIDIA Target provides a simulated QPU via cuStateVec (state-vector simulation) integration.
	precision=fp32
Supported Arguments:
  - option (Specify the target options as a comma-separated list.
Supported options are 'fp32', 'fp64', 'mgpu', 'mqpu'.
For example, the 'fp32,mgpu' option combination will activate multi-GPU distribution with single-precision. Not all option combinations are supported.)



In [None]:
import time, math, random
import cudaq

# ---------- knobs ----------
N_QUBITS = 22
DEPTH    = 40
SHOTS    = 1000
GPU_OPTS = "fp32"     # try "fp64" or "mgpu", e.g., "fp32,mgpu"
# ---------------------------

# Heavy-ish circuit
random.seed(0)

@cudaq.kernel
def heavy(n_qubits: int, thetas: list[float]):
    q = cudaq.qvector(n_qubits)
    for i in range(n_qubits): h(q[i])
    t = 0
    for _ in range(DEPTH):
        for i in range(n_qubits - 1):
            x.ctrl(q[i], q[i+1])
        for i in range(n_qubits):
            rz(thetas[t], q[i]); t += 1
    mz(q)

thetas = [2.0 * math.pi * random.random() for _ in range(N_QUBITS * DEPTH)]

def do_sample(*args, **kwargs):
    """Call cudaq.sample with whatever this build supports."""
    try:
        return cudaq.sample(*args, **kwargs)
    except TypeError:
        # Try shots_count kw
        if "shots" in kwargs:
            shots = kwargs.pop("shots")
            return cudaq.sample(*args, shots_count=shots, **kwargs)
        # Try without shots kw at all
        return cudaq.sample(*args)

def pick_targets():
    # Prefer these CPU & GPU names if present; otherwise fall back heuristically.
    cpu_pref = ["qpp-cpu", "density-matrix-cpu", "stim", "qpp"]
    gpu_pref = ["nvidia", "nvidia-fp64", "nvidia-mgpu", "nvidia-mgpu-fp64", "tensornet", "tensornet-mps"]

    names = [getattr(t, "name", str(t)) for t in cudaq.get_targets()]
    cpu = next((n for n in cpu_pref if n in names), None)
    gpu = next((n for n in gpu_pref if n in names), None)

    if not cpu:
        # any obvious CPU fallback
        cpu = next((n for n in names if "cpu" in n or n in ("qpp", "stim")), None)
    if not gpu:
        gpu = next((n for n in names if "nvidia" in n or "tensor" in n), None)

    if not cpu or not gpu:
        raise RuntimeError(f"Couldn’t find CPU/GPU targets. Found: {names}")
    return cpu, gpu

def set_target(name: str, opts: str | None = None):
    # Some builds require keyword 'option', not positional.
    if opts:
        cudaq.set_target(name, option=opts)
    else:
        cudaq.set_target(name)

def time_target(name: str, opts: str | None = None):
    set_target(name, opts)
    # warm-up (JIT, caches)
    _ = do_sample(heavy, N_QUBITS, thetas, shots=10)
    t0 = time.perf_counter()
    counts = do_sample(heavy, N_QUBITS, thetas, shots=SHOTS)
    dt = time.perf_counter() - t0
    return dt, sum(counts.values())

cpu_name, gpu_name = pick_targets()

cpu_t, _ = time_target(cpu_name)
print(f"CPU  ({cpu_name}):      {cpu_t:.3f} s")

gpu_opts = GPU_OPTS if "nvidia" in gpu_name else None  # only nvidia uses 'option'
gpu_t, _ = time_target(gpu_name, gpu_opts)
label = f"{gpu_name}{' '+gpu_opts if gpu_opts else ''}"
print(f"GPU  ({label}): {gpu_t:.3f} s")

print(f"\nSpeedup (CPU/GPU): {cpu_t/gpu_t:.2f}×")

# Memory hint
state_bytes_fp32 = (2**N_QUBITS) * 8   # complex64
state_bytes_fp64 = (2**N_QUBITS) * 16  # complex128
print(f"\nApprox state size: ~{state_bytes_fp32/1e9:.2f} GB (fp32) / {state_bytes_fp64/1e9:.2f} GB (fp64)")


In [15]:
import time, math, random, cudaq

# --- knobs ---
START_N = 20     # starting qubits
MAX_N   = 28     # try up to here (stop earlier if OOM)
DEPTH   = 20
SHOTS   = 200
# -----------

cudaq.set_target("nvidia", option="fp32")   # try "fp64" or "fp32,mgpu" if you want

def approx_gb(n):  # complex64 state: 8 bytes per amplitude
    return (2**n * 8) / 1e9

@cudaq.kernel
def heavy(n: int, theta: list[float]):
    q = cudaq.qvector(n)
    for i in range(n): h(q[i])
    k = 0
    for _ in range(DEPTH):
        for i in range(n-1): x.ctrl(q[i], q[i+1])
        for i in range(n): rz(theta[k], q[i]); k += 1
    mz(q)

for n in range(START_N, MAX_N + 1):
    random.seed(0)
    thetas = [2*math.pi*random.random() for _ in range(n*DEPTH)]
    try:
        cudaq.sample(heavy, n, thetas, shots_count=10)  # warm-up
        t0 = time.perf_counter()
        cudaq.sample(heavy, n, thetas, shots_count=SHOTS)
        dt = time.perf_counter() - t0
        print(f"N={n:2d}  ~{approx_gb(n):5.2f} GB fp32  time={dt:6.3f}s")
    except Exception as e:
        print(f"N={n:2d}  ~{approx_gb(n):5.2f} GB fp32  -> stopped ({type(e).__name__})")
        break


N=20  ~ 0.01 GB fp32  time= 0.016s
N=21  ~ 0.02 GB fp32  time= 0.017s
N=22  ~ 0.03 GB fp32  time= 0.031s
N=23  ~ 0.07 GB fp32  time= 0.060s
N=24  ~ 0.13 GB fp32  time= 0.121s
N=25  ~ 0.27 GB fp32  time= 0.248s
N=26  ~ 0.54 GB fp32  time= 0.503s
N=27  ~ 1.07 GB fp32  time= 1.073s
N=28  ~ 2.15 GB fp32  time= 2.185s


In [16]:
import cudaq

@cudaq.kernel
def one_qubit(theta: float, phi: float):
    q = cudaq.qvector(1)
    rx(theta, q[0])
    rz(phi, q[0])
    mz(q)

# Set GPU target
cudaq.set_target("nvidia", option="fp32")

# Run sampling
result = cudaq.sample(one_qubit, 1.234, 0.789, shots_count=1000)
print(result)


{ 0:641 1:359 }



In [27]:
import cudaq, numpy as np
from cudaq import spin

@cudaq.kernel
def circuit(theta: float, phi: float):
    q = cudaq.qvector(1)
    rx(theta, q[0])
    ry(phi, q[0])

# expect ≈ -1 at (0, π)
val = cudaq.observe(circuit, spin.z(0), 0.0, np.pi).expectation()
print("⟨Z⟩ at (θ=0, φ=π):", val)  # -> -1.0


⟨Z⟩ at (θ=0, φ=π): -0.9999999999999981


In [35]:
import numpy as np, cudaq
from cudaq import spin

# (optional) GPU
# cudaq.set_target("nvidia", option="fp32")

@cudaq.kernel
def circuit(p: list[float]):
    q = cudaq.qvector(1)
    rx(p[0], q[0])
    ry(p[1], q[0])

ham = spin.z(0)
opt = cudaq.optimizers.Adam()
grad = cudaq.gradients.CentralDifference()

# non-stationary starting point
x0 = [0.54, 0.12]

def objective_delta(d: list[float]):
    # parameters = x0 + d
    p = [x0[0] + d[0], x0[1] + d[1]]
    f = lambda pv: cudaq.observe(circuit, ham, pv).expectation()
    cost = f(p)
    # since p = x0 + d, ∂cost/∂d = ∂cost/∂p
    g = grad.compute(p, f, cost)
    return cost, g

energy, d_opt = opt.optimize(dimensions=2, function=objective_delta)
p_opt = [x0[0] + d_opt[0], x0[1] + d_opt[1]]

print("final <Z>:", energy)          # ≈ -1
print("params (θ, φ):", p_opt)       # ≈ [~0, ~π]


final <Z>: -0.9982628213474527
params (θ, φ): [3.0842350694048486, -0.0004203485768833337]
