In [1]:
import torch
import time

In [2]:
# Create a tensor
x = torch.tensor([1.0, 2.0, 3.0])

# Move the tensor to GPU if available
if torch.cuda.is_available():
    x = x.to('cuda')

print("CUDA available:", torch.cuda.is_available())
print("Device count:", torch.cuda.device_count())

if torch.cuda.is_available():
    print("Current device:", torch.cuda.current_device())
    print("Device name:", torch.cuda.get_device_name(0))
    print("CUDA version (torch build):", torch.version.cuda)

torch.cuda.get_device_capability()


CUDA available: True
Device count: 1
Current device: 0
Device name: NVIDIA GeForce RTX 4080
CUDA version (torch build): 13.0


(8, 9)

In [3]:
def measure_time(device):
    x = torch.randn(5000, 5000, device=device)
    y = torch.randn(5000, 5000, device=device)

    if device == 'cuda':
        torch.cuda.synchronize()

    start = time.perf_counter()
    z = torch.mm(x, y)

    if device == 'cuda':
        torch.cuda.synchronize()

    end = time.perf_counter()
    return end - start


# Warmup
if torch.cuda.is_available():
    measure_time('cuda')

cpu_time = measure_time('cpu')
print(f"Time on CPU: {cpu_time:.6f} seconds")

if torch.cuda.is_available():
    gpu_time = measure_time('cuda')
    print(f"Time on GPU: {gpu_time:.6f} seconds")

Time on CPU: 0.256516 seconds
Time on GPU: 0.011312 seconds


In [4]:
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()


#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

    # Example of tensor moved to GPU
x = torch.randn(9000, 9000).to(device)  # Move a tensor to the GPU

# Check memory usage again
if device.type == 'cuda':
    print('Memory Usage After Tensor Allocation:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

torch.cuda.empty_cache()

Using device: cuda

NVIDIA GeForce RTX 4080
Memory Usage:
Allocated: 0.0 GB
Cached:    0.3 GB
Memory Usage After Tensor Allocation:
Allocated: 0.3 GB
Cached:    0.6 GB
