## Test if my GPU supports P2P

In [1]:
import torch

def check_p2p_support():
    if not torch.cuda.is_available():
        print("CUDA is not available. No GPUs detected.")
        return

    num_gpus = torch.cuda.device_count()
    print(f"Found {num_gpus} GPU(s)")

    for i in range(num_gpus):
        for j in range(num_gpus):
            if i == j:
                continue  # Skip self-check

            # Check if P2P access is possible
            try:
                # Enable P2P access (temporarily)
                torch.cuda.set_device(i)  # Set current GPU
                can_access = torch.cuda.can_device_access_peer(i,j)
                print(f"GPU {i} can access GPU {j} via P2P: {'✅ Yes' if can_access else '❌ No'}")
            except RuntimeError as e:
                print(f"P2P between GPU {i} and GPU {j} not supported: {e}")

check_p2p_support()

Found 2 GPU(s)
GPU 0 can access GPU 1 via P2P: ❌ No
GPU 1 can access GPU 0 via P2P: ❌ No


In [2]:
import torch
import ctypes

# Load the shared lib
lib = ctypes.CDLL('./cuda_tools/libipc_tensor_tool.so')
lib.export_ipc_handle.argtypes = [ctypes.c_void_p, ctypes.c_void_p]
lib.export_ipc_handle.restype = ctypes.c_int

def get_ipc_handle(tensor: torch.Tensor) -> bytes:
    
    meta={
        "shape": tensor.shape,
        "dtype": str(tensor.dtype),
        "device": int(tensor.device.index),
    }
    if not tensor.is_cuda:
        raise ValueError("Tensor must be on CUDA device")

    dev_ptr = tensor.data_ptr()
    out = ctypes.create_string_buffer(64)

    result = lib.export_ipc_handle(ctypes.c_void_p(dev_ptr), out)
    if result != 0:
        raise RuntimeError(f"export_ipc_handle failed with code {result}")

    return out.raw, meta  # This is the 64-byte IPC handle

In [3]:
tensor = torch.randn(10, device='cuda:0',dtype=torch.float32)
handler, meta = get_ipc_handle(tensor)

In [4]:
len(handler)

64

In [5]:
import requests
import json

# 准备数据
byte_data = handler # 示例 bytes 数据
map_data = meta  # 示例 map 数据

# 构建 multipart/form-data 请求
files = {
    'byte_data': ('data.bin', byte_data, 'application/octet-stream')
}

data = {
    'map_data': json.dumps(map_data)
}

# 发送 POST 请求
response = requests.post(
    'http://localhost:1177/upload',
    files=files,
    data=data
)

restored_tensor = torch.tensor(response.json()['restored_tensor'], device='cuda:0')


In [6]:
torch.equal(tensor, restored_tensor)

True