In [1]:
import torch
import ctypes

# Load shared library
lib = ctypes.CDLL('./cuda_tools/libipc_tensor_tool.so')
lib.open_ipc_tensor.argtypes = [ctypes.c_void_p, ctypes.c_int]
lib.open_ipc_tensor.restype = ctypes.c_void_p

lib.close_ipc_tensor.argtypes = [ctypes.c_void_p]
lib.close_ipc_tensor.restype = ctypes.c_int


DTYPE_SIZE = {
    torch.float32: 4,
    torch.int32: 4,
    torch.int64: 8,
    torch.float64: 8,
    torch.uint8: 1,
    torch.bfloat16: 2,
    # add more if needed
}

DTYPE_MAP = {
    'torch.float32':torch.float32,
    'torch.int32':torch.int32,
    'torch.int64':torch.int64,
    'torch.float64':torch.float64,
    'torch.uint8':torch.uint8,
    'torch.float16':torch.float16,
    'torch.bfloat16':torch.bfloat16,
}

def restore_tensor(ipc_handle_bytes: bytes, meta):
    shape = meta['shape']
    dtype = meta['dtype']
    device = meta['device']
    
    if len(ipc_handle_bytes) != 64:
        raise ValueError("Invalid IPC handle size")

    dtype = DTYPE_MAP.get(dtype, None)
    if dtype not in DTYPE_SIZE:
        raise ValueError(f"Unsupported dtype: {dtype}")

    handle_buf = ctypes.create_string_buffer(ipc_handle_bytes, 64)
    dev_ptr = lib.open_ipc_tensor(handle_buf, device)

    if not dev_ptr:
        raise RuntimeError("Failed to open IPC handle")

    numel = torch.prod(torch.tensor(shape)).item()
    nbytes = numel * DTYPE_SIZE[dtype]

    # Wrap the pointer as a ctypes pointer of the right type
    # (important: we cast to ctypes type matching dtype)
    ptr_type = ctypes.POINTER(ctypes.c_float)  # default
    if dtype == torch.float32:
        ptr_type = ctypes.POINTER(ctypes.c_float)
    elif dtype == torch.int32:
        ptr_type = ctypes.POINTER(ctypes.c_int32)
    elif dtype == torch.int64:
        ptr_type = ctypes.POINTER(ctypes.c_int64)
    elif dtype == torch.float64:
        ptr_type = ctypes.POINTER(ctypes.c_double)
    elif dtype == torch.uint8:
        ptr_type = ctypes.POINTER(ctypes.c_uint8)
    else:
        raise ValueError(f"Unsupported dtype for ctypes cast: {dtype}")

    typed_ptr = ctypes.cast(dev_ptr, ptr_type)

    # Use torch.from_blob (no ownership)
    t = torch.frombuffer(
        (ctypes.c_char * nbytes).from_address(dev_ptr),
        dtype=dtype
    ).view(*shape).to(f'cuda:{device}')
    lib.close_ipc_tensor(dev_ptr)
    return t

In [1]:
import cupy as cp
import torch
import ctypes

lib = ctypes.CDLL('/root/vllm_test/vllm/ipc_handler_demo/ipc_handle.so')
lib.open_ipc_handle.argtypes = [ctypes.c_void_p]
# Define the function types
lib.open_ipc_handle.restype = ctypes.c_void_p


def xuezhang_de_fangfa(handler_bytes, meta):
    torch.cuda.set_device(0)
    device_ptr = lib.open_ipc_handle(handler_bytes)
    if device_ptr:
        tensor_size = meta['numel']
        dtype_map = {
            'torch.float32': cp.float32,
            'torch.float64': cp.float64,
            'torch.int32': cp.int32,
            'torch.int64': cp.int64,
            'torch.uint8': cp.uint8,
            'torch.int8': cp.int8,
            'torch.int16': cp.int16,
            'torch.float16': cp.float16,
            'torch.bfloat16': cp.float16  # Map bfloat16 to float16 since CuPy doesn't support bfloat16
        }
        # 使用提前知道的dtype信息
        cp_dtype = dtype_map.get(meta['dtype'])
        if cp_dtype is None:
            raise ValueError(f"Unsupported dtype: {meta['dtype']}")

        unownedmemory = cp.cuda.UnownedMemory(
            device_ptr, tensor_size * cp_dtype().itemsize, None)
        # Wrap the raw GPU pointer using CuPy
        gpu_array = cp.ndarray((tensor_size,), dtype=cp_dtype, memptr=cp.cuda.MemoryPointer(unownedmemory, 0))

        # Convert CuPy array to PyTorch tensor using DLPack
        dlpack = gpu_array.toDlpack()
        restored = torch.utils.dlpack.from_dlpack(dlpack).view(meta['shape'])
        
        # If original dtype was bfloat16, convert back to bfloat16
        if meta['dtype'] == 'torch.bfloat16':
            restored = restored.to(torch.bfloat16)
            
        return restored
    else:
        print(f"Failed to open IPC handle")

In [None]:
# ... existing code ...

from flask import Flask, request, jsonify, Response
import json
from tensor_utils import restore_tensor_torch

app = Flask(__name__)

@app.route('/upload', methods=['POST'])
def upload():
    # 1. 获取二进制数据（如文件）
    byte_data = request.files['byte_data'].read()
    
    # 2. 获取字典数据（JSON 格式）
    map_data = json.loads(request.form['map_data']) 
    
    t = restore_tensor(byte_data, map_data)
    # t = load_model_shard_by_key(byte_data, map_data)
    # t = restore_tensor(byte_data, shape=map_data["shape"], dtype=map_data["dtype"], device=int(map_data["device"]))
    print(t)
    # 3. 返回结果
    response = {
        'message':"ok",
        'restored_tensor':t.cpu().tolist(),  # Convert to list for JSON serialization
    }
    
    return jsonify(response)

if __name__ == '__main__':
    app.run(port=1177)

 * Serving Flask app '__main__'
 * Debug mode: off


Address already in use
Port 1177 is in use by another program. Either identify and stop that program, or start the server with a different port.


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
