### dependencies

In [4]:
import os
os.path.exists("/var/run/docker.sock")

import os
import stat

st = os.stat("/var/run/docker.sock")
print(oct(st.st_mode))
print(st.st_uid, st.st_gid)  # 文件所属的 UID 和 GID


0o140660
0 1001


In [2]:
! docker ps

CONTAINER ID   IMAGE     COMMAND   CREATED   STATUS    PORTS     NAMES


In [7]:
import docker
import requests
import torch
import time
import random

client = docker.from_env()

# --- Configs ---


RANKs = list(range(3))
LOADED_EXPERTS = [list(range(0,20)),list(range(20,40)),list(range(40,60))]
GPU_IDX = [0,0,0]
WEIGHT_PATH = "/app/weights"


NUM_CONTAINERS = 1 
BASE_PORT = 5001
IMAGE_NAME = "moe_expert"
EXPERT_TIMEOUT = 20  # seconds



### Launching

In [9]:
# --- Step 1: Build Image ---
print(" Building Docker image...")
client.images.build(path=".", tag=IMAGE_NAME)

# --- Step 2: Launch Expert Containers ---
containers = []
for i in range(NUM_CONTAINERS):
    port = BASE_PORT + i
    container = client.containers.run(
        image=IMAGE_NAME,
        # runtime="nvidia",
        device_requests=[
        docker.types.DeviceRequest(count=-1, capabilities=[['gpu']])
        ],
        volumes={
            '/home/ubuntu/vllm_test_field/vllm/demo/weights': {
                'bind': '/app/weights',
                'mode': 'ro'
            }
        },
        detach=True,
        ports={f"5000/tcp": port},
        environment={
                    "RANK": str(RANKs[i]),
                    "LOADED_EXPERTS" : str(LOADED_EXPERTS[i]),
                    "GPU_IDX" : str(GPU_IDX[i]),
                    "WEIGHT_PATH" : str(WEIGHT_PATH),
                     },
        name=f"fused_expert_{i}",
        stdout=True,
        stderr=True,
    )
    # 获取并打印日志
    logs = container.logs(stream=True)
    for line in logs:
        print(line.decode().strip())
    containers.append((container, port))

    print(f"🚀 Launched expert_{i} at http://localhost:{port}")



 Building Docker image...


INFO 05-08 05:04:38 [__init__.py:243] No platform detected, vLLM is running on UnspecifiedPlatform
* Serving Flask app 'expert'
* Debug mode: off
* Running on all addresses (0.0.0.0)
* Running on http://127.0.0.1:5000
* Running on http://172.17.0.2:5000
Press CTRL+C to quit


KeyboardInterrupt: 

### Testing container function

In [None]:
import torch
import requests
def load_tensor(path):
    tensors = {}
    try:
        tensors["hidden_states"] = torch.load(path + "hidden_states.pt",map_location="cuda:0").to("cuda")
        tensors["w1"] = torch.load(path + "w1.pt",map_location="cuda:0").to("cuda")
        tensors["w2"] = torch.load(path + "w2.pt",map_location="cuda:0").to("cuda")
        tensors["topk_weights"] = torch.load(path + "topk_weights.pt",map_location="cuda:0").to("cuda")
        tensors["topk_ids"] = torch.load(path + "topk_ids.pt",map_location="cuda:0").to("cuda")
        tensors["expert_map"] = torch.load(path + "expert_map.pt",map_location="cuda:0").to("cuda")
        tensors["out_hidden_states"] = torch.load(path + "out_hidden_states.pt",map_location="cuda:0").to("cuda")
        tensors["final_hidden_states"] = torch.load(path + "final_hidden_states.pt",map_location="cuda:0").to("cuda")
    except FileNotFoundError as e:
        print(f"Error: File not found - {e}")
    except RuntimeError as e:
        print(f"Error: CUDA runtime issue - {e}")
    except Exception as e:
        print(f"Unexpected error: {e}")
    return tensors

rank0 = load_tensor("/home/ubuntu/vllm_test_field/saved_tensors/rank_0/")

hidden_states = rank0["hidden_states"].cpu().tolist()
topk_weights = rank0["topk_weights"].cpu().tolist()
topk_ids = rank0["topk_ids"].cpu().tolist()

response = requests.post(
    'http://localhost:5000/forward',
    json={'hidden_states': hidden_states,
          "topk_weights":topk_weights,
          "topk_ids":topk_ids}
)
output= torch.tensor(response.json()["hidden_output"],dtype=torch.bfloat16)
print(output)
print(output.shape)

ConnectionError: HTTPConnectionPool(host='localhost', port=5000): Max retries exceeded with url: /forward (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7ad1f7d831a0>: Failed to establish a new connection: [Errno 111] Connection refused'))

### testing

In [10]:

# --- Step 4: Routing and Dispatch ---
def select_expert():
    return random.choice(containers)

def dispatch(hidden_state):
    _, port = select_expert()
    try:
        res = requests.post(
            f"http://localhost:{port}/forward",
            json={"hidden": hidden_state.tolist()},
            timeout=EXPERT_TIMEOUT,
        )
        output = torch.tensor(res.json()["output"])
        print(f"✅ Expert at port {port} returned: {output.shape}")
        return output
    except Exception as e:
        print(f"❌ Error contacting expert at port {port}: {e}")
        return None

# Simulate a request
hidden = torch.randn(1, HIDDEN_SIZE)
output = dispatch(hidden)

# --- Step 5: Cleanup ---
input("Press Enter to stop and remove containers...")




NameError: name 'HIDDEN_SIZE' is not defined

### turning off


In [None]:
for container, _ in containers:
    print(f"🛑 Stopping container {container.name}")
    container.stop()
    container.remove()