Skip to content
Merged
107 changes: 107 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import os
from dataclasses import MISSING, Field, asdict, dataclass, field
from unittest.mock import patch

import pytest

Expand Down Expand Up @@ -388,3 +390,108 @@ def test_get_and_verify_max_len(model_id, max_model_len, expected_max_len,
else:
actual_max_len = model_config.get_and_verify_max_len(max_model_len)
assert actual_max_len == expected_max_len


class MockConfig:
"""Simple mock object for testing maybe_pull_model_tokenizer_for_runai"""

def __init__(self, model: str, tokenizer: str):
self.model = model
self.tokenizer = tokenizer
self.model_weights = None


@pytest.mark.parametrize("s3_url", [
"s3://example-bucket-1/model/",
"s3://example-bucket-2/model/",
])
@patch('vllm.transformers_utils.runai_utils.ObjectStorageModel.pull_files')
def test_s3_url_model_tokenizer_paths(mock_pull_files, s3_url):
"""Test that S3 URLs create deterministic local directories for model and
tokenizer."""
# Mock pull_files to avoid actually downloading files during tests
mock_pull_files.return_value = None

# Create first mock and run the method
config1 = MockConfig(model=s3_url, tokenizer=s3_url)
ModelConfig.maybe_pull_model_tokenizer_for_runai(config1, s3_url, s3_url)

# Check that model and tokenizer point to existing directories
assert os.path.exists(
config1.model), f"Model directory does not exist: {config1.model}"
assert os.path.isdir(
config1.model), f"Model path is not a directory: {config1.model}"
assert os.path.exists(
config1.tokenizer
), f"Tokenizer directory does not exist: {config1.tokenizer}"
assert os.path.isdir(
config1.tokenizer
), f"Tokenizer path is not a directory: {config1.tokenizer}"

# Verify that the paths are different from the original S3 URL
assert config1.model != s3_url, (
"Model path should be converted to local directory")
assert config1.tokenizer != s3_url, (
"Tokenizer path should be converted to local directory")

# Store the original paths
created_model_dir = config1.model
create_tokenizer_dir = config1.tokenizer

# Create a new mock and run the method with the same S3 URL
config2 = MockConfig(model=s3_url, tokenizer=s3_url)
ModelConfig.maybe_pull_model_tokenizer_for_runai(config2, s3_url, s3_url)

# Check that the new directories exist
assert os.path.exists(
config2.model), f"Model directory does not exist: {config2.model}"
assert os.path.isdir(
config2.model), f"Model path is not a directory: {config2.model}"
assert os.path.exists(
config2.tokenizer
), f"Tokenizer directory does not exist: {config2.tokenizer}"
assert os.path.isdir(
config2.tokenizer
), f"Tokenizer path is not a directory: {config2.tokenizer}"

# Verify that the paths are deterministic (same as before)
assert config2.model == created_model_dir, (
f"Model paths are not deterministic. "
f"Original: {created_model_dir}, New: {config2.model}")
assert config2.tokenizer == create_tokenizer_dir, (
f"Tokenizer paths are not deterministic. "
f"Original: {create_tokenizer_dir}, New: {config2.tokenizer}")


@patch('vllm.transformers_utils.runai_utils.ObjectStorageModel.pull_files')
def test_s3_url_different_models_create_different_directories(mock_pull_files):
"""Test that different S3 URLs create different local directories."""
# Mock pull_files to avoid actually downloading files during tests
mock_pull_files.return_value = None

s3_url1 = "s3://example-bucket-1/model/"
s3_url2 = "s3://example-bucket-2/model/"

# Create mocks with different S3 URLs and run the method
config1 = MockConfig(model=s3_url1, tokenizer=s3_url1)
ModelConfig.maybe_pull_model_tokenizer_for_runai(config1, s3_url1, s3_url1)

config2 = MockConfig(model=s3_url2, tokenizer=s3_url2)
ModelConfig.maybe_pull_model_tokenizer_for_runai(config2, s3_url2, s3_url2)

# Verify that different URLs produce different directories
assert config1.model != config2.model, (
f"Different S3 URLs should create different model directories. "
f"URL1 model: {config1.model}, URL2 model: {config2.model}")
assert config1.tokenizer != config2.tokenizer, (
f"Different S3 URLs should create different tokenizer directories. "
f"URL1 tokenizer: {config1.tokenizer}, "
f"URL2 tokenizer: {config2.tokenizer}")

# Verify that both sets of directories exist
assert os.path.exists(config1.model) and os.path.isdir(config1.model)
assert os.path.exists(config1.tokenizer) and os.path.isdir(
config1.tokenizer)
assert os.path.exists(config2.model) and os.path.isdir(config2.model)
assert os.path.exists(config2.tokenizer) and os.path.isdir(
config2.tokenizer)
5 changes: 3 additions & 2 deletions vllm/config/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,11 +699,12 @@ def maybe_pull_model_tokenizer_for_runai(self, model: str,
model: Model name or path
tokenizer: Tokenizer name or path
"""

if not (is_runai_obj_uri(model) or is_runai_obj_uri(tokenizer)):
return

if is_runai_obj_uri(model):
object_storage_model = ObjectStorageModel()
object_storage_model = ObjectStorageModel(url=model)
object_storage_model.pull_files(
model, allow_pattern=["*.model", "*.py", "*.json"])
self.model_weights = model
Expand All @@ -722,7 +723,7 @@ def maybe_pull_model_tokenizer_for_runai(self, model: str,

# Only download tokenizer if needed and not already handled
if is_runai_obj_uri(tokenizer):
object_storage_tokenizer = ObjectStorageModel()
object_storage_tokenizer = ObjectStorageModel(url=tokenizer)
object_storage_tokenizer.pull_files(model,
ignore_pattern=[
"*.pt", "*.safetensors",
Expand Down
11 changes: 9 additions & 2 deletions vllm/transformers_utils/runai_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import hashlib
import os
import shutil
import signal
Expand Down Expand Up @@ -56,12 +57,18 @@ class ObjectStorageModel:
pull_files(): Pull model from object storage to the temporary directory.
"""

def __init__(self) -> None:
def __init__(self, url: str) -> None:
for sig in (signal.SIGINT, signal.SIGTERM):
existing_handler = signal.getsignal(sig)
signal.signal(sig, self._close_by_signal(existing_handler))

self.dir = tempfile.mkdtemp()
dir_name = os.path.join(
tempfile.gettempdir(),
hashlib.sha256(str(url).encode()).hexdigest()[:8])
if os.path.exists(dir_name):
shutil.rmtree(dir_name)
os.makedirs(dir_name)
self.dir = dir_name

def __del__(self):
self._close()
Expand Down