Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "REF: support query for engine feature" #1329

Merged
merged 1 commit into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 0 additions & 23 deletions xinference/api/restful_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,16 +275,6 @@ def serve(self, logging_conf: Optional[dict] = None):
self._router.add_api_route(
"/v1/cluster/auth", self.is_cluster_authenticated, methods=["GET"]
)
self._router.add_api_route(
"/v1/engines/{model_name}",
self.query_engines_by_model_name,
methods=["GET"],
dependencies=(
[Security(self._auth_service, scopes=["models:list"])]
if self.is_authenticated()
else None
),
)
# running instances
self._router.add_api_route(
"/v1/models/instances",
Expand Down Expand Up @@ -1428,19 +1418,6 @@ async def stream_results():
self.handle_request_limit_error(e)
raise HTTPException(status_code=500, detail=str(e))

async def query_engines_by_model_name(self, model_name: str) -> JSONResponse:
try:
content = await (
await self._get_supervisor_ref()
).query_engines_by_model_name(model_name)
return JSONResponse(content=content)
except ValueError as re:
logger.error(re, exc_info=True)
raise HTTPException(status_code=400, detail=str(re))
except Exception as e:
logger.error(e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e))

async def register_model(self, model_type: str, request: Request) -> JSONResponse:
body = RegisterModelRequest.parse_obj(await request.json())
model = body.model
Expand Down
18 changes: 0 additions & 18 deletions xinference/core/supervisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,24 +591,6 @@ def get_model_registration(self, model_type: str, model_name: str) -> Any:
else:
raise ValueError(f"Unsupported model type: {model_type}")

@log_async(logger=logger)
async def query_engines_by_model_name(self, model_name: str):
from copy import deepcopy

from ..model.llm.llm_family import LLM_ENGINES

if model_name not in LLM_ENGINES:
raise ValueError(f"Model {model_name} not found")

# filter llm_class
engine_params = deepcopy(LLM_ENGINES[model_name])
for engine in engine_params:
params = engine_params[engine]
for param in params:
del param["llm_class"]

return engine_params

@log_async(logger=logger)
async def register_model(self, model_type: str, model: str, persist: bool):
if model_type in self._custom_register_type_to_cls:
Expand Down
88 changes: 0 additions & 88 deletions xinference/model/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,8 @@
BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES,
BUILTIN_LLM_PROMPT_STYLE,
BUILTIN_MODELSCOPE_LLM_FAMILIES,
LLAMA_CLASSES,
LLM_CLASSES,
LLM_ENGINES,
PEFT_SUPPORTED_CLASSES,
PYTORCH_CLASSES,
SGLANG_CLASSES,
SUPPORTED_ENGINES,
VLLM_CLASSES,
CustomLLMFamilyV1,
GgmlLLMSpecV1,
LLMFamilyV1,
Expand All @@ -53,50 +47,6 @@
)


def generate_engine_config_by_model_family(model_family):
model_name = model_family.model_name
specs = model_family.model_specs
engines = {} # structure for engine query
for spec in specs:
model_format = spec.model_format
model_size_in_billions = spec.model_size_in_billions
quantizations = spec.quantizations
for quantization in quantizations:
# traverse all supported engines to match the name, format, size in billions and quatization of model
for engine in SUPPORTED_ENGINES:
CLASSES = SUPPORTED_ENGINES[engine]
for cls in CLASSES:
if cls.match(model_family, spec, quantization):
engine_params = engines.get(engine, [])
already_exists = False
# if the name, format and size in billions of model already exists in the structure, add the new quantization
for param in engine_params:
if (
model_name == param["model_name"]
and model_format == param["model_format"]
and model_size_in_billions
== param["model_size_in_billions"]
and quantization not in param["quantizations"]
):
param["quantizations"].append(quantization)
already_exists = True
break
# successfully match the params for the first time, add to the structure
if not already_exists:
engine_params.append(
{
"model_name": model_name,
"model_format": model_format,
"model_size_in_billions": model_size_in_billions,
"quantizations": [quantization],
"llm_class": cls,
}
)
engines[engine] = engine_params
break
LLM_ENGINES[model_name] = engines


def _install():
from .ggml.chatglm import ChatglmCppChatModel
from .ggml.llamacpp import LlamaCppChatModel, LlamaCppModel
Expand Down Expand Up @@ -126,17 +76,8 @@ def _install():
ChatglmCppChatModel,
]
)
LLAMA_CLASSES.extend(
[
ChatglmCppChatModel,
LlamaCppChatModel,
LlamaCppModel,
]
)
LLM_CLASSES.extend([SGLANGModel, SGLANGChatModel])
SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel])
LLM_CLASSES.extend([VLLMModel, VLLMChatModel])
VLLM_CLASSES.extend([VLLMModel, VLLMChatModel])
LLM_CLASSES.extend(
[
BaichuanPytorchChatModel,
Expand All @@ -155,24 +96,6 @@ def _install():
PytorchModel,
]
)
PYTORCH_CLASSES.extend(
[
BaichuanPytorchChatModel,
VicunaPytorchChatModel,
FalconPytorchChatModel,
ChatglmPytorchChatModel,
LlamaPytorchModel,
LlamaPytorchChatModel,
PytorchChatModel,
FalconPytorchModel,
Internlm2PytorchChatModel,
QwenVLChatModel,
OmniLMMModel,
YiVLChatModel,
DeepSeekVLChatModel,
PytorchModel,
]
)
PEFT_SUPPORTED_CLASSES.extend(
[
BaichuanPytorchChatModel,
Expand All @@ -190,12 +113,6 @@ def _install():
]
)

# support 4 engines for now
SUPPORTED_ENGINES["vLLM"] = VLLM_CLASSES
SUPPORTED_ENGINES["SGLang"] = SGLANG_CLASSES
SUPPORTED_ENGINES["PyTorch"] = PYTORCH_CLASSES
SUPPORTED_ENGINES["llama-cpp-python"] = LLAMA_CLASSES

json_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "llm_family.json"
)
Expand Down Expand Up @@ -246,11 +163,6 @@ def _install():
if llm_spec.model_name not in LLM_MODEL_DESCRIPTIONS:
LLM_MODEL_DESCRIPTIONS.update(generate_llm_description(llm_spec))

# traverse all families and add engine parameters corresponding to the model name
for families in [BUILTIN_LLM_FAMILIES, BUILTIN_MODELSCOPE_LLM_FAMILIES]:
for family in families:
generate_engine_config_by_model_family(family)

from ...constants import XINFERENCE_MODEL_DIR

user_defined_llm_dir = os.path.join(XINFERENCE_MODEL_DIR, "llm")
Expand Down
40 changes: 0 additions & 40 deletions xinference/model/llm/llm_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,25 +227,16 @@ def parse_raw(
CustomLLMFamilyV1.update_forward_refs()


LLAMA_CLASSES: List[Type[LLM]] = []
LLM_CLASSES: List[Type[LLM]] = []
PEFT_SUPPORTED_CLASSES: List[Type[LLM]] = []

BUILTIN_LLM_FAMILIES: List["LLMFamilyV1"] = []
BUILTIN_MODELSCOPE_LLM_FAMILIES: List["LLMFamilyV1"] = []

SGLANG_CLASSES: List[Type[LLM]] = []
PYTORCH_CLASSES: List[Type[LLM]] = []

UD_LLM_FAMILIES: List["LLMFamilyV1"] = []

UD_LLM_FAMILIES_LOCK = Lock()

VLLM_CLASSES: List[Type[LLM]] = []

LLM_ENGINES: Dict[str, Dict[str, List[Dict[str, Any]]]] = {}
SUPPORTED_ENGINES: Dict[str, List[Type[LLM]]] = {}

LLM_LAUNCH_VERSIONS: Dict[str, List[str]] = {}


Expand Down Expand Up @@ -913,7 +904,6 @@ def _apply_format_to_model_id(spec: LLMSpecV1, q: str) -> LLMSpecV1:

def register_llm(llm_family: LLMFamilyV1, persist: bool):
from ..utils import is_valid_model_name
from . import generate_engine_config_by_model_family

if not is_valid_model_name(llm_family.model_name):
raise ValueError(f"Invalid model name {llm_family.model_name}.")
Expand All @@ -926,7 +916,6 @@ def register_llm(llm_family: LLMFamilyV1, persist: bool):
)

UD_LLM_FAMILIES.append(llm_family)
generate_engine_config_by_model_family(llm_family)

if persist:
# We only validate model URL when persist is True.
Expand All @@ -952,7 +941,6 @@ def unregister_llm(model_name: str, raise_error: bool = True):
break
if llm_family:
UD_LLM_FAMILIES.remove(llm_family)
del LLM_ENGINES[model_name]

persist_path = os.path.join(
XINFERENCE_MODEL_DIR, "llm", f"{llm_family.model_name}.json"
Expand Down Expand Up @@ -1002,31 +990,3 @@ def match_llm_cls(
if cls.match(family, llm_spec, quantization):
return cls
return None


def check_engine_by_spec_parameters(
model_engine: str,
model_name: str,
model_format: str,
model_size_in_billions: Union[str, int],
quantization: str,
) -> Optional[Type[LLM]]:
if model_name not in LLM_ENGINES:
logger.debug(f"Cannot find model {model_name}.")
return None
if model_engine not in LLM_ENGINES[model_name]:
logger.debug(f"Model {model_name} cannot be run on engine {model_engine}.")
return None
match_params = LLM_ENGINES[model_name][model_engine]
for param in match_params:
if (
model_name == param["model_name"]
and model_format == param["model_format"]
and model_size_in_billions == param["model_size_in_billions"]
and quantization in param["quantizations"]
):
return param["llm_class"]
logger.debug(
f"Model {model_name} with format {model_format}, size {model_size_in_billions} and quantization {quantization} cannot be run on engine {model_engine}."
)
return None
Loading
Loading