diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py
index bb739c1e41..ec58594cc3 100644
--- a/xinference/api/restful_api.py
+++ b/xinference/api/restful_api.py
@@ -275,16 +275,6 @@ def serve(self, logging_conf: Optional[dict] = None):
         self._router.add_api_route(
             "/v1/cluster/auth", self.is_cluster_authenticated, methods=["GET"]
         )
-        self._router.add_api_route(
-            "/v1/engines/{model_name}",
-            self.query_engines_by_model_name,
-            methods=["GET"],
-            dependencies=(
-                [Security(self._auth_service, scopes=["models:list"])]
-                if self.is_authenticated()
-                else None
-            ),
-        )
         # running instances
         self._router.add_api_route(
             "/v1/models/instances",
@@ -1428,19 +1418,6 @@ async def stream_results():
                 self.handle_request_limit_error(e)
                 raise HTTPException(status_code=500, detail=str(e))
 
-    async def query_engines_by_model_name(self, model_name: str) -> JSONResponse:
-        try:
-            content = await (
-                await self._get_supervisor_ref()
-            ).query_engines_by_model_name(model_name)
-            return JSONResponse(content=content)
-        except ValueError as re:
-            logger.error(re, exc_info=True)
-            raise HTTPException(status_code=400, detail=str(re))
-        except Exception as e:
-            logger.error(e, exc_info=True)
-            raise HTTPException(status_code=500, detail=str(e))
-
     async def register_model(self, model_type: str, request: Request) -> JSONResponse:
         body = RegisterModelRequest.parse_obj(await request.json())
         model = body.model
diff --git a/xinference/core/supervisor.py b/xinference/core/supervisor.py
index 8da7d1947f..115baf1e73 100644
--- a/xinference/core/supervisor.py
+++ b/xinference/core/supervisor.py
@@ -591,24 +591,6 @@ def get_model_registration(self, model_type: str, model_name: str) -> Any:
         else:
             raise ValueError(f"Unsupported model type: {model_type}")
 
-    @log_async(logger=logger)
-    async def query_engines_by_model_name(self, model_name: str):
-        from copy import deepcopy
-
-        from ..model.llm.llm_family import LLM_ENGINES
-
-        if model_name not in LLM_ENGINES:
-            raise ValueError(f"Model {model_name} not found")
-
-        # filter llm_class
-        engine_params = deepcopy(LLM_ENGINES[model_name])
-        for engine in engine_params:
-            params = engine_params[engine]
-            for param in params:
-                del param["llm_class"]
-
-        return engine_params
-
     @log_async(logger=logger)
     async def register_model(self, model_type: str, model: str, persist: bool):
         if model_type in self._custom_register_type_to_cls:
diff --git a/xinference/model/llm/__init__.py b/xinference/model/llm/__init__.py
index c6af541362..ec25095c3d 100644
--- a/xinference/model/llm/__init__.py
+++ b/xinference/model/llm/__init__.py
@@ -30,14 +30,8 @@
     BUILTIN_LLM_MODEL_TOOL_CALL_FAMILIES,
     BUILTIN_LLM_PROMPT_STYLE,
     BUILTIN_MODELSCOPE_LLM_FAMILIES,
-    LLAMA_CLASSES,
     LLM_CLASSES,
-    LLM_ENGINES,
     PEFT_SUPPORTED_CLASSES,
-    PYTORCH_CLASSES,
-    SGLANG_CLASSES,
-    SUPPORTED_ENGINES,
-    VLLM_CLASSES,
     CustomLLMFamilyV1,
     GgmlLLMSpecV1,
     LLMFamilyV1,
@@ -53,50 +47,6 @@
 )
 
 
-def generate_engine_config_by_model_family(model_family):
-    model_name = model_family.model_name
-    specs = model_family.model_specs
-    engines = {}  # structure for engine query
-    for spec in specs:
-        model_format = spec.model_format
-        model_size_in_billions = spec.model_size_in_billions
-        quantizations = spec.quantizations
-        for quantization in quantizations:
-            # traverse all supported engines to match the name, format, size in billions and quatization of model
-            for engine in SUPPORTED_ENGINES:
-                CLASSES = SUPPORTED_ENGINES[engine]
-                for cls in CLASSES:
-                    if cls.match(model_family, spec, quantization):
-                        engine_params = engines.get(engine, [])
-                        already_exists = False
-                        # if the name, format and size in billions of model already exists in the structure, add the new quantization
-                        for param in engine_params:
-                            if (
-                                model_name == param["model_name"]
-                                and model_format == param["model_format"]
-                                and model_size_in_billions
-                                == param["model_size_in_billions"]
-                                and quantization not in param["quantizations"]
-                            ):
-                                param["quantizations"].append(quantization)
-                                already_exists = True
-                                break
-                        # successfully match the params for the first time, add to the structure
-                        if not already_exists:
-                            engine_params.append(
-                                {
-                                    "model_name": model_name,
-                                    "model_format": model_format,
-                                    "model_size_in_billions": model_size_in_billions,
-                                    "quantizations": [quantization],
-                                    "llm_class": cls,
-                                }
-                            )
-                        engines[engine] = engine_params
-                        break
-    LLM_ENGINES[model_name] = engines
-
-
 def _install():
     from .ggml.chatglm import ChatglmCppChatModel
     from .ggml.llamacpp import LlamaCppChatModel, LlamaCppModel
@@ -126,17 +76,8 @@ def _install():
             ChatglmCppChatModel,
         ]
     )
-    LLAMA_CLASSES.extend(
-        [
-            ChatglmCppChatModel,
-            LlamaCppChatModel,
-            LlamaCppModel,
-        ]
-    )
     LLM_CLASSES.extend([SGLANGModel, SGLANGChatModel])
-    SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel])
     LLM_CLASSES.extend([VLLMModel, VLLMChatModel])
-    VLLM_CLASSES.extend([VLLMModel, VLLMChatModel])
     LLM_CLASSES.extend(
         [
             BaichuanPytorchChatModel,
@@ -155,24 +96,6 @@ def _install():
             PytorchModel,
         ]
     )
-    PYTORCH_CLASSES.extend(
-        [
-            BaichuanPytorchChatModel,
-            VicunaPytorchChatModel,
-            FalconPytorchChatModel,
-            ChatglmPytorchChatModel,
-            LlamaPytorchModel,
-            LlamaPytorchChatModel,
-            PytorchChatModel,
-            FalconPytorchModel,
-            Internlm2PytorchChatModel,
-            QwenVLChatModel,
-            OmniLMMModel,
-            YiVLChatModel,
-            DeepSeekVLChatModel,
-            PytorchModel,
-        ]
-    )
     PEFT_SUPPORTED_CLASSES.extend(
         [
             BaichuanPytorchChatModel,
@@ -190,12 +113,6 @@ def _install():
         ]
     )
 
-    # support 4 engines for now
-    SUPPORTED_ENGINES["vLLM"] = VLLM_CLASSES
-    SUPPORTED_ENGINES["SGLang"] = SGLANG_CLASSES
-    SUPPORTED_ENGINES["PyTorch"] = PYTORCH_CLASSES
-    SUPPORTED_ENGINES["llama-cpp-python"] = LLAMA_CLASSES
-
     json_path = os.path.join(
         os.path.dirname(os.path.abspath(__file__)), "llm_family.json"
     )
@@ -246,11 +163,6 @@ def _install():
             if llm_spec.model_name not in LLM_MODEL_DESCRIPTIONS:
                 LLM_MODEL_DESCRIPTIONS.update(generate_llm_description(llm_spec))
 
-    # traverse all families and add engine parameters corresponding to the model name
-    for families in [BUILTIN_LLM_FAMILIES, BUILTIN_MODELSCOPE_LLM_FAMILIES]:
-        for family in families:
-            generate_engine_config_by_model_family(family)
-
     from ...constants import XINFERENCE_MODEL_DIR
 
     user_defined_llm_dir = os.path.join(XINFERENCE_MODEL_DIR, "llm")
diff --git a/xinference/model/llm/llm_family.py b/xinference/model/llm/llm_family.py
index ecf2e3071e..6f5a01d6e7 100644
--- a/xinference/model/llm/llm_family.py
+++ b/xinference/model/llm/llm_family.py
@@ -227,25 +227,16 @@ def parse_raw(
 CustomLLMFamilyV1.update_forward_refs()
 
 
-LLAMA_CLASSES: List[Type[LLM]] = []
 LLM_CLASSES: List[Type[LLM]] = []
 PEFT_SUPPORTED_CLASSES: List[Type[LLM]] = []
 
 BUILTIN_LLM_FAMILIES: List["LLMFamilyV1"] = []
 BUILTIN_MODELSCOPE_LLM_FAMILIES: List["LLMFamilyV1"] = []
 
-SGLANG_CLASSES: List[Type[LLM]] = []
-PYTORCH_CLASSES: List[Type[LLM]] = []
-
 UD_LLM_FAMILIES: List["LLMFamilyV1"] = []
 
 UD_LLM_FAMILIES_LOCK = Lock()
 
-VLLM_CLASSES: List[Type[LLM]] = []
-
-LLM_ENGINES: Dict[str, Dict[str, List[Dict[str, Any]]]] = {}
-SUPPORTED_ENGINES: Dict[str, List[Type[LLM]]] = {}
-
 LLM_LAUNCH_VERSIONS: Dict[str, List[str]] = {}
 
 
@@ -913,7 +904,6 @@ def _apply_format_to_model_id(spec: LLMSpecV1, q: str) -> LLMSpecV1:
 
 def register_llm(llm_family: LLMFamilyV1, persist: bool):
     from ..utils import is_valid_model_name
-    from . import generate_engine_config_by_model_family
 
     if not is_valid_model_name(llm_family.model_name):
         raise ValueError(f"Invalid model name {llm_family.model_name}.")
@@ -926,7 +916,6 @@ def register_llm(llm_family: LLMFamilyV1, persist: bool):
                 )
 
         UD_LLM_FAMILIES.append(llm_family)
-        generate_engine_config_by_model_family(llm_family)
 
     if persist:
         # We only validate model URL when persist is True.
@@ -952,7 +941,6 @@ def unregister_llm(model_name: str, raise_error: bool = True):
                 break
         if llm_family:
             UD_LLM_FAMILIES.remove(llm_family)
-            del LLM_ENGINES[model_name]
 
             persist_path = os.path.join(
                 XINFERENCE_MODEL_DIR, "llm", f"{llm_family.model_name}.json"
@@ -1002,31 +990,3 @@ def match_llm_cls(
             if cls.match(family, llm_spec, quantization):
                 return cls
     return None
-
-
-def check_engine_by_spec_parameters(
-    model_engine: str,
-    model_name: str,
-    model_format: str,
-    model_size_in_billions: Union[str, int],
-    quantization: str,
-) -> Optional[Type[LLM]]:
-    if model_name not in LLM_ENGINES:
-        logger.debug(f"Cannot find model {model_name}.")
-        return None
-    if model_engine not in LLM_ENGINES[model_name]:
-        logger.debug(f"Model {model_name} cannot be run on engine {model_engine}.")
-        return None
-    match_params = LLM_ENGINES[model_name][model_engine]
-    for param in match_params:
-        if (
-            model_name == param["model_name"]
-            and model_format == param["model_format"]
-            and model_size_in_billions == param["model_size_in_billions"]
-            and quantization in param["quantizations"]
-        ):
-            return param["llm_class"]
-    logger.debug(
-        f"Model {model_name} with format {model_format}, size {model_size_in_billions} and quantization {quantization} cannot be run on engine {model_engine}."
-    )
-    return None
diff --git a/xinference/model/llm/tests/test_llm_family.py b/xinference/model/llm/tests/test_llm_family.py
index 81dc586e38..ec119b43ce 100644
--- a/xinference/model/llm/tests/test_llm_family.py
+++ b/xinference/model/llm/tests/test_llm_family.py
@@ -14,19 +14,13 @@
 import codecs
 import json
 import os
-import platform
 import shutil
 import tempfile
 from unittest.mock import MagicMock, Mock, patch
 
 import pytest
 
-from ....constants import (
-    XINFERENCE_DISABLE_VLLM,
-    XINFERENCE_ENABLE_SGLANG,
-    XINFERENCE_ENV_MODEL_SRC,
-)
-from ....utils import cuda_count
+from ....constants import XINFERENCE_ENV_MODEL_SRC
 from ...utils import is_locale_chinese_simplified, valid_model_revision
 from ..llm_family import (
     AWSRegion,
@@ -45,8 +39,6 @@
     match_model_size,
     parse_uri,
 )
-from ..sglang.core import SGLANG_INSTALLED
-from ..vllm.core import VLLM_INSTALLED
 
 
 def test_deserialize_llm_family_v1():
@@ -1072,251 +1064,3 @@ def test_match_model_size():
     assert not match_model_size("1.8", 18)
     assert not match_model_size("1.8", 1)
     assert match_model_size("001", 1)
-
-
-@pytest.mark.skipif(
-    XINFERENCE_DISABLE_VLLM
-    or platform.system() != "Linux"
-    or cuda_count() <= 0
-    or not VLLM_INSTALLED,
-    reason="Current system does not support vLLM",
-)
-def test_quert_engine_vLLM():
-    from ..llm_family import LLM_ENGINES, check_engine_by_spec_parameters
-
-    model_name = "qwen1.5-chat"
-    assert model_name in LLM_ENGINES
-
-    assert (
-        "vLLM" in LLM_ENGINES[model_name] and len(LLM_ENGINES[model_name]["vLLM"]) == 21
-    )
-
-    assert check_engine_by_spec_parameters(
-        model_engine="vLLM",
-        model_name=model_name,
-        model_format="gptq",
-        model_size_in_billions="1_8",
-        quantization="Int4",
-    )
-    assert (
-        check_engine_by_spec_parameters(
-            model_engine="vLLM",
-            model_name=model_name,
-            model_format="gptq",
-            model_size_in_billions="1_8",
-            quantization="Int8",
-        )
-        is None
-    )
-    assert check_engine_by_spec_parameters(
-        model_engine="vLLM",
-        model_name=model_name,
-        model_format="pytorch",
-        model_size_in_billions="1_8",
-        quantization="none",
-    )
-    assert (
-        check_engine_by_spec_parameters(
-            model_engine="vLLM",
-            model_name=model_name,
-            model_format="pytorch",
-            model_size_in_billions="1_8",
-            quantization="4-bit",
-        )
-        is None
-    )
-    assert (
-        check_engine_by_spec_parameters(
-            model_engine="vLLM",
-            model_name=model_name,
-            model_format="ggmlv3",
-            model_size_in_billions="1_8",
-            quantization="q2_k",
-        )
-        is None
-    )
-
-
-@pytest.mark.skipif(
-    not XINFERENCE_ENABLE_SGLANG
-    or platform.system() != "Linux"
-    or cuda_count() <= 0
-    or not SGLANG_INSTALLED,
-    reason="Current system does not support SGLang",
-)
-def test_quert_engine_SGLang():
-    from ..llm_family import LLM_ENGINES, check_engine_by_spec_parameters
-
-    model_name = "qwen1.5-chat"
-    assert model_name in LLM_ENGINES
-
-    assert (
-        "SGLang" in LLM_ENGINES[model_name]
-        and len(LLM_ENGINES[model_name]["SGLang"]) == 21
-    )
-
-    assert check_engine_by_spec_parameters(
-        model_engine="SGLang",
-        model_name=model_name,
-        model_format="gptq",
-        model_size_in_billions="1_8",
-        quantization="Int4",
-    )
-    assert (
-        check_engine_by_spec_parameters(
-            model_engine="SGLang",
-            model_name=model_name,
-            model_format="gptq",
-            model_size_in_billions="1_8",
-            quantization="Int8",
-        )
-        is None
-    )
-    assert check_engine_by_spec_parameters(
-        model_engine="SGLang",
-        model_name=model_name,
-        model_format="pytorch",
-        model_size_in_billions="1_8",
-        quantization="none",
-    )
-    assert (
-        check_engine_by_spec_parameters(
-            model_engine="SGLang",
-            model_name=model_name,
-            model_format="pytorch",
-            model_size_in_billions="1_8",
-            quantization="4-bit",
-        )
-        is None
-    )
-    assert (
-        check_engine_by_spec_parameters(
-            model_engine="SGLang",
-            model_name=model_name,
-            model_format="ggmlv3",
-            model_size_in_billions="1_8",
-            quantization="q2_k",
-        )
-        is None
-    )
-
-
-def test_query_engine_general():
-    from ..ggml.chatglm import ChatglmCppChatModel
-    from ..ggml.llamacpp import LlamaCppChatModel
-    from ..llm_family import (
-        LLM_ENGINES,
-        check_engine_by_spec_parameters,
-        get_user_defined_llm_families,
-        register_llm,
-        unregister_llm,
-    )
-
-    model_name = "qwen1.5-chat"
-    assert model_name in LLM_ENGINES
-
-    assert (
-        "PyTorch" in LLM_ENGINES[model_name]
-        and len(LLM_ENGINES[model_name]["PyTorch"]) == 28
-    )
-    assert (
-        "llama-cpp-python" in LLM_ENGINES[model_name]
-        and len(LLM_ENGINES[model_name]["llama-cpp-python"]) == 7
-    )
-
-    assert check_engine_by_spec_parameters(
-        model_engine="PyTorch",
-        model_name=model_name,
-        model_format="gptq",
-        model_size_in_billions="1_8",
-        quantization="Int4",
-    )
-    assert check_engine_by_spec_parameters(
-        model_engine="PyTorch",
-        model_name=model_name,
-        model_format="gptq",
-        model_size_in_billions="1_8",
-        quantization="Int8",
-    )
-    assert check_engine_by_spec_parameters(
-        model_engine="PyTorch",
-        model_name=model_name,
-        model_format="pytorch",
-        model_size_in_billions="1_8",
-        quantization="none",
-    )
-    assert check_engine_by_spec_parameters(
-        model_engine="PyTorch",
-        model_name=model_name,
-        model_format="pytorch",
-        model_size_in_billions="1_8",
-        quantization="4-bit",
-    )
-    assert (
-        check_engine_by_spec_parameters(
-            model_engine="llama-cpp-python",
-            model_name=model_name,
-            model_format="ggufv2",
-            model_size_in_billions="1_8",
-            quantization="q2_k",
-        )
-        is LlamaCppChatModel
-    )
-    assert (
-        check_engine_by_spec_parameters(
-            model_engine="llama-cpp-python",
-            model_name=model_name,
-            model_format="ggmlv3",
-            model_size_in_billions="1_8",
-            quantization="q2_k",
-        )
-        is None
-    )
-
-    assert (
-        check_engine_by_spec_parameters(
-            model_engine="llama-cpp-python",
-            model_name="chatglm",
-            model_format="ggmlv3",
-            model_size_in_billions=6,
-            quantization="q4_0",
-        )
-        is ChatglmCppChatModel
-    )
-
-    spec = GgmlLLMSpecV1(
-        model_format="ggmlv3",
-        model_size_in_billions=3,
-        model_id="TheBloke/orca_mini_3B-GGML",
-        quantizations=[""],
-        model_file_name_template="README.md",
-    )
-    family = LLMFamilyV1(
-        version=1,
-        context_length=2048,
-        model_type="LLM",
-        model_name="custom_model",
-        model_lang=["en"],
-        model_ability=["embed", "chat"],
-        model_specs=[spec],
-        prompt_style=None,
-    )
-
-    register_llm(family, False)
-
-    assert family in get_user_defined_llm_families()
-    assert (
-        "custom_model" in LLM_ENGINES
-        and "llama-cpp-python" in LLM_ENGINES["custom_model"]
-    )
-    assert check_engine_by_spec_parameters(
-        model_engine="llama-cpp-python",
-        model_name="custom_model",
-        model_format="ggmlv3",
-        model_size_in_billions=3,
-        quantization="",
-    )
-
-    unregister_llm(family.model_name)
-    assert family not in get_user_defined_llm_families()
-    assert "custom_model" not in LLM_ENGINES