Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FEAT: Support Launching Model with Uid #358

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 5 additions & 1 deletion xinference/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,7 @@ def launch_model(
model_size_in_billions: Optional[int] = None,
model_format: Optional[str] = None,
quantization: Optional[str] = None,
model_uid: Optional[str] = None,
**kwargs,
) -> str:
"""
Expand All @@ -656,6 +657,8 @@ def launch_model(
The format of the model.
quantization: Optional[str]
The quantization of model.
model_uid: Optional[str]
The assigned model_uid of model.
**kwargs:
Any other parameters been specified.

Expand All @@ -668,7 +671,7 @@ def launch_model(

url = f"{self.base_url}/v1/models"

model_uid = self._gen_model_uid()
model_uid = self._gen_model_uid() if model_uid is None else model_uid

payload = {
"model_uid": model_uid,
Expand All @@ -689,6 +692,7 @@ def launch_model(

response_data = response.json()
model_uid = response_data["model_uid"]
assert model_uid is not None
return model_uid

def terminate_model(self, model_uid: str):
Expand Down
2 changes: 1 addition & 1 deletion xinference/core/restful_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ async def launch_model(self, request: Request) -> JSONResponse:
key: value for key, value in payload.items() if key not in exclude_keys
}

if model_uid is None or model_uid is None:
if model_uid is None or model_name is None:
raise HTTPException(
status_code=400,
detail="Invalid input. Please specify the model UID and the model name",
Expand Down
3 changes: 3 additions & 0 deletions xinference/deploy/cmdline.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,12 +157,14 @@ def worker(log_level: str, endpoint: Optional[str], host: str):
@click.option("--size-in-billions", "-s", default=None, type=int)
@click.option("--model-format", "-f", default=None, type=str)
@click.option("--quantization", "-q", default=None, type=str)
@click.option("--model-uid", "-i", default=None, type=str)
def model_launch(
endpoint: Optional[str],
model_name: str,
size_in_billions: int,
model_format: str,
quantization: str,
model_uid: str,
):
endpoint = get_endpoint(endpoint)

Expand All @@ -172,6 +174,7 @@ def model_launch(
model_size_in_billions=size_in_billions,
model_format=model_format,
quantization=quantization,
model_uid=model_uid,
)

print(f"Model uid: {model_uid}", file=sys.stderr)
Expand Down