Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Container Manager Wait Until Ready #417

Merged
merged 10 commits into from Mar 8, 2018
6 changes: 3 additions & 3 deletions clipper_admin/clipper_admin/clipper_admin.py
Expand Up @@ -282,7 +282,7 @@ def build_and_deploy_model(self,
:py:meth:`clipper.ClipperConnection.set_num_replicas`.
batch_size : int, optional
The user-defined query batch size for the model. Replicas of the model will attempt
to process at most `batch_size` queries simultaneously. They may process smaller
to process at most `batch_size` queries simultaneously. They may process smaller
batches if `batch_size` queries are not immediately available.
If the default value of -1 is used, Clipper will adaptively calculate the batch size for individual
replicas of this model.
Expand Down Expand Up @@ -446,7 +446,7 @@ def deploy_model(self,
:py:meth:`clipper.ClipperConnection.set_num_replicas`.
batch_size : int, optional
The user-defined query batch size for the model. Replicas of the model will attempt
to process at most `batch_size` queries simultaneously. They may process smaller
to process at most `batch_size` queries simultaneously. They may process smaller
batches if `batch_size` queries are not immediately available.
If the default value of -1 is used, Clipper will adaptively calculate the batch size for individual
replicas of this model.
Expand Down Expand Up @@ -521,7 +521,7 @@ def register_model(self,
and used purely for user annotations.
batch_size : int, optional
The user-defined query batch size for the model. Replicas of the model will attempt
to process at most `batch_size` queries simultaneously. They may process smaller
to process at most `batch_size` queries simultaneously. They may process smaller
batches if `batch_size` queries are not immediately available.
If the default value of -1 is used, Clipper will adaptively calculate the batch size for individual
replicas of this model.
Expand Down
4 changes: 2 additions & 2 deletions clipper_admin/clipper_admin/deployers/python.py
Expand Up @@ -73,7 +73,7 @@ def create_endpoint(
:py:meth:`clipper.ClipperConnection.set_num_replicas`.
batch_size : int, optional
The user-defined query batch size for the model. Replicas of the model will attempt
to process at most `batch_size` queries simultaneously. They may process smaller
to process at most `batch_size` queries simultaneously. They may process smaller
batches if `batch_size` queries are not immediately available.
If the default value of -1 is used, Clipper will adaptively calculate the batch size for individual
replicas of this model.
Expand Down Expand Up @@ -136,7 +136,7 @@ def deploy_python_closure(
:py:meth:`clipper.ClipperConnection.set_num_replicas`.
batch_size : int, optional
The user-defined query batch size for the model. Replicas of the model will attempt
to process at most `batch_size` queries simultaneously. They may process smaller
to process at most `batch_size` queries simultaneously. They may process smaller
batches if `batch_size` queries are not immediately available.
If the default value of -1 is used, Clipper will adaptively calculate the batch size for individual
replicas of this model.
Expand Down
16 changes: 15 additions & 1 deletion clipper_admin/clipper_admin/docker/docker_container_manager.py
Expand Up @@ -3,6 +3,7 @@
import logging
import os
import random
import time
from ..container_manager import (
create_model_container_label, parse_model_container_label,
ContainerManager, CLIPPER_DOCKER_LABEL, CLIPPER_MODEL_CONTAINER_LABEL,
Expand Down Expand Up @@ -226,6 +227,9 @@ def _add_replica(self, name, version, input_type, image):
add_to_metric_config(model_container_name,
CLIPPER_INTERNAL_METRIC_PORT)

# Return model_container_name so we can check if it's up and running later
return model_container_name

def set_num_replicas(self, name, version, input_type, image, num_replicas):
current_replicas = self._get_replicas(name, version)
if len(current_replicas) < num_replicas:
Expand All @@ -237,8 +241,18 @@ def set_num_replicas(self, name, version, input_type, image, num_replicas):
name=name,
version=version,
missing=(num_missing)))

model_container_names = []
for _ in range(num_missing):
self._add_replica(name, version, input_type, image)
container_name = self._add_replica(name, version, input_type,
image)
model_container_names.append(container_name)

for name in model_container_names:
container = self.docker_client.containers.get(name)
while container.attrs.get("State").get("Status") != "running":
time.sleep(3)

elif len(current_replicas) > num_replicas:
num_extra = len(current_replicas) - num_replicas
logger.info(
Expand Down
Expand Up @@ -215,6 +215,11 @@ def deploy_model(self, name, version, input_type, image, num_replicas=1):
self._k8s_beta.create_namespaced_deployment(
body=body, namespace='default')

while self._k8s_beta.read_namespaced_deployment_status(
name=deployment_name, namespace='default').status.available_replicas \
!= num_replicas:
time.sleep(3)

def get_num_replicas(self, name, version):
deployment_name = get_model_deployment_name(name, version)
response = self._k8s_beta.read_namespaced_deployment_scale(
Expand All @@ -235,6 +240,12 @@ def set_num_replicas(self, name, version, input_type, image, num_replicas):
}
})


while self._k8s_beta.read_namespaced_deployment_status(
name=deployment_name, namespace='default').status.available_replicas \
!= num_replicas:
time.sleep(3)

def get_logs(self, logging_dir):
logging_dir = os.path.abspath(os.path.expanduser(logging_dir))

Expand Down