Skip to content

Commit

Permalink
vdk-server: status returns if server is not running (#754)
Browse files Browse the repository at this point in the history
When running `vdk server --status` command it was returning that the
control service was installed but it was not running (the process was
down). So added a check to explicitly verify that.
Added a few more debug messages.

Testing Done: vdk server -s locally and verified message.

Signed-off-by: Antoni Ivanov <aivanov@vmware.com>
  • Loading branch information
antoniivanov committed Mar 16, 2022
1 parent 1b4e66f commit 2f598d2
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 69 deletions.
165 changes: 96 additions & 69 deletions projects/vdk-plugins/vdk-server/src/vdk/plugin/server/installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
)
from vdk.internal.core.errors import BaseVdkError
from vdk.internal.core.errors import ErrorMessage
from vdk.plugin.server import server_plugin_utils

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -102,13 +103,15 @@ def check_status(self):
self.__get_kind_cluster()
and self.__docker_container_exists(self.git_server_container_name)
and self.__docker_container_exists(self.docker_registry_container_name)
and self.__control_service_is_up()
):
log.info("The Versatile Data Kit Control Service is installed")
log.info("The Versatile Data Kit Control Service is installed and running.")
log.info(
"Access the REST API at http://localhost:8092/data-jobs/swagger-ui.html\n"
)
else:
log.info("No installation found")
log.info("No running installation found.")
sys.exit(1)

@staticmethod
def __get_current_directory() -> pathlib.Path:
Expand All @@ -121,14 +124,18 @@ def __docker_container_exists(container_name) -> bool:
"""
docker_client = docker.from_env()
try:
return next(
container_exists = next(
(
c
for c in docker_client.api.containers(all=True)
if f"/{container_name}" in c["Names"]
),
None,
)
log.debug(
f"Container {container_name} is {'' if container_exists else 'not'} present"
)
return container_exists
except Exception as ex:
log.error(f"Failed to search for a Docker container. {str(ex)}")
sys.exit(1)
Expand Down Expand Up @@ -531,7 +538,12 @@ def __get_kind_cluster(self) -> bool:
log.error(f"Stderr output: {stderr_as_str}")
sys.exit(result.returncode)
stdout_as_str = result.stdout.decode("utf-8")
return self.kind_cluster_name in stdout_as_str.splitlines()
kind_cluster_exists = self.kind_cluster_name in stdout_as_str.splitlines()
if kind_cluster_exists:
log.debug(f"Kind cluster {self.kind_cluster_name} is present.")
else:
log.debug(f"Kind cluster {self.kind_cluster_name} is not present.")
return kind_cluster_exists
except Exception as ex:
log.error(
f'Failed to obtain information about the Kind cluster "{self.kind_cluster_name}". '
Expand Down Expand Up @@ -604,71 +616,7 @@ def __install_helm_chart(self):
log.error(f"Stderr output: {stderr_as_str}")
exit(result.returncode)
result = subprocess.run(
[
"helm",
"install",
self.helm_installation_name,
self.helm_chart_name,
"--atomic",
"--set",
"service.type=ClusterIP",
"--set",
"deploymentBuilderResourcesDefault.limits.cpu=0",
"--set",
"deploymentBuilderResourcesDefault.requests.cpu=0",
"--set",
"deploymentBuilderResourcesDefault.limits.memory=0",
"--set",
"deploymentBuilderResourcesDefault.requests.memory=0",
"--set",
"deploymentDefaultDataJobsResources.limits.cpu=0",
"--set",
"deploymentDefaultDataJobsResources.requests.cpu=0",
"--set",
"deploymentDefaultDataJobsResources.limits.memory=0",
"--set",
"deploymentDefaultDataJobsResources.requests.memory=0",
"--set",
"resources.limits.cpu=0",
"--set",
"resources.requests.cpu=0",
"--set",
"resources.limits.memory=0",
"--set",
"resources.requests.memory=0",
"--set",
"cockroachdb.statefulset.replicas=1",
"--set",
"replicas=1",
"--set",
"ingress.enabled=true",
"--set",
"deploymentGitBranch=master",
"--set",
"deploymentDockerRegistryType=generic",
"--set",
f"deploymentDockerRepository={self.docker_registry_container_name}:5000",
"--set",
"proxyRepositoryURL=localhost:5000",
"--set",
f"deploymentGitUrl={git_server_ip}/{self.git_server_admin_user}/{self.git_server_repository_name}.git",
"--set",
f"deploymentGitUsername={self.git_server_admin_user}",
"--set",
f"deploymentGitPassword={self.git_server_admin_password}",
"--set",
f"uploadGitReadWriteUsername={self.git_server_admin_user}",
"--set",
f"uploadGitReadWritePassword={self.git_server_admin_password}",
"--set",
"extraEnvVars.GIT_SSL_ENABLED=false",
"--set",
"extraEnvVars.DATAJOBS_DEPLOYMENT_BUILDER_EXTRAARGS=--insecure",
"--set",
"datajobTemplate.template.spec.successfulJobsHistoryLimit=5",
"--set",
"datajobTemplate.template.spec.failedJobsHistoryLimit=5",
],
self.__helm_install_command(git_server_ip),
capture_output=True,
)
if result.returncode != 0:
Expand All @@ -683,6 +631,73 @@ def __install_helm_chart(self):
)
sys.exit(1)

def __helm_install_command(self, git_server_ip):
return [
"helm",
"install",
self.helm_installation_name,
self.helm_chart_name,
"--atomic",
"--set",
"service.type=ClusterIP",
"--set",
"deploymentBuilderResourcesDefault.limits.cpu=0",
"--set",
"deploymentBuilderResourcesDefault.requests.cpu=0",
"--set",
"deploymentBuilderResourcesDefault.limits.memory=0",
"--set",
"deploymentBuilderResourcesDefault.requests.memory=0",
"--set",
"deploymentDefaultDataJobsResources.limits.cpu=0",
"--set",
"deploymentDefaultDataJobsResources.requests.cpu=0",
"--set",
"deploymentDefaultDataJobsResources.limits.memory=0",
"--set",
"deploymentDefaultDataJobsResources.requests.memory=0",
"--set",
"resources.limits.cpu=0",
"--set",
"resources.requests.cpu=0",
"--set",
"resources.limits.memory=0",
"--set",
"resources.requests.memory=0",
"--set",
"cockroachdb.statefulset.replicas=1",
"--set",
"replicas=1",
"--set",
"ingress.enabled=true",
"--set",
"deploymentGitBranch=master",
"--set",
"deploymentDockerRegistryType=generic",
"--set",
f"deploymentDockerRepository={self.docker_registry_container_name}:5000",
"--set",
"proxyRepositoryURL=localhost:5000",
"--set",
f"deploymentGitUrl={git_server_ip}/{self.git_server_admin_user}/{self.git_server_repository_name}.git",
"--set",
f"deploymentGitUsername={self.git_server_admin_user}",
"--set",
f"deploymentGitPassword={self.git_server_admin_password}",
"--set",
f"uploadGitReadWriteUsername={self.git_server_admin_user}",
"--set",
f"uploadGitReadWritePassword={self.git_server_admin_password}",
"--set",
"extraEnvVars.GIT_SSL_ENABLED=false",
"--set",
"extraEnvVars.DATAJOBS_DEPLOYMENT_BUILDER_EXTRAARGS=--insecure",
"--set",
"datajobTemplate.template.spec.successfulJobsHistoryLimit=5",
"--set",
"datajobTemplate.template.spec.failedJobsHistoryLimit=5",
]

def __uninstall_helm_chart(self):
log.info("Uninstalling Control Service...")
try:
Expand Down Expand Up @@ -711,6 +726,7 @@ def __install_ingress_prerequisites(self):
config.load_kube_config()
with client.ApiClient() as k8s_client:
try:
log.debug("Deploy ingress controller...")
utils.create_from_yaml(
k8s_client,
self.__current_directory.joinpath("ingress-nginx-deploy.yaml"),
Expand All @@ -727,6 +743,7 @@ def __install_ingress_prerequisites(self):
w = watch.Watch()
k8s_client = client.CoreV1Api()
try:
log.debug("Wait for ingress controller to be ready ...")
for event in w.stream(
func=k8s_client.list_namespaced_pod,
namespace="ingress-nginx",
Expand Down Expand Up @@ -772,3 +789,13 @@ def __cleanup_configuration():
log.error(f"Failed to clean up. {str(ex)}")
exit(1)
log.info("Done")

@staticmethod
def __control_service_is_up():
with server_plugin_utils.requests_retry_session() as s:
response: requests.Response = s.get("http://localhost:8092")
if response.status_code < 300:
log.debug("Control Service at http://localhost:8092 is UP.")
else:
log.debug("Control Service at http://localhost:8092 is DOWN.")
return response.status_code < 300
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright 2021 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry


def requests_retry_session(
retries=3,
backoff_factor=0.3,
status_forcelist=(500, 502, 503, 504),
session=None,
) -> requests.Session:
session = session or requests.Session()
retry = Retry(
total=retries,
read=retries,
connect=retries,
backoff_factor=backoff_factor,
status_forcelist=status_forcelist,
)
adapter = HTTPAdapter(max_retries=retry)
session.mount("http://", adapter)
session.mount("https://", adapter)
return session

0 comments on commit 2f598d2

Please sign in to comment.