diff --git a/.ci/_job_test.sh b/.ci/_job_test.sh new file mode 100644 index 000000000..12fd63d5f --- /dev/null +++ b/.ci/_job_test.sh @@ -0,0 +1,88 @@ +#!/bin/bash + +# ========== Utility functions ========== +function get_khjob_status_ok { + kubectl get khjob -n $NS kh-test-job -ojsonpath='{.status.ok}' +} + +function get_khstate_ok { + kubectl get khstate -n $NS kh-test-job -ojsonpath='{.spec.OK}' +} + +function job_phase { + kubectl get khjob -n $NS kh-test-job -ojsonpath='{.spec.phase}' +} + +function fail_test { + # Print debug_information + echo --- + kubectl get khjob -n $NS kh-test-job -oyaml + echo --- + kubectl get khstate -n $NS kh-test-job -oyaml + exit 1 +} + +echo ========== Job E2E test - Job successful case ========== +sed s/REPORT_FAILURE_VALUE/false/ .ci/khjob.yaml |kubectl apply -n $NS -f- + +if [ "$(get_khjob_status_ok)" != "" ]; then + echo "There should not be any OK field initially"; fail_test +fi + +if [ "$(job_phase)" != "Running" ]; then + echo "Job should be in running phase"; fail_test +fi + +# Wait until the field is available +TIMEOUT=30 +while [ "$(job_phase)" == "Running" ] && [ $TIMEOUT -gt 0 ]; do sleep 1; echo Job phase: $(job_phase), timeout: ${TIMEOUT}; let TIMEOUT-=1; done + +# Check the result +if [ "$(get_khjob_status_ok)" != "true" ]; then + echo "khjob status should have returned OK"; fail_test +fi + +if [ "$(get_khstate_ok)" != "true" ]; then + echo "khstate should have returned OK"; fail_test +fi + +if [ "$(job_phase)" != "Completed" ]; then + echo "Job phase should be Completed."; fail_test +fi + +# Delete the job +kubectl delete khjob -n $NS kh-test-job + + + +echo ========== Job E2E test - Job fail case ========== + +sed s/REPORT_FAILURE_VALUE/true/ .ci/khjob.yaml |kubectl apply -n $NS -f- + +if [ "$(get_khjob_status_ok)" != "" ]; then + echo "There should not be any OK field initially"; fail_test +fi + +if [ "$(job_phase)" != "Running" ]; then + echo "Job should be in running phase"; fail_test +fi + +# Wait until the field is available +TIMEOUT=30 +while [ "$(job_phase)" == "Running" ] && [ $TIMEOUT -gt 0 ]; do sleep 1; echo Job phase: $(job_phase), timeout: ${TIMEOUT}; let TIMEOUT-=1; done + +# Check the result +if [ "$(get_khjob_status_ok)" != "false" ]; then + echo "khjob status should have NOT returned OK"; fail_test +fi + +if [ "$(get_khstate_ok)" != "false" ]; then + echo "khstate should have NOT returned OK"; fail_test +fi + +if [ "$(job_phase)" != "Completed" ]; then + echo "Job phase should be Completed."; fail_test +fi + +# Delete the job +kubectl delete khjob -n $NS kh-test-job diff --git a/.ci/e2e.sh b/.ci/e2e.sh index 7d7badf70..616be46d8 100755 --- a/.ci/e2e.sh +++ b/.ci/e2e.sh @@ -58,6 +58,7 @@ kubectl logs -n $NS --selector $selector for i in {1..60} do khsCount=$(kubectl get -n $NS khs -o yaml |grep "OK: true" |wc -l) + kcStatusCount=$(kubectl get -n $NS khcheck -o yaml |grep "ok: true" |wc -l) cDeploy=$(kubectl -n $NS get pods -l app=kuberhealthy-check |grep deployment |grep Completed |wc -l) cDNS=$(kubectl -n $NS get pods -l app=kuberhealthy-check |grep dns-status-internal |grep Completed |wc -l) cDS=$(kubectl -n $NS get pods -l app=kuberhealthy-check |grep daemonset |grep Completed |wc -l) @@ -65,7 +66,7 @@ do cPS=$(kubectl -n $NS get pods -l app=kuberhealthy-check |grep pod-status |grep Completed |wc -l) failCount=$(kubectl get -n $NS khs -o yaml |grep "OK: false" |wc -l) - if [ $khsCount -ge 5 ] && [ $cDeploy -ge 1 ] && [ $cDS -ge 1 ] && [ $cDNS -ge 1 ] && [ $cPR -ge 1 ] && [ $cPS -ge 1 ] + if [ $khsCount -ge 5 ] && [ $khsCount -eq $kcStatusCount ] && [ $cDeploy -ge 1 ] && [ $cDS -ge 1 ] && [ $cDNS -ge 1 ] && [ $cPR -ge 1 ] && [ $cPS -ge 1 ] then echo "Kuberhealthy is working like it should and all tests passed" break @@ -113,3 +114,5 @@ then else echo "No Error deployment pods found" fi + +. .ci/_job_test.sh diff --git a/.ci/khjob.yaml b/.ci/khjob.yaml new file mode 100644 index 000000000..cb632599c --- /dev/null +++ b/.ci/khjob.yaml @@ -0,0 +1,20 @@ +apiVersion: comcast.github.io/v1 +kind: KuberhealthyJob +metadata: + name: kh-test-job +spec: + timeout: 2m + podSpec: + containers: + - env: + - name: REPORT_FAILURE + value: "REPORT_FAILURE_VALUE" + - name: REPORT_DELAY + value: 5s + image: kuberhealthy/test-check:latest + imagePullPolicy: Always + name: main + resources: + requests: + cpu: 10m + memory: 50Mi diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 17ea0e343..a70482089 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -25,4 +25,5 @@ - [Joel Kulesa](https://github.com/jkulesa) - [McKenna Jones](https://github.com/mckennajones) - [Allan Ramirez](https://github.com/ramirezag) -- [Erich Stoekl](https://github.com/erichstoekl) \ No newline at end of file +- [Erich Stoekl](https://github.com/erichstoekl) +- [Ugur Zongur](https://github.com/ugurzongur) diff --git a/cmd/kuberhealthy/crd.go b/cmd/kuberhealthy/crd.go index 600b24121..e0a776bed 100644 --- a/cmd/kuberhealthy/crd.go +++ b/cmd/kuberhealthy/crd.go @@ -28,11 +28,6 @@ func setCheckStateResource(checkName string, checkNamespace string, state khstat } resourceVersion := existingState.GetResourceVersion() - // set the pod name that wrote the khstate - state.AuthoritativePod = podHostname - now := metav1.Now() // set the time the khstate was last - state.LastRun = &now - khState := khstatev1.NewKuberhealthyState(name, state) khState.SetResourceVersion(resourceVersion) // TODO - if "try again" message found in error, then try again @@ -127,18 +122,13 @@ func getJobState(j *external.Checker) (khstatev1.WorkloadDetails, error) { // setJobPhase updates the kuberhealthy job phase depending on the state of its run. func setJobPhase(jobName string, jobNamespace string, jobPhase khjobv1.JobPhase) error { - kj, err := khJobClient.KuberhealthyJobs(jobNamespace).Get(jobName, metav1.GetOptions{}) if err != nil { log.Errorln("error getting khjob:", jobName, err) return err } - resourceVersion := kj.GetResourceVersion() - updatedJob := khjobv1.NewKuberhealthyJob(jobName, jobNamespace, kj.Spec) - updatedJob.SetResourceVersion(resourceVersion) log.Infoln("Setting khjob phase to:", jobPhase) - updatedJob.Spec.Phase = jobPhase - - _, err = khJobClient.KuberhealthyJobs(jobNamespace).Update(&updatedJob) + kj.Spec.Phase = jobPhase + _, err = khJobClient.KuberhealthyJobs(jobNamespace).Update(&kj) return err } diff --git a/cmd/kuberhealthy/kuberhealthy.go b/cmd/kuberhealthy/kuberhealthy.go index 96f7c79ad..8f370d9fe 100644 --- a/cmd/kuberhealthy/kuberhealthy.go +++ b/cmd/kuberhealthy/kuberhealthy.go @@ -82,7 +82,7 @@ func (k *Kuberhealthy) setCheckExecutionError(checkName string, checkNamespace s log.Debugln("Setting execution state of check", checkName, "to", details.OK, details.Errors, details.CurrentUUID, details.GetKHWorkload()) // store the check state with the CRD - err = k.storeCheckState(checkName, checkNamespace, details) + err = k.updateWorkloadStatus(checkName, checkNamespace, details) if err != nil { return fmt.Errorf("Was unable to write an execution error to the CRD status with error: %w", err) } @@ -116,7 +116,7 @@ func (k *Kuberhealthy) setJobExecutionError(jobName string, jobNamespace string, log.Debugln("Setting execution state of job", jobName, "to", details.OK, details.Errors, details.CurrentUUID, details.GetKHWorkload()) // store the check state with the CRD - err = k.storeCheckState(jobName, jobNamespace, details) + err = k.updateWorkloadStatus(jobName, jobNamespace, details) if err != nil { return fmt.Errorf("Was unable to write an execution error to the CRD status with error: %w", err) } @@ -135,7 +135,7 @@ func (k *Kuberhealthy) Shutdown(doneChan chan struct{}) { log.Infoln("shutdown: aborting control context") k.shutdownCtxFunc() // stop the control system } - time.Sleep(5) // help prevent more checks from starting in a race before control system stop happens + time.Sleep(time.Second * 5) // help prevent more checks from starting in a race before control system stop happens log.Infoln("shutdown: stopping checks") k.StopChecks() // stop all checks log.Infoln("shutdown: ready for main program shutdown") @@ -848,7 +848,7 @@ func (k *Kuberhealthy) masterMonitor(ctx context.Context, becameMasterChan chan // event, then we calculate if we should become or lose master. for range ticker.C { - if time.Now().Sub(lastMasterChangeTime) < interval { + if time.Since(lastMasterChangeTime) < interval { log.Println("control: waiting for master changes to settle...") continue } @@ -918,7 +918,7 @@ func (k *Kuberhealthy) runJob(ctx context.Context, job khjobv1.KuberhealthyJob) // Subtract 10 seconds from run time since there are two 5 second sleeps during the job run where kuberhealthy // waits for all pods to clear before running the check and waits for all pods to exit once the check has finished // running. Both occur before and after the kh job pod completes its run. - jobRunDuration := time.Now().Sub(jobStartTime) - time.Second*10 + jobRunDuration := time.Since(jobStartTime) - time.Second*10 // make a new state for this job and fill it from the job's current status jobDetails, err := getJobState(j) @@ -930,6 +930,10 @@ func (k *Kuberhealthy) runJob(ctx context.Context, job khjobv1.KuberhealthyJob) details.OK, details.Errors = j.CurrentStatus() details.RunDuration = jobRunDuration.String() details.CurrentUUID = jobDetails.CurrentUUID + details.AuthoritativePod = podHostname + + lastRun := metav1.Now() + details.LastRun = &lastRun // Fetch node information from running check pod using kh run uuid selector := "kuberhealthy-run-id=" + details.CurrentUUID @@ -972,12 +976,12 @@ func (k *Kuberhealthy) runJob(ctx context.Context, job khjobv1.KuberhealthyJob) log.Infoln("Setting state of job", j.Name(), "in namespace", j.CheckNamespace(), "to", details.OK, details.Errors, details.RunDuration, details.CurrentUUID, details.GetKHWorkload()) // store the job state with the CRD - err = k.storeCheckState(j.Name(), j.CheckNamespace(), details) + err = k.updateWorkloadStatus(j.Name(), j.CheckNamespace(), details) if err != nil { log.Errorln("Error storing CRD state for job:", j.Name(), "in namespace", j.CheckNamespace(), err) } - // set KHJob phase to running: + // set KHJob phase to completed: err = setJobPhase(j.Name(), j.CheckNamespace(), khjobv1.JobCompleted) if err != nil { log.Errorln("Error setting job phase:", err) @@ -1031,7 +1035,7 @@ func (k *Kuberhealthy) runCheck(ctx context.Context, c *external.Checker) { // Subtract 10 seconds from run time since there are two 5 second sleeps during the check run where kuberhealthy // waits for all pods to clear before running the check and waits for all pods to exit once the check has finished // running. Both occur before and after the checker pod completes its run. - checkRunDuration := time.Now().Sub(checkStartTime) - time.Second*10 + checkRunDuration := time.Since(checkStartTime) - time.Second*10 // make a new state for this check and fill it from the check's current status checkDetails, err := getCheckState(c) @@ -1043,6 +1047,10 @@ func (k *Kuberhealthy) runCheck(ctx context.Context, c *external.Checker) { details.OK, details.Errors = c.CurrentStatus() details.RunDuration = checkRunDuration.String() details.CurrentUUID = checkDetails.CurrentUUID + details.AuthoritativePod = podHostname + + lastRun := metav1.Now() + details.LastRun = &lastRun // Fetch node information from running check pod using kh run uuid selector := "kuberhealthy-run-id=" + details.CurrentUUID @@ -1085,7 +1093,7 @@ func (k *Kuberhealthy) runCheck(ctx context.Context, c *external.Checker) { log.Infoln("Setting state of check", c.Name(), "in namespace", c.CheckNamespace(), "to", details.OK, details.Errors, details.RunDuration, details.CurrentUUID, details.GetKHWorkload()) // store the check state with the CRD - err = k.storeCheckState(c.Name(), c.CheckNamespace(), details) + err = k.updateWorkloadStatus(c.Name(), c.CheckNamespace(), details) if err != nil { log.Errorln("Error storing CRD state for check:", c.Name(), "in namespace", c.CheckNamespace(), err) } @@ -1095,8 +1103,64 @@ func (k *Kuberhealthy) runCheck(ctx context.Context, c *external.Checker) { } } -// storeCheckState stores the check state in its cluster CRD -func (k *Kuberhealthy) storeCheckState(checkName string, checkNamespace string, details khstatev1.WorkloadDetails) error { +func (k *Kuberhealthy) updateWorkloadStatus(name string, namespace string, details khstatev1.WorkloadDetails) error { + var updateFunction func() error + khWorkload := details.GetKHWorkload() + switch khWorkload { + case khstatev1.KHCheck: + status := khcheckv1.CheckStatus{} + status.OK = details.OK + status.Errors = details.Errors + status.RunDuration = details.RunDuration + status.Node = details.Node + status.LastRun = details.LastRun + status.AuthoritativePod = details.AuthoritativePod + status.CurrentUUID = details.CurrentUUID + updateFunction = func() error { return k.updateCheckStatus(name, namespace, status) } + case khstatev1.KHJob: + status := khjobv1.JobStatus{} + status.OK = details.OK + status.Errors = details.Errors + status.RunDuration = details.RunDuration + status.Node = details.Node + status.LastRun = details.LastRun + status.AuthoritativePod = details.AuthoritativePod + status.CurrentUUID = details.CurrentUUID + updateFunction = func() error { return k.updateJobStatus(name, namespace, status) } + } + resourceDescription := fmt.Sprintf("%s %s/%s", khWorkload, namespace, name) + err := updateRetryingObjectModifiedError(resourceDescription, updateFunction) + if err != nil { + log.Errorln("error updating state:", khWorkload, namespace, name, err) + return err + } + + // Now store the state custom resource + return k.storeKHState(name, namespace, details) +} + +func (k *Kuberhealthy) updateCheckStatus(name string, namespace string, status khcheckv1.CheckStatus) error { + check, err := khCheckClient.KuberhealthyChecks(namespace).Get(name, metav1.GetOptions{}) + if err != nil { + return err + } + check.Status = status + _, err = khCheckClient.KuberhealthyChecks(namespace).UpdateStatus(&check) + return err +} + +func (k *Kuberhealthy) updateJobStatus(name string, namespace string, status khjobv1.JobStatus) error { + job, err := khJobClient.KuberhealthyJobs(namespace).Get(name, metav1.GetOptions{}) + if err != nil { + return err + } + job.Status = status + _, err = khJobClient.KuberhealthyJobs(namespace).UpdateStatus(&job) + return err +} + +// storeKHState stores the check state in its cluster CRD +func (k *Kuberhealthy) storeKHState(checkName string, checkNamespace string, details khstatev1.WorkloadDetails) error { // ensure the CRD resource exits err := ensureStateResourceExists(checkName, checkNamespace, details.GetKHWorkload()) @@ -1104,8 +1168,13 @@ func (k *Kuberhealthy) storeCheckState(checkName string, checkNamespace string, return err } + resourceDescription := fmt.Sprintf("khstate %s/%s", checkNamespace, checkName) + return updateRetryingObjectModifiedError(resourceDescription, func() error { return setCheckStateResource(checkName, checkNamespace, details) }) +} + +func updateRetryingObjectModifiedError(resourceDescription string, updateFunction func() error) error { // put the status on the CRD from the check - err = setCheckStateResource(checkName, checkNamespace, details) + err := updateFunction() //TODO: Make this retry of updating custom resources repeatable // @@ -1121,7 +1190,7 @@ func (k *Kuberhealthy) storeCheckState(checkName string, checkNamespace string, // if too many retires have occurred, we fail up the stack further if tries > maxTries { - return fmt.Errorf("failed to update khstate for check %s in namespace %s after %d with error %w", checkName, checkNamespace, maxTries, err) + return fmt.Errorf("failed to update %s after %d with error %w", resourceDescription, maxTries, err) } log.Infoln("Failed to update khstate for check because object was modified by another process. Retrying in " + delay.String() + ". Try " + strconv.Itoa(tries) + " of " + strconv.Itoa(maxTries) + ".") @@ -1130,7 +1199,7 @@ func (k *Kuberhealthy) storeCheckState(checkName string, checkNamespace string, delay = delay + delay // try setting the check state again - err = setCheckStateResource(checkName, checkNamespace, details) + err = updateFunction() // count how many times we've retried tries++ @@ -1459,10 +1528,14 @@ func (k *Kuberhealthy) externalCheckReportHandler(w http.ResponseWriter, r *http details.RunDuration = checkRunDuration details.Namespace = podReport.Namespace details.CurrentUUID = podReport.UUID + details.AuthoritativePod = podHostname + + lastRun := metav1.Now() + details.LastRun = &lastRun // since the check is validated, we can proceed to update the status now k.externalCheckReportHandlerLog(requestID, "Setting check with name", podReport.Name, "in namespace", podReport.Namespace, "to 'OK' state:", details.OK, "uuid", details.CurrentUUID, details.GetKHWorkload()) - err = k.storeCheckState(podReport.Name, podReport.Namespace, details) + err = k.updateWorkloadStatus(podReport.Name, podReport.Namespace, details) if err != nil { w.WriteHeader(http.StatusInternalServerError) k.externalCheckReportHandlerLog(requestID, "failed to store check state for %s: %w", podReport.Name, err) diff --git a/deploy/helm/kuberhealthy/crds/comcast.github.io_khchecks.yaml b/deploy/helm/kuberhealthy/crds/comcast.github.io_khchecks.yaml index 83632995a..a3055d773 100644 --- a/deploy/helm/kuberhealthy/crds/comcast.github.io_khchecks.yaml +++ b/deploy/helm/kuberhealthy/crds/comcast.github.io_khchecks.yaml @@ -19,7 +19,20 @@ spec: scope: Namespaced preserveUnknownFields: false versions: - - name: v1 + - additionalPrinterColumns: + - description: OK status + jsonPath: .status.ok + name: OK + type: string + - description: Last Run + jsonPath: .status.lastRun + name: Age LastRun + type: date + - description: Age + jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1 schema: openAPIV3Schema: description: KuberhealthyCheck represents the data in the CRD for configuring @@ -6451,9 +6464,40 @@ spec: - runInterval - timeout type: object + status: + description: Status holds the results of the last run + properties: + authoritativePod: + type: string + errors: + items: + type: string + type: array + lastRun: + format: date-time + nullable: true + type: string + node: + type: string + ok: + type: boolean + runDuration: + type: string + uuid: + type: string + required: + - authoritativePod + - errors + - node + - ok + - runDuration + - uuid + type: object type: object served: true storage: true + subresources: + status: {} status: acceptedNames: kind: "" diff --git a/deploy/helm/kuberhealthy/crds/comcast.github.io_khjobs.yaml b/deploy/helm/kuberhealthy/crds/comcast.github.io_khjobs.yaml index c1b1f9898..1fd4a6506 100644 --- a/deploy/helm/kuberhealthy/crds/comcast.github.io_khjobs.yaml +++ b/deploy/helm/kuberhealthy/crds/comcast.github.io_khjobs.yaml @@ -19,7 +19,20 @@ spec: scope: Namespaced preserveUnknownFields: false versions: - - name: v1 + - additionalPrinterColumns: + - description: OK status + jsonPath: .status.ok + name: OK + type: string + - description: Last Run + jsonPath: .status.lastRun + name: Age LastRun + type: date + - description: Age + jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1 schema: openAPIV3Schema: description: KuberhealthyJob represents the data in the CRD for configuring @@ -6452,9 +6465,40 @@ spec: - podSpec - timeout type: object + status: + description: Status holds the results of the job + properties: + authoritativePod: + type: string + errors: + items: + type: string + type: array + lastRun: + format: date-time + nullable: true + type: string + node: + type: string + ok: + type: boolean + runDuration: + type: string + uuid: + type: string + required: + - authoritativePod + - errors + - node + - ok + - runDuration + - uuid + type: object type: object served: true storage: true + subresources: + status: {} status: acceptedNames: kind: "" diff --git a/deploy/helm/kuberhealthy/templates/clusterrole.yaml b/deploy/helm/kuberhealthy/templates/clusterrole.yaml index 3ba1c4013..ed47c994b 100644 --- a/deploy/helm/kuberhealthy/templates/clusterrole.yaml +++ b/deploy/helm/kuberhealthy/templates/clusterrole.yaml @@ -51,6 +51,13 @@ rules: - khjobs verbs: - "*" + - apiGroups: + - comcast.github.io + resources: + - khchecks/status + - khjobs/status + verbs: + - "*" - apiGroups: - "" resources: diff --git a/pkg/apis/khcheck/v1/kuberhealthycheck.go b/pkg/apis/khcheck/v1/kuberhealthycheck.go index 24c2efc0a..1b8291dad 100644 --- a/pkg/apis/khcheck/v1/kuberhealthycheck.go +++ b/pkg/apis/khcheck/v1/kuberhealthycheck.go @@ -38,6 +38,7 @@ type KuberhealthyChecksGetter interface { type KuberhealthyCheckInterface interface { Create(*KuberhealthyCheck) (KuberhealthyCheck, error) Update(*KuberhealthyCheck) (KuberhealthyCheck, error) + UpdateStatus(*KuberhealthyCheck) (KuberhealthyCheck, error) Delete(name string, options *metav1.DeleteOptions) error DeleteCollection(options *metav1.DeleteOptions, listOptions metav1.ListOptions) error Get(name string, options metav1.GetOptions) (KuberhealthyCheck, error) @@ -130,6 +131,21 @@ func (c *kuberhealthyChecks) Update(kuberhealthyCheck *KuberhealthyCheck) (resul return } +// UpdateStatus was generated because the type contains a Status member. +// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). +func (c *kuberhealthyChecks) UpdateStatus(kuberhealthyCheck *KuberhealthyCheck) (result KuberhealthyCheck, err error) { + result = KuberhealthyCheck{} + err = c.client.Put(). + Namespace(c.ns). + Resource("khchecks"). + Name(kuberhealthyCheck.Name). + SubResource("status"). + Body(kuberhealthyCheck). + Do(context.TODO()). + Into(&result) + return +} + // Delete takes name of the kuberhealthyCheck and deletes it. Returns an error if one occurs. func (c *kuberhealthyChecks) Delete(name string, options *metav1.DeleteOptions) error { return c.client.Delete(). diff --git a/pkg/apis/khcheck/v1/types.go b/pkg/apis/khcheck/v1/types.go index 32b628b0b..5a6d94f8d 100644 --- a/pkg/apis/khcheck/v1/types.go +++ b/pkg/apis/khcheck/v1/types.go @@ -11,9 +11,13 @@ import ( // KuberhealthyCheck represents the data in the CRD for configuring an // external check for Kuberhealthy // +k8s:openapi-gen=true +// +kubebuilder:printcolumn:name="OK",type=string,JSONPath=`.status.ok`,description="OK status" +// +kubebuilder:printcolumn:name="Age LastRun",type=date,JSONPath=`.status.lastRun`,description="Last Run" +// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`,description="Age" // +kubebuilder:resource:path="khchecks" // +kubebuilder:resource:singular="khcheck" // +kubebuilder:resource:shortName="khc" +// +kubebuilder:subresource:status type KuberhealthyCheck struct { metav1.TypeMeta `json:",inline" yaml:",inline"` // +optional @@ -22,6 +26,9 @@ type KuberhealthyCheck struct { // Spec holds the desired state of the KuberhealthyCheck (from the client). // +optional Spec CheckConfig `json:"spec,omitempty" yaml:"spec,omitempty"` + + // Status holds the results of the last run + Status CheckStatus `json:"status,omitempty" yaml:"status,omitempty"` } // CheckConfig represents a configuration for a kuberhealthy external @@ -39,6 +46,17 @@ type CheckConfig struct { ExtraLabels map[string]string `json:"extraLabels" yaml:"extraLabels"` // a map of extra labels that will be applied to the pod } +type CheckStatus struct { + OK bool `json:"ok" yaml:"ok"` // true or false status of the check, whether or not it completed successfully + Errors []string `json:"errors" yaml:"errors"` // the list of errors reported from the check run + RunDuration string `json:"runDuration" yaml:"runDuration"` // the time it took for the check to complete + Node string `json:"node" yaml:"node"` // the node the check ran on + // +nullable + LastRun *metav1.Time `json:"lastRun,omitempty" yaml:"lastRun,omitempty"` // the time the check was last run + AuthoritativePod string `json:"authoritativePod" yaml:"authoritativePod"` // the main kuberhealthy pod creating and updating the status + CurrentUUID string `json:"uuid" yaml:"uuid"` // the UUID that is authorized to report statuses into the kuberhealthy endpoint +} + // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object // KuberhealthyCheckList is a list of KuberhealthyCheck resources diff --git a/pkg/apis/khjob/v1/kuberhealthyjob.go b/pkg/apis/khjob/v1/kuberhealthyjob.go index b87bcf282..106b5e695 100644 --- a/pkg/apis/khjob/v1/kuberhealthyjob.go +++ b/pkg/apis/khjob/v1/kuberhealthyjob.go @@ -38,6 +38,7 @@ type KuberhealthyJobsGetter interface { type KuberhealthyJobInterface interface { Create(*KuberhealthyJob) (KuberhealthyJob, error) Update(*KuberhealthyJob) (KuberhealthyJob, error) + UpdateStatus(*KuberhealthyJob) (KuberhealthyJob, error) Delete(name string, options *metav1.DeleteOptions) error DeleteCollection(options *metav1.DeleteOptions, listOptions metav1.ListOptions) error Get(name string, options metav1.GetOptions) (KuberhealthyJob, error) @@ -130,6 +131,21 @@ func (c *kuberhealthyJobs) Update(kuberhealthyJob *KuberhealthyJob) (result Kube return } +// UpdateStatus was generated because the type contains a Status member. +// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). +func (c *kuberhealthyJobs) UpdateStatus(kuberhealthyJob *KuberhealthyJob) (result KuberhealthyJob, err error) { + result = KuberhealthyJob{} + err = c.client.Put(). + Namespace(c.ns). + Resource("khjobs"). + Name(kuberhealthyJob.Name). + SubResource("status"). + Body(kuberhealthyJob). + Do(context.TODO()). + Into(&result) + return +} + // Delete takes name of the kuberhealthyJob and deletes it. Returns an error if one occurs. func (c *kuberhealthyJobs) Delete(name string, options *metav1.DeleteOptions) error { return c.client.Delete(). diff --git a/pkg/apis/khjob/v1/types.go b/pkg/apis/khjob/v1/types.go index 95273ee80..8492a5d7c 100644 --- a/pkg/apis/khjob/v1/types.go +++ b/pkg/apis/khjob/v1/types.go @@ -11,9 +11,13 @@ import ( // KuberhealthyJob represents the data in the CRD for configuring an // external checker job for Kuberhealthy // +k8s:openapi-gen=true +// +kubebuilder:printcolumn:name="OK",type=string,JSONPath=`.status.ok`,description="OK status" +// +kubebuilder:printcolumn:name="Age LastRun",type=date,JSONPath=`.status.lastRun`,description="Last Run" +// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`,description="Age" // +kubebuilder:resource:path="khjobs" // +kubebuilder:resource:singular="khjob" // +kubebuilder:resource:shortName="khj" +// +kubebuilder:subresource:status type KuberhealthyJob struct { metav1.TypeMeta `json:",inline" yaml:",inline"` // +optional @@ -22,6 +26,9 @@ type KuberhealthyJob struct { // Spec holds the desired state of the KuberhealthyJob (from the client). // +optional Spec JobConfig `json:"spec,omitempty" yaml:"spec,omitempty"` + + // Status holds the results of the job + Status JobStatus `json:"status,omitempty" yaml:"status,omitempty"` } // JobConfig represents a configuration for a kuberhealthy external @@ -40,6 +47,17 @@ type JobConfig struct { ExtraLabels map[string]string `json:"extraLabels" yaml:"extraLabels"` // a map of extra labels that will be applied to the pod } +type JobStatus struct { + OK bool `json:"ok" yaml:"ok"` // true or false status of the job, whether or not it completed successfully + Errors []string `json:"errors" yaml:"errors"` // the list of errors reported from the job run + RunDuration string `json:"runDuration" yaml:"runDuration"` // the time it took for the job to complete + Node string `json:"node" yaml:"node"` // the node the job ran on + // +nullable + LastRun *metav1.Time `json:"lastRun,omitempty" yaml:"lastRun,omitempty"` // the time the job was last run + AuthoritativePod string `json:"authoritativePod" yaml:"authoritativePod"` // the main kuberhealthy pod creating and updating the state + CurrentUUID string `json:"uuid" yaml:"uuid"` // the UUID that is authorized to report statuses into the kuberhealthy endpoint +} + // JobPhase is a label for the condition of the job at the current time. type JobPhase string diff --git a/scripts/generated/comcast.github.io_khchecks.yaml b/scripts/generated/comcast.github.io_khchecks.yaml index 83632995a..a3055d773 100644 --- a/scripts/generated/comcast.github.io_khchecks.yaml +++ b/scripts/generated/comcast.github.io_khchecks.yaml @@ -19,7 +19,20 @@ spec: scope: Namespaced preserveUnknownFields: false versions: - - name: v1 + - additionalPrinterColumns: + - description: OK status + jsonPath: .status.ok + name: OK + type: string + - description: Last Run + jsonPath: .status.lastRun + name: Age LastRun + type: date + - description: Age + jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1 schema: openAPIV3Schema: description: KuberhealthyCheck represents the data in the CRD for configuring @@ -6451,9 +6464,40 @@ spec: - runInterval - timeout type: object + status: + description: Status holds the results of the last run + properties: + authoritativePod: + type: string + errors: + items: + type: string + type: array + lastRun: + format: date-time + nullable: true + type: string + node: + type: string + ok: + type: boolean + runDuration: + type: string + uuid: + type: string + required: + - authoritativePod + - errors + - node + - ok + - runDuration + - uuid + type: object type: object served: true storage: true + subresources: + status: {} status: acceptedNames: kind: "" diff --git a/scripts/generated/comcast.github.io_khjobs.yaml b/scripts/generated/comcast.github.io_khjobs.yaml index c1b1f9898..1fd4a6506 100644 --- a/scripts/generated/comcast.github.io_khjobs.yaml +++ b/scripts/generated/comcast.github.io_khjobs.yaml @@ -19,7 +19,20 @@ spec: scope: Namespaced preserveUnknownFields: false versions: - - name: v1 + - additionalPrinterColumns: + - description: OK status + jsonPath: .status.ok + name: OK + type: string + - description: Last Run + jsonPath: .status.lastRun + name: Age LastRun + type: date + - description: Age + jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1 schema: openAPIV3Schema: description: KuberhealthyJob represents the data in the CRD for configuring @@ -6452,9 +6465,40 @@ spec: - podSpec - timeout type: object + status: + description: Status holds the results of the job + properties: + authoritativePod: + type: string + errors: + items: + type: string + type: array + lastRun: + format: date-time + nullable: true + type: string + node: + type: string + ok: + type: boolean + runDuration: + type: string + uuid: + type: string + required: + - authoritativePod + - errors + - node + - ok + - runDuration + - uuid + type: object type: object served: true storage: true + subresources: + status: {} status: acceptedNames: kind: ""