diff --git a/cmd/kar-controllers/app/options/options.go b/cmd/kar-controllers/app/options/options.go index ada825490..2bfb45d1d 100644 --- a/cmd/kar-controllers/app/options/options.go +++ b/cmd/kar-controllers/app/options/options.go @@ -56,6 +56,7 @@ type ServerOption struct { QuotaRestURL string HealthProbeListenAddr string DispatchResourceReservationTimeout int64 + ExternalDispatch bool // if true, will use external plugin to dispatch workloads } // NewServerOption creates a new CMServer with a default config. @@ -83,6 +84,8 @@ func (s *ServerOption) AddFlags(fs *flag.FlagSet) { fs.IntVar(&s.SecurePort, "secure-port", 6443, "The port on which to serve secured, authenticated access for metrics.") fs.StringVar(&s.HealthProbeListenAddr, "healthProbeListenAddr", ":8081", "Listen address for health probes. Defaults to ':8081'") fs.Int64Var(&s.DispatchResourceReservationTimeout, "dispatchResourceReservationTimeout", s.DispatchResourceReservationTimeout, "Resource reservation timeout for pods to be created once AppWrapper is dispatched, in millisecond. Defaults to '300000', 5 minutes") + fs.BoolVar(&s.ExternalDispatch,"externalDispatch", s.ExternalDispatch,"Use external workload dispatch plugin. Default is false.") + flag.Parse() klog.V(4).Infof("[AddFlags] Controller configuration: %#v", s) } @@ -147,6 +150,12 @@ func (s *ServerOption) loadDefaultsFromEnvVars() { s.DispatchResourceReservationTimeout = to } } + externalDispatch, envVarExists := os.LookupEnv("EXTERNAL_DISPATCH") + s.ExternalDispatch = false + if envVarExists && strings.EqualFold(externalDispatch, "true") { + s.ExternalDispatch = true + } + } func (s *ServerOption) CheckOptionOrDie() { diff --git a/config/crd/bases/mcad.ibm.com_appwrappers.yaml b/config/crd/bases/mcad.ibm.com_appwrappers.yaml index b43d6d3cf..3f09fc279 100644 --- a/config/crd/bases/mcad.ibm.com_appwrappers.yaml +++ b/config/crd/bases/mcad.ibm.com_appwrappers.yaml @@ -62,10 +62,10 @@ spec: completionstatus: description: Optional field that drives completion status of appwrapper. This field within an item of an appwrapper determines the full state of appwrapper. - The completionstatus field contains a list of conditions that make the associate item considered + The completionstatus field contains a list of conditions that make the associate item considered completed, for instance :- completion conditions could be "Complete" or "Failed". - The associated item's level .status.conditions[].type field is monitored for any one of these conditions. Once all items with this - option is set and the conditionstatus is met the entire appwrapper state will be changed to one of the valid appwrapper completion state. Note :- this is an AND + The associated item's level .status.conditions[].type field is monitored for any one of these conditions. Once all items with this + option is set and the conditionstatus is met the entire appwrapper state will be changed to one of the valid appwrapper completion state. Note :- this is an AND operation for all items where this option is set. See the list of appwrapper states for a list of valid complete states. type: string @@ -299,6 +299,98 @@ spec: type: integer default: 0 type: object + clusterScheduling: + description: Stanza to define cluster allocation restrictions. + Used only in multiCluster mode. + properties: + clusters: + description: Specifies cluster IDs for allocation + restrictions. Used only in + multiCluster mode. + items: + properties: + name: + description: Cluster ID name. Used only in + multiCluster mode. + type: string + type: object + type: array + clusterSelector: + description: (UNSUPPORTED FIELD) A label selector is a label + query over a set of cluster objects. The result of + matchLabels and matchExpressions are ANDed. An empty + label selector matches all objects. A null label selector + matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that + contains values, a key, and an operator that relates the key + and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to + a set of values. Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of string values. If the + operator is In or NotIn, the values array must be non-empty. + If the operator is Exists or DoesNotExist, the values + array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator + is "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + policyResult: + properties: + policySource: + items: + properties: + lastUpdateMicroTime: + description: The latest time this condition was updated. + format: date-time + type: string + message: + description: A human readable message indicating details + about the cluster decision. + type: string + name: + description: ID/Name of the policy decision maker. Most + often this will be MCAD but design can support alternatives + type: string + type: object + type: array + targetCluster: + properties: + name: + type: string + required: + - name + type: object + type: object + type: object dispatchDuration: description: Wall clock duration time of appwrapper in seconds properties: @@ -822,6 +914,39 @@ spec: (is this different from the MinAvailable from JobStatus) format: int32 type: integer + itemCompletionStatus: + properties: + genericItems: + items: + properties: + condition: + type: string + gvk: + description: Group Version Kind + properties: + Group: + type: string + Version: + type: string + Kind: + type: string + required: + - Group + - Version + - Kind + type: object + name: + type: string + namespace: + type: string + required: + - condition + - gvk + - name + - namespace + type: object + type: array + type: object type: object required: - spec diff --git a/config/crd/bases/mcad.ibm.com_schedulingspecs.yaml b/config/crd/bases/mcad.ibm.com_schedulingspecs.yaml index 3de2af124..a0ae2b0ce 100644 --- a/config/crd/bases/mcad.ibm.com_schedulingspecs.yaml +++ b/config/crd/bases/mcad.ibm.com_schedulingspecs.yaml @@ -74,6 +74,98 @@ spec: type: integer default: 0 type: object + clusterScheduling: + description: Stanza to define cluster allocation restrictions. + Used only in multiCluster mode. + properties: + clusters: + description: Specifies cluster IDs for allocation + restrictions. Used only in + multiCluster mode. + items: + properties: + name: + description: Cluster ID name. Used only in + multiCluster mode. + type: string + type: object + type: array + clusterSelector: + description: (UNSUPPORTED FIELD) A label selector is a label + query over a set of cluster objects. The result of + matchLabels and matchExpressions are ANDed. An empty + label selector matches all objects. A null label selector + matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that + contains values, a key, and an operator that relates the key + and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to + a set of values. Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of string values. If the + operator is In or NotIn, the values array must be non-empty. + If the operator is Exists or DoesNotExist, the values + array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator + is "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + policyResult: + properties: + policySource: + items: + properties: + lastUpdateMicroTime: + description: The latest time this condition was updated. + format: date-time + type: string + message: + description: A human readable message indicating details + about the cluster decision. + type: string + name: + description: ID/Name of the policy decision maker. Most + often this will be MCAD but design can support alternatives + type: string + type: object + type: array + targetCluster: + properties: + name: + type: string + required: + - name + type: object + type: object + type: object dispatchDuration: description: Wall clock duration time of appwrapper in seconds properties: diff --git a/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml b/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml index b43d6d3cf..3f09fc279 100644 --- a/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml +++ b/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml @@ -62,10 +62,10 @@ spec: completionstatus: description: Optional field that drives completion status of appwrapper. This field within an item of an appwrapper determines the full state of appwrapper. - The completionstatus field contains a list of conditions that make the associate item considered + The completionstatus field contains a list of conditions that make the associate item considered completed, for instance :- completion conditions could be "Complete" or "Failed". - The associated item's level .status.conditions[].type field is monitored for any one of these conditions. Once all items with this - option is set and the conditionstatus is met the entire appwrapper state will be changed to one of the valid appwrapper completion state. Note :- this is an AND + The associated item's level .status.conditions[].type field is monitored for any one of these conditions. Once all items with this + option is set and the conditionstatus is met the entire appwrapper state will be changed to one of the valid appwrapper completion state. Note :- this is an AND operation for all items where this option is set. See the list of appwrapper states for a list of valid complete states. type: string @@ -299,6 +299,98 @@ spec: type: integer default: 0 type: object + clusterScheduling: + description: Stanza to define cluster allocation restrictions. + Used only in multiCluster mode. + properties: + clusters: + description: Specifies cluster IDs for allocation + restrictions. Used only in + multiCluster mode. + items: + properties: + name: + description: Cluster ID name. Used only in + multiCluster mode. + type: string + type: object + type: array + clusterSelector: + description: (UNSUPPORTED FIELD) A label selector is a label + query over a set of cluster objects. The result of + matchLabels and matchExpressions are ANDed. An empty + label selector matches all objects. A null label selector + matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that + contains values, a key, and an operator that relates the key + and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to + a set of values. Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of string values. If the + operator is In or NotIn, the values array must be non-empty. + If the operator is Exists or DoesNotExist, the values + array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator + is "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + policyResult: + properties: + policySource: + items: + properties: + lastUpdateMicroTime: + description: The latest time this condition was updated. + format: date-time + type: string + message: + description: A human readable message indicating details + about the cluster decision. + type: string + name: + description: ID/Name of the policy decision maker. Most + often this will be MCAD but design can support alternatives + type: string + type: object + type: array + targetCluster: + properties: + name: + type: string + required: + - name + type: object + type: object + type: object dispatchDuration: description: Wall clock duration time of appwrapper in seconds properties: @@ -822,6 +914,39 @@ spec: (is this different from the MinAvailable from JobStatus) format: int32 type: integer + itemCompletionStatus: + properties: + genericItems: + items: + properties: + condition: + type: string + gvk: + description: Group Version Kind + properties: + Group: + type: string + Version: + type: string + Kind: + type: string + required: + - Group + - Version + - Kind + type: object + name: + type: string + namespace: + type: string + required: + - condition + - gvk + - name + - namespace + type: object + type: array + type: object type: object required: - spec diff --git a/deployment/mcad-controller/crds/mcad.ibm.com_schedulingspecs.yaml b/deployment/mcad-controller/crds/mcad.ibm.com_schedulingspecs.yaml index 3de2af124..a0ae2b0ce 100644 --- a/deployment/mcad-controller/crds/mcad.ibm.com_schedulingspecs.yaml +++ b/deployment/mcad-controller/crds/mcad.ibm.com_schedulingspecs.yaml @@ -74,6 +74,98 @@ spec: type: integer default: 0 type: object + clusterScheduling: + description: Stanza to define cluster allocation restrictions. + Used only in multiCluster mode. + properties: + clusters: + description: Specifies cluster IDs for allocation + restrictions. Used only in + multiCluster mode. + items: + properties: + name: + description: Cluster ID name. Used only in + multiCluster mode. + type: string + type: object + type: array + clusterSelector: + description: (UNSUPPORTED FIELD) A label selector is a label + query over a set of cluster objects. The result of + matchLabels and matchExpressions are ANDed. An empty + label selector matches all objects. A null label selector + matches no objects. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: A label selector requirement is a selector that + contains values, a key, and an operator that relates the key + and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: operator represents a key's relationship to + a set of values. Valid operators are In, NotIn, Exists + and DoesNotExist. + type: string + values: + description: values is an array of string values. If the + operator is In or NotIn, the values array must be non-empty. + If the operator is Exists or DoesNotExist, the values + array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A single + {key,value} in the matchLabels map is equivalent to an element + of matchExpressions, whose key field is "key", the operator + is "In", and the values array contains only "value". The requirements + are ANDed. + type: object + type: object + policyResult: + properties: + policySource: + items: + properties: + lastUpdateMicroTime: + description: The latest time this condition was updated. + format: date-time + type: string + message: + description: A human readable message indicating details + about the cluster decision. + type: string + name: + description: ID/Name of the policy decision maker. Most + often this will be MCAD but design can support alternatives + type: string + type: object + type: array + targetCluster: + properties: + name: + type: string + required: + - name + type: object + type: object + type: object dispatchDuration: description: Wall clock duration time of appwrapper in seconds properties: diff --git a/deployment/mcad-controller/templates/configmap.yaml b/deployment/mcad-controller/templates/configmap.yaml index b25f71f77..6ab34efd9 100644 --- a/deployment/mcad-controller/templates/configmap.yaml +++ b/deployment/mcad-controller/templates/configmap.yaml @@ -7,6 +7,7 @@ metadata: data: QUOTA_ENABLED: {{ .Values.configMap.quotaEnabled }} DISPATCHER_MODE: {{ .Values.configMap.dispatcherMode }} + EXTERNAL_DISPATCH: {{.Values.configMap.externalDispatch }} {{ if .Values.configMap.agentConfigs }}DISPATCHER_AGENT_CONFIGS: {{ .Values.configMap.agentConfigs }}{{ end }} PREEMPTION: {{ .Values.configMap.preemptionEnabled }} {{ if .Values.configMap.quotaRestUrl }}QUOTA_REST_URL: {{ .Values.configMap.quotaRestUrl }}{{ end }} diff --git a/deployment/mcad-controller/templates/deployment.yaml b/deployment/mcad-controller/templates/deployment.yaml index 56a7445d0..7e2e54896 100644 --- a/deployment/mcad-controller/templates/deployment.yaml +++ b/deployment/mcad-controller/templates/deployment.yaml @@ -33,7 +33,7 @@ spec: #{{ end }} --- #{{ if (eq .Values.configMap.multiCluster true) }} -apiVersion: apiregistration.k8s.io/v1beta1 +apiVersion: apiregistration.k8s.io/v1 kind: APIService metadata: name: v1beta1.external.metrics.k8s.io @@ -47,7 +47,7 @@ spec: groupPriorityMinimum: 100 versionPriority: 100 --- -apiVersion: apiregistration.k8s.io/v1beta1 +apiVersion: apiregistration.k8s.io/v1 kind: APIService metadata: name: v1beta1.custom.metrics.k8s.io diff --git a/deployment/mcad-controller/values.yaml b/deployment/mcad-controller/values.yaml index 1d289f2f8..741985b82 100644 --- a/deployment/mcad-controller/values.yaml +++ b/deployment/mcad-controller/values.yaml @@ -47,6 +47,7 @@ configMap: quotaEnabled: '"false"' multiCluster: false dispatcherMode: '"false"' + externalDispatch: '"false"' preemptionEnabled: '"false"' agentConfigs: "" quotaRestUrl: "" diff --git a/deployment/mcad-operator/bundle/manifests/mcad-operator.clusterserviceversion.yaml b/deployment/mcad-operator/bundle/manifests/mcad-operator.clusterserviceversion.yaml index a82f6f43d..7166e0460 100644 --- a/deployment/mcad-operator/bundle/manifests/mcad-operator.clusterserviceversion.yaml +++ b/deployment/mcad-operator/bundle/manifests/mcad-operator.clusterserviceversion.yaml @@ -3,7 +3,7 @@ kind: ClusterServiceVersion metadata: annotations: alm-examples: >- - [{"apiVersion":"mcad.ibm.com/v1beta1","kind":"MCADHelmConfig","metadata":{"name":"example-mcadhelmconfig"},"spec":{"configMap":{"agentConfigs":null,"dispatcherMode":"false","name":null},"deploymentName":"xqueuejob-controller","image":{"pullPolicy":"Always","repository":"darroyo/mcad-controller","tag":"v1.29.0"},"imagePullSecret":{"name":null,"password":"dummyvalue","registry":"registry.stage1.ng.bluemix.net","username":"iamapikey"},"loglevel":4,"namespace":"kube-system","nodeSelector":{"hostname":null},"replicaCount":1,"resources":{"limits":{"cpu":"2000m","memory":"2048Mi"},"requests":{"cpu":"2000m","memory":"2048Mi"}},"serviceAccount":"xqueuejob-controller","volumes":{"hostPath":null}}}] + [{"apiVersion":"mcad.ibm.com/v1beta1","kind":"MCADHelmConfig","metadata":{"name":"example-mcadhelmconfig"},"spec":{"configMap":{"agentConfigs":null,"dispatcherMode":"false","externalDispatch":"false","name":null},"deploymentName":"xqueuejob-controller","image":{"pullPolicy":"Always","repository":"darroyo/mcad-controller","tag":"v1.29.0"},"imagePullSecret":{"name":null,"password":"dummyvalue","registry":"registry.stage1.ng.bluemix.net","username":"iamapikey"},"loglevel":4,"namespace":"kube-system","nodeSelector":{"hostname":null},"replicaCount":1,"resources":{"limits":{"cpu":"2000m","memory":"2048Mi"},"requests":{"cpu":"2000m","memory":"2048Mi"}},"serviceAccount":"xqueuejob-controller","volumes":{"hostPath":null}}}] capabilities: Basic Install description: A Kubernetes Native Holistic Lifecycle Resource Manager for Applications name: mcad-operator.v0.1.9 diff --git a/deployment/mcad-operator/deploy/crds/mcad.ibm.com_v1beta1_mcadhelmconfig_cr.yaml b/deployment/mcad-operator/deploy/crds/mcad.ibm.com_v1beta1_mcadhelmconfig_cr.yaml index 00fa8b085..ea01e1c11 100644 --- a/deployment/mcad-operator/deploy/crds/mcad.ibm.com_v1beta1_mcadhelmconfig_cr.yaml +++ b/deployment/mcad-operator/deploy/crds/mcad.ibm.com_v1beta1_mcadhelmconfig_cr.yaml @@ -8,6 +8,7 @@ spec: configMap: agentConfigs: null dispatcherMode: "false" + externalDispatch: "false" name: null deploymentName: xqueuejob-controller image: diff --git a/deployment/mcad-operator/deploy/olm-catalog/mcad-operator/manifests/mcad-operator.clusterserviceversion.yaml b/deployment/mcad-operator/deploy/olm-catalog/mcad-operator/manifests/mcad-operator.clusterserviceversion.yaml index 58afc99f6..7dba6e615 100644 --- a/deployment/mcad-operator/deploy/olm-catalog/mcad-operator/manifests/mcad-operator.clusterserviceversion.yaml +++ b/deployment/mcad-operator/deploy/olm-catalog/mcad-operator/manifests/mcad-operator.clusterserviceversion.yaml @@ -3,7 +3,7 @@ kind: ClusterServiceVersion metadata: annotations: alm-examples: >- - [{"apiVersion":"mcad.ibm.com/v1beta1","kind":"MCADHelmConfig","metadata":{"name":"example-mcadhelmconfig"},"spec":{"configMap":{"agentConfigs":null,"dispatcherMode":"false","name":null},"deploymentName":"xqueuejob-controller","image":{"pullPolicy":"Always","repository":"darroyo/mcad-controller","tag":"v1.29.0"},"imagePullSecret":{"name":null,"password":"dummyvalue","registry":"registry.stage1.ng.bluemix.net","username":"iamapikey"},"loglevel":4,"namespace":"kube-system","nodeSelector":{"hostname":null},"replicaCount":1,"resources":{"limits":{"cpu":"2000m","memory":"2048Mi"},"requests":{"cpu":"2000m","memory":"2048Mi"}},"serviceAccount":"xqueuejob-controller","volumes":{"hostPath":null}}}] + [{"apiVersion":"mcad.ibm.com/v1beta1","kind":"MCADHelmConfig","metadata":{"name":"example-mcadhelmconfig"},"spec":{"configMap":{"agentConfigs":null,"dispatcherMode":"false","externalDispatch":"false","name":null},"deploymentName":"xqueuejob-controller","image":{"pullPolicy":"Always","repository":"darroyo/mcad-controller","tag":"v1.29.0"},"imagePullSecret":{"name":null,"password":"dummyvalue","registry":"registry.stage1.ng.bluemix.net","username":"iamapikey"},"loglevel":4,"namespace":"kube-system","nodeSelector":{"hostname":null},"replicaCount":1,"resources":{"limits":{"cpu":"2000m","memory":"2048Mi"},"requests":{"cpu":"2000m","memory":"2048Mi"}},"serviceAccount":"xqueuejob-controller","volumes":{"hostPath":null}}}] capabilities: Basic Install description: A Kubernetes Native Holistic Lifecycle Resource Manager for Applications name: mcad-operator.v0.1.9 diff --git a/deployment/mcad-operator/helm-charts/mcad-controller/templates/configmap.yaml b/deployment/mcad-operator/helm-charts/mcad-controller/templates/configmap.yaml index 451bd3337..5fc7df1c6 100644 --- a/deployment/mcad-operator/helm-charts/mcad-controller/templates/configmap.yaml +++ b/deployment/mcad-operator/helm-charts/mcad-controller/templates/configmap.yaml @@ -6,5 +6,6 @@ metadata: namespace: kube-system data: DISPATCHER_MODE: {{ .Values.configMap.dispatcherMode }} + EXTERNAL_DISPATCH: {{.Values.configMap.externalDispatch }} DISPATCHER_AGENT_CONFIGS: {{ .Values.configMap.agentConfigs }} #{{ end }} diff --git a/deployment/mcad-operator/helm-charts/mcad-controller/values.yaml b/deployment/mcad-operator/helm-charts/mcad-controller/values.yaml index c2031f7cd..8a5cbac5c 100644 --- a/deployment/mcad-operator/helm-charts/mcad-controller/values.yaml +++ b/deployment/mcad-operator/helm-charts/mcad-controller/values.yaml @@ -32,6 +32,7 @@ configMap: name: multiCluster: false dispatcherMode: "false" + externalDispatch: "false" agentConfigs: volumes: diff --git a/doc/deploy/deployment.md b/doc/deploy/deployment.md index af81b4f7b..61cc07a3a 100644 --- a/doc/deploy/deployment.md +++ b/doc/deploy/deployment.md @@ -150,6 +150,7 @@ The following table lists the configurable parameters of the helm chart and thei | ----------------------- | ------------------------------------ | ------------- | ------------------------------------------------ | | `configMap.agentConfigs` | *For Every Agent Cluster separated by commas(,):* Name of *agent* config file _:_ Set the dispatching mode for the _*Agent Cluster*_. Note:For the dispatching mode `uncordon`, indicating _MCAD_ controller is allowed to dispatched jobs to the _*Agent Cluster*_, is only supported. | <_No default for agent config file_>:`uncordon` | `agent101config:uncordon,agent110config:uncordon` | | `configMap.dispatcherMode` | Whether the _MCAD_ Controller should be launched in Dispatcher mode or not | `false` | `true` | +| `configMap.externalDispatch` | Whether the _MCAD_ Controller should use external plugin to dispatch workloads or not | `false` | `true` | | `configMap.name` | Name of the Kubernetes *ConfigMap* resource to configure the _MCAD_ Controller | | `mcad-deployer` | | `deploymentName` | Name of _MCAD_ Controller Deployment Object | `mcad-controller` | `my-mcad-controller` | | `image.pullPolicy` | Policy that dictates when the specified image is pulled | `Always` | `Never` | diff --git a/doc/usage/examples/aw-1-k8s-job1.yaml b/doc/usage/examples/aw-1-k8s-job1.yaml index 35e3b372e..02fa69f59 100644 --- a/doc/usage/examples/aw-1-k8s-job1.yaml +++ b/doc/usage/examples/aw-1-k8s-job1.yaml @@ -8,7 +8,7 @@ spec: minAvailable: 2 resources: GenericItems: - replicas: 1 + - replicas: 1 metadata: name: aw-generic-statefulset-2 namespace: test1 diff --git a/doc/usage/examples/aw-1-k8s-job4-with-cluster-sched.yaml b/doc/usage/examples/aw-1-k8s-job4-with-cluster-sched.yaml new file mode 100644 index 000000000..53ef6b25c --- /dev/null +++ b/doc/usage/examples/aw-1-k8s-job4-with-cluster-sched.yaml @@ -0,0 +1,41 @@ +apiVersion: mcad.ibm.com/v1beta1 +kind: AppWrapper +metadata: + name: aw-generic-ss-cluster-sched-2 + namespace: test1 +spec: + schedulingSpec: + minAvailable: 2 + clusterScheduling: + clusters: + - name: helloworld + resources: + GenericItems: + - replicas: 1 + metadata: + name: aw-generic-ss-cluster-sched-2 + namespace: test1 + generictemplate: + apiVersion: apps/v1 + kind: StatefulSet + metadata: + name: aw-generic-ss-cluster-sched-2 + namespace: test1 + labels: + app: aw-generic-ss-cluster-sched-2 + spec: + replicas: 2 + selector: + matchLabels: + app: aw-generic-ss-cluster-sched-2 + template: + metadata: + labels: + app: aw-generic-ss-cluster-sched-2 + spec: + containers: + - name: aw-generic-ss-cluster-sched-2 + image: k8s.gcr.io/echoserver:1.4 + imagePullPolicy: Never + ports: + - containerPort: 80 diff --git a/go.sum b/go.sum index 84487dcf2..ab1361640 100644 --- a/go.sum +++ b/go.sum @@ -406,6 +406,7 @@ github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijb github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8= github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0= @@ -547,6 +548,7 @@ golang.org/x/sys v0.0.0-20201112073958-5cba982894dd/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad h1:ntjMns5wyP/fN65tdBD4g8J5w8n015+iIIs9rtjXkY0= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/pkg/apis/controller/v1beta1/appwrapper.go b/pkg/apis/controller/v1beta1/appwrapper.go index f5302b3d7..e2af9cbab 100644 --- a/pkg/apis/controller/v1beta1/appwrapper.go +++ b/pkg/apis/controller/v1beta1/appwrapper.go @@ -34,6 +34,7 @@ import ( v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" ) const AppWrapperPlural string = "appwrappers" @@ -101,7 +102,7 @@ type AppWrapperService struct { } // AppWrapperResource is App Wrapper aggregation resource -//todo: To be depricated +// todo: To be depricated type AppWrapperResource struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata"` @@ -263,11 +264,24 @@ type AppWrapperStatus struct { // Represents the latest available observations of pods under appwrapper PendingPodConditions []PendingPodSpec `json:"pendingpodconditions"` + + ItemCompletionStatus GenericItemCompletionStatus `json:"itemCompletionStatus,omitempty"` +} + +type GenericItemCompletionStatus struct { + GenericItems [] GenericItem `json:"genericItems,omitempty"` +} + +type GenericItem struct { + schema.GroupVersionKind `json:"gvk"` + Name string `json:"name"` + Namespace string `json:"namespace"` + Condition string `json:"condition"` } type AppWrapperState string -//enqueued, active, deleting, succeeded, failed +// enqueued, active, deleting, succeeded, failed const ( AppWrapperStateEnqueued AppWrapperState = "Pending" AppWrapperStateActive AppWrapperState = "Running" diff --git a/pkg/apis/controller/v1beta1/schedulingspec.go b/pkg/apis/controller/v1beta1/schedulingspec.go index 457cd1540..1caa7d582 100644 --- a/pkg/apis/controller/v1beta1/schedulingspec.go +++ b/pkg/apis/controller/v1beta1/schedulingspec.go @@ -80,6 +80,21 @@ type ClusterReference struct { type ClusterSchedulingSpec struct { Clusters []ClusterReference `json:"clusters,omitempty"` ClusterSelector *metav1.LabelSelector `json:"clusterSelector,omitempty"` + PolicyResult ClusterDecision `json:"policyResult,omitempty"` +} + +type ClusterDecision struct { + TargetCluster ClusterReference `json:"targetCluster,omitempty"` + PolicySource []PolicySourceReference `json:"policySource,omitempty"` +} + +type PolicySourceReference struct { + // ID/Name of the policy decision maker. Most often this will be MCAD but design can support alternatives + Name string `json:"name,omitempty"` + // The latest time this condition was updated. + LastUpdateMicroTime metav1.MicroTime `json:"lastUpdateMicroTime,omitempty"` + // A human readable message indicating details about the cluster decision. + Message string `json:"message,omitempty"` } type ScheduleTimeSpec struct { diff --git a/pkg/controller/queuejob/queuejob_controller_ex.go b/pkg/controller/queuejob/queuejob_controller_ex.go index 009192998..93fb32dd2 100644 --- a/pkg/controller/queuejob/queuejob_controller_ex.go +++ b/pkg/controller/queuejob/queuejob_controller_ex.go @@ -34,6 +34,7 @@ import ( "fmt" "math" "math/rand" + "path" "reflect" "runtime/debug" "sort" @@ -679,8 +680,9 @@ func (qjm *XController) GetAggregatedResourcesPerGenericItem(cqj *arbv1.AppWrapp } // Gets all objects owned by AW from API server, check user supplied status and set whole AW status -func (qjm *XController) getAppWrapperCompletionStatus(caw *arbv1.AppWrapper) arbv1.AppWrapperState { +func (qjm *XController) getAppWrapperCompletionStatus(caw *arbv1.AppWrapper) (arbv1.AppWrapperState, arbv1.GenericItemCompletionStatus) { + completionStatus := arbv1.GenericItemCompletionStatus{} // Get all pods and related resources countCompletionRequired := 0 for _, genericItem := range caw.Spec.AggrResources.GenericItems { @@ -694,21 +696,55 @@ func (qjm *XController) getAppWrapperCompletionStatus(caw *arbv1.AppWrapper) arb } unstruct.Object = blob.(map[string]interface{}) //set object to the content of the blob after Unmarshalling name := "" + namespace := "" if md, ok := unstruct.Object["metadata"]; ok { metadata := md.(map[string]interface{}) if objectName, ok := metadata["name"]; ok { name = objectName.(string) } + if objectNamespace, ok := metadata["namespace"]; ok { + if objectNamespace != nil { + namespace = objectNamespace.(string) + } + } + } + var gvk = schema.GroupVersionKind{} + _, groupversionkind, err := unstructured.UnstructuredJSONScheme.Decode(objectName.Raw, nil, nil) + if err != nil || groupversionkind == nil { + klog.Errorf("[getAppWrapperCompletionStatus] Error unmarshalling gvk, err=%#v", err) + unknown := "Unknown" + gvk = schema.GroupVersionKind { + Group: unknown, + Version: unknown, + Kind: unknown, + } + } + if groupversionkind != nil { + gvk = *groupversionkind } + if len(name) == 0 { klog.Warningf("[getAppWrapperCompletionStatus] object name not present for appwrapper: %v in namespace: %v", caw.Name, caw.Namespace) } klog.Infof("[getAppWrapperCompletionStatus] Checking items completed for appwrapper: %v in namespace: %v", caw.Name, caw.Namespace) - status := qjm.genericresources.IsItemCompleted(&genericItem, caw.Namespace, caw.Name, name) + status, condition := qjm.genericresources.IsItemCompleted(&genericItem, caw.Namespace, caw.Name, name) if !status { //early termination because a required item is not completed - return caw.Status.State + return caw.Status.State, completionStatus + } else { + genItemCompletionStatus := arbv1.GenericItem{ + GroupVersionKind: schema.GroupVersionKind { + Group: gvk.Group, + Version: gvk.Version, + Kind: gvk.Kind, + }, + + Name: name, + Namespace: namespace, + Condition: condition, + } + completionStatus.GenericItems = append(completionStatus.GenericItems, genItemCompletionStatus) } //only consider count completion required for valid items @@ -721,15 +757,15 @@ func (qjm *XController) getAppWrapperCompletionStatus(caw *arbv1.AppWrapper) arb //Set new status only when completion required flag is present in genericitems array if countCompletionRequired > 0 { if caw.Status.Running == 0 && caw.Status.Pending == 0 { - return arbv1.AppWrapperStateCompleted + return arbv1.AppWrapperStateCompleted, completionStatus } if caw.Status.Pending > 0 || caw.Status.Running > 0 { - return arbv1.AppWrapperStateRunningHoldCompletion + return arbv1.AppWrapperStateRunningHoldCompletion, completionStatus } } //return previous condition - return caw.Status.State + return caw.Status.State, completionStatus } func (qjm *XController) GetAggregatedResources(cqj *arbv1.AppWrapper) *clusterstateapi.Resource { @@ -1012,6 +1048,31 @@ func (qjm *XController) getAggregatedAvailableResourcesPriority(unallocatedClust func (qjm *XController) chooseAgent(qj *arbv1.AppWrapper) string { + if qjm.serverOption.ExternalDispatch { + clusters := qj.Spec.SchedSpec.ClusterScheduling.Clusters + var agentId = "" + apath := path.Dir(qjm.agentList[0]) + var agentIdList = make([]string, len(clusters)) + clustersProvided := false // assume clusters not provided + for _, clusterRef := range clusters { + if clusterRef.Name != "" { + clustersProvided = true + agentIdList = append(agentIdList, apath+"/"+clusterRef.Name) + } + } + // target clusters no defined by the submitter of workload. Just pick a target + // from a known list of clusters provided in serverOption.AgentConfigs + if !clustersProvided { + agentId = qjm.agentList[rand.Int()%len(qjm.agentList)] + klog.V(1).Infof("ClusterId %s is chosen randomly from a list provided by mcad\n", agentId) + } else { + // choose target clusterId at random + agentId = agentIdList[rand.Int()%len(agentIdList)] + klog.V(1).Infof("ClusterId %s is chosen randomly from a list provided in Spec.SchedSpec.ClusterScheduling.Clusters: %s\n", agentId, agentIdList) + } + return agentId + } + qjAggrResources := qjm.GetAggregatedResources(qj) klog.V(2).Infof("[chooseAgent] Aggregated Resources of XQJ %s: %v\n", qj.Name, qjAggrResources) @@ -1923,40 +1984,40 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool klog.V(10).Infof("[worker-manageQJ] Ending %s manageQJ time=%s &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(startTime), qj, qj.ResourceVersion, qj.Status) }() - if !cc.isDispatcher { // Agent Mode - - if qj.DeletionTimestamp != nil { + if qj.DeletionTimestamp != nil { - klog.V(4).Infof("[manageQueueJob] AW job=%s/%s set for deletion.", qj.Name, qj.Namespace) + klog.V(4).Infof("[manageQueueJob] AW job=%s/%s set for deletion.", qj.Name, qj.Namespace) - // cleanup resources for running job - err = cc.Cleanup(qj) - if err != nil { - return err - } - //empty finalizers and delete the queuejob again - accessor, err := meta.Accessor(qj) - if err != nil { - return err - } - accessor.SetFinalizers(nil) + // cleanup resources for running job + err = cc.Cleanup(qj) + if err != nil { + return err + } + //empty finalizers and delete the queuejob again + accessor, err := meta.Accessor(qj) + if err != nil { + return err + } + accessor.SetFinalizers(nil) - // we delete the job from the queue if it is there - cc.qjqueue.Delete(qj) + // we delete the job from the queue if it is there + cc.qjqueue.Delete(qj) - return nil - } - //Job is Complete only update pods if needed. - if qj.Status.State == arbv1.AppWrapperStateCompleted || qj.Status.State == arbv1.AppWrapperStateRunningHoldCompletion { - if podPhaseChanges { - // Only update etcd if AW status has changed. This can happen for periodic - // updates of pod phase counts done in caller of this function. - if err := cc.updateEtcd(qj, "manageQueueJob - podPhaseChanges"); err != nil { - klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) - } + return nil + } + //Job is Complete only update pods if needed. + if qj.Status.State == arbv1.AppWrapperStateCompleted || qj.Status.State == arbv1.AppWrapperStateRunningHoldCompletion { + if podPhaseChanges { + // Only update etcd if AW status has changed. This can happen for periodic + // updates of pod phase counts done in caller of this function. + if err := cc.updateEtcd(qj, "manageQueueJob - podPhaseChanges"); err != nil { + klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) } - return nil } + return nil + } + + if !cc.isDispatcher { // Agent Mode // First execution of qj to set Status.State = Enqueued if !qj.Status.CanRun && (qj.Status.State != arbv1.AppWrapperStateEnqueued && qj.Status.State != arbv1.AppWrapperStateDeleted) { @@ -2098,12 +2159,14 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool } else if qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateActive { //set appwrapper status to Complete or RunningHoldCompletion - derivedAwStatus := cc.getAppWrapperCompletionStatus(qj) + derivedAwStatus, genericItemsCompletionStatus := cc.getAppWrapperCompletionStatus(qj) //Set Appwrapper state to complete if all items in Appwrapper //are completed if derivedAwStatus == arbv1.AppWrapperStateRunningHoldCompletion { qj.Status.State = derivedAwStatus + klog.V(1).Infof("[manageQueueJob] Setting ItemCompletionStatus 1") + qj.Status.ItemCompletionStatus = genericItemsCompletionStatus var updateQj *arbv1.AppWrapper index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondRunningHoldCompletion, "SomeItemsCompleted") if index < 0 { @@ -2119,8 +2182,11 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool } cc.updateEtcd(updateQj, "[syncQueueJob] setRunningHoldCompletion") } + //Set appwrapper status to complete if derivedAwStatus == arbv1.AppWrapperStateCompleted { + klog.V(1).Infof("[manageQueueJob] Setting ItemCompletionStatus 2") + qj.Status.ItemCompletionStatus = genericItemsCompletionStatus qj.Status.State = derivedAwStatus qj.Status.CanRun = false var updateQj *arbv1.AppWrapper @@ -2140,6 +2206,10 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool cc.quotaManager.Release(updateQj) } + if derivedAwStatus == arbv1.AppWrapperStateCompleted { + qj.Status.ItemCompletionStatus = genericItemsCompletionStatus + qj.Status.State = derivedAwStatus + } // Bugfix to eliminate performance problem of overloading the event queue. } else if podPhaseChanges { // Continued bug fix // Only update etcd if AW status has changed. This can happen for periodic @@ -2147,29 +2217,13 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool if err := cc.updateEtcd(qj, "manageQueueJob - podPhaseChanges"); err != nil { klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) } - } - // Finish adding qj to Etcd for dispatch - - } else { // Dispatcher Mode - - if qj.DeletionTimestamp != nil { - // cleanup resources for running job - err = cc.Cleanup(qj) - if err != nil { - return err - } - //empty finalizers and delete the queuejob again - accessor, err := meta.Accessor(qj) - if err != nil { - return err - } - accessor.SetFinalizers(nil) - - cc.qjqueue.Delete(qj) - + // Finish adding qj to Etcd for dispatch return nil } + + } else { // Dispatcher Mode + if !qj.Status.CanRun && (qj.Status.State != arbv1.AppWrapperStateEnqueued && qj.Status.State != arbv1.AppWrapperStateDeleted) { // if there are running resources for this job then delete them because the job was put in // pending state... @@ -2196,33 +2250,43 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) } } - - //_, err = cc.arbclients.ArbV1().AppWrappers(qj.Namespace).Update(qj) - //if err != nil { - // return err - //} return nil } - // if !qj.Status.CanRun && qj.Status.State == arbv1.QueueJobStateEnqueued { if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued { cc.qjqueue.AddIfNotPresent(qj) return nil } - if qj.Status.CanRun && !qj.Status.IsDispatched { + if qj.Status.CanRun && !qj.Status.IsDispatched { + if klog.V(10).Enabled() { current_time := time.Now() klog.V(10).Infof("[worker-manageQJ] XQJ %s has Overhead Before Dispatching: %s", qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) klog.V(10).Infof("[TTime] %s, %s: WorkerBeforeDispatch", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) } - queuejobKey, _ := GetQueueJobKey(qj) - // agentId:=cc.dispatchMap[queuejobKey] - // if agentId!=nil { if agentId, ok := cc.dispatchMap[queuejobKey]; ok { klog.V(10).Infof("[Dispatcher Controller] Dispatched AppWrapper %s to Agent ID: %s.", qj.Name, agentId) - cc.agentMap[agentId].CreateJob(qj) + if cc.serverOption.ExternalDispatch { + values := strings.Split(agentId, "/") + klog.V(10).Infof("[Dispatcher Controller] Dispatching AppWrapper %s to Agent ID: %s Through External Dispatcher.", qj.Name, values[len(values)-1]) + qj.Spec.SchedSpec.ClusterScheduling.PolicyResult = arbv1.ClusterDecision { + TargetCluster: arbv1.ClusterReference{ + // TODO check if values len > 0. + Name: values[len(values)-1], + }, + PolicySource : []arbv1.PolicySourceReference { + { + Name: "MCAD", + LastUpdateMicroTime: metav1.MicroTime{Time: time.Now()}, + Message: "Assigned by MCAD", + }, + }, + } + } else { + cc.agentMap[agentId].CreateJob(qj) + } qj.Status.IsDispatched = true } else { klog.Errorf("[Dispatcher Controller] AppWrapper %s not found in dispatcher mapping.", qj.Name) @@ -2232,14 +2296,14 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool klog.V(10).Infof("[Dispatcher Controller] XQJ %s has Overhead After Dispatching: %s", qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) klog.V(10).Infof("[TTime] %s, %s: WorkerAfterDispatch", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) } - + if _, err := cc.arbclients.ArbV1().AppWrappers(qj.Namespace).Update(qj); err != nil { klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", qj.Namespace, qj.Name, err) return err } + klog.V(10).Infof("[Dispatcher Controller] Update Successfull - AppWrapper %s .", qj.Name) } - } return err } @@ -2277,7 +2341,10 @@ func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error { if appwrapper.Status.IsDispatched { queuejobKey, _ := GetQueueJobKey(appwrapper) if obj, ok := cc.dispatchMap[queuejobKey]; ok { - cc.agentMap[obj].DeleteJob(appwrapper) + if !cc.serverOption.ExternalDispatch { + cc.agentMap[obj].DeleteJob(appwrapper) + } + delete(cc.dispatchMap,queuejobKey) } appwrapper.Status.IsDispatched = false } diff --git a/pkg/controller/queuejobresources/genericresource/genericresource.go b/pkg/controller/queuejobresources/genericresource/genericresource.go index 4b2879305..21a6d498c 100644 --- a/pkg/controller/queuejobresources/genericresource/genericresource.go +++ b/pkg/controller/queuejobresources/genericresource/genericresource.go @@ -598,24 +598,25 @@ func getContainerResources(container v1.Container, replicas float64) *clustersta } //returns status of an item present in etcd -func (gr *GenericResources) IsItemCompleted(awgr *arbv1.AppWrapperGenericResource, namespace string, appwrapperName string, genericItemName string) (completed bool) { +func (gr *GenericResources) IsItemCompleted(awgr *arbv1.AppWrapperGenericResource, namespace string, appwrapperName string, genericItemName string) (completed bool, condition string) { dd := gr.clients.Discovery() + klog.V(8).Infof("[IsItemCompleted] - checking status !!!!!!!") apigroups, err := restmapper.GetAPIGroupResources(dd) if err != nil { klog.Errorf("[IsItemCompleted] Error getting API resources, err=%#v", err) - return false + return false, "" } restmapper := restmapper.NewDiscoveryRESTMapper(apigroups) _, gvk, err := unstructured.UnstructuredJSONScheme.Decode(awgr.GenericTemplate.Raw, nil, nil) if err != nil { klog.Errorf("[IsItemCompleted] Decoding error, please check your CR! Aborting handling the resource creation, err: `%v`", err) - return false + return false, "" } mapping, err := restmapper.RESTMapping(gvk.GroupKind(), gvk.Version) if err != nil { klog.Errorf("[IsItemCompleted] mapping error from raw object: `%v`", err) - return false + return false, "" } restconfig := gr.kubeClientConfig restconfig.GroupVersion = &schema.GroupVersion{ @@ -626,14 +627,14 @@ func (gr *GenericResources) IsItemCompleted(awgr *arbv1.AppWrapperGenericResourc dclient, err := dynamic.NewForConfig(restconfig) if err != nil { klog.Errorf("[IsItemCompleted] Error creating new dynamic client, err %v", err) - return false + return false, "" } labelSelector := fmt.Sprintf("%s=%s", appwrapperJobName, appwrapperName) inEtcd, err := dclient.Resource(rsrc).Namespace(namespace).List(context.Background(), metav1.ListOptions{LabelSelector: labelSelector}) if err != nil { klog.Errorf("[IsItemCompleted] Error listing object: %v", err) - return false + return false, "" } for _, job := range inEtcd.Items { @@ -660,7 +661,7 @@ func (gr *GenericResources) IsItemCompleted(awgr *arbv1.AppWrapperGenericResourc if jobMap == nil { continue } - + if job.Object["status"] != nil { status := job.Object["status"].(map[string]interface{}) if status["conditions"] != nil { @@ -676,7 +677,9 @@ func (gr *GenericResources) IsItemCompleted(awgr *arbv1.AppWrapperGenericResourc userSpecfiedCompletionConditions := strings.Split(awgr.CompletionStatus, ",") for _, condition := range userSpecfiedCompletionConditions { if strings.Contains(strings.ToLower(completionType), strings.ToLower(condition)) { - return true + klog.V(8).Infof("[IsItemCompleted] condition `%v`.\n", condition) + + return true, condition } } } @@ -685,5 +688,5 @@ func (gr *GenericResources) IsItemCompleted(awgr *arbv1.AppWrapperGenericResourc klog.Errorf("[IsItemCompleted] Found item with name %v that has status nil in namespace %v with labels %v", job.GetName(), job.GetNamespace(), job.GetLabels()) } } - return false + return false, "" } diff --git a/test/e2e-kuttl/status-check/00-assert.yaml b/test/e2e-kuttl/status-check/00-assert.yaml new file mode 100644 index 000000000..f0edec999 --- /dev/null +++ b/test/e2e-kuttl/status-check/00-assert.yaml @@ -0,0 +1,12 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: appwrappers.mcad.ibm.com +status: + acceptedNames: + kind: AppWrapper + listKind: AppWrapperList + plural: appwrappers + singular: appwrapper + storedVersions: + - v1beta1 \ No newline at end of file diff --git a/test/e2e-kuttl/status-check/01-assert.yaml b/test/e2e-kuttl/status-check/01-assert.yaml new file mode 100644 index 000000000..5e0105d10 --- /dev/null +++ b/test/e2e-kuttl/status-check/01-assert.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: test \ No newline at end of file diff --git a/test/e2e-kuttl/status-check/01-install.yaml b/test/e2e-kuttl/status-check/01-install.yaml new file mode 100644 index 000000000..7c265c019 --- /dev/null +++ b/test/e2e-kuttl/status-check/01-install.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: test diff --git a/test/e2e-kuttl/status-check/02-assert.yaml b/test/e2e-kuttl/status-check/02-assert.yaml new file mode 100644 index 000000000..a31d7743a --- /dev/null +++ b/test/e2e-kuttl/status-check/02-assert.yaml @@ -0,0 +1,16 @@ +apiVersion: mcad.ibm.com/v1beta1 +kind: AppWrapper +metadata: + name: my-job-1 + namespace: test +status: + state: Completed + itemCompletionStatus: + genericItems: + - condition: Complete + gvk: + Group: batch + Kind: Job + Version: v1 + name: my-job-2 + namespace: test \ No newline at end of file diff --git a/test/e2e-kuttl/status-check/02-install.yaml b/test/e2e-kuttl/status-check/02-install.yaml new file mode 100644 index 000000000..538aaf4cb --- /dev/null +++ b/test/e2e-kuttl/status-check/02-install.yaml @@ -0,0 +1,63 @@ +apiVersion: mcad.ibm.com/v1beta1 +kind: AppWrapper +metadata: + name: my-job-1 + namespace: test + labels: + quota_context: bronze + quota_service: service-root +spec: + schedulingSpec: + minAvailable: 1 + resources: + GenericItems: + - replicas: 1 + completionstatus: Complete + custompodresources: + - replicas: 1 + requests: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi + limits: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi + generictemplate: + apiVersion: batch/v1 + kind: Job + metadata: + name: my-job-2 + namespace: test + labels: + appwrapper.mcad.ibm.com: my-job-2 + spec: + parallelism: 1 + completions: 1 + template: + metadata: + name: my-job-2 + namespace: test + labels: + appwrapper.mcad.ibm.com: my-job-2 + spec: + terminationGracePeriodSeconds: 1 + restartPolicy: Never + containers: + - name: ubuntu + image: ubuntu:latest + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - | + sleep 30 + resources: + requests: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi + limits: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi \ No newline at end of file