[Addon kubevela#579] Refactor the spark-workload parameter definition…

… and add spark-py example Signed-off-by: yanghua <yanghua1127@gmail.com>
yanghua · Mar 2, 2023 · 4b52ee4 · 4b52ee4
1 parent 8916021
commit 4b52ee4
Show file tree

Hide file tree

Showing 3 changed files with 106 additions and 2 deletions.
diff --git a/examples/spark-kubernetes-operator/sparkapp-py.yaml b/examples/spark-kubernetes-operator/sparkapp-py.yaml
@@ -0,0 +1,49 @@
+apiVersion: core.oam.dev/v1beta1
+kind: Application
+metadata:
+  name: spark-app-v1
+  namespace: spark-cluster
+spec:
+  components:
+  - name: spark-workload-component
+    type: spark-workload
+    properties:
+      name: my-spark-py-app
+      namespace: spark-cluster
+      type: Python
+      pythonVersion: "3"
+      mode: cluster
+      image: "gcr.io/spark-operator/spark-py:v3.1.1"
+      imagePullPolicy: Always
+      mainClass: org.apache.spark.examples.streaming.JavaQueueStream
+      mainApplicationFile: "local:///opt/spark/examples/src/main/python/pi.py"
+      sparkVersion: "3.1.1"
+      restartPolicy:
+        type: OnFailure
+        onFailureRetries: 3
+        onFailureRetryInterval: 10
+        onSubmissionFailureRetries: 5
+        onSubmissionFailureRetryInterval: 20
+      volumes:
+        - name: "test-volume"
+          hostPath:
+            path: "/tmp"
+            type: Directory
+      driver:
+        cores: 1
+        coreLimit: "1200m"
+        memory: "1024m"
+        labels:
+          version: 3.1.1
+        volumeMounts:
+          - name: "test-volume"
+            mountPath: "/tmp"
+      executor:
+        cores: 1
+        instances: 1
+        memory: "1024m"
+        labels:
+          version: 3.1.1
+        volumeMounts:
+          - name: "test-volume"
+            mountPath: "/tmp"
diff --git a/examples/spark-kubernetes-operator/sparkapp.yaml b/examples/spark-kubernetes-operator/sparkapp.yaml
@@ -17,18 +17,28 @@ spec:
       mainClass: org.apache.spark.examples.streaming.JavaQueueStream
       mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.12-3.1.1.jar"
       sparkVersion: "3.1.1"
+      restartPolicy:
+        type: Never
       volumes:
         - name: "test-volume"
           hostPath:
             path: "/tmp"
             type: Directory
       driver:
         cores: 1
+        coreLimit: "1200m"
+        memory: "1024m"
+        labels:
+          version: 3.1.1
         volumeMounts:
           - name: "test-volume"
             mountPath: "/tmp"
       executor:
         cores: 1
+        instances: 1
+        memory: "1024m"
+        labels:
+          version: 3.1.1
         volumeMounts:
           - name: "test-volume"
             mountPath: "/tmp"
diff --git a/experimental/addons/spark-kubernetes-operator/definitions/spark-workload.cue b/experimental/addons/spark-kubernetes-operator/definitions/spark-workload.cue
@@ -28,17 +28,47 @@ template: {
 		mainApplicationFile: string
 		// +usage=Specify the version of Spark the application uses
 		sparkVersion: string
+		// +usage=Specify the policy on if and in which conditions the controller should restart an application
+		restartPolicy?: {
+			// +usage=Type value option: "Always", "Never", "OnFailure"
+			type: string
+			// +usage=Specify the number of times to retry submitting an application before giving up. This is best effort and actual retry attempts can be >= the value specified due to caching. These are required if RestartPolicy is OnFailure
+			onSubmissionFailureRetries?: int
+			// +usage=Specify the number of times to retry running an application before giving up
+			onFailureRetries?: int
+			// +usage=Specify the interval in seconds between retries on failed submissions
+			onSubmissionFailureRetryInterval?: int
+			// +usage=Specify the interval in seconds between retries on failed runs
+			onFailureRetryInterval?: int
+		}
 		// +usage=Specify the driver sepc request for the driver pod
 		driver: {
-			cores: int
+			// +usage=Specify the cores maps to spark.driver.cores or spark.executor.cores for the driver and executors, respectively
+			cores?: int
+			// +usage=Specify a hard limit on CPU cores for the pod
+			coreLimit?: string
+			// +usage=Specify the amount of memory to request for the pod
+			memory?: string
+			// +usage=Specify the Kubernetes labels to be added to the pod
+			labels?: [string]: string
+			// +usage=Specify the volumes listed in “.spec.volumes” to mount into the main container’s filesystem
 			volumeMounts?: [...{
 				name:      string
 				mountPath: string
 			}]
 		}
 		// +usage=Specify the executor spec request for the executor pod
 		executor: {
-			cores: int
+			// +usage=Specify the cores maps to spark.driver.cores or spark.executor.cores for the driver and executors, respectively
+			cores?: int
+			// +usage=Specify a hard limit on CPU cores for the pod
+			coreLimit?: string
+			// +usage=Specify the amount of memory to request for the pod
+			memory?:    string
+			instances?: int
+			// +usage=Specify the Kubernetes labels to be added to the pod
+			labels?: [string]: string
+			// +usage=Specify the volumes listed in “.spec.volumes” to mount into the main container’s filesystem
 			volumeMounts?: [...{
 				name:      string
 				mountPath: string
@@ -62,6 +92,21 @@ template: {
 				type: *"Directory" | string
 			}
 		}]
+		// +usage=Specify the dependencies captures all possible types of dependencies of a Spark application
+		deps?: {
+			// +usage=Specify a list of JAR files the Spark application depends on
+			jars?: [...string]
+			// +usage=Specify a list of files the Spark application depends on
+			files?: [...string]
+			// +usage=Specify a list of Python files the Spark application depends on
+			pyFiles?: [...string]
+			// +usage=Specify a list of maven coordinates of jars to include on the driver and executor classpaths. This will search the local maven repo, then maven central and any additional remote repositories given by the “repositories” option. Each package should be of the form “groupId:artifactId:version”
+			packages?: [...string]
+			// +usage=Specify a list of “groupId:artifactId”, to exclude while resolving the dependencies provided in Packages to avoid dependency conflicts
+			excludePackages?: [...string]
+			// +usage=Specify a list of additional remote repositories to search for the maven coordinate given with the “packages” option
+			repositories?: [...string]
+		}
 	}
 
 	output: {