Skip to content

Commit fffe912

Browse files
committed
fix aw yaml
1 parent 6bbd4ae commit fffe912

File tree

1 file changed

+138
-112
lines changed

1 file changed

+138
-112
lines changed

Diff for: doc/usage/examples/kuberay/config/aw-raycluster.yaml

+138-112
Original file line numberDiff line numberDiff line change
@@ -11,120 +11,146 @@ spec:
1111
custompodresources:
1212
- replicas: 2
1313
requests:
14-
cpu: 1
14+
cpu: 10
1515
memory: 512Mi
1616
limits:
17-
cpu: 1
17+
cpu: 10
1818
memory: 1G
1919
generictemplate:
20-
apiVersion: v1
21-
items:
22-
- apiVersion: ray.io/v1alpha1
23-
kind: RayCluster
24-
metadata:
25-
labels:
26-
controller-tools.k8s.io: "1.0"
27-
name: raycluster-autoscaler
28-
namespace: default
29-
spec:
30-
autoscalerOptions:
31-
idleTimeoutSeconds: 60
32-
imagePullPolicy: Always
33-
resources:
34-
limits:
35-
cpu: 500m
36-
memory: 512Mi
37-
requests:
38-
cpu: 500m
39-
memory: 512Mi
40-
upscalingMode: Default
41-
enableInTreeAutoscaling: true
42-
headGroupSpec:
43-
rayStartParams:
44-
block: "true"
45-
dashboard-host: 0.0.0.0
46-
serviceType: ClusterIP
47-
template:
48-
spec:
49-
containers:
50-
- image: rayproject/ray:2.0.0
51-
imagePullPolicy: Always
52-
lifecycle:
53-
preStop:
54-
exec:
55-
command:
56-
- /bin/sh
57-
- -c
58-
- ray stop
59-
name: ray-head
60-
ports:
61-
- containerPort: 6379
62-
name: gcs
63-
protocol: TCP
64-
- containerPort: 8265
65-
name: dashboard
66-
protocol: TCP
67-
- containerPort: 10001
68-
name: client
69-
protocol: TCP
70-
resources:
71-
limits:
72-
cpu: "1"
73-
memory: 1G
74-
requests:
75-
cpu: 500m
76-
memory: 512Mi
77-
rayVersion: 2.0.0
78-
workerGroupSpecs:
79-
- groupName: small-group
80-
maxReplicas: 300
81-
minReplicas: 1
82-
rayStartParams:
83-
block: "true"
84-
replicas: 1
85-
template:
86-
metadata:
87-
annotations:
88-
key: value
89-
labels:
90-
key: value
91-
spec:
92-
containers:
93-
- image: rayproject/ray:2.0.0
94-
lifecycle:
95-
preStop:
96-
exec:
97-
command:
98-
- /bin/sh
99-
- -c
100-
- ray stop
101-
name: machine-learning
102-
resources:
103-
limits:
104-
cpu: "1"
105-
memory: 512Mi
106-
requests:
107-
cpu: 500m
108-
memory: 256Mi
109-
initContainers:
110-
- command:
111-
- sh
112-
- -c
113-
- until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local;
114-
do echo waiting for myservice; sleep 2; done
115-
image: busybox:1.28
116-
name: init-myservice
117-
status:
118-
availableWorkerReplicas: 2
119-
desiredWorkerReplicas: 1
120-
endpoints:
121-
client: "10001"
122-
dashboard: "8265"
123-
gcs: "6379"
124-
lastUpdateTime: "2022-09-26T14:17:54Z"
125-
maxWorkerReplicas: 300
126-
minWorkerReplicas: 1
127-
state: ready
128-
kind: List
20+
# This config demonstrates KubeRay's Ray autoscaler integration.
21+
# The resource requests and limits in this config are too small for production!
22+
# For an example with more realistic resource configuration, see
23+
# ray-cluster.autoscaler.large.yaml.
24+
apiVersion: ray.io/v1alpha1
25+
kind: RayCluster
12926
metadata:
130-
resourceVersion: ""
27+
labels:
28+
controller-tools.k8s.io: "1.0"
29+
# A unique identifier for the head node and workers of this cluster.
30+
name: raycluster-autoscaler
31+
spec:
32+
# The version of Ray you are using. Make sure all Ray containers are running this version of Ray.
33+
rayVersion: '2.0.0'
34+
# If enableInTreeAutoscaling is true, the autoscaler sidecar will be added to the Ray head pod.
35+
# Ray autoscaler integration is supported only for Ray versions >= 1.11.0
36+
# Ray autoscaler integration is Beta with KubeRay >= 0.3.0 and Ray >= 2.0.0.
37+
enableInTreeAutoscaling: true
38+
# autoscalerOptions is an OPTIONAL field specifying configuration overrides for the Ray autoscaler.
39+
# The example configuration shown below below represents the DEFAULT values.
40+
# (You may delete autoscalerOptions if the defaults are suitable.)
41+
autoscalerOptions:
42+
# upscalingMode is "Default" or "Aggressive."
43+
# Conservative: Upscaling is rate-limited; the number of pending worker pods is at most the size of the Ray cluster.
44+
# Default: Upscaling is not rate-limited.
45+
# Aggressive: An alias for Default; upscaling is not rate-limited.
46+
upscalingMode: Default
47+
# idleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources.
48+
idleTimeoutSeconds: 60
49+
# image optionally overrides the autoscaler's container image.
50+
# If instance.spec.rayVersion is at least "2.0.0", the autoscaler will default to the same image as
51+
# the ray container. For older Ray versions, the autoscaler will default to using the Ray 2.0.0 image.
52+
## image: "my-repo/my-custom-autoscaler-image:tag"
53+
# imagePullPolicy optionally overrides the autoscaler container's image pull policy.
54+
imagePullPolicy: Always
55+
# resources specifies optional resource request and limit overrides for the autoscaler container.
56+
# For large Ray clusters, we recommend monitoring container resource usage to determine if overriding the defaults is required.
57+
resources:
58+
limits:
59+
cpu: "500m"
60+
memory: "512Mi"
61+
requests:
62+
cpu: "500m"
63+
memory: "512Mi"
64+
######################headGroupSpec#################################
65+
# head group template and specs, (perhaps 'group' is not needed in the name)
66+
headGroupSpec:
67+
# Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer'
68+
serviceType: ClusterIP
69+
# logical group name, for this called head-group, also can be functional
70+
# pod type head or worker
71+
# rayNodeType: head # Not needed since it is under the headgroup
72+
# the following params are used to complete the ray start: ray start --head --block ...
73+
rayStartParams:
74+
# Flag "no-monitor" will be automatically set when autoscaling is enabled.
75+
dashboard-host: '0.0.0.0'
76+
block: 'true'
77+
# num-cpus: '1' # can be auto-completed from the limits
78+
# Use `resources` to optionally specify custom resource annotations for the Ray node.
79+
# The value of `resources` is a string-integer mapping.
80+
# Currently, `resources` must be provided in the specific format demonstrated below:
81+
# resources: '"{\"Custom1\": 1, \"Custom2\": 5}"'
82+
#pod template
83+
template:
84+
spec:
85+
containers:
86+
# The Ray head pod
87+
- name: ray-head
88+
image: rayproject/ray:2.0.0
89+
imagePullPolicy: Always
90+
ports:
91+
- containerPort: 6379
92+
name: gcs
93+
- containerPort: 8265
94+
name: dashboard
95+
- containerPort: 10001
96+
name: client
97+
lifecycle:
98+
preStop:
99+
exec:
100+
command: ["/bin/sh","-c","ray stop"]
101+
resources:
102+
limits:
103+
cpu: "1"
104+
memory: "1G"
105+
requests:
106+
cpu: "500m"
107+
memory: "512Mi"
108+
workerGroupSpecs:
109+
# the pod replicas in this group typed worker
110+
- replicas: 1
111+
minReplicas: 1
112+
maxReplicas: 300
113+
# logical group name, for this called small-group, also can be functional
114+
groupName: small-group
115+
# if worker pods need to be added, we can simply increment the replicas
116+
# if worker pods need to be removed, we decrement the replicas, and populate the podsToDelete list
117+
# the operator will remove pods from the list until the number of replicas is satisfied
118+
# when a pod is confirmed to be deleted, its name will be removed from the list below
119+
#scaleStrategy:
120+
# workersToDelete:
121+
# - raycluster-complete-worker-small-group-bdtwh
122+
# - raycluster-complete-worker-small-group-hv457
123+
# - raycluster-complete-worker-small-group-k8tj7
124+
# the following params are used to complete the ray start: ray start --block ...
125+
rayStartParams:
126+
block: 'true'
127+
#pod template
128+
template:
129+
metadata:
130+
labels:
131+
key: value
132+
# annotations for pod
133+
annotations:
134+
key: value
135+
spec:
136+
initContainers:
137+
# the env var $RAY_IP is set by the operator if missing, with the value of the head service name
138+
- name: init-myservice
139+
image: busybox:1.28
140+
command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
141+
containers:
142+
- name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc'
143+
image: rayproject/ray:2.0.0
144+
# environment variables to set in the container.Optional.
145+
# Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/
146+
lifecycle:
147+
preStop:
148+
exec:
149+
command: ["/bin/sh","-c","ray stop"]
150+
resources:
151+
limits:
152+
cpu: "1"
153+
memory: "512Mi"
154+
requests:
155+
cpu: "500m"
156+
memory: "256Mi"

0 commit comments

Comments
 (0)