You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# head group template and specs, (perhaps 'group' is not needed in the name)
71
+
headGroupSpec:
72
+
# Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer'
73
+
serviceType: ClusterIP
74
+
# logical group name, for this called head-group, also can be functional
75
+
# pod type head or worker
76
+
# rayNodeType: head # Not needed since it is under the headgroup
77
+
# the following params are used to complete the ray start: ray start --head --block ...
78
+
rayStartParams:
79
+
# Flag "no-monitor" will be automatically set when autoscaling is enabled.
80
+
dashboard-host: '0.0.0.0'
81
+
block: 'true'
82
+
# num-cpus: '1' # can be auto-completed from the limits
83
+
# Use `resources` to optionally specify custom resource annotations for the Ray node.
84
+
# The value of `resources` is a string-integer mapping.
85
+
# Currently, `resources` must be provided in the specific format demonstrated below:
86
+
# resources: '"{\"Custom1\": 1, \"Custom2\": 5}"'
87
+
num-gpus: '0'
88
+
#pod template
89
+
template:
90
+
spec:
91
+
containers:
92
+
# The Ray head pod
93
+
- name: ray-head
94
+
image: projectcodeflare/codeflare-glue:latest
95
+
env:
96
+
- name: AWS_ACCESS_KEY_ID
97
+
valueFrom:
98
+
secretKeyRef:
99
+
name: glue-s3-creds
100
+
key: AWS_ACCESS_KEY_ID
101
+
- name: AWS_SECRET_ACCESS_KEY
102
+
valueFrom:
103
+
secretKeyRef:
104
+
name: glue-s3-creds
105
+
key: AWS_SECRET_ACCESS_KEY
106
+
- name: ENDPOINT_URL
107
+
valueFrom:
108
+
secretKeyRef:
109
+
name: glue-s3-creds
110
+
key: ENDPOINT_URL
111
+
imagePullPolicy: Always
112
+
ports:
113
+
- containerPort: 6379
114
+
name: gcs
115
+
- containerPort: 8265
116
+
name: dashboard
117
+
- containerPort: 10001
118
+
name: client
119
+
lifecycle:
120
+
preStop:
121
+
exec:
122
+
command: ["/bin/sh","-c","ray stop"]
123
+
resources:
124
+
limits:
125
+
cpu: "2"
126
+
memory: "16G"
127
+
nvidia.com/gpu: "0"
128
+
requests:
129
+
cpu: "2"
130
+
memory: "16G"
131
+
nvidia.com/gpu: "0"
132
+
workerGroupSpecs:
133
+
# the pod replicas in this group typed worker
134
+
- replicas: 1
135
+
minReplicas: 1
136
+
maxReplicas: 1
137
+
# logical group name, for this called small-group, also can be functional
138
+
groupName: small-group
139
+
# if worker pods need to be added, we can simply increment the replicas
140
+
# if worker pods need to be removed, we decrement the replicas, and populate the podsToDelete list
141
+
# the operator will remove pods from the list until the number of replicas is satisfied
142
+
# when a pod is confirmed to be deleted, its name will be removed from the list below
143
+
#scaleStrategy:
144
+
# workersToDelete:
145
+
# - raycluster-complete-worker-small-group-bdtwh
146
+
# - raycluster-complete-worker-small-group-hv457
147
+
# - raycluster-complete-worker-small-group-k8tj7
148
+
# the following params are used to complete the ray start: ray start --block ...
149
+
rayStartParams:
150
+
block: 'true'
151
+
num-gpus: '1'
152
+
#pod template
153
+
template:
154
+
metadata:
155
+
labels:
156
+
key: value
157
+
# annotations for pod
158
+
annotations:
159
+
key: value
160
+
# finalizers:
161
+
# - kubernetes
162
+
spec:
163
+
initContainers:
164
+
# the env var $RAY_IP is set by the operator if missing, with the value of the head service name
165
+
- name: init-myservice
166
+
image: busybox:1.28
167
+
command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
168
+
containers:
169
+
- name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc'
170
+
image: projectcodeflare/codeflare-glue:latest
171
+
env:
172
+
- name: AWS_ACCESS_KEY_ID
173
+
valueFrom:
174
+
secretKeyRef:
175
+
name: glue-s3-creds
176
+
key: AWS_ACCESS_KEY_ID
177
+
- name: AWS_SECRET_ACCESS_KEY
178
+
valueFrom:
179
+
secretKeyRef:
180
+
name: glue-s3-creds
181
+
key: AWS_SECRET_ACCESS_KEY
182
+
- name: ENDPOINT_URL
183
+
valueFrom:
184
+
secretKeyRef:
185
+
name: glue-s3-creds
186
+
key: ENDPOINT_URL
187
+
# environment variables to set in the container.Optional.
188
+
# Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/
0 commit comments