-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathwfapi.ipldsch
481 lines (401 loc) · 15.8 KB
/
wfapi.ipldsch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
# Warpforge API Schema
# This document defines the Warpforge API schema using the IPLD schema DSL.
# For more info, see the IPLD schema docs: https://ipld.io/specs/schemas/.
###
### Layer 0 -- content snapshot identification
###
# The Layer 0 API is just content, represented as WareIDs.
# A WareID is a simple tuple of what kind of packing is used, and a hash.
# WareIDs are content-addressable.
#
# We use opaque strings for the packtype and the hash for simplicity.
type WareID struct {
packtype Packtype # typically "tar" or "git" or etc.
hash String # usually an actual hash, but handed to the io plugin verbatim.
} representation stringjoin {
join ":"
}
# Packtype is a string that should identify a format for fileset packing.
# Typical examples are "tar", "git", etc.
# An enum could be used here; however, we use an opaque string here
# rather than enum because fileset packing is regarded as a plugin-style system.
type Packtype string
###
### Layer 1 -- computation atoms
###
# The Layer 1 API consists of the things we need to do execution. This provides
# the minimal set of features needed to define inputs, an action, and outputs.
# FormulaCapsule wraps the Formula to allow for new versions in the future
type FormulaCapsule union {
| Formula "formula.v1"
} representation keyed
# Formula describes a single computation.
# What exactly the computation is is defined by the action
# (of which there are several kinds, but typically, it's commands
# which will be evaluated in some kind of hermetic container),
# and the environment the action wil run it is described by the inputs map.
# What data we collect after the action is completed is defined by the outputs.
type Formula struct {
inputs {SandboxPort:FormulaInput}
action Action
outputs {OutputName:GatherDirective}
}
# SandboxPort defines someplace within the sandbox we'll run the action in
# where data can be either put in or pulled out.
type SandboxPort union {
| SandboxPath "/"
| SandboxVar "$"
} representation stringprefix
# SandboxPath is one of the members of the SandboxPort sum type.
# It's a unix-like path, e.g. something like "foo/bar/baz".
#
# (Despite not beginning with a slash, it's interpreted as an absolute path,
# because the leading slash was stripped by SandboxPort union parse step.)
type SandboxPath string
# SandboxVar is used to describe a variable (as contrasted with a path).
# When the action is a one of the unixy-container actions,
# these will correspond to environment variables.
#
# Note that though SandboxVar is syntactically accepted in output declarations,
# it's not guaranteed to be supported by all actions.
#
# Mapping a SandboxVar to any kind of input other than a literal is strange
# and currently undefined.
type SandboxVar string
# TODO placeholder type, we may want something more structured here.
# TODO if it does get more structured, some if its uses might need union wrapper.
# TODO or this might be a good place to use multi-phase pattern recognition.
type FilterMap {String:String} # TODO not supported by ipld yet. representation stringpairs {
# entrySep ","
# pairSep "="
#}
type FormulaInput union {
| FormulaInputSimple string
| FormulaInputComplex map
} representation kinded
# FIXME revisit name, this happens to also be the RunRecord.results value type!
# ... except for the mount part, which would make no sense there. Meh.
type FormulaInputSimple union {
| WareID "ware:" # this is most of the time!
| Mount "mount:" # not hermetic! we'll warn about the use of these.
| Literal "literal:" # a fun escape valve, isn't it.
} representation stringprefix
type FormulaInputComplex struct {
basis FormulaInputSimple
filters FilterMap
}
type Literal string
type Mount struct {
mode MountMode
hostPath String
} representation stringjoin {
join ":"
}
type MountMode enum {
| readonly ("ro")
| readwrite ("rw")
| overlay ("overlay")
}
# OutputName is a plain freetext string which a Formula (or Plot) author uses
# to identify the output data they want to collect.
# It's used when writing the Formula's outputs description,
# and will be seen again returned in the RunRecord
# which is produced by a tool when it evaluates the Formula.
type OutputName string
type GatherDirective struct {
from SandboxPort
packtype optional Packtype # should be absent iff SandboxPort is a VariableName.
filters optional FilterMap # must be absent if SandboxPort is a VariableName.
}
type Action union {
| Action_Echo "echo"
| Action_Exec "exec"
| Action_Script "script"
} representation keyed
# Action_Echo is an action which will cause a formula to execute by
# just echoing its own formula.
# It's not useful on its own, except for as a debugging and demo tool.
type Action_Echo struct {
# Not much to say in this one!
}
# Action_Exec describes launching a container, and running a single process in it.
# (Consider using Action_Script; it's more user-friendly.)
type Action_Exec struct {
command [String] # fairly literally, what will be handed to exec syscall.
# cwd optional String
network optional Bool (implicit false)
# userinfo optional ActionUserinfo
}
# Action_Script describes launching a container, launching a shell processes
# within the container, and then feeding your commands to that shell process.
# This is somewhat more complicated than Action_Exec, but also offers more
# opportunities for debugging, and lets you easily run several commands
# while in the same container.
type Action_Script struct {
interpreter String # specifies what's going to parse your commands.
contents [String] # very different than exec's string list, though! is parsed.
# future: consider an optional enum here for what features to expect from shell.
# cwd optional String
network optional Bool (implicit false)
# userinfo optional ActionUserinfo
}
# Action_Noop is an action which does... nothing!
# It's sometimes useful if you have data munging work to do that's so simple
# that you can do it using the FilterMap on a FormulaInputComplex.
# (This is fairly rare to see in practice.)
type Action_Noop struct {
# Not much to say in this one!
}
# ActionUserinfo can describe optional configuration for unix-like environments.
# Actions that launch containers will optionally contain this information.
type ActionUserinfo struct {
uid Int (implicit 0)
gid Int (implicit 0)
username String (implicit "luser")
homedir String (implicit "/home/luser")
}
type WarehouseAddr string
# FormulaAndContext is what we actually use as the document root
# when parsing a formula file.
type FormulaAndContext struct {
formula FormulaCapsule
context optional FormulaContextCapsule
}
type FormulaContextCapsule union {
| FormulaContext "context.v1"
} representation keyed
type FormulaContext struct {
warehouses {WareID:WarehouseAddr}
}
type RunRecord struct {
guid String # purely to force uniqueness.
time Int # again, to force uniqueness.
formulaID String # hash of the Formula that triggered this.
exitcode Int # what is says on the tin. zero is success, per unix.
results {OutputName:FormulaInputSimple} # map corresponding to output gathers.
}
# Logging Types
type LogOutput struct {
Msg String
}
type ApiOutput union {
| String "output"
| LogOutput "log"
| RunRecord "runrecord"
| PlotResults "plotresults"
} representation keyed
###
### Layer 2 -- group A: evaluation graphs
###
type ModuleCapsule union {
| Module "module.v1"
} representation keyed
type Module struct {
name ModuleName
# note: "Module" is the info in the local workspace; "CatalogModule" is a distinct type.
# semantically: also contains `plot optional Plot`... but practically speaking: that's in a sibling file.
# future: other optional fields used by "override modules".
# future: maybe something about recommended update patterns for any catalog inputs in the plot?
# future: maybe something about a gadget that can be invoked to generate the plot?
}
# ModuleName strings tend to look a bit like URLs.
# For example: "foo.org/teamname/projectname".
#
# Characters like "/" and "." are allowed, but ":" and whitespace is forbidden
# and the use of other punctuation characters is Not Recommended.
type ModuleName string
type PlotCapsule union {
| Plot "plot.v1"
} representation keyed
# Plot is the type that outlines a series of steps of related computations.
# It has inputs and outputs itself, which label things relative to the plot;
# and the steps can be more plots (recursively, for namespacing),
# or Protoformulas (which are resolvable to Formulas, and are how work gets done).
type Plot struct {
inputs {LocalLabel:PlotInput} # note: contrast to Protoformula, which is `inputs {SandboxPort:PlotInput}`.
steps {StepName:Step}
outputs {LocalLabel:PlotOutput}
}
# StepName is for assigning string names to Steps in a Plot.
# StepName values also appear as part of wiring things together using Pipes.
#
# Must not contain ':' characters or unprintables or whitespace.
# (LocalLabel is used in types with a stringjoin represention,
# such as Pipe, which use the ':' character as a separator.)
type StepName string
# LocalLabel is for referencing data within a Plot.
# Input data gets assigned a LocalLabel;
# Pipes pull info from a LocalLabel (possibly together with a StepName to scope it);
# when a Step is evaluated (e.g. turned into a Formula, executed, and produces results),
# the results will become identifiable by a LocalLabel (scoped by the StepName).
#
# (LocalLabel and OutputName are essentially the same thing: an OutputName
# gets casted to being considered a LocalLabel when a Formula's results are hoisted
# into the Plot.)
#
# Must not contain ':' characters or unprintables or whitespace.
# (LocalLabel is used in types with a stringjoin represention,
# such as Pipe, which use the ':' character as a separator.)
type LocalLabel string
type PlotInput union {
| PlotInputSimple string
| PlotInputComplex map
} representation kinded
# PlotInputSimple is extremely comparable to FormulaInputSimple --
# and it's a superset of it: all the same things are acceptable here.
# PlotInputSimple adds more features:
# some are for getting data from the wider universe (mediated by Catalogs);
# some are for getting data ingested from a host environemnt (unhermetic!);
# and some are simply for wiring all the steps in a Plot together
# into a computable graph!
type PlotInputSimple union {
| WareID "ware:" # same as in FormulaInputSimple.
| Mount "mount:" # same as in FormulaInputSimple.
| Literal "literal:" # same as in FormulaInputSimple.
| Pipe "pipe:" # allows wiring outputs from one formula into inputs of another!
| CatalogRef "catalog:" # allows lookup of a WareID via the catalog!
| Ingest "ingest:" # allows demanding ingest of data from the environment!
#| CandidateRef "candidate:" # TODO Like catalog, but dangling a bit.
} representation stringprefix
# PlotInputComplex allows decorating a PlotInputSimple with filters.
type PlotInputComplex struct {
basis PlotInputSimple
filters FilterMap
}
type PlotOutput union {
| Pipe "pipe:"
} representation stringprefix
type Pipe struct {
stepName StepName # if set, should be a sibling; if empty, means it's a reference to the parent's input map.
label LocalLabel
} representation stringjoin {
join ":"
}
type PlotResults {LocalLabel:WareID}
type Step union {
| Plot "plot"
| Protoformula "protoformula"
} representation keyed
type Protoformula struct {
inputs {SandboxPort:PlotInput} # same as Formula -- but value is PlotInput.
action Action # literally verbatim passed through to the Formula.
outputs {LocalLabel:GatherDirective} # same as Formula -- but key is LocalLabel.
}
# Ingests are a special kind of of PlotInput.
# They look at the host filesystem and derive a Ware from it.
#
# Ingests are only contextually allowed. Using one in a Plot will cause
# Warpforge to warn users about evaluting that Plot, and may require more flags
# to be used in the CLI in order to make that Plot actually be evaluted.
# Ingests are also not permissible in Replays, because they're not reproducible.
type Ingest union {
| GitIngest "git:"
} representation stringprefix
type GitIngest struct {
hostPath String
ref String
} representation stringjoin {
join ":"
}
###
### Layer 2 -- group B: catalog exchange datatypes
###
type ReleaseName string
type ItemLabel string
# CatalogRef is a tuple that allows lookup of a WareID in a Catalog.
#
# A typical value might look something like "foobar.org/frob:v1.2.3:linux-amd64-zapp".
# CatalogRef values are often seen in serialized documents with a "catalog:" prefix,
# in the same way that WareIDs are often seen with a "ware:" prefix;
# they're usually used with a wrapper type with that prefix for clarity purposes.
type CatalogRef struct {
moduleName ModuleName
releaseName ReleaseName
itemName ItemLabel
} representation stringjoin {
join ":"
}
# Not currently defined in the schema: a proper type for the Catalog as a whole.
# If we could: it should be something like:
# `type Catalog {ModuleName:&CatalogModuleCapsule} using ADL "prollytree"`
type CatalogModuleCapsule union {
| CatalogModule "catalogmodule.v1"
} representation keyed
type CatalogModule struct {
name ModuleName
releases {ReleaseName:CatalogReleaseCID}
metadata {String:String}
}
# CatalogReleaseCID probably ought to just be `&CatalogRelease` if we were doing IPLD properly.
# Unfortunately, it's not, because we're using JSON, and not DAG-JSON
# (which in turn is because because DAG-JSON has unsolved escaping problems),
# so we've been forced into treating CIDs as strings and handling them at the application level.
# This is the result.
type CatalogReleaseCID string
# CatalogRelease is part of a Catalog's tree structure, and the root type in one of the blocks within a Catalog.
# It is pointed to by the CatalogModule type.
# The releaseName value here reiterates the same name indicated in the CatalogModule pointing at this.
#
# There's a lack of "capsule" type for versioning this structure because we assume that if this part
# of the protocol evolves, it will do so in tandem with the CatalogModule type,
# and therefore the CatalogModuleCapsule type provides enough versioning hints for this area too.
type CatalogRelease struct {
releaseName ReleaseName
items {ItemLabel:WareID}
metadata {String:String}
}
type CatalogMirrorsCapsule union {
| CatalogMirrors "catalogmirrors.v1"
} representation keyed
type CatalogMirrors struct {
byWare optional CatalogMirrorsByWare
byModule optional CatalogMirrorsByModule
}
type WarehouseAddrList [WarehouseAddr]
type CatalogMirrorsByWare {WareID:WarehouseAddrList}
type CatalogMirrorsByModule {ModuleName:CatalogMirrorsByPacktype}
type CatalogMirrorsByPacktype {Packtype:WarehouseAddrList}
type ReplayCapsule union {
| Plot "plot.v1"
} representation keyed
# MirroringConfig defines the mirroring configuration for an entire workspace.
# This maps the addresses which we want to publish to (of type WarehouseAddr)
# to specific configs, which may be backed by various types of storage.
#
# For instance, if a catalog has an entry which refers to
# "ca+https://warpsys-wares.s3.fr-par.scw.cloud" as a mirror, this configuration
# will be checked for that entry when attempting to do mirroring operations.
# If it exists, that configuration will be used.
#
# Here is a full example mirroring configuration for an S3 endpoint:
# {
# "mirroring.v1": {
# "ca+https://warpsys-wares.s3.fr-par.scw.cloud": {
# "pushConfig": {
# "s3": {
# "endpoint": "https://s3.fr-par.scw.cloud",
# "region": "fr-par",
# "bucket": "warpsys-wares"
# }
# }
# }
# }
# }
type MirroringConfigCapsule union {
| MirroringConfig "mirroring.v1"
} representation keyed
type MirroringConfig {WarehouseAddr:WarehouseMirroringConfig}
type WarehouseMirroringConfig struct {
pushConfig WarehousePushConfig
}
type WarehousePushConfig union {
| S3PushConfig "s3"
| MockPushConfig "mock"
} representation keyed
type S3PushConfig struct {
endpoint String
region String
bucket String
path optional String
}
type MockPushConfig struct {}