Skip to content

Commit a37bde5

Browse files
feat: introduce special immutables address space (#2689)
This commit introduces a new address space for manipulating immutables. It also modifies the existing "code" address space which resolves pointers as starting from the start of the "data" section instead of the start of the "text" section of the code (air quotes because these sections are conceptual rather than part of an object format). Lastly it slightly changes the semantics of the `lll` opcode (and for clarity, renames it to `deploy`) in order to save a few instructions in the constructor. This is important for two reasons. One, other VMs like zksync model immutables differently by mapping them onto storage. In order to capture different handling of the address space (immutables -> storage in zksync vs immutables -> runtime code in EVM), the new pseudo opcodes `iload` and `istore` can be compiled differently depending on the backend. For EVM, they compile to `mload` and `mstore` at magic locations calculated at assembly time. Note that `iload` and `istore` are not valid outside of constructor code (that is, code which does not have a child `deploy` section), and assembly will panic if they are used in non-constructor code. The second reason has to do with runtime code layout. Currently, the IR codegen has no idea what the codesize will be (as that can only be known after assembly), so we have runtime logic to calculate offsets of data that is stored in code (e.g. currently to get data at position `x` in the runtime code, we need to issue pointer arithmetic `(add ~codelen x)` to get the runtime location of the data). By introducing new pseudo opcodes, we can resolve these locations at assembly time instead of at runtime. This commit also renames the "code" address space to "data", to make it clearer that it is used to access the data section of the code. The pseudo-opcode `codeload` has been renamed to `dload`. For the same efficiency reason described above, `data` is considered to start at the end of the runtime code, so `dload` usage is `dload x` instead of `codeload (add ~codelen x)`. To accomplish all this, a couple magic locations have been introduced into the assembly: `_mem_deploy_start` and `_mem_deploy_end`. These are resolved at assembly time to the end of the runtime code just prior to deploy - immutables are stored starting from `_mem_deploy_end`. To support the assembly-time calculation of these offsets, two magic opcodes have been added to the assembly: `_DEPLOY_MEM_OFST_<N>` and `_OFST`. `_DEPLOY_MEM_OFST_<N>` is used to pass information from the memory allocator to the assembler (which is required to calculate the location of where immutables will be stored in memory), and `_OFST` is used to resolve offsets from `_sym_code_end` and `_mem_deploy_end` at compile time.
1 parent d03a808 commit a37bde5

File tree

14 files changed

+303
-172
lines changed

14 files changed

+303
-172
lines changed

tests/compiler/LLL/test_compile_lll.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,14 @@ def test_compile_lll_good(good_lll, get_contract_from_lll):
3838
def test_lll_from_s_expression(get_contract_from_lll):
3939
code = """
4040
(seq
41-
(return
41+
(deploy
4242
0
43-
(lll ; just return 32 byte of calldata back
44-
0
45-
(seq
46-
(calldatacopy 0 4 32)
47-
(return 0 32)
48-
stop
49-
)
50-
)))
43+
(seq ; just return 32 byte of calldata back
44+
(calldatacopy 0 4 32)
45+
(return 0 32)
46+
stop
47+
)
48+
0))
5149
"""
5250
abi = [
5351
{

vyper/builtin_functions/functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ def build_LLL(self, expr, context):
564564
if arg.typ.maxlen == 0:
565565
continue
566566
# Get the length of the current argument
567-
if arg.location in ("memory", "calldata", "code"):
567+
if arg.location in ("memory", "calldata", "data", "immutables"):
568568
length = LLLnode.from_list(
569569
[load_op(arg.location), "_arg"], typ=BaseType("int128")
570570
)

vyper/codegen/context.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ def _new_variable(
152152
self, name: str, typ: NodeType, var_size: int, is_internal: bool, is_mutable: bool = True
153153
) -> int:
154154
if is_internal:
155+
# TODO CMC 2022-03-02 change this to `.allocate_memory()`
156+
# and make `expand_memory()` private.
155157
var_pos = self.memory_allocator.expand_memory(var_size)
156158
else:
157159
var_pos = self.memory_allocator.allocate_memory(var_size)

vyper/codegen/core.py

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def make_byte_array_copier(dst, src, pos=None):
115115

116116
# TODO maybe move me to types.py
117117
def wordsize(location):
118-
if location in ("memory", "calldata", "code"):
118+
if location in ("memory", "calldata", "data", "immutables"):
119119
return 32
120120
if location == "storage":
121121
return 1
@@ -232,21 +232,28 @@ def copy_bytes(dst, src, length, length_bound, pos=None):
232232
ret = LLLnode.from_list(copy_op, annotation=annotation)
233233
return b1.resolve(b2.resolve(b3.resolve(ret)))
234234

235-
if dst.location == "memory" and src.location in ("memory", "calldata", "code"):
235+
if dst.location == "memory" and src.location in ("memory", "calldata", "data"):
236236
# special cases: batch copy to memory
237+
# TODO: iloadbytes
237238
if src.location == "memory":
238239
copy_op = ["staticcall", "gas", 4, src, length, dst, length]
239240
gas_bound = _identity_gas_bound(length_bound)
240241
elif src.location == "calldata":
241242
copy_op = ["calldatacopy", dst, src, length]
242243
gas_bound = _calldatacopy_gas_bound(length_bound)
243-
elif src.location == "code":
244-
copy_op = ["codecopy", dst, src, length]
244+
elif src.location == "data":
245+
copy_op = ["dloadbytes", dst, src, length]
246+
# note: dloadbytes compiles to CODECOPY
245247
gas_bound = _codecopy_gas_bound(length_bound)
246248

247249
ret = LLLnode.from_list(copy_op, annotation=annotation, add_gas_estimate=gas_bound)
248250
return b1.resolve(b2.resolve(b3.resolve(ret)))
249251

252+
if dst.location == "immutables" and src.location in ("memory", "data"):
253+
# TODO istorebytes-from-mem, istorebytes-from-calldata(?)
254+
# compile to identity, CODECOPY respectively.
255+
pass
256+
250257
# general case, copy word-for-word
251258
# pseudocode for our approach (memory-storage as example):
252259
# for i in range(len, bound=MAX_LEN):
@@ -259,15 +266,15 @@ def copy_bytes(dst, src, length, length_bound, pos=None):
259266

260267
i = LLLnode.from_list(_freshname("copy_bytes_ix"), typ="uint256")
261268

262-
if src.location in ("memory", "calldata", "code"):
269+
if src.location in ("memory", "calldata", "data", "immutables"):
263270
loader = [load_op(src.location), ["add", src, _mul(32, i)]]
264271
elif src.location == "storage":
265272
loader = [load_op(src.location), ["add", src, i]]
266273
else:
267274
raise CompilerPanic(f"Unsupported location: {src.location}") # pragma: notest
268275

269-
if dst.location == "memory":
270-
setter = ["mstore", ["add", dst, _mul(32, i)], loader]
276+
if dst.location in ("memory", "immutables"):
277+
setter = [store_op(dst.location), ["add", dst, _mul(32, i)], loader]
271278
elif dst.location == "storage":
272279
setter = ["sstore", ["add", dst, i], loader]
273280
else:
@@ -454,7 +461,7 @@ def _get_element_ptr_tuplelike(parent, key, pos):
454461
if parent.location == "storage":
455462
for i in range(index):
456463
ofst += typ.members[attrs[i]].storage_size_in_words
457-
elif parent.location in ("calldata", "memory", "code"):
464+
elif parent.location in ("calldata", "memory", "data", "immutables"):
458465
for i in range(index):
459466
ofst += typ.members[attrs[i]].memory_bytes_required
460467
else:
@@ -522,7 +529,7 @@ def _get_element_ptr_array(parent, key, pos, array_bounds_check):
522529

523530
if parent.location == "storage":
524531
element_size = subtype.storage_size_in_words
525-
elif parent.location in ("calldata", "memory", "code"):
532+
elif parent.location in ("calldata", "memory", "data", "immutables"):
526533
element_size = subtype.memory_bytes_required
527534

528535
ofst = _mul(ix, element_size)
@@ -578,8 +585,13 @@ def load_op(location):
578585
return "sload"
579586
if location == "calldata":
580587
return "calldataload"
581-
if location == "code":
582-
return "codeload"
588+
if location == "data":
589+
# refers to data section of currently executing code
590+
return "dload"
591+
if location == "immutables":
592+
# special address space for manipulating immutables before deploy
593+
# only makes sense in a constructor
594+
return "iload"
583595
raise CompilerPanic(f"unreachable {location}") # pragma: notest
584596

585597

@@ -588,12 +600,14 @@ def store_op(location):
588600
return "mstore"
589601
if location == "storage":
590602
return "sstore"
603+
if location == "immutables":
604+
return "istore"
591605
raise CompilerPanic(f"unreachable {location}") # pragma: notest
592606

593607

594608
# Unwrap location
595609
def unwrap_location(orig):
596-
if orig.location in ("memory", "storage", "calldata", "code"):
610+
if orig.location in ("memory", "storage", "calldata", "data", "immutables"):
597611
return LLLnode.from_list([load_op(orig.location), orig], typ=orig.typ)
598612
else:
599613
# CMC 20210909 TODO double check if this branch can be removed

vyper/codegen/expr.py

Lines changed: 17 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -315,39 +315,26 @@ def parse_Name(self):
315315
return LLLnode.from_list(
316316
[obj], typ=BaseType(typ, is_literal=True), pos=getpos(self.expr)
317317
)
318+
318319
elif self.expr._metadata["type"].is_immutable:
319-
# immutable variable
320-
# need to handle constructor and outside constructor
321320
var = self.context.globals[self.expr.id]
322-
is_constructor = self.expr.get_ancestor(vy_ast.FunctionDef).get("name") == "__init__"
323-
if is_constructor:
324-
# store memory position for later access in module.py in the variable record
325-
memory_loc = self.context.new_variable(self.expr.id, var.typ)
326-
self.context.global_ctx._globals[self.expr.id].pos = memory_loc
327-
# store the data offset in the variable record as well for accessing
328-
data_offset = self.expr._metadata["type"].position.offset
329-
self.context.global_ctx._globals[self.expr.id].data_offset = data_offset
321+
ofst = self.expr._metadata["type"].position.offset
330322

331-
return LLLnode.from_list(
332-
memory_loc,
333-
typ=var.typ,
334-
location="memory",
335-
pos=getpos(self.expr),
336-
annotation=self.expr.id,
337-
mutable=True,
338-
)
323+
if self.context.sig.is_init_func:
324+
mutable = True
325+
location = "immutables"
339326
else:
340-
immutable_section_size = self.context.global_ctx.immutable_section_size
341-
offset = self.expr._metadata["type"].position.offset
342-
# TODO: resolve code offsets for immutables at compile time
343-
return LLLnode.from_list(
344-
["sub", "codesize", immutable_section_size - offset],
345-
typ=var.typ,
346-
location="code",
347-
pos=getpos(self.expr),
348-
annotation=self.expr.id,
349-
mutable=False,
350-
)
327+
mutable = False
328+
location = "data"
329+
330+
return LLLnode.from_list(
331+
ofst,
332+
typ=var.typ,
333+
location=location,
334+
pos=getpos(self.expr),
335+
annotation=self.expr.id,
336+
mutable=mutable,
337+
)
351338

352339
# x.y or x[5]
353340
def parse_Attribute(self):
@@ -481,7 +468,7 @@ def parse_Attribute(self):
481468
return LLLnode.from_list(["chainid"], typ="uint256", pos=getpos(self.expr))
482469
# Other variables
483470
else:
484-
sub = Expr.parse_variable_location(self.expr.value, self.context)
471+
sub = Expr(self.expr.value, self.context).lll_node
485472
# contract type
486473
if isinstance(sub.typ, InterfaceType):
487474
return sub

vyper/codegen/function_definitions/external_function.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,7 @@ def _register_function_args(context: Context, sig: FunctionSignature) -> List[LL
4646

4747
# tuple with the abi_encoded args
4848
if sig.is_init_func:
49-
base_args_ofst = LLLnode(
50-
"~codelen", location="code", typ=base_args_t, encoding=Encoding.ABI
51-
)
49+
base_args_ofst = LLLnode(0, location="data", typ=base_args_t, encoding=Encoding.ABI)
5250
else:
5351
base_args_ofst = LLLnode(4, location="calldata", typ=base_args_t, encoding=Encoding.ABI)
5452

vyper/codegen/global_context.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,10 @@ def parse_type(self, ast_node):
236236
custom_structs=self._structs,
237237
)
238238

239+
@property
240+
def immutables(self):
241+
return [t for t in self._globals.values() if t.is_immutable]
242+
239243
@cached_property
240-
def immutable_section_size(self):
241-
return sum([imm.size * 32 for imm in self._globals.values() if imm.is_immutable])
244+
def immutable_section_bytes(self):
245+
return sum([imm.size * 32 for imm in self.immutables])

vyper/codegen/lll_node.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -242,10 +242,9 @@ def _check(condition, err):
242242
)
243243
self.valency = sum([arg.valency for arg in self.args])
244244
self.gas = sum([arg.gas for arg in self.args])
245-
# LLL brackets (don't bother gas counting)
246-
elif self.value == "lll":
247-
self.valency = 1
248-
self.gas = NullAttractor()
245+
elif self.value == "deploy":
246+
self.valency = 0
247+
self.gas = NullAttractor() # unknown
249248
# Stack variables
250249
else:
251250
self.valency = 1

vyper/codegen/memory_allocator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ def allocate_memory(self, size: int) -> int:
100100
# if no deallocated slots are available, expand memory
101101
return self.expand_memory(size)
102102

103+
# TODO this should be an internal function
103104
def expand_memory(self, size: int) -> int:
104105
"""
105106
Allocate `size` bytes in memory, starting from the free memory pointer.

vyper/codegen/module.py

Lines changed: 19 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
from vyper import ast as vy_ast
66
from vyper.ast.signatures.function_signature import FunctionSignature, FunctionSignatures
7-
from vyper.codegen.core import make_setter
87
from vyper.codegen.function_definitions import (
98
generate_lll_for_function,
109
is_default_func,
@@ -90,7 +89,7 @@ def parse_external_interfaces(external_interfaces, global_ctx):
9089

9190

9291
def parse_regular_functions(
93-
o, regular_functions, sigs, external_interfaces, global_ctx, default_function
92+
regular_functions, sigs, external_interfaces, global_ctx, default_function, init_function
9493
):
9594
# check for payable/nonpayable external functions to optimize nonpayable assertions
9695
func_types = [i._metadata["type"] for i in global_ctx._defs]
@@ -179,50 +178,7 @@ def parse_regular_functions(
179178
]
180179
runtime.extend(internal_funcs)
181180

182-
immutables = [_global for _global in global_ctx._globals.values() if _global.is_immutable]
183-
184-
# TODO: enable usage of the data section beyond just user defined immutables
185-
# https://github.com/vyperlang/vyper/pull/2466#discussion_r722816358
186-
if len(immutables) > 0:
187-
# find position of the last immutable so we do not overwrite it in memory
188-
# when we codecopy the runtime code to memory
189-
immutables = sorted(immutables, key=lambda imm: imm.pos)
190-
start_pos = immutables[-1].pos + immutables[-1].size * 32
191-
192-
# create sequence of actions to copy immutables to the end of the runtime code in memory
193-
# TODO: if possible, just use identity precompile
194-
data_section = []
195-
for immutable in immutables:
196-
# store each immutable at the end of the runtime code
197-
memory_loc, offset = (
198-
immutable.pos,
199-
immutable.data_offset,
200-
)
201-
lhs = LLLnode.from_list(
202-
["add", start_pos + offset, "_lllsz"], typ=immutable.typ, location="memory"
203-
)
204-
rhs = LLLnode.from_list(memory_loc, typ=immutable.typ, location="memory")
205-
data_section.append(make_setter(lhs, rhs, pos=None))
206-
207-
# TODO: use GlobalContext.immutable_section_size
208-
data_section_size = sum([immutable.size * 32 for immutable in immutables])
209-
o.append(
210-
[
211-
"with",
212-
"_lllsz", # keep size of runtime bytecode in sz var
213-
["lll", start_pos, runtime], # store runtime code at `start_pos`
214-
# sequence of copying immutables, with final action of returning the runtime code
215-
["seq", *data_section, ["return", start_pos, ["add", data_section_size, "_lllsz"]]],
216-
]
217-
)
218-
219-
else:
220-
# NOTE: lll macro first argument is the location in memory to store
221-
# the compiled bytecode
222-
# https://lll-docs.readthedocs.io/en/latest/lll_reference.html#code-lll
223-
o.append(["return", 0, ["lll", 0, runtime]])
224-
225-
return o, runtime
181+
return runtime
226182

227183

228184
# Main python parse tree => LLL method
@@ -257,10 +213,10 @@ def parse_tree_to_lll(global_ctx: GlobalContext) -> Tuple[LLLnode, LLLnode, Func
257213
if global_ctx._contracts or global_ctx._interfaces:
258214
external_interfaces = parse_external_interfaces(external_interfaces, global_ctx)
259215

260-
# TODO: fix for #2251 is to move this after parse_regular_functions
216+
init_func_lll = None
261217
if init_function:
262218
o.append(init_func_init_lll())
263-
init_func_lll, _frame_start, _frame_size = generate_lll_for_function(
219+
init_func_lll, _frame_start, init_frame_size = generate_lll_for_function(
264220
init_function,
265221
{**{"self": sigs}, **external_interfaces},
266222
global_ctx,
@@ -269,15 +225,27 @@ def parse_tree_to_lll(global_ctx: GlobalContext) -> Tuple[LLLnode, LLLnode, Func
269225
o.append(init_func_lll)
270226

271227
if regular_functions or default_function:
272-
o, runtime = parse_regular_functions(
273-
o,
228+
runtime = parse_regular_functions(
274229
regular_functions,
275230
sigs,
276231
external_interfaces,
277232
global_ctx,
278233
default_function,
234+
init_func_lll,
279235
)
280236
else:
281-
runtime = o.copy()
237+
# for some reason, somebody may want to deploy a contract with no code,
238+
# or more likely, a "pure data" contract which contains immutables
239+
runtime = LLLnode.from_list(["seq"])
240+
241+
immutables_len = global_ctx.immutable_section_bytes
242+
243+
if init_function:
244+
memsize = init_func_lll.context.memory_allocator.size_of_mem # type: ignore
245+
else:
246+
memsize = 0
247+
248+
# note: (deploy mem_ofst, code, extra_padding)
249+
o.append(["deploy", memsize, runtime, immutables_len]) # type: ignore
282250

283251
return LLLnode.from_list(o), LLLnode.from_list(runtime), sigs

vyper/compiler/output.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,11 +174,9 @@ def _build_asm(asm_list):
174174
output_string += " "
175175
in_push -= 1
176176
else:
177-
assert isinstance(node, str), node
177+
output_string += str(node) + " "
178178

179-
output_string += node + " "
180-
181-
if node.startswith("PUSH"):
179+
if isinstance(node, str) and node.startswith("PUSH"):
182180
assert in_push == 0
183181
in_push = int(node[4:])
184182
output_string += "0x"

0 commit comments

Comments
 (0)