Skip to content

Commit

Permalink
feat: add runtime code layout to initcode (#3584)
Browse files Browse the repository at this point in the history
this commit adds the runtime code layout to the initcode payload (as a
suffix), so that the runtime code can be analyzed without source code.
this is particularly important for disassemblers, which need
demarcations for where the data section starts as distinct from the
runtime code segment itself.

the layout is:

CBOR-encoded list:
  runtime code length
  [<length of data section> for data section in runtime data sections]
  immutable section length
  {"vyper": (major, minor, patch)}
length of CBOR-encoded list + 2, encoded as two big-endian bytes.

note the specific format for the CBOR payload was chosen to avoid
changing the last 13 bytes of the signature compared to previous
versions of vyper. that is, the last 13 bytes still look like
b"\xa1evyper\x83...", this is because, as the last item in a list, its
encoding does not change compared to being the only dict in the payload.

this commit also changes the meaning of the two footer bytes: they now
indicate the length of the entire footer (including the two bytes
indicating the footer length). the sole purpose of this is to be more
intuitive as the two footer bytes indicate offset-from-the-end where the
CBOR-encoded metadata starts, rather than the length of the CBOR
payload (without the two length bytes).

lastly, this commit renames the internal `insert_vyper_signature=` kwarg
to `insert_compiler_metadata=` as the metadata includes more than just
the vyper version now.
  • Loading branch information
charles-cooper committed Sep 5, 2023
1 parent 39a2313 commit 96d2042
Show file tree
Hide file tree
Showing 5 changed files with 180 additions and 32 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def _global_version(version):
python_requires=">=3.10,<4",
py_modules=["vyper"],
install_requires=[
"cbor2>=5.4.6,<6",
"asttokens>=2.0.5,<3",
"pycryptodome>=3.5.1,<4",
"semantic-version>=2.10,<3",
Expand Down
133 changes: 127 additions & 6 deletions tests/compiler/test_bytecode_runtime.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,135 @@
import vyper
import cbor2
import pytest

import vyper
from vyper.compiler.settings import OptimizationLevel, Settings

def test_bytecode_runtime():
code = """
simple_contract_code = """
@external
def a() -> bool:
return True
"""
"""

many_functions = """
@external
def foo1():
pass
@external
def foo2():
pass
@external
def foo3():
pass
@external
def foo4():
pass
@external
def foo5():
pass
"""

has_immutables = """
A_GOOD_PRIME: public(immutable(uint256))
@external
def __init__():
A_GOOD_PRIME = 967
"""


def _parse_cbor_metadata(initcode):
metadata_ofst = int.from_bytes(initcode[-2:], "big")
metadata = cbor2.loads(initcode[-metadata_ofst:-2])
return metadata

out = vyper.compile_code(code, ["bytecode_runtime", "bytecode"])

def test_bytecode_runtime():
out = vyper.compile_code(simple_contract_code, ["bytecode_runtime", "bytecode"])

assert len(out["bytecode"]) > len(out["bytecode_runtime"])
assert out["bytecode_runtime"][2:] in out["bytecode"][2:]
assert out["bytecode_runtime"].removeprefix("0x") in out["bytecode"].removeprefix("0x")


def test_bytecode_signature():
out = vyper.compile_code(simple_contract_code, ["bytecode_runtime", "bytecode"])

runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x"))
initcode = bytes.fromhex(out["bytecode"].removeprefix("0x"))

metadata = _parse_cbor_metadata(initcode)
runtime_len, data_section_lengths, immutables_len, compiler = metadata

assert runtime_len == len(runtime_code)
assert data_section_lengths == []
assert immutables_len == 0
assert compiler == {"vyper": list(vyper.version.version_tuple)}


def test_bytecode_signature_dense_jumptable():
settings = Settings(optimize=OptimizationLevel.CODESIZE)

out = vyper.compile_code(many_functions, ["bytecode_runtime", "bytecode"], settings=settings)

runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x"))
initcode = bytes.fromhex(out["bytecode"].removeprefix("0x"))

metadata = _parse_cbor_metadata(initcode)
runtime_len, data_section_lengths, immutables_len, compiler = metadata

assert runtime_len == len(runtime_code)
assert data_section_lengths == [5, 35]
assert immutables_len == 0
assert compiler == {"vyper": list(vyper.version.version_tuple)}


def test_bytecode_signature_sparse_jumptable():
settings = Settings(optimize=OptimizationLevel.GAS)

out = vyper.compile_code(many_functions, ["bytecode_runtime", "bytecode"], settings=settings)

runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x"))
initcode = bytes.fromhex(out["bytecode"].removeprefix("0x"))

metadata = _parse_cbor_metadata(initcode)
runtime_len, data_section_lengths, immutables_len, compiler = metadata

assert runtime_len == len(runtime_code)
assert data_section_lengths == [8]
assert immutables_len == 0
assert compiler == {"vyper": list(vyper.version.version_tuple)}


def test_bytecode_signature_immutables():
out = vyper.compile_code(has_immutables, ["bytecode_runtime", "bytecode"])

runtime_code = bytes.fromhex(out["bytecode_runtime"].removeprefix("0x"))
initcode = bytes.fromhex(out["bytecode"].removeprefix("0x"))

metadata = _parse_cbor_metadata(initcode)
runtime_len, data_section_lengths, immutables_len, compiler = metadata

assert runtime_len == len(runtime_code)
assert data_section_lengths == []
assert immutables_len == 32
assert compiler == {"vyper": list(vyper.version.version_tuple)}


# check that deployed bytecode actually matches the cbor metadata
@pytest.mark.parametrize("code", [simple_contract_code, has_immutables, many_functions])
def test_bytecode_signature_deployed(code, get_contract, w3):
c = get_contract(code)
deployed_code = w3.eth.get_code(c.address)

initcode = c._classic_contract.bytecode

metadata = _parse_cbor_metadata(initcode)
runtime_len, data_section_lengths, immutables_len, compiler = metadata

assert compiler == {"vyper": list(vyper.version.version_tuple)}

# runtime_len includes data sections but not immutables
assert len(deployed_code) == runtime_len + immutables_len
2 changes: 1 addition & 1 deletion vyper/compiler/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def _build_asm(asm_list):

def build_source_map_output(compiler_data: CompilerData) -> OrderedDict:
_, line_number_map = compile_ir.assembly_to_evm(
compiler_data.assembly_runtime, insert_vyper_signature=False
compiler_data.assembly_runtime, insert_compiler_metadata=False
)
# Sort line_number_map
out = OrderedDict()
Expand Down
12 changes: 7 additions & 5 deletions vyper/compiler/phases.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,12 @@ def assembly_runtime(self) -> list:

@cached_property
def bytecode(self) -> bytes:
insert_vyper_signature = not self.no_bytecode_metadata
return generate_bytecode(self.assembly, insert_vyper_signature=insert_vyper_signature)
insert_compiler_metadata = not self.no_bytecode_metadata
return generate_bytecode(self.assembly, insert_compiler_metadata=insert_compiler_metadata)

@cached_property
def bytecode_runtime(self) -> bytes:
return generate_bytecode(self.assembly_runtime, insert_vyper_signature=False)
return generate_bytecode(self.assembly_runtime, insert_compiler_metadata=False)

@cached_property
def blueprint_bytecode(self) -> bytes:
Expand Down Expand Up @@ -331,7 +331,7 @@ def _find_nested_opcode(assembly, key):
return any(_find_nested_opcode(x, key) for x in sublists)


def generate_bytecode(assembly: list, insert_vyper_signature: bool) -> bytes:
def generate_bytecode(assembly: list, insert_compiler_metadata: bool) -> bytes:
"""
Generate bytecode from assembly instructions.
Expand All @@ -345,4 +345,6 @@ def generate_bytecode(assembly: list, insert_vyper_signature: bool) -> bytes:
bytes
Final compiled bytecode.
"""
return compile_ir.assembly_to_evm(assembly, insert_vyper_signature=insert_vyper_signature)[0]
return compile_ir.assembly_to_evm(assembly, insert_compiler_metadata=insert_compiler_metadata)[
0
]
64 changes: 44 additions & 20 deletions vyper/ir/compile_ir.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import copy
import functools
import math
from dataclasses import dataclass

import cbor2

from vyper.codegen.ir_node import IRnode
from vyper.compiler.settings import OptimizationLevel
Expand Down Expand Up @@ -507,9 +510,9 @@ def _height_of(witharg):
elif code.value == "deploy":
memsize = code.args[0].value # used later to calculate _mem_deploy_start
ir = code.args[1]
padding = code.args[2].value
immutables_len = code.args[2].value
assert isinstance(memsize, int), "non-int memsize"
assert isinstance(padding, int), "non-int padding"
assert isinstance(immutables_len, int), "non-int immutables_len"

runtime_begin = mksymbol("runtime_begin")

Expand All @@ -521,14 +524,14 @@ def _height_of(witharg):
o.extend(["_sym_subcode_size", runtime_begin, "_mem_deploy_start", "CODECOPY"])

# calculate the len of runtime code
o.extend(["_OFST", "_sym_subcode_size", padding]) # stack: len
o.extend(["_OFST", "_sym_subcode_size", immutables_len]) # stack: len
o.extend(["_mem_deploy_start"]) # stack: len mem_ofst
o.extend(["RETURN"])

# since the asm data structures are very primitive, to make sure
# assembly_to_evm is able to calculate data offsets correctly,
# we pass the memsize via magic opcodes to the subcode
subcode = [_RuntimeHeader(runtime_begin, memsize)] + subcode
subcode = [_RuntimeHeader(runtime_begin, memsize, immutables_len)] + subcode

# append the runtime code after the ctor code
# `append(...)` call here is intentional.
Expand Down Expand Up @@ -1051,18 +1054,19 @@ def _length_of_data(assembly):
return ret


@dataclass
class _RuntimeHeader:
def __init__(self, label, ctor_mem_size):
self.label = label
self.ctor_mem_size = ctor_mem_size
label: str
ctor_mem_size: int
immutables_len: int

def __repr__(self):
return f"<RUNTIME {self.label} mem @{self.ctor_mem_size}>"
return f"<RUNTIME {self.label} mem @{self.ctor_mem_size} imms @{self.immutables_len}>"


@dataclass
class _DataHeader:
def __init__(self, label):
self.label = label
label: str

def __repr__(self):
return f"DATA {self.label}"
Expand Down Expand Up @@ -1092,21 +1096,21 @@ def _relocate_segments(assembly):


# TODO: change API to split assembly_to_evm and assembly_to_source/symbol_maps
def assembly_to_evm(assembly, pc_ofst=0, insert_vyper_signature=False):
def assembly_to_evm(assembly, pc_ofst=0, insert_compiler_metadata=False):
bytecode, source_maps, _ = assembly_to_evm_with_symbol_map(
assembly, pc_ofst=pc_ofst, insert_vyper_signature=insert_vyper_signature
assembly, pc_ofst=pc_ofst, insert_compiler_metadata=insert_compiler_metadata
)
return bytecode, source_maps


def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_vyper_signature=False):
def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_compiler_metadata=False):
"""
Assembles assembly into EVM
assembly: list of asm instructions
pc_ofst: when constructing the source map, the amount to offset all
pcs by (no effect until we add deploy code source map)
insert_vyper_signature: whether to append vyper metadata to output
insert_compiler_metadata: whether to append vyper metadata to output
(should be true for runtime code)
"""
line_number_map = {
Expand All @@ -1122,12 +1126,6 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_vyper_signature=

runtime_code, runtime_code_start, runtime_code_end = None, None, None

bytecode_suffix = b""
if insert_vyper_signature:
# CBOR encoded: {"vyper": [major,minor,patch]}
bytecode_suffix += b"\xa1\x65vyper\x83" + bytes(list(version_tuple))
bytecode_suffix += len(bytecode_suffix).to_bytes(2, "big")

# to optimize the size of deploy code - we want to use the smallest
# PUSH instruction possible which can support all memory symbols
# (and also works with linear pass symbol resolution)
Expand Down Expand Up @@ -1155,6 +1153,9 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_vyper_signature=
if runtime_code_end is not None:
mem_ofst_size = calc_mem_ofst_size(runtime_code_end + max_mem_ofst)

data_section_lengths = []
immutables_len = None

# go through the code, resolving symbolic locations
# (i.e. JUMPDEST locations) to actual code locations
for i, item in enumerate(assembly):
Expand Down Expand Up @@ -1198,18 +1199,41 @@ def assembly_to_evm_with_symbol_map(assembly, pc_ofst=0, insert_vyper_signature=
# [_OFST, _mem_foo, bar] -> PUSHN (foo+bar)
pc -= 1
elif isinstance(item, list) and isinstance(item[0], _RuntimeHeader):
# we are in initcode
symbol_map[item[0].label] = pc
# add source map for all items in the runtime map
t = adjust_pc_maps(runtime_map, pc)
for key in line_number_map:
line_number_map[key].update(t[key])
immutables_len = item[0].immutables_len
pc += len(runtime_code)
# grab lengths of data sections from the runtime
for t in item:
if isinstance(t, list) and isinstance(t[0], _DataHeader):
data_section_lengths.append(_length_of_data(t))

elif isinstance(item, list) and isinstance(item[0], _DataHeader):
symbol_map[item[0].label] = pc
pc += _length_of_data(item)
else:
pc += 1

bytecode_suffix = b""
if insert_compiler_metadata:
# this will hold true when we are in initcode
assert immutables_len is not None
metadata = (
len(runtime_code),
data_section_lengths,
immutables_len,
{"vyper": version_tuple},
)
bytecode_suffix += cbor2.dumps(metadata)
# append the length of the footer, *including* the length
# of the length bytes themselves.
suffix_len = len(bytecode_suffix) + 2
bytecode_suffix += suffix_len.to_bytes(2, "big")

pc += len(bytecode_suffix)

symbol_map["_sym_code_end"] = pc
Expand Down

0 comments on commit 96d2042

Please sign in to comment.