From 24bb63aa0bb4b9436fd859e0de5ba002c805bddb Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 20 Dec 2022 13:00:41 +0200 Subject: [PATCH 01/82] add RJUMP and RJUMPI instructions and create a new EVM version to represent the new functionality environment --- vyper/evm/opcodes.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vyper/evm/opcodes.py b/vyper/evm/opcodes.py index b9f1e77ca8..ec246bb0e6 100644 --- a/vyper/evm/opcodes.py +++ b/vyper/evm/opcodes.py @@ -25,11 +25,13 @@ "istanbul": 2, "berlin": 3, "paris": 4, + "eof": 5, + # ETC Forks "atlantis": 0, "agharta": 1, } -DEFAULT_EVM_VERSION: str = "paris" +DEFAULT_EVM_VERSION: str = "eof" # opcode as hex value @@ -102,6 +104,8 @@ "MSIZE": (0x59, 0, 1, 2), "GAS": (0x5A, 0, 1, 2), "JUMPDEST": (0x5B, 0, 0, 1), + "RJUMP": (0x5C, 0, 0, (None, None, None, None, 2)), + "RJUMPI": (0x5D, 1, 0, (None, None, None, None, 4)), "PUSH1": (0x60, 0, 1, 3), "PUSH2": (0x61, 0, 1, 3), "PUSH3": (0x62, 0, 1, 3), From ce4b8ba9b9ab495aec1f10dfd7db74379c180fc2 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 20 Dec 2022 13:01:12 +0200 Subject: [PATCH 02/82] get_opcode() utility function to get the opcode for a mnemonic --- vyper/evm/opcodes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vyper/evm/opcodes.py b/vyper/evm/opcodes.py index ec246bb0e6..3a1b7df41d 100644 --- a/vyper/evm/opcodes.py +++ b/vyper/evm/opcodes.py @@ -262,6 +262,8 @@ def _mk_version_opcodes(opcodes: OpcodeMap, idx: int) -> OpcodeRulesetMap: def get_opcodes() -> OpcodeRulesetMap: return _evm_opcodes[active_evm_version] +def get_opcode(mnemonic: str) -> int: + return get_opcodes()[mnemonic.upper()][0] def get_ir_opcodes() -> OpcodeRulesetMap: return _ir_opcodes[active_evm_version] From 6105ac6aad733b3be5b5db0db39bf187e94d5a34 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 20 Dec 2022 13:03:21 +0200 Subject: [PATCH 03/82] implement RJUMP * store pc for all instructions * calculate offsets for RJUMP and emit bytecode --- vyper/ir/compile_ir.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 7b05162f1f..c8edb7752e 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -3,7 +3,7 @@ import math from vyper.codegen.ir_node import IRnode -from vyper.evm.opcodes import get_opcodes +from vyper.evm.opcodes import get_opcodes, get_opcode from vyper.exceptions import CodegenPanic, CompilerPanic from vyper.utils import MemoryPositions from vyper.version import version_tuple @@ -12,6 +12,8 @@ DUP_OFFSET = 0x7F SWAP_OFFSET = 0x8F +## TODO: replace with actual version handling +EOF_ENABLED = True def num_to_bytearray(x): o = [] @@ -670,7 +672,7 @@ def _height_of(witharg): o = [] for i, c in enumerate(reversed(code.args[1:])): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) - o.extend(["_sym_" + str(code.args[0]), "JUMP"]) + o.extend(["RJUMP", "_sym_" + str(code.args[0])]) return o # push a literal symbol elif isinstance(code.value, str) and is_symbol(code.value): @@ -1031,9 +1033,12 @@ def assembly_to_evm( if runtime_code_end is not None: mem_ofst_size = calc_mem_ofst_size(runtime_code_end + max_mem_ofst) + instr_offsets = [] + # go through the code, resolving symbolic locations # (i.e. JUMPDEST locations) to actual code locations for i, item in enumerate(assembly): + instr_offsets.append(pc) note_line_num(line_number_map, item, pc) if item == "DEBUG": continue # skip debug @@ -1136,6 +1141,15 @@ def assembly_to_evm( o += bytecode to_skip = 2 + elif EOF_ENABLED and item in ["RJUMP", "RJUMPI"]: + sym = assembly[i + 1] + assert is_symbol(sym), "Internal compiler error: RJUMP not followed by symbol" + offset = symbol_map[sym] - instr_offsets[i] + print("\n", symbol_map[sym], instr_offsets[i], offset) + o += bytes([get_opcode("RJUMP")]) + o += bytes(offset.to_bytes(2, 'big', signed=True)) + to_skip = 1 + elif isinstance(item, int): o += bytes([item]) elif isinstance(item, str) and item.upper() in get_opcodes(): From 646c5be20ea25909323bfef0361cbe4d7bc8ffb7 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 20 Dec 2022 13:04:27 +0200 Subject: [PATCH 04/82] properly output RJUMP enabled opcode formating --- vyper/compiler/output.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 3c2808d0e6..b8ea88303f 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -285,10 +285,14 @@ def _build_opcodes(bytecode: bytes) -> str: while bytecode_sequence: op = bytecode_sequence.popleft() - opcode_output.append(opcode_map[op]) + mnemonic = opcode_map[op] + opcode_output.append(mnemonic) if "PUSH" in opcode_output[-1]: - push_len = int(opcode_map[op][4:]) + push_len = int(mnemonic[4:]) push_values = [hex(bytecode_sequence.popleft())[2:] for i in range(push_len)] opcode_output.append(f"0x{''.join(push_values).upper()}") + elif mnemonic in ['RJUMP', 'RJUMPI']: + offset = int.from_bytes([bytecode_sequence.popleft() for _i in range(2)], 'big', signed=True) + opcode_output.append(hex(offset)) return " ".join(opcode_output) From 708e2ae58b01dae85e93155d8b2f156a7e25182b Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 20 Dec 2022 13:49:24 +0200 Subject: [PATCH 05/82] refactor symbol to be before RJUMP for consistency --- vyper/ir/compile_ir.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index c8edb7752e..0ffb81d479 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -672,7 +672,7 @@ def _height_of(witharg): o = [] for i, c in enumerate(reversed(code.args[1:])): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) - o.extend(["RJUMP", "_sym_" + str(code.args[0])]) + o.extend(["_sym_" + str(code.args[0]), "RJUMP"]) return o # push a literal symbol elif isinstance(code.value, str) and is_symbol(code.value): @@ -1125,7 +1125,16 @@ def assembly_to_evm( continue elif is_symbol(item): - if assembly[i + 1] != "JUMPDEST" and assembly[i + 1] != "BLANK": + if EOF_ENABLED and assembly[i + 1] in ["RJUMP", "RJUMPI"]: + sym = item + assert is_symbol(sym), "Internal compiler error: RJUMP not preceded by symbol" + pc_post_instruction = instr_offsets[i] + 3 + offset = symbol_map[sym] - pc_post_instruction + print("\n", symbol_map[sym], pc_post_instruction, offset) + o += bytes([get_opcode("RJUMP")]) + o += bytes(offset.to_bytes(2, 'big', signed=True)) + to_skip = 1 + elif assembly[i + 1] != "JUMPDEST" and assembly[i + 1] != "BLANK": bytecode, _ = assembly_to_evm(PUSH_N(symbol_map[item], n=CODE_OFST_SIZE)) o += bytecode @@ -1141,15 +1150,6 @@ def assembly_to_evm( o += bytecode to_skip = 2 - elif EOF_ENABLED and item in ["RJUMP", "RJUMPI"]: - sym = assembly[i + 1] - assert is_symbol(sym), "Internal compiler error: RJUMP not followed by symbol" - offset = symbol_map[sym] - instr_offsets[i] - print("\n", symbol_map[sym], instr_offsets[i], offset) - o += bytes([get_opcode("RJUMP")]) - o += bytes(offset.to_bytes(2, 'big', signed=True)) - to_skip = 1 - elif isinstance(item, int): o += bytes([item]) elif isinstance(item, str) and item.upper() in get_opcodes(): From f107f98162af39786e4035ebc06d6bedf7e0f15f Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 20 Dec 2022 13:57:56 +0200 Subject: [PATCH 06/82] if supported emmit RJUMPI --- vyper/ir/compile_ir.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 0ffb81d479..f5560630dd 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -15,6 +15,9 @@ ## TODO: replace with actual version handling EOF_ENABLED = True +def JUMPI() -> str: + return "RJUMPI" if EOF_ENABLED else "JUMPI" + def num_to_bytearray(x): o = [] while x > 0: @@ -143,7 +146,7 @@ def _assert_false(): # use a shared failure block for common case of assert(x). # in the future we might want to change the code # at _sym_revert0 to: INVALID - return [_revert_label, "JUMPI"] + return [_revert_label, JUMPI()] def _add_postambles(asm_ops): @@ -340,7 +343,7 @@ def _height_of(witharg): o = [] o.extend(_compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height)) end_symbol = mksymbol("join") - o.extend(["ISZERO", end_symbol, "JUMPI"]) + o.extend(["ISZERO", end_symbol, JUMPI()]) o.extend(_compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) o.extend([end_symbol, "JUMPDEST"]) return o @@ -350,7 +353,7 @@ def _height_of(witharg): o.extend(_compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height)) mid_symbol = mksymbol("else") end_symbol = mksymbol("join") - o.extend(["ISZERO", mid_symbol, "JUMPI"]) + o.extend(["ISZERO", mid_symbol, JUMPI()]) o.extend(_compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) o.extend([end_symbol, "JUMP", mid_symbol, "JUMPDEST"]) o.extend(_compile_to_assembly(code.args[2], withargs, existing_labels, break_dest, height)) @@ -407,7 +410,7 @@ def _height_of(witharg): # stack: i, rounds # if (0 == rounds) { goto end_dest; } - o.extend(["DUP1", "ISZERO", exit_dest, "JUMPI"]) + o.extend(["DUP1", "ISZERO", exit_dest, JUMPI()]) # stack: start, rounds if start.value != 0: @@ -439,7 +442,7 @@ def _height_of(witharg): # stack: exit_i, i+1 (new_i) # if (exit_i != new_i) { goto entry_dest } - o.extend(["DUP2", "DUP2", "XOR", entry_dest, "JUMPI"]) + o.extend(["DUP2", "DUP2", "XOR", entry_dest, JUMPI()]) o.extend([exit_dest, "JUMPDEST", "POP", "POP"]) return o @@ -543,7 +546,7 @@ def _height_of(witharg): elif code.value == "assert_unreachable": o = _compile_to_assembly(code.args[0], withargs, existing_labels, break_dest, height) end_symbol = mksymbol("reachable") - o.extend([end_symbol, "JUMPI", "INVALID", end_symbol, "JUMPDEST"]) + o.extend([end_symbol, JUMPI(), "INVALID", end_symbol, "JUMPDEST"]) return o # Assert (if false, exit) elif code.value == "assert": @@ -878,7 +881,7 @@ def _merge_iszero(assembly): if ( assembly[i : i + 2] == ["ISZERO", "ISZERO"] and is_symbol(assembly[i + 2]) - and assembly[i + 3] == "JUMPI" + and assembly[i + 3] == JUMPI() ): changed = True del assembly[i : i + 2] @@ -1044,7 +1047,7 @@ def assembly_to_evm( continue # skip debug # update pc_jump_map - if item == "JUMP": + if item in ("RJUMP", "JUMP"): last = assembly[i - 1] if is_symbol(last) and last.startswith("_sym_internal"): if last.endswith("cleanup"): @@ -1056,7 +1059,7 @@ def assembly_to_evm( else: # everything else line_number_map["pc_jump_map"][pc] = "-" - elif item in ("JUMPI", "JUMPDEST"): + elif item in ("RJUMPI", "JUMPI", "JUMPDEST"): line_number_map["pc_jump_map"][pc] = "-" # update pc @@ -1131,7 +1134,7 @@ def assembly_to_evm( pc_post_instruction = instr_offsets[i] + 3 offset = symbol_map[sym] - pc_post_instruction print("\n", symbol_map[sym], pc_post_instruction, offset) - o += bytes([get_opcode("RJUMP")]) + o += bytes([get_opcode(assembly[i + 1])]) o += bytes(offset.to_bytes(2, 'big', signed=True)) to_skip = 1 elif assembly[i + 1] != "JUMPDEST" and assembly[i + 1] != "BLANK": From a825150688a9bae6e0524c5408fa0b0ac183945d Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 20 Dec 2022 14:17:38 +0200 Subject: [PATCH 07/82] fix pc progression for RJUMP RJUMPI --- vyper/ir/compile_ir.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index f5560630dd..bddf4385c7 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1070,6 +1070,8 @@ def assembly_to_evm( raise CompilerPanic(f"duplicate jumpdest {item}") symbol_map[item] = pc + elif assembly[i + 1] in ("RJUMP", "RJUMPI"): + pc += CODE_OFST_SIZE # highbyte lowbyte only else: pc += CODE_OFST_SIZE + 1 # PUSH2 highbits lowbits elif is_mem_sym(item): From 0ee6d69ea448c059e484b7875e3f6279a4046794 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 20 Dec 2022 14:22:18 +0200 Subject: [PATCH 08/82] emit RJUMP for if and repeat --- vyper/ir/compile_ir.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index bddf4385c7..e55fa9b8e1 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -18,6 +18,9 @@ def JUMPI() -> str: return "RJUMPI" if EOF_ENABLED else "JUMPI" +def JUMP() -> str: + return "RJUMP" if EOF_ENABLED else "JUMP" + def num_to_bytearray(x): o = [] while x > 0: @@ -355,7 +358,7 @@ def _height_of(witharg): end_symbol = mksymbol("join") o.extend(["ISZERO", mid_symbol, JUMPI()]) o.extend(_compile_to_assembly(code.args[1], withargs, existing_labels, break_dest, height)) - o.extend([end_symbol, "JUMP", mid_symbol, "JUMPDEST"]) + o.extend([end_symbol, JUMP(), mid_symbol, "JUMPDEST"]) o.extend(_compile_to_assembly(code.args[2], withargs, existing_labels, break_dest, height)) o.extend([end_symbol, "JUMPDEST"]) return o @@ -452,7 +455,7 @@ def _height_of(witharg): if not break_dest: raise CompilerPanic("Invalid break") dest, continue_dest, break_height = break_dest - return [continue_dest, "JUMP"] + return [continue_dest, JUMP()] # Break from inside a for loop elif code.value == "break": if not break_dest: @@ -462,7 +465,7 @@ def _height_of(witharg): n_local_vars = height - break_height # clean up any stack items declared in the loop body cleanup_local_vars = ["POP"] * n_local_vars - return cleanup_local_vars + [dest, "JUMP"] + return cleanup_local_vars + [dest, JUMP()] # Break from inside one or more for loops prior to a return statement inside the loop elif code.value == "cleanup_repeat": if not break_dest: @@ -1135,7 +1138,6 @@ def assembly_to_evm( assert is_symbol(sym), "Internal compiler error: RJUMP not preceded by symbol" pc_post_instruction = instr_offsets[i] + 3 offset = symbol_map[sym] - pc_post_instruction - print("\n", symbol_map[sym], pc_post_instruction, offset) o += bytes([get_opcode(assembly[i + 1])]) o += bytes(offset.to_bytes(2, 'big', signed=True)) to_skip = 1 From 7e605515fb71ae4614e02a85d9b9f761964b3a38 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 20 Dec 2022 14:36:12 +0200 Subject: [PATCH 09/82] assert for very lange offsets --- vyper/ir/compile_ir.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index e55fa9b8e1..ac00d2bad2 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1138,6 +1138,8 @@ def assembly_to_evm( assert is_symbol(sym), "Internal compiler error: RJUMP not preceded by symbol" pc_post_instruction = instr_offsets[i] + 3 offset = symbol_map[sym] - pc_post_instruction + # TODO: fallback to dynamic jumps? + assert offset > -32767 and offset <= 32767, "Offset too big for relative jump" o += bytes([get_opcode(assembly[i + 1])]) o += bytes(offset.to_bytes(2, 'big', signed=True)) to_skip = 1 From 1bae653a7f6df31c9a4b12865cbfec8f82275fa3 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 20 Dec 2022 14:44:47 +0200 Subject: [PATCH 10/82] enable RJUMP ineffiecient optimization --- vyper/ir/compile_ir.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index ac00d2bad2..f94120d306 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -781,7 +781,7 @@ def _prune_unreachable_code(assembly): changed = False i = 0 while i < len(assembly) - 1: - if assembly[i] in ("JUMP", "RETURN", "REVERT", "STOP") and not ( + if assembly[i] in ("JUMP", "RJUMP", "RETURN", "REVERT", "STOP") and not ( is_symbol(assembly[i + 1]) or assembly[i + 1] == "JUMPDEST" ): changed = True @@ -799,7 +799,7 @@ def _prune_inefficient_jumps(assembly): while i < len(assembly) - 4: if ( is_symbol(assembly[i]) - and assembly[i + 1] == "JUMP" + and assembly[i + 1] in ("JUMP", "RJUMP") and assembly[i] == assembly[i + 2] and assembly[i + 3] == "JUMPDEST" ): @@ -811,7 +811,6 @@ def _prune_inefficient_jumps(assembly): return changed - def _merge_jumpdests(assembly): # When we have multiple JUMPDESTs in a row, or when a JUMPDEST # is immediately followed by another JUMP, we can skip the @@ -832,7 +831,7 @@ def _merge_jumpdests(assembly): if assembly[j] == current_symbol and i != j: assembly[j] = new_symbol changed = True - elif is_symbol(assembly[i + 2]) and assembly[i + 3] == "JUMP": + elif is_symbol(assembly[i + 2]) and assembly[i + 3] in ("JUMP", "RJUMP"): # _sym_x JUMPDEST _sym_y JUMP # replace all instances of _sym_x with _sym_y # (except for _sym_x JUMPDEST - don't want duplicate labels) From 54a2da01729482e0b578c19d6a261fd6150d46a4 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 20 Dec 2022 15:10:27 +0200 Subject: [PATCH 11/82] update evm version and proper checking --- vyper/evm/opcodes.py | 2 +- vyper/ir/compile_ir.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/vyper/evm/opcodes.py b/vyper/evm/opcodes.py index 3a1b7df41d..4afd1e1110 100644 --- a/vyper/evm/opcodes.py +++ b/vyper/evm/opcodes.py @@ -3,7 +3,7 @@ from vyper.exceptions import CompilerPanic from vyper.typing import OpcodeGasCost, OpcodeMap, OpcodeRulesetMap, OpcodeRulesetValue, OpcodeValue -active_evm_version: int = 4 +active_evm_version: int = 5 # EVM version rules work as follows: # 1. Fork rules go from oldest (lowest value) to newest (highest value). diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index f94120d306..73b16565c0 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -3,7 +3,7 @@ import math from vyper.codegen.ir_node import IRnode -from vyper.evm.opcodes import get_opcodes, get_opcode +from vyper.evm.opcodes import get_opcodes, get_opcode, version_check from vyper.exceptions import CodegenPanic, CompilerPanic from vyper.utils import MemoryPositions from vyper.version import version_tuple @@ -12,8 +12,7 @@ DUP_OFFSET = 0x7F SWAP_OFFSET = 0x8F -## TODO: replace with actual version handling -EOF_ENABLED = True +EOF_ENABLED = version_check("eof") def JUMPI() -> str: return "RJUMPI" if EOF_ENABLED else "JUMPI" From 9d13915c3a424fc748862a54b5d86b8c6142d82c Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 20 Dec 2022 15:48:40 +0200 Subject: [PATCH 12/82] add CALLF, RETF, JUMPF definitions --- vyper/evm/opcodes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vyper/evm/opcodes.py b/vyper/evm/opcodes.py index 4afd1e1110..6847d50225 100644 --- a/vyper/evm/opcodes.py +++ b/vyper/evm/opcodes.py @@ -187,6 +187,9 @@ "INVALID": (0xFE, 0, 0, 0), "DEBUG": (0xA5, 1, 0, 0), "BREAKPOINT": (0xA6, 0, 0, 0), + "CALLF": (0xB0, 0, 0, (None, None, None, None, 5)), + "RETF": (0xB1, 0, 0, (None, None, None, None, 4)), + "JUMPF": (0xB2, 0, 0, (None, None, None, None, 4)), } PSEUDO_OPCODES: OpcodeMap = { From a42ef8665f9cec70ae165d303b11655f07d9f6fd Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 20 Dec 2022 17:44:12 +0200 Subject: [PATCH 13/82] fix tests --- tests/compiler/test_opcodes.py | 2 +- vyper/ir/compile_ir.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/compiler/test_opcodes.py b/tests/compiler/test_opcodes.py index 67ea10c311..d099b1ab1c 100644 --- a/tests/compiler/test_opcodes.py +++ b/tests/compiler/test_opcodes.py @@ -43,7 +43,7 @@ def test_version_check(evm_version): def test_get_opcodes(evm_version): op = opcodes.get_opcodes() - if evm_version in ("paris", "berlin"): + if evm_version in ("eof", "paris", "berlin"): assert "CHAINID" in op assert op["SLOAD"][-1] == 2100 elif evm_version == "istanbul": diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 73b16565c0..937090196e 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -677,7 +677,7 @@ def _height_of(witharg): o = [] for i, c in enumerate(reversed(code.args[1:])): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) - o.extend(["_sym_" + str(code.args[0]), "RJUMP"]) + o.extend(["_sym_" + str(code.args[0]), JUMP()]) return o # push a literal symbol elif isinstance(code.value, str) and is_symbol(code.value): From ca268d21a3cb7bfd0f3a3c3018ae7505648a70f0 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 20 Dec 2022 22:38:57 +0200 Subject: [PATCH 14/82] rename EOF evm version to shanghai --- tests/compiler/test_opcodes.py | 2 +- vyper/evm/opcodes.py | 4 ++-- vyper/ir/compile_ir.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/compiler/test_opcodes.py b/tests/compiler/test_opcodes.py index d099b1ab1c..de62e28bc0 100644 --- a/tests/compiler/test_opcodes.py +++ b/tests/compiler/test_opcodes.py @@ -43,7 +43,7 @@ def test_version_check(evm_version): def test_get_opcodes(evm_version): op = opcodes.get_opcodes() - if evm_version in ("eof", "paris", "berlin"): + if evm_version in ("shanghai", "paris", "berlin"): assert "CHAINID" in op assert op["SLOAD"][-1] == 2100 elif evm_version == "istanbul": diff --git a/vyper/evm/opcodes.py b/vyper/evm/opcodes.py index 6847d50225..452a0bf0a2 100644 --- a/vyper/evm/opcodes.py +++ b/vyper/evm/opcodes.py @@ -25,13 +25,13 @@ "istanbul": 2, "berlin": 3, "paris": 4, - "eof": 5, + "shanghai": 5, # ETC Forks "atlantis": 0, "agharta": 1, } -DEFAULT_EVM_VERSION: str = "eof" +DEFAULT_EVM_VERSION: str = "shanghai" # opcode as hex value diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 937090196e..c5a641ac3b 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -12,7 +12,7 @@ DUP_OFFSET = 0x7F SWAP_OFFSET = 0x8F -EOF_ENABLED = version_check("eof") +EOF_ENABLED = version_check("shanghai") def JUMPI() -> str: return "RJUMPI" if EOF_ENABLED else "JUMPI" From 4c662540da27ad9c5e4bcd87af46aa036c3e85fa Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 20 Dec 2022 22:41:23 +0200 Subject: [PATCH 15/82] add RJUMPV --- vyper/evm/opcodes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vyper/evm/opcodes.py b/vyper/evm/opcodes.py index 452a0bf0a2..de115d7923 100644 --- a/vyper/evm/opcodes.py +++ b/vyper/evm/opcodes.py @@ -106,6 +106,7 @@ "JUMPDEST": (0x5B, 0, 0, 1), "RJUMP": (0x5C, 0, 0, (None, None, None, None, 2)), "RJUMPI": (0x5D, 1, 0, (None, None, None, None, 4)), + "RJUMPV": (0x5E, 1, 0, (None, None, None, None, 4)), "PUSH1": (0x60, 0, 1, 3), "PUSH2": (0x61, 0, 1, 3), "PUSH3": (0x62, 0, 1, 3), From d4febb0d15c5108f867ad5d4b5a32b495a49c894 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 21 Dec 2022 09:48:35 +0200 Subject: [PATCH 16/82] add output for new opcodes * add JUMPF and CALLF output * throw exception when unknown opcode encountered to avoid the cryptic "KeyError" exception --- vyper/compiler/output.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index b8ea88303f..155895178a 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -13,6 +13,7 @@ from vyper.semantics.types.function import FunctionVisibility, StateMutability from vyper.typing import StorageLayout from vyper.warnings import ContractSizeLimitWarning +from vyper.exceptions import CompilerPanic def build_ast_dict(compiler_data: CompilerData) -> dict: @@ -285,13 +286,17 @@ def _build_opcodes(bytecode: bytes) -> str: while bytecode_sequence: op = bytecode_sequence.popleft() - mnemonic = opcode_map[op] + mnemonic = opcode_map.get(op) + + if mnemonic == None: + raise CompilerPanic(f"Unsupported opcode {hex(op)}") + opcode_output.append(mnemonic) if "PUSH" in opcode_output[-1]: push_len = int(mnemonic[4:]) push_values = [hex(bytecode_sequence.popleft())[2:] for i in range(push_len)] opcode_output.append(f"0x{''.join(push_values).upper()}") - elif mnemonic in ['RJUMP', 'RJUMPI']: + elif mnemonic in ['RJUMP', 'RJUMPI', 'JUMPF', 'CALLF']: offset = int.from_bytes([bytecode_sequence.popleft() for _i in range(2)], 'big', signed=True) opcode_output.append(hex(offset)) From 67b2c038a94471334bd76261ffdab998ae3d4932 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 21 Dec 2022 09:49:55 +0200 Subject: [PATCH 17/82] add CALLF emission and JUMPF CALLF handling --- vyper/ir/compile_ir.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index c5a641ac3b..ebc884c431 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -677,7 +677,10 @@ def _height_of(witharg): o = [] for i, c in enumerate(reversed(code.args[1:])): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) - o.extend(["_sym_" + str(code.args[0]), JUMP()]) + if EOF_ENABLED: + o.extend(["_sym_" + str(code.args[0]), "CALLF"]) + else: + o.extend(["_sym_" + str(code.args[0]), JUMP()]) return o # push a literal symbol elif isinstance(code.value, str) and is_symbol(code.value): @@ -1048,7 +1051,7 @@ def assembly_to_evm( continue # skip debug # update pc_jump_map - if item in ("RJUMP", "JUMP"): + if item in ("RJUMP", "JUMP", "JUMPF", "CALLF"): last = assembly[i - 1] if is_symbol(last) and last.startswith("_sym_internal"): if last.endswith("cleanup"): @@ -1071,7 +1074,7 @@ def assembly_to_evm( raise CompilerPanic(f"duplicate jumpdest {item}") symbol_map[item] = pc - elif assembly[i + 1] in ("RJUMP", "RJUMPI"): + elif assembly[i + 1] in ("RJUMP", "RJUMPI", "JUMPF", "CALLF"): pc += CODE_OFST_SIZE # highbyte lowbyte only else: pc += CODE_OFST_SIZE + 1 # PUSH2 highbits lowbits @@ -1131,9 +1134,9 @@ def assembly_to_evm( continue elif is_symbol(item): - if EOF_ENABLED and assembly[i + 1] in ["RJUMP", "RJUMPI"]: + if EOF_ENABLED and assembly[i + 1] in ["RJUMP", "RJUMPI", "JUMPF", "CALLF"]: sym = item - assert is_symbol(sym), "Internal compiler error: RJUMP not preceded by symbol" + assert is_symbol(sym), f"Internal compiler error: {assembly[i + 1]} not preceded by symbol" pc_post_instruction = instr_offsets[i] + 3 offset = symbol_map[sym] - pc_post_instruction # TODO: fallback to dynamic jumps? From 66202cca1932e3a5cdbd8636b43342fe741d98de Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Sat, 24 Dec 2022 15:48:12 +0200 Subject: [PATCH 18/82] add validate_eof.py cli util --- vyper/cli/validate_eof.py | 208 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100755 vyper/cli/validate_eof.py diff --git a/vyper/cli/validate_eof.py b/vyper/cli/validate_eof.py new file mode 100755 index 0000000000..438fa8ef5b --- /dev/null +++ b/vyper/cli/validate_eof.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +import sys +import argparse + +MAGIC = b'\xEF\x00' +VERSION = 0x01 +S_TERMINATOR = 0x00 +S_TYPE = 0x01 +S_CODE = 0x02 +S_DATA = 0x03 + +class ValidationException(Exception): + """Validation exception.""" + +class FunctionType: + def __init__(self, inputs, outputs) -> None: + self.inputs = inputs + self.outputs = outputs + +# The ranges below are as specified in the Yellow Paper. +# Note: range(s, e) excludes e, hence the +1 +valid_opcodes = [ + *range(0x00, 0x0b + 1), + *range(0x10, 0x1d + 1), + 0x20, + *range(0x30, 0x3f + 1), + *range(0x40, 0x48 + 1), + *range(0x50, 0x55 + 1), *range(0x58, 0x5d + 1), + *range(0x60, 0x6f + 1), + *range(0x70, 0x7f + 1), + *range(0x80, 0x8f + 1), + *range(0x90, 0x9f + 1), + *range(0xa0, 0xa4 + 1), + 0xb0, 0xb1, 0xb2, + # Note: 0xfe is considered assigned. + 0xf0, 0xf1, 0xf3, 0xf4, 0xf5, 0xfa, 0xfd, 0xfe +] + +# STOP, RETF, JUMPF, RETURN, REVERT, INVALID +terminating_opcodes = [0x00, 0xb1, 0xb2, 0xf3, 0xfd, 0xfe] + +immediate_sizes = 256 * [0] +immediate_sizes[0x5c] = 2 # RJUMP +immediate_sizes[0x5d] = 2 # RJUMPI +immediate_sizes[0xb0] = 2 # CALLF +immediate_sizes[0xb2] = 2 # JUMPF +for opcode in range(0x60, 0x7f + 1): # PUSH1..PUSH32 + immediate_sizes[opcode] = opcode - 0x60 + 1 + +# Validate EOF code. +# Raises ValidationException on invalid code +def validate_eof(code: bytes): + # Check version + if len(code) < 3 or code[2] != VERSION: + raise ValidationException("invalid version") + + # Process section headers + section_sizes = {S_TYPE: [], S_CODE: [], S_DATA: []} + pos = 3 + while True: + # Terminator not found + if pos >= len(code): + raise ValidationException("no section terminator") + + section_id = code[pos] + pos += 1 + if section_id == S_TERMINATOR: + break + + # Disallow unknown sections + if not section_id in section_sizes: + raise ValidationException("invalid section id") + + # Data section preceding code section (i.e. code section following data section) + if section_id == S_CODE and len(section_sizes[S_DATA]) != 0: + raise ValidationException("data section preceding code section") + + # Code section or data section preceding type section + if section_id == S_TYPE and (len(section_sizes[S_CODE]) != 0 or len(section_sizes[S_DATA]) != 0): + raise ValidationException("code or data section preceding type section") + + # Multiple type or data sections + if section_id == S_TYPE and len(section_sizes[S_TYPE]) != 0: + raise ValidationException("multiple type sections") + if section_id == S_DATA and len(section_sizes[S_DATA]) != 0: + raise ValidationException("multiple data sections") + + # Truncated section size + if (pos + 1) >= len(code): + raise ValidationException("truncated section size") + + section_count = (code[pos] << 8) | code[pos + 1] + pos += 2 + if section_id == S_TYPE: + section_sizes[S_TYPE].append(section_count) + elif section_id == S_CODE: + section_sizes[S_CODE] = [0] * section_count + pos += section_count * 2 + elif section_id == S_DATA: + section_sizes[S_DATA].append(section_count) + + # Code section cannot be absent + if len(section_sizes[S_CODE]) == 0: + raise ValidationException("no code section") + + # Not more than 1024 code sections + if len(section_sizes[S_CODE]) > 1024: + raise ValidationException("more than 1024 code sections") + + # Type section can be absent only if single code section is present + if len(section_sizes[S_TYPE]) == 0 and len(section_sizes[S_CODE]) != 1: + raise ValidationException("no obligatory type section") + + # Type section, if present, has size corresponding to number of code sections + if section_sizes[S_TYPE][0] != 0 and section_sizes[S_TYPE][0] != len(section_sizes[S_CODE]) * 4: + raise ValidationException("invalid type section size") + + # The entire container must be scanned + # print(section_sizes, (pos + sum(section_sizes[S_TYPE]) + sum(section_sizes[S_CODE]) + sum(section_sizes[S_DATA]))) + # if len(code) != (pos + sum(section_sizes[S_TYPE]) + sum(section_sizes[S_CODE]) + sum(section_sizes[S_DATA])): + # raise ValidationException("container size not equal to sum of section sizes") + + # First type section, if present, has 0 inputs and 0 outputs + if len(section_sizes[S_TYPE]) > 0 and (code[pos] != 0 or code[pos + 1] != 0): + raise ValidationException("invalid type of section 0") + +# Raises ValidationException on invalid code +def validate_code_section(func_id: int, code: bytes, types: list[FunctionType] = [FunctionType(0, 0)]): + # Note that EOF1 already asserts this with the code section requirements + assert len(code) > 0 + + opcode = 0 + pos = 0 + rjumpdests = set() + immediates = set() + while pos < len(code): + # Ensure the opcode is valid + opcode = code[pos] + pos += 1 + if not opcode in valid_opcodes: + raise ValidationException("undefined instruction") + + if opcode == 0x5c or opcode == 0x5d: + if pos + 2 > len(code): + raise ValidationException("truncated relative jump offset") + offset = int.from_bytes(code[pos:pos+2], byteorder = "big", signed = True) + + rjumpdest = pos + 2 + offset + if rjumpdest < 0 or rjumpdest >= len(code): + raise ValidationException("relative jump destination out of bounds") + + rjumpdests.add(rjumpdest) + elif opcode == 0xb0: + if pos + 2 > len(code): + raise ValidationException("truncated CALLF immediate") + section_id = int.from_bytes(code[pos:pos+2], byteorder = "big", signed = False) + + if section_id >= len(types): + raise ValidationException("invalid section id") + elif opcode == 0xb2: + if pos + 2 > len(code): + raise ValidationException("truncated JUMPF immediate") + section_id = int.from_bytes(code[pos:pos+2], byteorder = "big", signed = False) + + if section_id >= len(types): + raise ValidationException("invalid section id") + + if types[section_id].outputs != types[func_id].outputs: + raise ValidationException("incompatible function type for JUMPF") + + # Save immediate value positions + immediates.update(range(pos, pos + immediate_sizes[opcode])) + # Skip immediates + pos += immediate_sizes[opcode] + + # Ensure last opcode's immediate doesn't go over code end + if pos != len(code): + raise ValidationException("truncated immediate") + + # opcode is the *last opcode* + if not opcode in terminating_opcodes: + raise ValidationException("no terminating instruction") + + # Ensure relative jump destinations don't target immediates + if not rjumpdests.isdisjoint(immediates): + raise ValidationException("relative jump destination targets immediate") + + +def _parse_args(argv): + parser = argparse.ArgumentParser( + description="Vyper EOFv1 validation utility", + formatter_class=argparse.RawTextHelpFormatter, + ) + parser.add_argument( + "input_file", + help="Input file containing the EOFv1 formated bytecode", + nargs="?", + ) + + args = parser.parse_args(argv) + + if args.input_file: + with open(args.input_file, "r") as f: + code = bytes.fromhex(f.read()) + validate_eof(code) + +if __name__ == "__main__": + _parse_args(sys.argv[1:]) \ No newline at end of file From 9c3972796350e32aef73597b8ae0833f5880a80c Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Sat, 24 Dec 2022 16:24:37 +0200 Subject: [PATCH 19/82] update EOFv1 validation script --- vyper/cli/validate_eof.py | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/vyper/cli/validate_eof.py b/vyper/cli/validate_eof.py index 438fa8ef5b..516578bcf5 100755 --- a/vyper/cli/validate_eof.py +++ b/vyper/cli/validate_eof.py @@ -56,6 +56,9 @@ def validate_eof(code: bytes): # Process section headers section_sizes = {S_TYPE: [], S_CODE: [], S_DATA: []} + code_section_ios = [] + code_sections = [] + data_sections = [] pos = 3 while True: # Terminator not found @@ -92,12 +95,14 @@ def validate_eof(code: bytes): section_count = (code[pos] << 8) | code[pos + 1] pos += 2 if section_id == S_TYPE: - section_sizes[S_TYPE].append(section_count) + section_sizes[S_TYPE].append(section_count) elif section_id == S_CODE: - section_sizes[S_CODE] = [0] * section_count - pos += section_count * 2 + for i in range(section_count): + code_size = (code[pos] << 8) | code[pos + 1] + pos += 2 + section_sizes[S_CODE].append(code_size) elif section_id == S_DATA: - section_sizes[S_DATA].append(section_count) + section_sizes[S_DATA].append(section_count) # Code section cannot be absent if len(section_sizes[S_CODE]) == 0: @@ -120,9 +125,27 @@ def validate_eof(code: bytes): # if len(code) != (pos + sum(section_sizes[S_TYPE]) + sum(section_sizes[S_CODE]) + sum(section_sizes[S_DATA])): # raise ValidationException("container size not equal to sum of section sizes") - # First type section, if present, has 0 inputs and 0 outputs - if len(section_sizes[S_TYPE]) > 0 and (code[pos] != 0 or code[pos + 1] != 0): - raise ValidationException("invalid type of section 0") + # Read TYPE section + for i in range(len(section_sizes[S_CODE])): + input_count = code[pos] + output_count = code[pos + 1] + max_stack_height = (code[pos + 2] << 8) | code[pos + 3] + code_section_ios.append((input_count, output_count, max_stack_height)) + pos += 4 + + # Read CODE sections + for section_size in section_sizes[S_CODE]: + code_sections.append(code[pos:pos + section_size]) + pos += section_size + + # Read DATA sections + for section_size in section_sizes[S_DATA]: + data_sections.append(code[pos:pos + section_size]) + pos += section_size + + # First code section should have zero inputs and outputs + if code_section_ios[0][0] != 0 or code_section_ios[0][1] != 0: + raise ValidationException("invalid input/output count for code section 0") # Raises ValidationException on invalid code def validate_code_section(func_id: int, code: bytes, types: list[FunctionType] = [FunctionType(0, 0)]): From bb37644e49b4524ebf3d5e2728cdcf5df38b1047 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Sat, 24 Dec 2022 16:41:02 +0200 Subject: [PATCH 20/82] track proper size --- vyper/cli/validate_eof.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/vyper/cli/validate_eof.py b/vyper/cli/validate_eof.py index 516578bcf5..91451b3ffa 100755 --- a/vyper/cli/validate_eof.py +++ b/vyper/cli/validate_eof.py @@ -125,6 +125,10 @@ def validate_eof(code: bytes): # if len(code) != (pos + sum(section_sizes[S_TYPE]) + sum(section_sizes[S_CODE]) + sum(section_sizes[S_DATA])): # raise ValidationException("container size not equal to sum of section sizes") + # Truncated section size + if (pos + len(section_sizes[S_CODE]) * 4) > len(code): + raise ValidationException("truncated TYPE section size") + # Read TYPE section for i in range(len(section_sizes[S_CODE])): input_count = code[pos] @@ -135,14 +139,23 @@ def validate_eof(code: bytes): # Read CODE sections for section_size in section_sizes[S_CODE]: + # Truncated section size + if (pos + section_size) > len(code): + raise ValidationException("truncated CODE section size") code_sections.append(code[pos:pos + section_size]) pos += section_size # Read DATA sections for section_size in section_sizes[S_DATA]: + # Truncated section size + if (pos + section_size) > len(code): + raise ValidationException("truncated DATA section size") data_sections.append(code[pos:pos + section_size]) pos += section_size + if (pos) != len(code): + raise ValidationException("Bad file size") + # First code section should have zero inputs and outputs if code_section_ios[0][0] != 0 or code_section_ios[0][1] != 0: raise ValidationException("invalid input/output count for code section 0") From fa5df339c5ac5943fceb0db17b98f2f45b80cf9c Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Sat, 24 Dec 2022 16:54:39 +0200 Subject: [PATCH 21/82] validate code --- vyper/cli/validate_eof.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/vyper/cli/validate_eof.py b/vyper/cli/validate_eof.py index 91451b3ffa..a9ab4631bc 100755 --- a/vyper/cli/validate_eof.py +++ b/vyper/cli/validate_eof.py @@ -13,9 +13,10 @@ class ValidationException(Exception): """Validation exception.""" class FunctionType: - def __init__(self, inputs, outputs) -> None: + def __init__(self, inputs, outputs, max_stack_height) -> None: self.inputs = inputs self.outputs = outputs + self.max_stack_height = max_stack_height # The ranges below are as specified in the Yellow Paper. # Note: range(s, e) excludes e, hence the +1 @@ -134,17 +135,19 @@ def validate_eof(code: bytes): input_count = code[pos] output_count = code[pos + 1] max_stack_height = (code[pos + 2] << 8) | code[pos + 3] - code_section_ios.append((input_count, output_count, max_stack_height)) + code_section_ios.append(FunctionType(input_count, output_count, max_stack_height)) pos += 4 # Read CODE sections - for section_size in section_sizes[S_CODE]: + for i, section_size in enumerate(section_sizes[S_CODE]): # Truncated section size if (pos + section_size) > len(code): raise ValidationException("truncated CODE section size") code_sections.append(code[pos:pos + section_size]) pos += section_size + validate_code_section(i, code_sections[-1], code_section_ios) + # Read DATA sections for section_size in section_sizes[S_DATA]: # Truncated section size @@ -157,11 +160,11 @@ def validate_eof(code: bytes): raise ValidationException("Bad file size") # First code section should have zero inputs and outputs - if code_section_ios[0][0] != 0 or code_section_ios[0][1] != 0: + if code_section_ios[0].inputs != 0 or code_section_ios[0].outputs != 0: raise ValidationException("invalid input/output count for code section 0") # Raises ValidationException on invalid code -def validate_code_section(func_id: int, code: bytes, types: list[FunctionType] = [FunctionType(0, 0)]): +def validate_code_section(func_id: int, code: bytes, types: list[FunctionType] = [FunctionType(0, 0, 0)]): # Note that EOF1 already asserts this with the code section requirements assert len(code) > 0 From bbaa63ee05c99decf13e2118d883aad93db78a2a Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 28 Dec 2022 15:11:31 +0200 Subject: [PATCH 22/82] EOFv1 enabled emits retf --- vyper/ir/compile_ir.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index ebc884c431..2d913b98a5 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -734,7 +734,13 @@ def _height_of(witharg): return [] elif code.value == "exit_to": - raise CodegenPanic("exit_to not implemented yet!") + if not EOF_ENABLED: + raise CodegenPanic("exit_to not implemented yet!") + + if code.args[0].value == "return_pc": + return ["RETF"] + else: + return [str(code.args[0]), "JUMPF"] # Jump to cleanup function # inject debug opcode. elif code.value == "debugger": From c703e20f48be39e901fb070e5a09aed5f3897703 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 28 Dec 2022 15:12:21 +0200 Subject: [PATCH 23/82] refactor EOF_ENABLED to EOFv1_ENABLED --- vyper/ir/compile_ir.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 2d913b98a5..1e0ac29881 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -12,13 +12,13 @@ DUP_OFFSET = 0x7F SWAP_OFFSET = 0x8F -EOF_ENABLED = version_check("shanghai") +EOFv1_ENABLED = version_check("shanghai") def JUMPI() -> str: - return "RJUMPI" if EOF_ENABLED else "JUMPI" + return "RJUMPI" if EOFv1_ENABLED else "JUMPI" def JUMP() -> str: - return "RJUMP" if EOF_ENABLED else "JUMP" + return "RJUMP" if EOFv1_ENABLED else "JUMP" def num_to_bytearray(x): o = [] @@ -677,7 +677,7 @@ def _height_of(witharg): o = [] for i, c in enumerate(reversed(code.args[1:])): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) - if EOF_ENABLED: + if EOFv1_ENABLED: o.extend(["_sym_" + str(code.args[0]), "CALLF"]) else: o.extend(["_sym_" + str(code.args[0]), JUMP()]) @@ -734,7 +734,7 @@ def _height_of(witharg): return [] elif code.value == "exit_to": - if not EOF_ENABLED: + if not EOFv1_ENABLED: raise CodegenPanic("exit_to not implemented yet!") if code.args[0].value == "return_pc": @@ -1140,7 +1140,7 @@ def assembly_to_evm( continue elif is_symbol(item): - if EOF_ENABLED and assembly[i + 1] in ["RJUMP", "RJUMPI", "JUMPF", "CALLF"]: + if EOFv1_ENABLED and assembly[i + 1] in ["RJUMP", "RJUMPI", "JUMPF", "CALLF"]: sym = item assert is_symbol(sym), f"Internal compiler error: {assembly[i + 1]} not preceded by symbol" pc_post_instruction = instr_offsets[i] + 3 From 190261d48489bcfedb3574ce1daaad7675623939 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 28 Dec 2022 15:16:58 +0200 Subject: [PATCH 24/82] generate callf --- vyper/ir/compile_ir.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 1e0ac29881..de5682a54c 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -675,8 +675,11 @@ def _height_of(witharg): # jump to a symbol, and push variable # of arguments onto stack elif code.value == "goto": o = [] - for i, c in enumerate(reversed(code.args[1:])): + args = code.args[2:] if EOFv1_ENABLED and is_symbol(code.args[1].value) else code.args[1:] + + for i, c in enumerate(reversed(args)): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) + if EOFv1_ENABLED: o.extend(["_sym_" + str(code.args[0]), "CALLF"]) else: From e4ec072e96b87afaa840dfe7433255f8a0fb3b68 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 28 Dec 2022 20:49:08 +0200 Subject: [PATCH 25/82] disable return rewrites and only handle gotos that are not simple jumps --- vyper/ir/compile_ir.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index de5682a54c..a58d81440e 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -111,6 +111,9 @@ def calc_mem_ofst_size(ctor_mem_size): # by better liveness analysis. # NOTE: modifies input in-place def _rewrite_return_sequences(ir_node, label_params=None): + if EOFv1_ENABLED: + return + args = ir_node.args if ir_node.value == "return": @@ -675,7 +678,7 @@ def _height_of(witharg): # jump to a symbol, and push variable # of arguments onto stack elif code.value == "goto": o = [] - args = code.args[2:] if EOFv1_ENABLED and is_symbol(code.args[1].value) else code.args[1:] + args = code.args[2:] if EOFv1_ENABLED and len(code.args) >= 2 and is_symbol(code.args[1].value) else code.args[1:] for i, c in enumerate(reversed(args)): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) @@ -743,7 +746,7 @@ def _height_of(witharg): if code.args[0].value == "return_pc": return ["RETF"] else: - return [str(code.args[0]), "JUMPF"] # Jump to cleanup function + return [str(code.args[0]), JUMP()] # Jump to cleanup function # inject debug opcode. elif code.value == "debugger": From 3314262288480f1898f557dd5771e6bd75b68d35 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 28 Dec 2022 20:59:14 +0200 Subject: [PATCH 26/82] skip JUMPDESTs when EOFv1 --- vyper/ir/compile_ir.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index a58d81440e..42f5524d41 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1101,6 +1101,8 @@ def assembly_to_evm( pc -= 1 elif item == "BLANK": pc += 0 + elif item == "JUMPDEST" and EOFv1_ENABLED: + pc += 0 elif isinstance(item, str) and item.startswith("_DEPLOY_MEM_OFST_"): # _DEPLOY_MEM_OFST is assembly magic which will # get removed during final assembly-to-bytecode @@ -1141,6 +1143,9 @@ def assembly_to_evm( if item in ("DEBUG", "BLANK"): continue # skippable opcodes + # When EOFv1 enabled skip emiting JUMPDESTs + elif item == "JUMPDEST" and EOFv1_ENABLED: + continue elif isinstance(item, str) and item.startswith("_DEPLOY_MEM_OFST_"): continue From 930b8ca9f1717967dceae7e85651f8303b1a1172 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 29 Dec 2022 00:02:32 +0200 Subject: [PATCH 27/82] exit_to wip --- vyper/ir/compile_ir.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 42f5524d41..f14b2a3bf9 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -678,7 +678,7 @@ def _height_of(witharg): # jump to a symbol, and push variable # of arguments onto stack elif code.value == "goto": o = [] - args = code.args[2:] if EOFv1_ENABLED and len(code.args) >= 2 and is_symbol(code.args[1].value) else code.args[1:] + args = code.args[1:] # if EOFv1_ENABLED and len(code.args) >= 2 and is_symbol(code.args[1].value) else code.args[1:] for i, c in enumerate(reversed(args)): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) @@ -688,6 +688,20 @@ def _height_of(witharg): else: o.extend(["_sym_" + str(code.args[0]), JUMP()]) return o + elif code.value == "exit_to": + o = [] + args = code.args[1:] # if EOFv1_ENABLED and len(code.args) >= 2 and is_symbol(code.args[1].value) else code.args[1:] + + for i, c in enumerate(reversed(args)): + o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) + + if EOFv1_ENABLED: + if str(code.args[0]) == "return_pc": + o.extend(["RETF"]) + else: + o.extend([str(code.args[0]), "CALLF"]) + + return o # push a literal symbol elif isinstance(code.value, str) and is_symbol(code.value): return [code.value] From 7af32150efffa6f6b360d3bc8bcb640971e6935f Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 29 Dec 2022 00:08:42 +0200 Subject: [PATCH 28/82] goto wip --- vyper/ir/compile_ir.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index f14b2a3bf9..eb3a8e54a8 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -683,23 +683,27 @@ def _height_of(witharg): for i, c in enumerate(reversed(args)): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) - if EOFv1_ENABLED: - o.extend(["_sym_" + str(code.args[0]), "CALLF"]) + symbol = str(code.args[0]) + if symbol.startswith("internal"): + o.extend(["_sym_" + symbol, "CALLF"]) else: - o.extend(["_sym_" + str(code.args[0]), JUMP()]) + o.extend(["_sym_" + symbol, JUMP()]) return o elif code.value == "exit_to": + if not EOFv1_ENABLED: + raise CodegenPanic("exit_to not implemented on non EOFv1") + o = [] args = code.args[1:] # if EOFv1_ENABLED and len(code.args) >= 2 and is_symbol(code.args[1].value) else code.args[1:] for i, c in enumerate(reversed(args)): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) - if EOFv1_ENABLED: + if str(code.args[0]) == "return_pc": o.extend(["RETF"]) else: - o.extend([str(code.args[0]), "CALLF"]) + o.extend([str(code.args[0]), "RJUMP"]) return o # push a literal symbol From 540c764add0c120196c7d7b273abd05df14aad7e Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 29 Dec 2022 07:41:39 +0200 Subject: [PATCH 29/82] fix ident --- vyper/ir/compile_ir.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index eb3a8e54a8..26c364d4df 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -698,12 +698,11 @@ def _height_of(witharg): for i, c in enumerate(reversed(args)): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) - - if str(code.args[0]) == "return_pc": - o.extend(["RETF"]) - else: - o.extend([str(code.args[0]), "RJUMP"]) + if str(code.args[0]) == "return_pc": + o.extend(["RETF"]) + else: + o.extend([str(code.args[0]), "RJUMP"]) return o # push a literal symbol From 4911ab97b5dd6e5c4358bd0e77a64990c5b7c015 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 29 Dec 2022 09:05:39 +0200 Subject: [PATCH 30/82] eofv1 header output wip --- vyper/compiler/phases.py | 30 +++++++++++++++++++++++------- vyper/ir/compile_ir.py | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 10 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 781887c427..ead1fa36bd 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -11,7 +11,7 @@ from vyper.ir import compile_ir, optimizer from vyper.semantics import set_data_positions, validate_semantics from vyper.typing import InterfaceImports, StorageLayout - +from vyper.evm.opcodes import version_check class CompilerData: """ @@ -146,15 +146,25 @@ def assembly_runtime(self) -> list: @cached_property def bytecode(self) -> bytes: - return generate_bytecode( - self.assembly, is_runtime=False, no_bytecode_metadata=self.no_bytecode_metadata - ) + if version_check("shanghai"): + return generate_EOFv1( + self.assembly, is_runtime=False, no_bytecode_metadata=self.no_bytecode_metadata + ) + else: + return generate_bytecode( + self.assembly, is_runtime=False, no_bytecode_metadata=self.no_bytecode_metadata + ) @cached_property def bytecode_runtime(self) -> bytes: - return generate_bytecode( - self.assembly_runtime, is_runtime=True, no_bytecode_metadata=self.no_bytecode_metadata - ) + if version_check("shanghai"): + return generate_EOFv1( + self.assembly, is_runtime=True, no_bytecode_metadata=self.no_bytecode_metadata + ) + else: + return generate_bytecode( + self.assembly_runtime, is_runtime=True, no_bytecode_metadata=self.no_bytecode_metadata + ) @cached_property def blueprint_bytecode(self) -> bytes: @@ -334,3 +344,9 @@ def generate_bytecode( return compile_ir.assembly_to_evm( assembly, insert_vyper_signature=is_runtime, disable_bytecode_metadata=no_bytecode_metadata )[0] + +def generate_EOFv1(assembly: list, is_runtime: bool = False, no_bytecode_metadata: bool = False) -> bytes: + bytecode = compile_ir.assembly_to_evm( + assembly, insert_vyper_signature=is_runtime, disable_bytecode_metadata=no_bytecode_metadata + )[0] + return bytecode \ No newline at end of file diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 26c364d4df..50f58bcb05 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -694,7 +694,7 @@ def _height_of(witharg): raise CodegenPanic("exit_to not implemented on non EOFv1") o = [] - args = code.args[1:] # if EOFv1_ENABLED and len(code.args) >= 2 and is_symbol(code.args[1].value) else code.args[1:] + args = code.args[1:] for i, c in enumerate(reversed(args)): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) @@ -1148,7 +1148,7 @@ def assembly_to_evm( # TODO refactor into two functions, create posmap and assemble - o = b"" + o = b"" # now that all symbols have been resolved, generate bytecode # using the symbol map @@ -1210,7 +1210,34 @@ def assembly_to_evm( # Should never reach because, assembly is create in _compile_to_assembly. raise Exception("Weird symbol in assembly: " + str(item)) # pragma: no cover - o += bytecode_suffix + if EOFv1_ENABLED: + code_sections_len = 1 # temporary, will calculate eventually + header = b"" + header += bytes([0xef, 0x00]) # EOFv1 signature + header += bytes([0x01]) # version 1 + + header += bytes([0x01]) # kind=type + header += (code_sections_len * 4).to_bytes(2, "big") + + header += bytes([0x02]) # kind=code + header += code_sections_len.to_bytes(2, "big") + + header += bytes([0x01]) # single code section + header += len(o).to_bytes(2, "big") + + header += bytes([0x02]) # kind=data + header += bytes([0x0, 0x0]) + + header += bytes([0x0]) # Terminator + + # Type section + header += bytes([0x0]) # inputs + header += bytes([0x0]) # outputs + header += (1024).to_bytes(2, "big") # max stack + + o = header + o + else: + o += bytecode_suffix line_number_map["breakpoints"] = list(line_number_map["breakpoints"]) line_number_map["pc_breakpoints"] = list(line_number_map["pc_breakpoints"]) From 78d4c873cc9ac6bd986aeeb73b4a15f97a94fbf0 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 29 Dec 2022 14:45:45 +0200 Subject: [PATCH 31/82] file should be handled higher --- vyper/compiler/phases.py | 3 ++- vyper/ir/compile_ir.py | 54 ++++++++++++++++++++-------------------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index ead1fa36bd..d76427fce8 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -349,4 +349,5 @@ def generate_EOFv1(assembly: list, is_runtime: bool = False, no_bytecode_metadat bytecode = compile_ir.assembly_to_evm( assembly, insert_vyper_signature=is_runtime, disable_bytecode_metadata=no_bytecode_metadata )[0] - return bytecode \ No newline at end of file + + return compile_ir.decorateWithEOFHeader(bytecode) \ No newline at end of file diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 50f58bcb05..0820ebf98d 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -990,6 +990,32 @@ def _optimize_assembly(assembly): raise CompilerPanic("infinite loop detected during assembly reduction") # pragma: notest +def decorateWithEOFHeader(bytecode: bytes) -> bytes: + code_sections_len = 1 # temporary, will calculate eventually + header = b"" + header += bytes([0xef, 0x00]) # EOFv1 signature + header += bytes([0x01]) # version 1 + + header += bytes([0x01]) # kind=type + header += (code_sections_len * 4).to_bytes(2, "big") + + header += bytes([0x02]) # kind=code + header += code_sections_len.to_bytes(2, "big") + + header += bytes([0x01]) # single code section + header += len(bytecode).to_bytes(2, "big") + + header += bytes([0x02]) # kind=data + header += bytes([0x0, 0x0]) + + header += bytes([0x0]) # Terminator + + # Type section + header += bytes([0x0]) # inputs + header += bytes([0x0]) # outputs + header += (1024).to_bytes(2, "big") # max stack + + return header + bytecode def adjust_pc_maps(pc_maps, ofst): assert ofst >= 0 @@ -1210,33 +1236,7 @@ def assembly_to_evm( # Should never reach because, assembly is create in _compile_to_assembly. raise Exception("Weird symbol in assembly: " + str(item)) # pragma: no cover - if EOFv1_ENABLED: - code_sections_len = 1 # temporary, will calculate eventually - header = b"" - header += bytes([0xef, 0x00]) # EOFv1 signature - header += bytes([0x01]) # version 1 - - header += bytes([0x01]) # kind=type - header += (code_sections_len * 4).to_bytes(2, "big") - - header += bytes([0x02]) # kind=code - header += code_sections_len.to_bytes(2, "big") - - header += bytes([0x01]) # single code section - header += len(o).to_bytes(2, "big") - - header += bytes([0x02]) # kind=data - header += bytes([0x0, 0x0]) - - header += bytes([0x0]) # Terminator - - # Type section - header += bytes([0x0]) # inputs - header += bytes([0x0]) # outputs - header += (1024).to_bytes(2, "big") # max stack - - o = header + o - else: + if not EOFv1_ENABLED: o += bytecode_suffix line_number_map["breakpoints"] = list(line_number_map["breakpoints"]) From 8ad038d664c7e143f239dc7ef9e7bdc1f6f6be2a Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 29 Dec 2022 14:57:43 +0200 Subject: [PATCH 32/82] eof handling code --- vyper/compiler/output.py | 16 ++++++++++++++-- vyper/evm/eof.py | 3 +++ 2 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 vyper/evm/eof.py diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 155895178a..f905a0542a 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -9,6 +9,8 @@ from vyper.compiler.phases import CompilerData from vyper.compiler.utils import build_gas_estimates from vyper.evm import opcodes +from vyper.evm.opcodes import version_check +from vyper.evm import eof from vyper.ir import compile_ir from vyper.semantics.types.function import FunctionVisibility, StateMutability from vyper.typing import StorageLayout @@ -277,8 +279,7 @@ def build_opcodes_output(compiler_data: CompilerData) -> str: def build_opcodes_runtime_output(compiler_data: CompilerData) -> str: return _build_opcodes(compiler_data.bytecode_runtime) - -def _build_opcodes(bytecode: bytes) -> str: +def _build_legacy_opcodes(bytecode: bytes) -> str: bytecode_sequence = deque(bytecode) opcode_map = dict((v[0], k) for k, v in opcodes.get_opcodes().items()) @@ -301,3 +302,14 @@ def _build_opcodes(bytecode: bytes) -> str: opcode_output.append(hex(offset)) return " ".join(opcode_output) + +def _build_eof_opcodes(bytecode: bytes) -> str: + if not eof.verifyHeader(bytecode): + raise CompilerPanic("bytecode not in EOF format") + return "" + +def _build_opcodes(bytecode: bytes) -> str: + if version_check("shanghai"): + _build_eof_opcodes(bytecode) + else: + _build_legacy_opcodes(bytecode) diff --git a/vyper/evm/eof.py b/vyper/evm/eof.py new file mode 100644 index 0000000000..e93b004d5f --- /dev/null +++ b/vyper/evm/eof.py @@ -0,0 +1,3 @@ + +def verifyHeader(bytecode: bytes) -> bool: + return bytecode[0] == 0xef and bytecode[1] == 0x0 and bytecode[2] == 0x01 \ No newline at end of file From 40641a8a5177a23bc42406230de115b47775c37e Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 29 Dec 2022 15:05:59 +0200 Subject: [PATCH 33/82] function to get opcode's immediate size --- vyper/evm/opcodes.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/vyper/evm/opcodes.py b/vyper/evm/opcodes.py index de115d7923..df7ab80c16 100644 --- a/vyper/evm/opcodes.py +++ b/vyper/evm/opcodes.py @@ -224,6 +224,16 @@ IR_OPCODES: OpcodeMap = {**OPCODES, **PSEUDO_OPCODES} +# Terminating opcodes for EOFv1 support +TERMINATING_OPCODES = ["STOP", "RETF", "JUMPF", "RETURN", "REVERT", "INVALID"] + +def immediate_size(op): + if op in ["RJUMP", "RJUMPI", "CALLF"]: + return 2 + elif op[:4] == "PUSH": + return int(op[4:]) + else: + return 0 def evm_wrapper(fn, *args, **kwargs): def _wrapper(*args, **kwargs): From 282af9dfef73c57d7d2193d683d62f1b13a6a647 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 29 Dec 2022 15:18:07 +0200 Subject: [PATCH 34/82] eof reader class --- vyper/compiler/output.py | 3 +-- vyper/evm/eof.py | 23 +++++++++++++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index f905a0542a..c7b88a9af3 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -304,8 +304,7 @@ def _build_legacy_opcodes(bytecode: bytes) -> str: return " ".join(opcode_output) def _build_eof_opcodes(bytecode: bytes) -> str: - if not eof.verifyHeader(bytecode): - raise CompilerPanic("bytecode not in EOF format") + eofReader = eof.EOFReader(bytecode) return "" def _build_opcodes(bytecode: bytes) -> str: diff --git a/vyper/evm/eof.py b/vyper/evm/eof.py index e93b004d5f..2ed1131f53 100644 --- a/vyper/evm/eof.py +++ b/vyper/evm/eof.py @@ -1,3 +1,22 @@ +from vyper.exceptions import VyperInternalException -def verifyHeader(bytecode: bytes) -> bool: - return bytecode[0] == 0xef and bytecode[1] == 0x0 and bytecode[2] == 0x01 \ No newline at end of file +MAGIC = b'\xEF\x00' +VERSION = 0x01 +S_TERMINATOR = 0x00 +S_TYPE = 0x01 +S_CODE = 0x02 +S_DATA = 0x03 + +class ValidationException(VyperInternalException): + """Validation exception.""" + +class EOFReader: + bytecode: bytes + + def __init__(self, bytecode: bytes): + self.bytecode = bytecode + self._verifyHeader() + + def _verifyHeader(self) -> bool: + if self.bytecode[:2] != MAGIC or self.bytecode[2] != VERSION: + raise ValidationException(f"not an EOFv{VERSION} bytecode") \ No newline at end of file From 18c17c4e4963f1e5c9f717187cd8a4804987f8c8 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 29 Dec 2022 15:22:50 +0200 Subject: [PATCH 35/82] fix naming --- vyper/evm/eof.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vyper/evm/eof.py b/vyper/evm/eof.py index 2ed1131f53..0d939fff1e 100644 --- a/vyper/evm/eof.py +++ b/vyper/evm/eof.py @@ -15,8 +15,11 @@ class EOFReader: def __init__(self, bytecode: bytes): self.bytecode = bytecode - self._verifyHeader() + self._verify_header() - def _verifyHeader(self) -> bool: + def get_code_segments(self): + pass + + def _verify_header(self) -> bool: if self.bytecode[:2] != MAGIC or self.bytecode[2] != VERSION: raise ValidationException(f"not an EOFv{VERSION} bytecode") \ No newline at end of file From c2cfe42629e6cce8fa3c076323b98daf62ad7e99 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 29 Dec 2022 15:29:29 +0200 Subject: [PATCH 36/82] validator --- vyper/evm/eof.py | 54 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/vyper/evm/eof.py b/vyper/evm/eof.py index 0d939fff1e..9f61d6e7e0 100644 --- a/vyper/evm/eof.py +++ b/vyper/evm/eof.py @@ -22,4 +22,56 @@ def get_code_segments(self): def _verify_header(self) -> bool: if self.bytecode[:2] != MAGIC or self.bytecode[2] != VERSION: - raise ValidationException(f"not an EOFv{VERSION} bytecode") \ No newline at end of file + raise ValidationException(f"not an EOFv{VERSION} bytecode") + + code = self.bytecode + + # Process section headers + section_sizes = {S_TYPE: [], S_CODE: [], S_DATA: []} + code_section_ios = [] + code_sections = [] + data_sections = [] + pos = 3 + while True: + # Terminator not found + if pos >= len(code): + raise ValidationException("no section terminator") + + section_id = code[pos] + pos += 1 + if section_id == S_TERMINATOR: + break + + # Disallow unknown sections + if not section_id in section_sizes: + raise ValidationException("invalid section id") + + # Data section preceding code section (i.e. code section following data section) + if section_id == S_CODE and len(section_sizes[S_DATA]) != 0: + raise ValidationException("data section preceding code section") + + # Code section or data section preceding type section + if section_id == S_TYPE and (len(section_sizes[S_CODE]) != 0 or len(section_sizes[S_DATA]) != 0): + raise ValidationException("code or data section preceding type section") + + # Multiple type or data sections + if section_id == S_TYPE and len(section_sizes[S_TYPE]) != 0: + raise ValidationException("multiple type sections") + if section_id == S_DATA and len(section_sizes[S_DATA]) != 0: + raise ValidationException("multiple data sections") + + # Truncated section size + if (pos + 1) >= len(code): + raise ValidationException("truncated section size") + + section_count = (code[pos] << 8) | code[pos + 1] + pos += 2 + if section_id == S_TYPE: + section_sizes[S_TYPE].append(section_count) + elif section_id == S_CODE: + for i in range(section_count): + code_size = (code[pos] << 8) | code[pos + 1] + pos += 2 + section_sizes[S_CODE].append(code_size) + elif section_id == S_DATA: + section_sizes[S_DATA].append(section_count) \ No newline at end of file From bc86be0d3bdef3f0c89609285c98f96a345e7788 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 2 Jan 2023 10:24:08 +0200 Subject: [PATCH 37/82] create reverse mapping from opcode to mnemonic --- vyper/evm/opcodes.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/vyper/evm/opcodes.py b/vyper/evm/opcodes.py index df7ab80c16..eb032bc0dd 100644 --- a/vyper/evm/opcodes.py +++ b/vyper/evm/opcodes.py @@ -227,14 +227,6 @@ # Terminating opcodes for EOFv1 support TERMINATING_OPCODES = ["STOP", "RETF", "JUMPF", "RETURN", "REVERT", "INVALID"] -def immediate_size(op): - if op in ["RJUMP", "RJUMPI", "CALLF"]: - return 2 - elif op[:4] == "PUSH": - return int(op[4:]) - else: - return 0 - def evm_wrapper(fn, *args, **kwargs): def _wrapper(*args, **kwargs): global active_evm_version @@ -282,6 +274,22 @@ def get_opcode(mnemonic: str) -> int: def get_ir_opcodes() -> OpcodeRulesetMap: return _ir_opcodes[active_evm_version] +OPCODE_TO_MNEMONIC_MAP = {ruleset[0]: mnemonic for mnemonic, ruleset in get_opcodes().items()} +def get_mnemonic(opcode: int) -> str: + return OPCODE_TO_MNEMONIC_MAP[opcode] + +VALID_OPCODES = OPCODE_TO_MNEMONIC_MAP.keys() + +def immediate_size(op): + if isinstance(op, int): + op = get_mnemonic(op) + + if op in ["RJUMP", "RJUMPI", "CALLF"]: + return 2 + elif op[:4] == "PUSH": + return int(op[4:]) + else: + return 0 def version_check(begin: Optional[str] = None, end: Optional[str] = None) -> bool: if begin is None and end is None: From 2b8bea9487657af3ce4d00c0d90ef15a7729ea96 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 2 Jan 2023 10:24:28 +0200 Subject: [PATCH 38/82] read code sections and validate wip --- vyper/evm/eof.py | 128 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 127 insertions(+), 1 deletion(-) diff --git a/vyper/evm/eof.py b/vyper/evm/eof.py index 9f61d6e7e0..02553cac5a 100644 --- a/vyper/evm/eof.py +++ b/vyper/evm/eof.py @@ -1,4 +1,5 @@ from vyper.exceptions import VyperInternalException +from vyper.evm.opcodes import TERMINATING_OPCODES, VALID_OPCODES, immediate_size, get_mnemonic MAGIC = b'\xEF\x00' VERSION = 0x01 @@ -10,11 +11,20 @@ class ValidationException(VyperInternalException): """Validation exception.""" +class FunctionType: + def __init__(self, inputs, outputs, max_stack_height) -> None: + self.offset = 0 + self.size = 0 + self.inputs = inputs + self.outputs = outputs + self.max_stack_height = max_stack_height + class EOFReader: bytecode: bytes def __init__(self, bytecode: bytes): self.bytecode = bytecode + self.code_sections = [] self._verify_header() def get_code_segments(self): @@ -74,4 +84,120 @@ def _verify_header(self) -> bool: pos += 2 section_sizes[S_CODE].append(code_size) elif section_id == S_DATA: - section_sizes[S_DATA].append(section_count) \ No newline at end of file + section_sizes[S_DATA].append(section_count) + + # Code section cannot be absent + if len(section_sizes[S_CODE]) == 0: + raise ValidationException("no code section") + + # Not more than 1024 code sections + if len(section_sizes[S_CODE]) > 1024: + raise ValidationException("more than 1024 code sections") + + # Type section can be absent only if single code section is present + if len(section_sizes[S_TYPE]) == 0 and len(section_sizes[S_CODE]) != 1: + raise ValidationException("no obligatory type section") + + # Type section, if present, has size corresponding to number of code sections + if section_sizes[S_TYPE][0] != 0 and section_sizes[S_TYPE][0] != len(section_sizes[S_CODE]) * 4: + raise ValidationException("invalid type section size") + + # Truncated section size + if (pos + len(section_sizes[S_CODE]) * 4) > len(code): + raise ValidationException("truncated TYPE section size") + + # Read TYPE section + for i in range(len(section_sizes[S_CODE])): + input_count = code[pos] + output_count = code[pos + 1] + max_stack_height = (code[pos + 2] << 8) | code[pos + 3] + type = FunctionType(input_count, output_count, max_stack_height) + self.code_sections.append(type) + pos += 4 + + # Read CODE sections + for i, section_size in enumerate(section_sizes[S_CODE]): + # Truncated section size + if (pos + section_size) > len(code): + raise ValidationException("truncated CODE section size") + code_sections.append(code[pos:pos + section_size]) + pos += section_size + + self.validate_code_section(i, code_sections[-1], code_section_ios) + + # Read DATA sections + for section_size in section_sizes[S_DATA]: + # Truncated section size + if (pos + section_size) > len(code): + raise ValidationException("truncated DATA section size") + data_sections.append(code[pos:pos + section_size]) + pos += section_size + + if (pos) != len(code): + raise ValidationException("Bad file size") + + # First code section should have zero inputs and outputs + if code_section_ios[0].inputs != 0 or code_section_ios[0].outputs != 0: + raise ValidationException("invalid input/output count for code section 0") + + + # Raises ValidationException on invalid code + def validate_code_section(self, func_id: int, code: bytes, types: list[FunctionType] = [FunctionType(0, 0, 0)]): + # Note that EOF1 already asserts this with the code section requirements + assert len(code) > 0 + + opcode = 0 + pos = 0 + rjumpdests = set() + immediates = set() + while pos < len(code): + # Ensure the opcode is valid + opcode = code[pos] + pos += 1 + if not opcode in VALID_OPCODES: + raise ValidationException("undefined instruction") + + if opcode == 0x5c or opcode == 0x5d: + if pos + 2 > len(code): + raise ValidationException("truncated relative jump offset") + offset = int.from_bytes(code[pos:pos+2], byteorder = "big", signed = True) + + rjumpdest = pos + 2 + offset + if rjumpdest < 0 or rjumpdest >= len(code): + raise ValidationException("relative jump destination out of bounds") + + rjumpdests.add(rjumpdest) + elif opcode == 0xb0: + if pos + 2 > len(code): + raise ValidationException("truncated CALLF immediate") + section_id = int.from_bytes(code[pos:pos+2], byteorder = "big", signed = False) + + if section_id >= len(types): + raise ValidationException("invalid section id") + elif opcode == 0xb2: + if pos + 2 > len(code): + raise ValidationException("truncated JUMPF immediate") + section_id = int.from_bytes(code[pos:pos+2], byteorder = "big", signed = False) + + if section_id >= len(types): + raise ValidationException("invalid section id") + + if types[section_id].outputs != types[func_id].outputs: + raise ValidationException("incompatible function type for JUMPF") + + # Save immediate value positions + immediates.update(range(pos, pos + immediate_size(opcode))) + # Skip immediates + pos += immediate_size(opcode) + + # Ensure last opcode's immediate doesn't go over code end + if pos != len(code): + raise ValidationException("truncated immediate") + + # opcode is the *last opcode* + if not get_mnemonic(opcode) in TERMINATING_OPCODES: + raise ValidationException("no terminating instruction") + + # Ensure relative jump destinations don't target immediates + if not rjumpdests.isdisjoint(immediates): + raise ValidationException("relative jump destination targets immediate") From f335a7cd9e0709b544e876e355e8b51ae2260bec Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 2 Jan 2023 10:36:47 +0200 Subject: [PATCH 39/82] use class members --- vyper/evm/eof.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/vyper/evm/eof.py b/vyper/evm/eof.py index 02553cac5a..f16ebb6f17 100644 --- a/vyper/evm/eof.py +++ b/vyper/evm/eof.py @@ -15,6 +15,7 @@ class FunctionType: def __init__(self, inputs, outputs, max_stack_height) -> None: self.offset = 0 self.size = 0 + self.code = bytes() self.inputs = inputs self.outputs = outputs self.max_stack_height = max_stack_height @@ -25,6 +26,7 @@ class EOFReader: def __init__(self, bytecode: bytes): self.bytecode = bytecode self.code_sections = [] + self.data_sections = [] self._verify_header() def get_code_segments(self): @@ -38,9 +40,8 @@ def _verify_header(self) -> bool: # Process section headers section_sizes = {S_TYPE: [], S_CODE: [], S_DATA: []} - code_section_ios = [] - code_sections = [] - data_sections = [] + self.code_sections = [] + self.data_sections = [] pos = 3 while True: # Terminator not found @@ -120,29 +121,31 @@ def _verify_header(self) -> bool: # Truncated section size if (pos + section_size) > len(code): raise ValidationException("truncated CODE section size") - code_sections.append(code[pos:pos + section_size]) + self.code_sections[i].code = code[pos:pos + section_size] pos += section_size - self.validate_code_section(i, code_sections[-1], code_section_ios) + self.validate_code_section(i) # Read DATA sections for section_size in section_sizes[S_DATA]: # Truncated section size if (pos + section_size) > len(code): raise ValidationException("truncated DATA section size") - data_sections.append(code[pos:pos + section_size]) + self.data_sections.append(code[pos:pos + section_size]) pos += section_size if (pos) != len(code): raise ValidationException("Bad file size") # First code section should have zero inputs and outputs - if code_section_ios[0].inputs != 0 or code_section_ios[0].outputs != 0: + if self.code_sections[0].inputs != 0 or self.code_sections[0].outputs != 0: raise ValidationException("invalid input/output count for code section 0") # Raises ValidationException on invalid code - def validate_code_section(self, func_id: int, code: bytes, types: list[FunctionType] = [FunctionType(0, 0, 0)]): + def validate_code_section(self, func_id: int): + code = self.code_sections[func_id].code + # Note that EOF1 already asserts this with the code section requirements assert len(code) > 0 @@ -172,17 +175,17 @@ def validate_code_section(self, func_id: int, code: bytes, types: list[FunctionT raise ValidationException("truncated CALLF immediate") section_id = int.from_bytes(code[pos:pos+2], byteorder = "big", signed = False) - if section_id >= len(types): + if section_id >= len(self.code_sections): raise ValidationException("invalid section id") elif opcode == 0xb2: if pos + 2 > len(code): raise ValidationException("truncated JUMPF immediate") section_id = int.from_bytes(code[pos:pos+2], byteorder = "big", signed = False) - if section_id >= len(types): + if section_id >= len(self.code_sections): raise ValidationException("invalid section id") - if types[section_id].outputs != types[func_id].outputs: + if self.code_sections[section_id].outputs != self.code_sections[func_id].outputs: raise ValidationException("incompatible function type for JUMPF") # Save immediate value positions From 742b19e6f2f38f7a0fbf6cd87e3491c124781360 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 2 Jan 2023 10:40:54 +0200 Subject: [PATCH 40/82] update cli verification tool to use vyper --- vyper/cli/validate_eof.py | 225 +------------------------------------- 1 file changed, 2 insertions(+), 223 deletions(-) diff --git a/vyper/cli/validate_eof.py b/vyper/cli/validate_eof.py index a9ab4631bc..4a64bcc235 100755 --- a/vyper/cli/validate_eof.py +++ b/vyper/cli/validate_eof.py @@ -2,228 +2,7 @@ import sys import argparse -MAGIC = b'\xEF\x00' -VERSION = 0x01 -S_TERMINATOR = 0x00 -S_TYPE = 0x01 -S_CODE = 0x02 -S_DATA = 0x03 - -class ValidationException(Exception): - """Validation exception.""" - -class FunctionType: - def __init__(self, inputs, outputs, max_stack_height) -> None: - self.inputs = inputs - self.outputs = outputs - self.max_stack_height = max_stack_height - -# The ranges below are as specified in the Yellow Paper. -# Note: range(s, e) excludes e, hence the +1 -valid_opcodes = [ - *range(0x00, 0x0b + 1), - *range(0x10, 0x1d + 1), - 0x20, - *range(0x30, 0x3f + 1), - *range(0x40, 0x48 + 1), - *range(0x50, 0x55 + 1), *range(0x58, 0x5d + 1), - *range(0x60, 0x6f + 1), - *range(0x70, 0x7f + 1), - *range(0x80, 0x8f + 1), - *range(0x90, 0x9f + 1), - *range(0xa0, 0xa4 + 1), - 0xb0, 0xb1, 0xb2, - # Note: 0xfe is considered assigned. - 0xf0, 0xf1, 0xf3, 0xf4, 0xf5, 0xfa, 0xfd, 0xfe -] - -# STOP, RETF, JUMPF, RETURN, REVERT, INVALID -terminating_opcodes = [0x00, 0xb1, 0xb2, 0xf3, 0xfd, 0xfe] - -immediate_sizes = 256 * [0] -immediate_sizes[0x5c] = 2 # RJUMP -immediate_sizes[0x5d] = 2 # RJUMPI -immediate_sizes[0xb0] = 2 # CALLF -immediate_sizes[0xb2] = 2 # JUMPF -for opcode in range(0x60, 0x7f + 1): # PUSH1..PUSH32 - immediate_sizes[opcode] = opcode - 0x60 + 1 - -# Validate EOF code. -# Raises ValidationException on invalid code -def validate_eof(code: bytes): - # Check version - if len(code) < 3 or code[2] != VERSION: - raise ValidationException("invalid version") - - # Process section headers - section_sizes = {S_TYPE: [], S_CODE: [], S_DATA: []} - code_section_ios = [] - code_sections = [] - data_sections = [] - pos = 3 - while True: - # Terminator not found - if pos >= len(code): - raise ValidationException("no section terminator") - - section_id = code[pos] - pos += 1 - if section_id == S_TERMINATOR: - break - - # Disallow unknown sections - if not section_id in section_sizes: - raise ValidationException("invalid section id") - - # Data section preceding code section (i.e. code section following data section) - if section_id == S_CODE and len(section_sizes[S_DATA]) != 0: - raise ValidationException("data section preceding code section") - - # Code section or data section preceding type section - if section_id == S_TYPE and (len(section_sizes[S_CODE]) != 0 or len(section_sizes[S_DATA]) != 0): - raise ValidationException("code or data section preceding type section") - - # Multiple type or data sections - if section_id == S_TYPE and len(section_sizes[S_TYPE]) != 0: - raise ValidationException("multiple type sections") - if section_id == S_DATA and len(section_sizes[S_DATA]) != 0: - raise ValidationException("multiple data sections") - - # Truncated section size - if (pos + 1) >= len(code): - raise ValidationException("truncated section size") - - section_count = (code[pos] << 8) | code[pos + 1] - pos += 2 - if section_id == S_TYPE: - section_sizes[S_TYPE].append(section_count) - elif section_id == S_CODE: - for i in range(section_count): - code_size = (code[pos] << 8) | code[pos + 1] - pos += 2 - section_sizes[S_CODE].append(code_size) - elif section_id == S_DATA: - section_sizes[S_DATA].append(section_count) - - # Code section cannot be absent - if len(section_sizes[S_CODE]) == 0: - raise ValidationException("no code section") - - # Not more than 1024 code sections - if len(section_sizes[S_CODE]) > 1024: - raise ValidationException("more than 1024 code sections") - - # Type section can be absent only if single code section is present - if len(section_sizes[S_TYPE]) == 0 and len(section_sizes[S_CODE]) != 1: - raise ValidationException("no obligatory type section") - - # Type section, if present, has size corresponding to number of code sections - if section_sizes[S_TYPE][0] != 0 and section_sizes[S_TYPE][0] != len(section_sizes[S_CODE]) * 4: - raise ValidationException("invalid type section size") - - # The entire container must be scanned - # print(section_sizes, (pos + sum(section_sizes[S_TYPE]) + sum(section_sizes[S_CODE]) + sum(section_sizes[S_DATA]))) - # if len(code) != (pos + sum(section_sizes[S_TYPE]) + sum(section_sizes[S_CODE]) + sum(section_sizes[S_DATA])): - # raise ValidationException("container size not equal to sum of section sizes") - - # Truncated section size - if (pos + len(section_sizes[S_CODE]) * 4) > len(code): - raise ValidationException("truncated TYPE section size") - - # Read TYPE section - for i in range(len(section_sizes[S_CODE])): - input_count = code[pos] - output_count = code[pos + 1] - max_stack_height = (code[pos + 2] << 8) | code[pos + 3] - code_section_ios.append(FunctionType(input_count, output_count, max_stack_height)) - pos += 4 - - # Read CODE sections - for i, section_size in enumerate(section_sizes[S_CODE]): - # Truncated section size - if (pos + section_size) > len(code): - raise ValidationException("truncated CODE section size") - code_sections.append(code[pos:pos + section_size]) - pos += section_size - - validate_code_section(i, code_sections[-1], code_section_ios) - - # Read DATA sections - for section_size in section_sizes[S_DATA]: - # Truncated section size - if (pos + section_size) > len(code): - raise ValidationException("truncated DATA section size") - data_sections.append(code[pos:pos + section_size]) - pos += section_size - - if (pos) != len(code): - raise ValidationException("Bad file size") - - # First code section should have zero inputs and outputs - if code_section_ios[0].inputs != 0 or code_section_ios[0].outputs != 0: - raise ValidationException("invalid input/output count for code section 0") - -# Raises ValidationException on invalid code -def validate_code_section(func_id: int, code: bytes, types: list[FunctionType] = [FunctionType(0, 0, 0)]): - # Note that EOF1 already asserts this with the code section requirements - assert len(code) > 0 - - opcode = 0 - pos = 0 - rjumpdests = set() - immediates = set() - while pos < len(code): - # Ensure the opcode is valid - opcode = code[pos] - pos += 1 - if not opcode in valid_opcodes: - raise ValidationException("undefined instruction") - - if opcode == 0x5c or opcode == 0x5d: - if pos + 2 > len(code): - raise ValidationException("truncated relative jump offset") - offset = int.from_bytes(code[pos:pos+2], byteorder = "big", signed = True) - - rjumpdest = pos + 2 + offset - if rjumpdest < 0 or rjumpdest >= len(code): - raise ValidationException("relative jump destination out of bounds") - - rjumpdests.add(rjumpdest) - elif opcode == 0xb0: - if pos + 2 > len(code): - raise ValidationException("truncated CALLF immediate") - section_id = int.from_bytes(code[pos:pos+2], byteorder = "big", signed = False) - - if section_id >= len(types): - raise ValidationException("invalid section id") - elif opcode == 0xb2: - if pos + 2 > len(code): - raise ValidationException("truncated JUMPF immediate") - section_id = int.from_bytes(code[pos:pos+2], byteorder = "big", signed = False) - - if section_id >= len(types): - raise ValidationException("invalid section id") - - if types[section_id].outputs != types[func_id].outputs: - raise ValidationException("incompatible function type for JUMPF") - - # Save immediate value positions - immediates.update(range(pos, pos + immediate_sizes[opcode])) - # Skip immediates - pos += immediate_sizes[opcode] - - # Ensure last opcode's immediate doesn't go over code end - if pos != len(code): - raise ValidationException("truncated immediate") - - # opcode is the *last opcode* - if not opcode in terminating_opcodes: - raise ValidationException("no terminating instruction") - - # Ensure relative jump destinations don't target immediates - if not rjumpdests.isdisjoint(immediates): - raise ValidationException("relative jump destination targets immediate") - +from vyper.evm.eof import EOFReader def _parse_args(argv): parser = argparse.ArgumentParser( @@ -241,7 +20,7 @@ def _parse_args(argv): if args.input_file: with open(args.input_file, "r") as f: code = bytes.fromhex(f.read()) - validate_eof(code) + EOFReader(code) if __name__ == "__main__": _parse_args(sys.argv[1:]) \ No newline at end of file From 8114b8a404dfd6d85f6be024e0879c5f2de68e55 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 2 Jan 2023 11:00:00 +0200 Subject: [PATCH 41/82] remove size --- vyper/evm/eof.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/evm/eof.py b/vyper/evm/eof.py index f16ebb6f17..c6f1d1907d 100644 --- a/vyper/evm/eof.py +++ b/vyper/evm/eof.py @@ -14,7 +14,6 @@ class ValidationException(VyperInternalException): class FunctionType: def __init__(self, inputs, outputs, max_stack_height) -> None: self.offset = 0 - self.size = 0 self.code = bytes() self.inputs = inputs self.outputs = outputs @@ -122,6 +121,7 @@ def _verify_header(self) -> bool: if (pos + section_size) > len(code): raise ValidationException("truncated CODE section size") self.code_sections[i].code = code[pos:pos + section_size] + self.code_sections[i].offset = pos pos += section_size self.validate_code_section(i) From caca9b323e851d1edd2631d7387b1d8dd0d8736e Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 2 Jan 2023 15:12:10 +0200 Subject: [PATCH 42/82] use eof constants --- vyper/ir/compile_ir.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 0820ebf98d..6ce90edffc 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -4,6 +4,7 @@ from vyper.codegen.ir_node import IRnode from vyper.evm.opcodes import get_opcodes, get_opcode, version_check +from vyper.evm import eof from vyper.exceptions import CodegenPanic, CompilerPanic from vyper.utils import MemoryPositions from vyper.version import version_tuple @@ -993,19 +994,19 @@ def _optimize_assembly(assembly): def decorateWithEOFHeader(bytecode: bytes) -> bytes: code_sections_len = 1 # temporary, will calculate eventually header = b"" - header += bytes([0xef, 0x00]) # EOFv1 signature - header += bytes([0x01]) # version 1 + header += eof.MAGIC # EOFv1 signature + header += bytes([eof.VERSION]) - header += bytes([0x01]) # kind=type + header += bytes([eof.S_TYPE]) header += (code_sections_len * 4).to_bytes(2, "big") - header += bytes([0x02]) # kind=code + header += bytes([eof.S_CODE]) header += code_sections_len.to_bytes(2, "big") header += bytes([0x01]) # single code section header += len(bytecode).to_bytes(2, "big") - header += bytes([0x02]) # kind=data + header += bytes([eof.S_DATA]) header += bytes([0x0, 0x0]) header += bytes([0x0]) # Terminator From 6ec3d51dbc3826a7da5d624d40291ecb7a106b10 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 2 Jan 2023 16:12:19 +0200 Subject: [PATCH 43/82] remove double size --- vyper/ir/compile_ir.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 6ce90edffc..4d82d710cb 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1003,7 +1003,6 @@ def decorateWithEOFHeader(bytecode: bytes) -> bytes: header += bytes([eof.S_CODE]) header += code_sections_len.to_bytes(2, "big") - header += bytes([0x01]) # single code section header += len(bytecode).to_bytes(2, "big") header += bytes([eof.S_DATA]) From e50a8d5fd732cf04aeaf40314aefdea09d658d6e Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 2 Jan 2023 18:04:41 +0200 Subject: [PATCH 44/82] remove JUMPF and PC for eof --- vyper/compiler/output.py | 2 +- vyper/evm/eof.py | 10 ---------- vyper/evm/opcodes.py | 5 ++--- vyper/ir/compile_ir.py | 6 +++--- 4 files changed, 6 insertions(+), 17 deletions(-) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index c7b88a9af3..d5bbe47e76 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -297,7 +297,7 @@ def _build_legacy_opcodes(bytecode: bytes) -> str: push_len = int(mnemonic[4:]) push_values = [hex(bytecode_sequence.popleft())[2:] for i in range(push_len)] opcode_output.append(f"0x{''.join(push_values).upper()}") - elif mnemonic in ['RJUMP', 'RJUMPI', 'JUMPF', 'CALLF']: + elif mnemonic in ['RJUMP', 'RJUMPI', 'CALLF']: offset = int.from_bytes([bytecode_sequence.popleft() for _i in range(2)], 'big', signed=True) opcode_output.append(hex(offset)) diff --git a/vyper/evm/eof.py b/vyper/evm/eof.py index c6f1d1907d..3a84c0d6ac 100644 --- a/vyper/evm/eof.py +++ b/vyper/evm/eof.py @@ -177,16 +177,6 @@ def validate_code_section(self, func_id: int): if section_id >= len(self.code_sections): raise ValidationException("invalid section id") - elif opcode == 0xb2: - if pos + 2 > len(code): - raise ValidationException("truncated JUMPF immediate") - section_id = int.from_bytes(code[pos:pos+2], byteorder = "big", signed = False) - - if section_id >= len(self.code_sections): - raise ValidationException("invalid section id") - - if self.code_sections[section_id].outputs != self.code_sections[func_id].outputs: - raise ValidationException("incompatible function type for JUMPF") # Save immediate value positions immediates.update(range(pos, pos + immediate_size(opcode))) diff --git a/vyper/evm/opcodes.py b/vyper/evm/opcodes.py index eb032bc0dd..26de9694f7 100644 --- a/vyper/evm/opcodes.py +++ b/vyper/evm/opcodes.py @@ -100,7 +100,7 @@ "SSTORE": (0x55, 2, 0, 20000), "JUMP": (0x56, 1, 0, 8), "JUMPI": (0x57, 2, 0, 10), - "PC": (0x58, 0, 1, 2), + "PC": (0x58, 0, 1, (2, 2, 2, 2, None)), "MSIZE": (0x59, 0, 1, 2), "GAS": (0x5A, 0, 1, 2), "JUMPDEST": (0x5B, 0, 0, 1), @@ -190,7 +190,6 @@ "BREAKPOINT": (0xA6, 0, 0, 0), "CALLF": (0xB0, 0, 0, (None, None, None, None, 5)), "RETF": (0xB1, 0, 0, (None, None, None, None, 4)), - "JUMPF": (0xB2, 0, 0, (None, None, None, None, 4)), } PSEUDO_OPCODES: OpcodeMap = { @@ -225,7 +224,7 @@ IR_OPCODES: OpcodeMap = {**OPCODES, **PSEUDO_OPCODES} # Terminating opcodes for EOFv1 support -TERMINATING_OPCODES = ["STOP", "RETF", "JUMPF", "RETURN", "REVERT", "INVALID"] +TERMINATING_OPCODES = ["STOP", "RETF", "RETURN", "REVERT", "INVALID"] def evm_wrapper(fn, *args, **kwargs): def _wrapper(*args, **kwargs): diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 4d82d710cb..4cd3526972 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1106,7 +1106,7 @@ def assembly_to_evm( continue # skip debug # update pc_jump_map - if item in ("RJUMP", "JUMP", "JUMPF", "CALLF"): + if item in ("RJUMP", "JUMP", "CALLF"): last = assembly[i - 1] if is_symbol(last) and last.startswith("_sym_internal"): if last.endswith("cleanup"): @@ -1129,7 +1129,7 @@ def assembly_to_evm( raise CompilerPanic(f"duplicate jumpdest {item}") symbol_map[item] = pc - elif assembly[i + 1] in ("RJUMP", "RJUMPI", "JUMPF", "CALLF"): + elif assembly[i + 1] in ("RJUMP", "RJUMPI", "CALLF"): pc += CODE_OFST_SIZE # highbyte lowbyte only else: pc += CODE_OFST_SIZE + 1 # PUSH2 highbits lowbits @@ -1194,7 +1194,7 @@ def assembly_to_evm( continue elif is_symbol(item): - if EOFv1_ENABLED and assembly[i + 1] in ["RJUMP", "RJUMPI", "JUMPF", "CALLF"]: + if EOFv1_ENABLED and assembly[i + 1] in ["RJUMP", "RJUMPI", "CALLF"]: sym = item assert is_symbol(sym), f"Internal compiler error: {assembly[i + 1]} not preceded by symbol" pc_post_instruction = instr_offsets[i] + 3 From 949ea14e68be194bf8fbcec2c4d8e968459f412f Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 3 Jan 2023 18:31:44 +0200 Subject: [PATCH 45/82] convert callf offsets to function ids --- vyper/ir/compile_ir.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 4cd3526972..97a512aa3d 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1053,6 +1053,7 @@ def assembly_to_evm( pc = 0 symbol_map = {} + call_offsets = {} runtime_code, runtime_code_start, runtime_code_end = None, None, None @@ -1118,6 +1119,9 @@ def assembly_to_evm( else: # everything else line_number_map["pc_jump_map"][pc] = "-" + + if item == "CALLF": + call_offsets[last] = True elif item in ("RJUMPI", "JUMPI", "JUMPDEST"): line_number_map["pc_jump_map"][pc] = "-" @@ -1168,6 +1172,8 @@ def assembly_to_evm( if runtime_code is not None: symbol_map["_sym_subcode_size"] = len(runtime_code) + function_breaks = {symbol_map[offset_symbol]:i+1 for i,offset_symbol in enumerate(call_offsets.keys())} + # (NOTE CMC 2022-06-17 this way of generating bytecode did not # seem to be a perf hotspot. if it is, may want to use bytearray() # instead). @@ -1179,11 +1185,17 @@ def assembly_to_evm( # now that all symbols have been resolved, generate bytecode # using the symbol map to_skip = 0 + # current_function = 0 + # current_function_offset = 0 for i, item in enumerate(assembly): if to_skip > 0: to_skip -= 1 continue + # if len(function_breaks) > current_function and instr_offsets[i] > function_breaks[current_function]: + # current_function_offset = function_breaks[current_function] + # current_function += 1 + if item in ("DEBUG", "BLANK"): continue # skippable opcodes # When EOFv1 enabled skip emiting JUMPDESTs @@ -1197,13 +1209,17 @@ def assembly_to_evm( if EOFv1_ENABLED and assembly[i + 1] in ["RJUMP", "RJUMPI", "CALLF"]: sym = item assert is_symbol(sym), f"Internal compiler error: {assembly[i + 1]} not preceded by symbol" - pc_post_instruction = instr_offsets[i] + 3 - offset = symbol_map[sym] - pc_post_instruction - # TODO: fallback to dynamic jumps? - assert offset > -32767 and offset <= 32767, "Offset too big for relative jump" o += bytes([get_opcode(assembly[i + 1])]) - o += bytes(offset.to_bytes(2, 'big', signed=True)) - to_skip = 1 + + if assembly[i + 1] == "CALLF": + function_id = function_breaks[symbol_map[sym]] + o += bytes(function_id.to_bytes(2, 'big', signed=True)) + else: + pc_post_instruction = instr_offsets[i] + 3 + offset = symbol_map[sym] - pc_post_instruction + assert offset > -32767 and offset <= 32767, "Offset too big for relative jump" + o += bytes(offset.to_bytes(2, 'big', signed=True)) + to_skip = 1 elif assembly[i + 1] != "JUMPDEST" and assembly[i + 1] != "BLANK": bytecode, _ = assembly_to_evm(PUSH_N(symbol_map[item], n=CODE_OFST_SIZE)) o += bytecode @@ -1239,6 +1255,7 @@ def assembly_to_evm( if not EOFv1_ENABLED: o += bytecode_suffix + line_number_map["function_breaks"] = function_breaks line_number_map["breakpoints"] = list(line_number_map["breakpoints"]) line_number_map["pc_breakpoints"] = list(line_number_map["pc_breakpoints"]) return o, line_number_map From 9a4da0aaa069d3077041b2f9f1c1015654025d4a Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 3 Jan 2023 19:32:46 +0200 Subject: [PATCH 46/82] compute and emit code section sizes --- vyper/compiler/phases.py | 8 ++++---- vyper/ir/compile_ir.py | 31 +++++++++++++++++++++---------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index d76427fce8..f5c7a072fc 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -346,8 +346,8 @@ def generate_bytecode( )[0] def generate_EOFv1(assembly: list, is_runtime: bool = False, no_bytecode_metadata: bool = False) -> bytes: - bytecode = compile_ir.assembly_to_evm( + bytecode, _, function_breaks = compile_ir.assembly_to_evm( assembly, insert_vyper_signature=is_runtime, disable_bytecode_metadata=no_bytecode_metadata - )[0] - - return compile_ir.decorateWithEOFHeader(bytecode) \ No newline at end of file + ) + + return compile_ir.decorateWithEOFHeader(bytecode, function_breaks) \ No newline at end of file diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 97a512aa3d..1dd81610b9 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -991,8 +991,8 @@ def _optimize_assembly(assembly): raise CompilerPanic("infinite loop detected during assembly reduction") # pragma: notest -def decorateWithEOFHeader(bytecode: bytes) -> bytes: - code_sections_len = 1 # temporary, will calculate eventually +def decorateWithEOFHeader(bytecode: bytes, function_sizes) -> bytes: + code_sections_len = len(function_sizes) header = b"" header += eof.MAGIC # EOFv1 signature header += bytes([eof.VERSION]) @@ -1003,7 +1003,8 @@ def decorateWithEOFHeader(bytecode: bytes) -> bytes: header += bytes([eof.S_CODE]) header += code_sections_len.to_bytes(2, "big") - header += len(bytecode).to_bytes(2, "big") + for size in function_sizes: + header += size.to_bytes(2, "big") header += bytes([eof.S_DATA]) header += bytes([0x0, 0x0]) @@ -1051,6 +1052,9 @@ def assembly_to_evm( "error_map": {}, } + function_breaks = {} + function_sizes = [] + pc = 0 symbol_map = {} call_offsets = {} @@ -1075,7 +1079,7 @@ def assembly_to_evm( for i, item in enumerate(assembly): if isinstance(item, list): assert runtime_code is None, "Multiple subcodes" - runtime_code, runtime_map = assembly_to_evm( + runtime_code, runtime_map, runtime_function_sizes = assembly_to_evm( item, insert_vyper_signature=True, disable_bytecode_metadata=disable_bytecode_metadata, @@ -1090,6 +1094,7 @@ def assembly_to_evm( ) assert runtime_code_end - runtime_code_start == len(runtime_code) + function_sizes = runtime_function_sizes if is_ofst(item) and is_mem_sym(assembly[i + 1]): max_mem_ofst = max(assembly[i + 2], max_mem_ofst) @@ -1221,18 +1226,18 @@ def assembly_to_evm( o += bytes(offset.to_bytes(2, 'big', signed=True)) to_skip = 1 elif assembly[i + 1] != "JUMPDEST" and assembly[i + 1] != "BLANK": - bytecode, _ = assembly_to_evm(PUSH_N(symbol_map[item], n=CODE_OFST_SIZE)) + bytecode, _, _ = assembly_to_evm(PUSH_N(symbol_map[item], n=CODE_OFST_SIZE)) o += bytecode elif is_mem_sym(item): - bytecode, _ = assembly_to_evm(PUSH_N(symbol_map[item], n=mem_ofst_size)) + bytecode, _, _ = assembly_to_evm(PUSH_N(symbol_map[item], n=mem_ofst_size)) o += bytecode elif is_ofst(item): # _OFST _sym_foo 32 ofst = symbol_map[assembly[i + 1]] + assembly[i + 2] n = mem_ofst_size if is_mem_sym(assembly[i + 1]) else CODE_OFST_SIZE - bytecode, _ = assembly_to_evm(PUSH_N(ofst, n)) + bytecode, _, _ = assembly_to_evm(PUSH_N(ofst, n)) o += bytecode to_skip = 2 @@ -1252,10 +1257,16 @@ def assembly_to_evm( # Should never reach because, assembly is create in _compile_to_assembly. raise Exception("Weird symbol in assembly: " + str(item)) # pragma: no cover - if not EOFv1_ENABLED: + if EOFv1_ENABLED: + last_offset = 0 + if len(function_breaks) > 0: # hack: distinguises runtime from deploy + for _, offset in enumerate(function_breaks): + function_sizes.append(offset - last_offset) + last_offset = offset + function_sizes.append(pc - last_offset) + else: o += bytecode_suffix - line_number_map["function_breaks"] = function_breaks line_number_map["breakpoints"] = list(line_number_map["breakpoints"]) line_number_map["pc_breakpoints"] = list(line_number_map["pc_breakpoints"]) - return o, line_number_map + return o, line_number_map, function_sizes From 951d8cc003038fe37058b2631d51af233d5d4984 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 3 Jan 2023 19:48:30 +0200 Subject: [PATCH 47/82] update type section --- vyper/ir/compile_ir.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 1dd81610b9..cfc0133237 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -998,7 +998,11 @@ def decorateWithEOFHeader(bytecode: bytes, function_sizes) -> bytes: header += bytes([eof.VERSION]) header += bytes([eof.S_TYPE]) - header += (code_sections_len * 4).to_bytes(2, "big") + # Type section + for _ in range(code_sections_len): + header += bytes([0x0]) # inputs + header += bytes([0x0]) # outputs + header += (1024).to_bytes(2, "big") # max stack header += bytes([eof.S_CODE]) header += code_sections_len.to_bytes(2, "big") @@ -1011,11 +1015,6 @@ def decorateWithEOFHeader(bytecode: bytes, function_sizes) -> bytes: header += bytes([0x0]) # Terminator - # Type section - header += bytes([0x0]) # inputs - header += bytes([0x0]) # outputs - header += (1024).to_bytes(2, "big") # max stack - return header + bytecode def adjust_pc_maps(pc_maps, ofst): From aa0f24b2562e628a49c9bd57cedc3c588f2c1bef Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 3 Jan 2023 19:48:50 +0200 Subject: [PATCH 48/82] disable code validation for now --- vyper/evm/eof.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/evm/eof.py b/vyper/evm/eof.py index 3a84c0d6ac..c0764767a3 100644 --- a/vyper/evm/eof.py +++ b/vyper/evm/eof.py @@ -124,7 +124,7 @@ def _verify_header(self) -> bool: self.code_sections[i].offset = pos pos += section_size - self.validate_code_section(i) + # self.validate_code_section(i) # Read DATA sections for section_size in section_sizes[S_DATA]: From 92964fb18f7c02e70792dd338f56d24ff764a6f6 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 3 Jan 2023 21:56:44 +0200 Subject: [PATCH 49/82] emit types --- vyper/ir/compile_ir.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index cfc0133237..b5115a66e8 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -998,11 +998,7 @@ def decorateWithEOFHeader(bytecode: bytes, function_sizes) -> bytes: header += bytes([eof.VERSION]) header += bytes([eof.S_TYPE]) - # Type section - for _ in range(code_sections_len): - header += bytes([0x0]) # inputs - header += bytes([0x0]) # outputs - header += (1024).to_bytes(2, "big") # max stack + header += (code_sections_len * 4).to_bytes(2, "big") header += bytes([eof.S_CODE]) header += code_sections_len.to_bytes(2, "big") @@ -1015,6 +1011,12 @@ def decorateWithEOFHeader(bytecode: bytes, function_sizes) -> bytes: header += bytes([0x0]) # Terminator + # Type section + for _ in range(code_sections_len): + header += bytes([0x0]) # inputs + header += bytes([0x0]) # outputs + header += (1024).to_bytes(2, "big") # max stack + return header + bytecode def adjust_pc_maps(pc_maps, ofst): From e04ac075304347fe8e328c1d3e2cc1b4bd14c6a6 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 4 Jan 2023 08:33:21 +0200 Subject: [PATCH 50/82] wip --- vyper/ir/compile_ir.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index b5115a66e8..98eba6d9b2 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -3,7 +3,7 @@ import math from vyper.codegen.ir_node import IRnode -from vyper.evm.opcodes import get_opcodes, get_opcode, version_check +from vyper.evm.opcodes import get_opcodes, get_opcode, version_check, immediate_size from vyper.evm import eof from vyper.exceptions import CodegenPanic, CompilerPanic from vyper.utils import MemoryPositions @@ -1170,7 +1170,8 @@ def assembly_to_evm( else: pc += 1 - pc += len(bytecode_suffix) + if not EOFv1_ENABLED: + pc += len(bytecode_suffix) symbol_map["_sym_code_end"] = pc symbol_map["_mem_deploy_start"] = runtime_code_start From 8e293f0419deec4baf3fb8d57f3d6b1eb85b5638 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 4 Jan 2023 08:36:15 +0200 Subject: [PATCH 51/82] remove decorateEOFHeader --- vyper/compiler/phases.py | 2 +- vyper/ir/compile_ir.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index f5c7a072fc..96f5189517 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -350,4 +350,4 @@ def generate_EOFv1(assembly: list, is_runtime: bool = False, no_bytecode_metadat assembly, insert_vyper_signature=is_runtime, disable_bytecode_metadata=no_bytecode_metadata ) - return compile_ir.decorateWithEOFHeader(bytecode, function_breaks) \ No newline at end of file + return bytecode # compile_ir.decorateWithEOFHeader(bytecode, function_breaks) \ No newline at end of file diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 98eba6d9b2..85e7751223 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -991,7 +991,7 @@ def _optimize_assembly(assembly): raise CompilerPanic("infinite loop detected during assembly reduction") # pragma: notest -def decorateWithEOFHeader(bytecode: bytes, function_sizes) -> bytes: +def generateEOFHeader(function_sizes) -> bytes: code_sections_len = len(function_sizes) header = b"" header += eof.MAGIC # EOFv1 signature @@ -1017,7 +1017,7 @@ def decorateWithEOFHeader(bytecode: bytes, function_sizes) -> bytes: header += bytes([0x0]) # outputs header += (1024).to_bytes(2, "big") # max stack - return header + bytecode + return header def adjust_pc_maps(pc_maps, ofst): assert ofst >= 0 From 69e10dae56ef1f725153d764df9b755e975bc610 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 4 Jan 2023 09:26:06 +0200 Subject: [PATCH 52/82] append headers at bytecode start for deploy and runtime --- vyper/compiler/phases.py | 4 ++-- vyper/ir/compile_ir.py | 37 ++++++++++++++++++++++--------------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 96f5189517..a518a9f831 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -342,12 +342,12 @@ def generate_bytecode( Final compiled bytecode. """ return compile_ir.assembly_to_evm( - assembly, insert_vyper_signature=is_runtime, disable_bytecode_metadata=no_bytecode_metadata + assembly, emit_headers=is_runtime, disable_bytecode_metadata=no_bytecode_metadata )[0] def generate_EOFv1(assembly: list, is_runtime: bool = False, no_bytecode_metadata: bool = False) -> bytes: bytecode, _, function_breaks = compile_ir.assembly_to_evm( - assembly, insert_vyper_signature=is_runtime, disable_bytecode_metadata=no_bytecode_metadata + assembly, emit_headers=is_runtime, disable_bytecode_metadata=no_bytecode_metadata ) return bytecode # compile_ir.decorateWithEOFHeader(bytecode, function_breaks) \ No newline at end of file diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 85e7751223..11fa436127 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1034,7 +1034,7 @@ def adjust_pc_maps(pc_maps, ofst): def assembly_to_evm( - assembly, pc_ofst=0, insert_vyper_signature=False, disable_bytecode_metadata=False + assembly, pc_ofst=0, emit_headers=False, disable_bytecode_metadata=False ): """ Assembles assembly into EVM @@ -1042,8 +1042,7 @@ def assembly_to_evm( assembly: list of asm instructions pc_ofst: when constructing the source map, the amount to offset all pcs by (no effect until we add deploy code source map) - insert_vyper_signature: whether to append vyper metadata to output - (should be true for runtime code) + emit_headers: whether to generate EOFv1 headers. In legacy mode it will generate vyper version suffix """ line_number_map = { "breakpoints": set(), @@ -1063,7 +1062,7 @@ def assembly_to_evm( runtime_code, runtime_code_start, runtime_code_end = None, None, None bytecode_suffix = b"" - if (not disable_bytecode_metadata) and insert_vyper_signature: + if (not disable_bytecode_metadata) and emit_headers and (not EOFv1_ENABLED): # CBOR encoded: {"vyper": [major,minor,patch]} bytecode_suffix += b"\xa1\x65vyper\x83" + bytes(list(version_tuple)) bytecode_suffix += len(bytecode_suffix).to_bytes(2, "big") @@ -1082,7 +1081,7 @@ def assembly_to_evm( assert runtime_code is None, "Multiple subcodes" runtime_code, runtime_map, runtime_function_sizes = assembly_to_evm( item, - insert_vyper_signature=True, + emit_headers=True, disable_bytecode_metadata=disable_bytecode_metadata, ) @@ -1095,7 +1094,7 @@ def assembly_to_evm( ) assert runtime_code_end - runtime_code_start == len(runtime_code) - function_sizes = runtime_function_sizes + # function_sizes = runtime_function_sizes if is_ofst(item) and is_mem_sym(assembly[i + 1]): max_mem_ofst = max(assembly[i + 2], max_mem_ofst) @@ -1187,7 +1186,12 @@ def assembly_to_evm( # TODO refactor into two functions, create posmap and assemble - o = b"" + o = b"" + + if EOFv1_ENABLED and emit_headers: + # generate header with placeholder function sizes + header = generateEOFHeader([0] * (len(function_breaks) + 1)) + o += header # now that all symbols have been resolved, generate bytecode # using the symbol map @@ -1259,15 +1263,18 @@ def assembly_to_evm( # Should never reach because, assembly is create in _compile_to_assembly. raise Exception("Weird symbol in assembly: " + str(item)) # pragma: no cover - if EOFv1_ENABLED: + if EOFv1_ENABLED and emit_headers: last_offset = 0 - if len(function_breaks) > 0: # hack: distinguises runtime from deploy - for _, offset in enumerate(function_breaks): - function_sizes.append(offset - last_offset) - last_offset = offset - function_sizes.append(pc - last_offset) - else: - o += bytecode_suffix + for _, offset in enumerate(function_breaks): + function_sizes.append(offset - last_offset) + last_offset = offset + function_sizes.append(pc - last_offset) + + # Generate the final header and replace the placeholder + header = generateEOFHeader(function_sizes) + o = header + o[len(header):] + + o += bytecode_suffix line_number_map["breakpoints"] = list(line_number_map["breakpoints"]) line_number_map["pc_breakpoints"] = list(line_number_map["pc_breakpoints"]) From 0fd53d13a3fa04b78109669252fb8c22d49180bd Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 4 Jan 2023 09:32:10 +0200 Subject: [PATCH 53/82] proper size for deploy code --- vyper/ir/compile_ir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 11fa436127..e4a89a37c2 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1268,7 +1268,7 @@ def assembly_to_evm( for _, offset in enumerate(function_breaks): function_sizes.append(offset - last_offset) last_offset = offset - function_sizes.append(pc - last_offset) + function_sizes.append(symbol_map.get("_sym_runtime_begin2", pc) - last_offset) # Generate the final header and replace the placeholder header = generateEOFHeader(function_sizes) From 9abb708518eb076ce05adfd18c83a8e889b0810c Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 4 Jan 2023 09:43:25 +0200 Subject: [PATCH 54/82] handle consecutive eofv1 containers --- vyper/compiler/output.py | 2 ++ vyper/evm/eof.py | 5 ++++- vyper/ir/compile_ir.py | 8 +------- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index d5bbe47e76..194d858462 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -305,6 +305,8 @@ def _build_legacy_opcodes(bytecode: bytes) -> str: def _build_eof_opcodes(bytecode: bytes) -> str: eofReader = eof.EOFReader(bytecode) + if (eofReader.bytecode_size != len(bytecode)): + runtimeEofReader = eof.EOFReader(bytecode[eofReader.bytecode_size:]) return "" def _build_opcodes(bytecode: bytes) -> str: diff --git a/vyper/evm/eof.py b/vyper/evm/eof.py index c0764767a3..be6437443d 100644 --- a/vyper/evm/eof.py +++ b/vyper/evm/eof.py @@ -24,6 +24,7 @@ class EOFReader: def __init__(self, bytecode: bytes): self.bytecode = bytecode + self.bytecode_size = 0 self.code_sections = [] self.data_sections = [] self._verify_header() @@ -134,13 +135,15 @@ def _verify_header(self) -> bool: self.data_sections.append(code[pos:pos + section_size]) pos += section_size - if (pos) != len(code): + # Check if we have a second EOF header attached (the runtime container) + if (pos) != len(code) and (self.bytecode[pos:pos+2] != MAGIC or self.bytecode[pos+2] != VERSION): raise ValidationException("Bad file size") # First code section should have zero inputs and outputs if self.code_sections[0].inputs != 0 or self.code_sections[0].outputs != 0: raise ValidationException("invalid input/output count for code section 0") + self.bytecode_size = pos # Raises ValidationException on invalid code def validate_code_section(self, func_id: int): diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index e4a89a37c2..5a8ce67c92 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1079,7 +1079,7 @@ def assembly_to_evm( for i, item in enumerate(assembly): if isinstance(item, list): assert runtime_code is None, "Multiple subcodes" - runtime_code, runtime_map, runtime_function_sizes = assembly_to_evm( + runtime_code, runtime_map, _ = assembly_to_evm( item, emit_headers=True, disable_bytecode_metadata=disable_bytecode_metadata, @@ -1093,8 +1093,6 @@ def assembly_to_evm( ctor_mem_size, len(runtime_code) ) assert runtime_code_end - runtime_code_start == len(runtime_code) - - # function_sizes = runtime_function_sizes if is_ofst(item) and is_mem_sym(assembly[i + 1]): max_mem_ofst = max(assembly[i + 2], max_mem_ofst) @@ -1203,10 +1201,6 @@ def assembly_to_evm( to_skip -= 1 continue - # if len(function_breaks) > current_function and instr_offsets[i] > function_breaks[current_function]: - # current_function_offset = function_breaks[current_function] - # current_function += 1 - if item in ("DEBUG", "BLANK"): continue # skippable opcodes # When EOFv1 enabled skip emiting JUMPDESTs From d3ce4a60392952b8443d0c186a637a90194da29c Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 4 Jan 2023 10:17:42 +0200 Subject: [PATCH 55/82] eofv1 disassemble debuging support --- vyper/compiler/output.py | 1 + vyper/evm/eof.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 194d858462..d5af38bc41 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -307,6 +307,7 @@ def _build_eof_opcodes(bytecode: bytes) -> str: eofReader = eof.EOFReader(bytecode) if (eofReader.bytecode_size != len(bytecode)): runtimeEofReader = eof.EOFReader(bytecode[eofReader.bytecode_size:]) + print(runtimeEofReader.disassemble()) return "" def _build_opcodes(bytecode: bytes) -> str: diff --git a/vyper/evm/eof.py b/vyper/evm/eof.py index be6437443d..4fe252823c 100644 --- a/vyper/evm/eof.py +++ b/vyper/evm/eof.py @@ -1,3 +1,4 @@ +from collections import deque from vyper.exceptions import VyperInternalException from vyper.evm.opcodes import TERMINATING_OPCODES, VALID_OPCODES, immediate_size, get_mnemonic @@ -19,6 +20,22 @@ def __init__(self, inputs, outputs, max_stack_height) -> None: self.outputs = outputs self.max_stack_height = max_stack_height + def disassemble(self): + output = f"Code segment offset: {self.offset} inputs: {self.inputs} outputs: {self.outputs} max stack height: {self.max_stack_height}\n" + code = deque(self.code) + while code: + pc = len(self.code) - len(code) + op = code.popleft() + mnemonic = get_mnemonic(op) + immediates_len = immediate_size(mnemonic) + immediates = "0x" + "".join([f"{code.popleft():02x}" for _ in range(immediates_len)]) + output += f"{pc:04x}: {mnemonic}" + if immediates_len > 0: + output += f" {immediates}" + output += "\n" + + return output + "\n" + class EOFReader: bytecode: bytes @@ -197,3 +214,10 @@ def validate_code_section(self, func_id: int): # Ensure relative jump destinations don't target immediates if not rjumpdests.isdisjoint(immediates): raise ValidationException("relative jump destination targets immediate") + + def disassemble(self): + output = "" + for code in self.code_sections: + output += code.disassemble() + + return output From 282a88fa91e3d90b8ecde0d09ae806de5d85583c Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 4 Jan 2023 10:25:48 +0200 Subject: [PATCH 56/82] temp --- vyper/compiler/phases.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index a518a9f831..02a1e3901a 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -148,7 +148,7 @@ def assembly_runtime(self) -> list: def bytecode(self) -> bytes: if version_check("shanghai"): return generate_EOFv1( - self.assembly, is_runtime=False, no_bytecode_metadata=self.no_bytecode_metadata + self.assembly, is_runtime=True, no_bytecode_metadata=self.no_bytecode_metadata ) else: return generate_bytecode( @@ -350,4 +350,4 @@ def generate_EOFv1(assembly: list, is_runtime: bool = False, no_bytecode_metadat assembly, emit_headers=is_runtime, disable_bytecode_metadata=no_bytecode_metadata ) - return bytecode # compile_ir.decorateWithEOFHeader(bytecode, function_breaks) \ No newline at end of file + return bytecode \ No newline at end of file From 3feaba29e9453d76c5820f3b2749467fcefb7a17 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 4 Jan 2023 10:35:01 +0200 Subject: [PATCH 57/82] print separators --- vyper/compiler/output.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index d5af38bc41..1c1977f14b 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -305,8 +305,11 @@ def _build_legacy_opcodes(bytecode: bytes) -> str: def _build_eof_opcodes(bytecode: bytes) -> str: eofReader = eof.EOFReader(bytecode) + print("----DEPLOY----") + print(eofReader.disassemble()) if (eofReader.bytecode_size != len(bytecode)): runtimeEofReader = eof.EOFReader(bytecode[eofReader.bytecode_size:]) + print("----RUNTIME----") print(runtimeEofReader.disassemble()) return "" From 09e72379e2708eafbeedd066343297b03013a461 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 4 Jan 2023 10:37:24 +0200 Subject: [PATCH 58/82] remove unused return value --- vyper/compiler/phases.py | 10 +++++----- vyper/ir/compile_ir.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 02a1e3901a..41032491d3 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -148,7 +148,7 @@ def assembly_runtime(self) -> list: def bytecode(self) -> bytes: if version_check("shanghai"): return generate_EOFv1( - self.assembly, is_runtime=True, no_bytecode_metadata=self.no_bytecode_metadata + self.assembly, no_bytecode_metadata=self.no_bytecode_metadata ) else: return generate_bytecode( @@ -159,7 +159,7 @@ def bytecode(self) -> bytes: def bytecode_runtime(self) -> bytes: if version_check("shanghai"): return generate_EOFv1( - self.assembly, is_runtime=True, no_bytecode_metadata=self.no_bytecode_metadata + self.assembly_runtime, no_bytecode_metadata=self.no_bytecode_metadata ) else: return generate_bytecode( @@ -345,9 +345,9 @@ def generate_bytecode( assembly, emit_headers=is_runtime, disable_bytecode_metadata=no_bytecode_metadata )[0] -def generate_EOFv1(assembly: list, is_runtime: bool = False, no_bytecode_metadata: bool = False) -> bytes: - bytecode, _, function_breaks = compile_ir.assembly_to_evm( - assembly, emit_headers=is_runtime, disable_bytecode_metadata=no_bytecode_metadata +def generate_EOFv1(assembly: list, no_bytecode_metadata: bool = False) -> bytes: + bytecode, _ = compile_ir.assembly_to_evm( + assembly, emit_headers=True, disable_bytecode_metadata=no_bytecode_metadata ) return bytecode \ No newline at end of file diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 5a8ce67c92..e68364fc7c 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1079,7 +1079,7 @@ def assembly_to_evm( for i, item in enumerate(assembly): if isinstance(item, list): assert runtime_code is None, "Multiple subcodes" - runtime_code, runtime_map, _ = assembly_to_evm( + runtime_code, runtime_map = assembly_to_evm( item, emit_headers=True, disable_bytecode_metadata=disable_bytecode_metadata, @@ -1226,18 +1226,18 @@ def assembly_to_evm( o += bytes(offset.to_bytes(2, 'big', signed=True)) to_skip = 1 elif assembly[i + 1] != "JUMPDEST" and assembly[i + 1] != "BLANK": - bytecode, _, _ = assembly_to_evm(PUSH_N(symbol_map[item], n=CODE_OFST_SIZE)) + bytecode, _ = assembly_to_evm(PUSH_N(symbol_map[item], n=CODE_OFST_SIZE)) o += bytecode elif is_mem_sym(item): - bytecode, _, _ = assembly_to_evm(PUSH_N(symbol_map[item], n=mem_ofst_size)) + bytecode, _ = assembly_to_evm(PUSH_N(symbol_map[item], n=mem_ofst_size)) o += bytecode elif is_ofst(item): # _OFST _sym_foo 32 ofst = symbol_map[assembly[i + 1]] + assembly[i + 2] n = mem_ofst_size if is_mem_sym(assembly[i + 1]) else CODE_OFST_SIZE - bytecode, _, _ = assembly_to_evm(PUSH_N(ofst, n)) + bytecode, _ = assembly_to_evm(PUSH_N(ofst, n)) o += bytecode to_skip = 2 @@ -1272,4 +1272,4 @@ def assembly_to_evm( line_number_map["breakpoints"] = list(line_number_map["breakpoints"]) line_number_map["pc_breakpoints"] = list(line_number_map["pc_breakpoints"]) - return o, line_number_map, function_sizes + return o, line_number_map From 75660a9fb9163c082090b88e2c32e5889aecb1e7 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 4 Jan 2023 10:49:14 +0200 Subject: [PATCH 59/82] output update --- vyper/evm/eof.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vyper/evm/eof.py b/vyper/evm/eof.py index 4fe252823c..01fd82241d 100644 --- a/vyper/evm/eof.py +++ b/vyper/evm/eof.py @@ -13,7 +13,8 @@ class ValidationException(VyperInternalException): """Validation exception.""" class FunctionType: - def __init__(self, inputs, outputs, max_stack_height) -> None: + def __init__(self, function_id, inputs, outputs, max_stack_height) -> None: + self.function_id = function_id self.offset = 0 self.code = bytes() self.inputs = inputs @@ -21,7 +22,7 @@ def __init__(self, inputs, outputs, max_stack_height) -> None: self.max_stack_height = max_stack_height def disassemble(self): - output = f"Code segment offset: {self.offset} inputs: {self.inputs} outputs: {self.outputs} max stack height: {self.max_stack_height}\n" + output = f"Func {self.function_id}:\nCode segment offset:{self.offset} inputs:{self.inputs} outputs:{self.outputs} max stack height:{self.max_stack_height}\n" code = deque(self.code) while code: pc = len(self.code) - len(code) @@ -129,7 +130,7 @@ def _verify_header(self) -> bool: input_count = code[pos] output_count = code[pos + 1] max_stack_height = (code[pos + 2] << 8) | code[pos + 3] - type = FunctionType(input_count, output_count, max_stack_height) + type = FunctionType(i, input_count, output_count, max_stack_height) self.code_sections.append(type) pos += 4 From f5c34c1e48a1322089597d78b44fdbd9e234e896 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 4 Jan 2023 12:57:37 +0200 Subject: [PATCH 60/82] fix return opcodes --- vyper/compiler/output.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 1c1977f14b..95f8a626f3 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -315,6 +315,6 @@ def _build_eof_opcodes(bytecode: bytes) -> str: def _build_opcodes(bytecode: bytes) -> str: if version_check("shanghai"): - _build_eof_opcodes(bytecode) + return _build_eof_opcodes(bytecode) else: - _build_legacy_opcodes(bytecode) + return _build_legacy_opcodes(bytecode) From 8f133ddefc28c843e39fa0060b9fe3e0b2bd5dc6 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 5 Jan 2023 08:12:56 +0200 Subject: [PATCH 61/82] convert revert jump to function call --- vyper/ir/compile_ir.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index e68364fc7c..c8d9df9eae 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -152,7 +152,10 @@ def _assert_false(): # use a shared failure block for common case of assert(x). # in the future we might want to change the code # at _sym_revert0 to: INVALID - return [_revert_label, JUMPI()] + if EOFv1_ENABLED: + return [_revert_label, "CALLF"] + else: + return [_revert_label, JUMPI()] def _add_postambles(asm_ops): @@ -1259,7 +1262,7 @@ def assembly_to_evm( if EOFv1_ENABLED and emit_headers: last_offset = 0 - for _, offset in enumerate(function_breaks): + for offset in sorted(function_breaks.keys()): function_sizes.append(offset - last_offset) last_offset = offset function_sizes.append(symbol_map.get("_sym_runtime_begin2", pc) - last_offset) From 1af9d010acc100fd8bea4406b505f59830ac9170 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 5 Jan 2023 08:13:06 +0200 Subject: [PATCH 62/82] enable code validation --- vyper/evm/eof.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/evm/eof.py b/vyper/evm/eof.py index 01fd82241d..63ebd77153 100644 --- a/vyper/evm/eof.py +++ b/vyper/evm/eof.py @@ -143,7 +143,7 @@ def _verify_header(self) -> bool: self.code_sections[i].offset = pos pos += section_size - # self.validate_code_section(i) + self.validate_code_section(i) # Read DATA sections for section_size in section_sizes[S_DATA]: From fb431f6d169a9b8a9eedfc4da6d52bc22433ee7d Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 5 Jan 2023 08:25:48 +0200 Subject: [PATCH 63/82] add ending new lines --- vyper/cli/validate_eof.py | 2 +- vyper/compiler/phases.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vyper/cli/validate_eof.py b/vyper/cli/validate_eof.py index 4a64bcc235..e4959351c6 100755 --- a/vyper/cli/validate_eof.py +++ b/vyper/cli/validate_eof.py @@ -23,4 +23,4 @@ def _parse_args(argv): EOFReader(code) if __name__ == "__main__": - _parse_args(sys.argv[1:]) \ No newline at end of file + _parse_args(sys.argv[1:]) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 41032491d3..4455b055de 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -350,4 +350,4 @@ def generate_EOFv1(assembly: list, no_bytecode_metadata: bool = False) -> bytes: assembly, emit_headers=True, disable_bytecode_metadata=no_bytecode_metadata ) - return bytecode \ No newline at end of file + return bytecode From b0050fd81c0653dae342884325e8cccc825de91e Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 9 Jan 2023 15:15:35 +0200 Subject: [PATCH 64/82] runnable --- vyper/ir/compile_ir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index c8d9df9eae..3dc6cc6312 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1018,7 +1018,7 @@ def generateEOFHeader(function_sizes) -> bytes: for _ in range(code_sections_len): header += bytes([0x0]) # inputs header += bytes([0x0]) # outputs - header += (1024).to_bytes(2, "big") # max stack + header += (64).to_bytes(2, "big") # max stack return header From e9b11c7847958796ef413ca3667dc501a72d3822 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 9 Jan 2023 15:46:45 +0200 Subject: [PATCH 65/82] properly order functions breaks --- vyper/ir/compile_ir.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 3dc6cc6312..c15a5b89fb 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1179,7 +1179,8 @@ def assembly_to_evm( if runtime_code is not None: symbol_map["_sym_subcode_size"] = len(runtime_code) - function_breaks = {symbol_map[offset_symbol]:i+1 for i,offset_symbol in enumerate(call_offsets.keys())} + breaks = sorted([symbol_map[offset_symbol] for offset_symbol in call_offsets.keys()]) + function_breaks = {br:i+1 for i,br in enumerate(breaks)} # (NOTE CMC 2022-06-17 this way of generating bytecode did not # seem to be a perf hotspot. if it is, may want to use bytearray() From af1f1f67f0f6fff7a5b52dbd9ecee6c36d7b1e67 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 9 Jan 2023 18:00:50 +0200 Subject: [PATCH 66/82] fix revert call --- vyper/ir/compile_ir.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index c15a5b89fb..8f8d5bc445 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -153,7 +153,8 @@ def _assert_false(): # in the future we might want to change the code # at _sym_revert0 to: INVALID if EOFv1_ENABLED: - return [_revert_label, "CALLF"] + _no_revert_symbol = mksymbol("no_revert") + return ["ISZERO", _no_revert_symbol, "RJUMPI", _revert_label, "CALLF", _no_revert_symbol, "JUMPDEST"] else: return [_revert_label, JUMPI()] From 1817f42b75d11c34fc4265c663e7a5c5e94f8635 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Mon, 9 Jan 2023 23:45:03 +0200 Subject: [PATCH 67/82] remove return pc from stack --- vyper/codegen/self_call.py | 11 +++++++++-- vyper/ir/compile_ir.py | 6 ++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/vyper/codegen/self_call.py b/vyper/codegen/self_call.py index 54a39e6488..3c264b7de4 100644 --- a/vyper/codegen/self_call.py +++ b/vyper/codegen/self_call.py @@ -3,6 +3,9 @@ from vyper.codegen.ir_node import IRnode, push_label_to_stack from vyper.codegen.types import TupleType from vyper.exceptions import StateAccessViolation, StructureException +from vyper.evm.opcodes import version_check + +EOFv1_ENABLED = version_check("shanghai") _label_counter = 0 @@ -89,11 +92,15 @@ def ir_for_self_call(stmt_expr, context): if return_buffer is not None: goto_op += [return_buffer] # pass return label to subroutine - goto_op += [push_label_to_stack(return_label)] + if not EOFv1_ENABLED: + goto_op += [push_label_to_stack(return_label)] call_sequence = ["seq"] call_sequence.append(eval_once_check(_freshname(stmt_expr.node_source_code))) - call_sequence.extend([copy_args, goto_op, ["label", return_label, ["var_list"], "pass"]]) + if EOFv1_ENABLED: + call_sequence.extend([copy_args, goto_op]) + else: + call_sequence.extend([copy_args, goto_op, ["label", return_label, ["var_list"], "pass"]]) if return_buffer is not None: # push return buffer location to stack call_sequence += [return_buffer] diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 8f8d5bc445..e3ad429f45 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -3,7 +3,7 @@ import math from vyper.codegen.ir_node import IRnode -from vyper.evm.opcodes import get_opcodes, get_opcode, version_check, immediate_size +from vyper.evm.opcodes import get_opcodes, get_opcode, version_check from vyper.evm import eof from vyper.exceptions import CodegenPanic, CompilerPanic from vyper.utils import MemoryPositions @@ -699,9 +699,11 @@ def _height_of(witharg): raise CodegenPanic("exit_to not implemented on non EOFv1") o = [] - args = code.args[1:] + args = code.args[1:] for i, c in enumerate(reversed(args)): + if c.value == "return_pc": + continue o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) if str(code.args[0]) == "return_pc": From fb2d62ddf43412548064d367a89afa8f771284a0 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 10 Jan 2023 09:42:55 +0200 Subject: [PATCH 68/82] pop arg buffer address on retf --- vyper/ir/compile_ir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index e3ad429f45..8881987bdb 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -707,7 +707,7 @@ def _height_of(witharg): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) if str(code.args[0]) == "return_pc": - o.extend(["RETF"]) + o.extend(["POP", "RETF"]) else: o.extend([str(code.args[0]), "RJUMP"]) From 2f77eac83ef4fec6bb71fcdcc06dfb513c8bcdef Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 10 Jan 2023 09:53:29 +0200 Subject: [PATCH 69/82] outout 1 input for all functions exept main --- vyper/ir/compile_ir.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 8881987bdb..08df1b2227 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1018,8 +1018,11 @@ def generateEOFHeader(function_sizes) -> bytes: header += bytes([0x0]) # Terminator # Type section - for _ in range(code_sections_len): - header += bytes([0x0]) # inputs + for i in range(code_sections_len): + if i == 0: + header += bytes([0x0]) # inputs + else: + header += bytes([0x1]) # inputs header += bytes([0x0]) # outputs header += (64).to_bytes(2, "big") # max stack From 4332d3f72cc53218112e2f06970a91db103f451d Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 10 Jan 2023 10:39:40 +0200 Subject: [PATCH 70/82] refactoring, add eof enable parameter, proper output --- vyper/cli/vyper_compile.py | 4 ++++ vyper/codegen/self_call.py | 9 +++------ vyper/compiler/__init__.py | 13 ++++++++++++- vyper/compiler/output.py | 13 ++++++------- vyper/compiler/phases.py | 8 +++++--- vyper/evm/opcodes.py | 8 ++++++++ vyper/ir/compile_ir.py | 34 ++++++++++++++++------------------ 7 files changed, 54 insertions(+), 35 deletions(-) diff --git a/vyper/cli/vyper_compile.py b/vyper/cli/vyper_compile.py index 40f87d860a..1f3b6ecd13 100755 --- a/vyper/cli/vyper_compile.py +++ b/vyper/cli/vyper_compile.py @@ -131,6 +131,7 @@ def _parse_args(argv): "-p", help="Set the root path for contract imports", default=".", dest="root_folder" ) parser.add_argument("-o", help="Set the output path", dest="output_path") + parser.add_argument("--experimental-eof", help="The compiler will emit EOFv1 formated bytecode", action="store_true") args = parser.parse_args(argv) @@ -160,6 +161,7 @@ def _parse_args(argv): args.no_optimize, args.storage_layout, args.no_bytecode_metadata, + args.experimental_eof, ) if args.output_path: @@ -256,6 +258,7 @@ def compile_files( no_optimize: bool = False, storage_layout: Iterable[str] = None, no_bytecode_metadata: bool = False, + experimental_eof: bool = False, ) -> OrderedDict: root_path = Path(root_folder).resolve() @@ -301,6 +304,7 @@ def compile_files( storage_layouts=storage_layouts, show_gas_estimates=show_gas_estimates, no_bytecode_metadata=no_bytecode_metadata, + experimental_eof=experimental_eof, ) if show_version: compiler_data["version"] = vyper.__version__ diff --git a/vyper/codegen/self_call.py b/vyper/codegen/self_call.py index 3c264b7de4..a28be80c12 100644 --- a/vyper/codegen/self_call.py +++ b/vyper/codegen/self_call.py @@ -3,13 +3,10 @@ from vyper.codegen.ir_node import IRnode, push_label_to_stack from vyper.codegen.types import TupleType from vyper.exceptions import StateAccessViolation, StructureException -from vyper.evm.opcodes import version_check - -EOFv1_ENABLED = version_check("shanghai") +from vyper.evm.opcodes import is_eof_enabled _label_counter = 0 - # TODO a more general way of doing this def _generate_label(name: str) -> str: global _label_counter @@ -92,12 +89,12 @@ def ir_for_self_call(stmt_expr, context): if return_buffer is not None: goto_op += [return_buffer] # pass return label to subroutine - if not EOFv1_ENABLED: + if not is_eof_enabled(): goto_op += [push_label_to_stack(return_label)] call_sequence = ["seq"] call_sequence.append(eval_once_check(_freshname(stmt_expr.node_source_code))) - if EOFv1_ENABLED: + if is_eof_enabled(): call_sequence.extend([copy_args, goto_op]) else: call_sequence.extend([copy_args, goto_op, ["label", return_label, ["var_list"], "pass"]]) diff --git a/vyper/compiler/__init__.py b/vyper/compiler/__init__.py index 7f2b9bc68d..e3687c862a 100644 --- a/vyper/compiler/__init__.py +++ b/vyper/compiler/__init__.py @@ -5,7 +5,7 @@ import vyper.codegen.core as codegen import vyper.compiler.output as output from vyper.compiler.phases import CompilerData -from vyper.evm.opcodes import DEFAULT_EVM_VERSION, evm_wrapper +from vyper.evm.opcodes import DEFAULT_EVM_VERSION, evm_wrapper, version_check, set_eof_enabled from vyper.typing import ( ContractCodes, ContractPath, @@ -56,6 +56,7 @@ def compile_codes( storage_layouts: Dict[ContractPath, StorageLayout] = None, show_gas_estimates: bool = False, no_bytecode_metadata: bool = False, + experimental_eof: bool = False, ) -> OrderedDict: """ Generate compiler output(s) from one or more contract source codes. @@ -91,6 +92,8 @@ def compile_codes( * JSON interfaces are given as lists, vyper interfaces as strings no_bytecode_metadata: bool, optional Do not add metadata to bytecode. Defaults to False + experimental_eof: bool, optional + Enables the experimental support for EOFv1. Defaults to False Returns ------- @@ -98,6 +101,11 @@ def compile_codes( Compiler output as `{'contract name': {'output key': "output data"}}` """ + if experimental_eof and not version_check('shanghai'): + raise ValueError(f"Enabling EOFv1 requires evm version of shanghai or greater") + + set_eof_enabled(experimental_eof) + if output_formats is None: output_formats = ("bytecode",) if isinstance(output_formats, Sequence): @@ -129,6 +137,7 @@ def compile_codes( storage_layout_override, show_gas_estimates, no_bytecode_metadata, + experimental_eof, ) for output_format in output_formats[contract_name]: if output_format not in OUTPUT_FORMATS: @@ -156,6 +165,7 @@ def compile_code( no_optimize: bool = False, storage_layout_override: StorageLayout = None, show_gas_estimates: bool = False, + experimental_eof: bool = False, ) -> dict: """ Generate compiler output(s) from a single contract source code. @@ -197,4 +207,5 @@ def compile_code( no_optimize=no_optimize, storage_layouts=storage_layouts, show_gas_estimates=show_gas_estimates, + experimental_eof=experimental_eof, )[UNKNOWN_CONTRACT_NAME] diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 95f8a626f3..358fe14c72 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -9,7 +9,7 @@ from vyper.compiler.phases import CompilerData from vyper.compiler.utils import build_gas_estimates from vyper.evm import opcodes -from vyper.evm.opcodes import version_check +from vyper.evm.opcodes import is_eof_enabled from vyper.evm import eof from vyper.ir import compile_ir from vyper.semantics.types.function import FunctionVisibility, StateMutability @@ -305,16 +305,15 @@ def _build_legacy_opcodes(bytecode: bytes) -> str: def _build_eof_opcodes(bytecode: bytes) -> str: eofReader = eof.EOFReader(bytecode) - print("----DEPLOY----") - print(eofReader.disassemble()) + output = eofReader.disassemble() if (eofReader.bytecode_size != len(bytecode)): runtimeEofReader = eof.EOFReader(bytecode[eofReader.bytecode_size:]) - print("----RUNTIME----") - print(runtimeEofReader.disassemble()) - return "" + output = "--- DEPLOY CODE ---\n" + output + output += "--- RUNTIME CODE ---\n" + runtimeEofReader.disassemble() + return output def _build_opcodes(bytecode: bytes) -> str: - if version_check("shanghai"): + if is_eof_enabled(): return _build_eof_opcodes(bytecode) else: return _build_legacy_opcodes(bytecode) diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index 4455b055de..5b7dfe9097 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -53,6 +53,7 @@ def __init__( storage_layout: StorageLayout = None, show_gas_estimates: bool = False, no_bytecode_metadata: bool = False, + experimental_eof: bool = False, ) -> None: """ Initialization method. @@ -84,6 +85,7 @@ def __init__( self.storage_layout_override = storage_layout self.show_gas_estimates = show_gas_estimates self.no_bytecode_metadata = no_bytecode_metadata + self.experimental_eof = experimental_eof @cached_property def vyper_module(self) -> vy_ast.Module: @@ -146,7 +148,7 @@ def assembly_runtime(self) -> list: @cached_property def bytecode(self) -> bytes: - if version_check("shanghai"): + if self.experimental_eof: return generate_EOFv1( self.assembly, no_bytecode_metadata=self.no_bytecode_metadata ) @@ -157,7 +159,7 @@ def bytecode(self) -> bytes: @cached_property def bytecode_runtime(self) -> bytes: - if version_check("shanghai"): + if self.experimental_eof: return generate_EOFv1( self.assembly_runtime, no_bytecode_metadata=self.no_bytecode_metadata ) @@ -347,7 +349,7 @@ def generate_bytecode( def generate_EOFv1(assembly: list, no_bytecode_metadata: bool = False) -> bytes: bytecode, _ = compile_ir.assembly_to_evm( - assembly, emit_headers=True, disable_bytecode_metadata=no_bytecode_metadata + assembly, emit_headers=True, disable_bytecode_metadata=no_bytecode_metadata, eof_enabled=True ) return bytecode diff --git a/vyper/evm/opcodes.py b/vyper/evm/opcodes.py index 26de9694f7..0194c39c96 100644 --- a/vyper/evm/opcodes.py +++ b/vyper/evm/opcodes.py @@ -33,6 +33,7 @@ } DEFAULT_EVM_VERSION: str = "shanghai" +_eof_enabled = False # opcode as hex value # number of values removed from stack @@ -299,3 +300,10 @@ def version_check(begin: Optional[str] = None, end: Optional[str] = None) -> boo begin_idx = EVM_VERSIONS[begin] end_idx = max(EVM_VERSIONS.values()) if end is None else EVM_VERSIONS[end] return begin_idx <= active_evm_version <= end_idx + +def set_eof_enabled(e: bool): + global _eof_enabled + _eof_enabled = e + +def is_eof_enabled() -> bool: + return _eof_enabled \ No newline at end of file diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 08df1b2227..1bc0bd70ef 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -3,7 +3,7 @@ import math from vyper.codegen.ir_node import IRnode -from vyper.evm.opcodes import get_opcodes, get_opcode, version_check +from vyper.evm.opcodes import get_opcodes, get_opcode, is_eof_enabled from vyper.evm import eof from vyper.exceptions import CodegenPanic, CompilerPanic from vyper.utils import MemoryPositions @@ -13,13 +13,11 @@ DUP_OFFSET = 0x7F SWAP_OFFSET = 0x8F -EOFv1_ENABLED = version_check("shanghai") - def JUMPI() -> str: - return "RJUMPI" if EOFv1_ENABLED else "JUMPI" + return "RJUMPI" if is_eof_enabled() else "JUMPI" def JUMP() -> str: - return "RJUMP" if EOFv1_ENABLED else "JUMP" + return "RJUMP" if is_eof_enabled() else "JUMP" def num_to_bytearray(x): o = [] @@ -112,7 +110,7 @@ def calc_mem_ofst_size(ctor_mem_size): # by better liveness analysis. # NOTE: modifies input in-place def _rewrite_return_sequences(ir_node, label_params=None): - if EOFv1_ENABLED: + if is_eof_enabled(): return args = ir_node.args @@ -152,7 +150,7 @@ def _assert_false(): # use a shared failure block for common case of assert(x). # in the future we might want to change the code # at _sym_revert0 to: INVALID - if EOFv1_ENABLED: + if is_eof_enabled(): _no_revert_symbol = mksymbol("no_revert") return ["ISZERO", _no_revert_symbol, "RJUMPI", _revert_label, "CALLF", _no_revert_symbol, "JUMPDEST"] else: @@ -683,7 +681,7 @@ def _height_of(witharg): # jump to a symbol, and push variable # of arguments onto stack elif code.value == "goto": o = [] - args = code.args[1:] # if EOFv1_ENABLED and len(code.args) >= 2 and is_symbol(code.args[1].value) else code.args[1:] + args = code.args[1:] # if is_eof_enabled() and len(code.args) >= 2 and is_symbol(code.args[1].value) else code.args[1:] for i, c in enumerate(reversed(args)): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) @@ -695,7 +693,7 @@ def _height_of(witharg): o.extend(["_sym_" + symbol, JUMP()]) return o elif code.value == "exit_to": - if not EOFv1_ENABLED: + if not is_eof_enabled(): raise CodegenPanic("exit_to not implemented on non EOFv1") o = [] @@ -764,7 +762,7 @@ def _height_of(witharg): return [] elif code.value == "exit_to": - if not EOFv1_ENABLED: + if not is_eof_enabled(): raise CodegenPanic("exit_to not implemented yet!") if code.args[0].value == "return_pc": @@ -1043,7 +1041,7 @@ def adjust_pc_maps(pc_maps, ofst): def assembly_to_evm( - assembly, pc_ofst=0, emit_headers=False, disable_bytecode_metadata=False + assembly, pc_ofst=0, emit_headers=False, disable_bytecode_metadata=False, eof_enabled=False ): """ Assembles assembly into EVM @@ -1071,7 +1069,7 @@ def assembly_to_evm( runtime_code, runtime_code_start, runtime_code_end = None, None, None bytecode_suffix = b"" - if (not disable_bytecode_metadata) and emit_headers and (not EOFv1_ENABLED): + if (not disable_bytecode_metadata) and emit_headers and (not is_eof_enabled()): # CBOR encoded: {"vyper": [major,minor,patch]} bytecode_suffix += b"\xa1\x65vyper\x83" + bytes(list(version_tuple)) bytecode_suffix += len(bytecode_suffix).to_bytes(2, "big") @@ -1160,7 +1158,7 @@ def assembly_to_evm( pc -= 1 elif item == "BLANK": pc += 0 - elif item == "JUMPDEST" and EOFv1_ENABLED: + elif item == "JUMPDEST" and is_eof_enabled(): pc += 0 elif isinstance(item, str) and item.startswith("_DEPLOY_MEM_OFST_"): # _DEPLOY_MEM_OFST is assembly magic which will @@ -1176,7 +1174,7 @@ def assembly_to_evm( else: pc += 1 - if not EOFv1_ENABLED: + if not is_eof_enabled(): pc += len(bytecode_suffix) symbol_map["_sym_code_end"] = pc @@ -1196,7 +1194,7 @@ def assembly_to_evm( o = b"" - if EOFv1_ENABLED and emit_headers: + if is_eof_enabled() and emit_headers: # generate header with placeholder function sizes header = generateEOFHeader([0] * (len(function_breaks) + 1)) o += header @@ -1214,14 +1212,14 @@ def assembly_to_evm( if item in ("DEBUG", "BLANK"): continue # skippable opcodes # When EOFv1 enabled skip emiting JUMPDESTs - elif item == "JUMPDEST" and EOFv1_ENABLED: + elif item == "JUMPDEST" and is_eof_enabled(): continue elif isinstance(item, str) and item.startswith("_DEPLOY_MEM_OFST_"): continue elif is_symbol(item): - if EOFv1_ENABLED and assembly[i + 1] in ["RJUMP", "RJUMPI", "CALLF"]: + if is_eof_enabled() and assembly[i + 1] in ["RJUMP", "RJUMPI", "CALLF"]: sym = item assert is_symbol(sym), f"Internal compiler error: {assembly[i + 1]} not preceded by symbol" o += bytes([get_opcode(assembly[i + 1])]) @@ -1267,7 +1265,7 @@ def assembly_to_evm( # Should never reach because, assembly is create in _compile_to_assembly. raise Exception("Weird symbol in assembly: " + str(item)) # pragma: no cover - if EOFv1_ENABLED and emit_headers: + if is_eof_enabled() and emit_headers: last_offset = 0 for offset in sorted(function_breaks.keys()): function_sizes.append(offset - last_offset) From 1c30bc5a64422141daaa8cc8a27c545d80aeecfc Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 10 Jan 2023 13:34:56 +0200 Subject: [PATCH 71/82] default to paris --- vyper/evm/opcodes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vyper/evm/opcodes.py b/vyper/evm/opcodes.py index 0194c39c96..101a57ce8e 100644 --- a/vyper/evm/opcodes.py +++ b/vyper/evm/opcodes.py @@ -3,7 +3,7 @@ from vyper.exceptions import CompilerPanic from vyper.typing import OpcodeGasCost, OpcodeMap, OpcodeRulesetMap, OpcodeRulesetValue, OpcodeValue -active_evm_version: int = 5 +active_evm_version: int = 4 # EVM version rules work as follows: # 1. Fork rules go from oldest (lowest value) to newest (highest value). @@ -31,7 +31,7 @@ "atlantis": 0, "agharta": 1, } -DEFAULT_EVM_VERSION: str = "shanghai" +DEFAULT_EVM_VERSION: str = "paris" _eof_enabled = False From e46ce90e9a7fd128b1a0fbb182dc6e7f42433d81 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 10 Jan 2023 13:35:08 +0200 Subject: [PATCH 72/82] add in old parameter for tests --- vyper/ir/compile_ir.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 1bc0bd70ef..405d4945bd 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -1041,7 +1041,7 @@ def adjust_pc_maps(pc_maps, ofst): def assembly_to_evm( - assembly, pc_ofst=0, emit_headers=False, disable_bytecode_metadata=False, eof_enabled=False + assembly, pc_ofst=0, insert_vyper_signature=False, emit_headers=False, disable_bytecode_metadata=False, eof_enabled=False ): """ Assembles assembly into EVM @@ -1069,7 +1069,7 @@ def assembly_to_evm( runtime_code, runtime_code_start, runtime_code_end = None, None, None bytecode_suffix = b"" - if (not disable_bytecode_metadata) and emit_headers and (not is_eof_enabled()): + if (not disable_bytecode_metadata) and insert_vyper_signature and (not is_eof_enabled()): # CBOR encoded: {"vyper": [major,minor,patch]} bytecode_suffix += b"\xa1\x65vyper\x83" + bytes(list(version_tuple)) bytecode_suffix += len(bytecode_suffix).to_bytes(2, "big") @@ -1174,7 +1174,7 @@ def assembly_to_evm( else: pc += 1 - if not is_eof_enabled(): + if not is_eof_enabled() and insert_vyper_signature: pc += len(bytecode_suffix) symbol_map["_sym_code_end"] = pc From 61202e363da98aa041148075575fb145f7e61c49 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 10 Jan 2023 14:08:04 +0200 Subject: [PATCH 73/82] fix legacy case --- vyper/ir/compile_ir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 405d4945bd..5361c8361e 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -687,7 +687,7 @@ def _height_of(witharg): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) symbol = str(code.args[0]) - if symbol.startswith("internal"): + if symbol.startswith("internal") and is_eof_enabled(): o.extend(["_sym_" + symbol, "CALLF"]) else: o.extend(["_sym_" + symbol, JUMP()]) From b08330bbb359c8893a05f099cf473a5eaed64cb7 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Tue, 10 Jan 2023 14:15:54 +0200 Subject: [PATCH 74/82] update readme --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index f17e693bf5..61a4c3ea52 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,19 @@ make dev-init python setup.py test ``` +## Experimental EOFv1 support + +This version of the Vyper compiler supports the upcoming EOFv1 bytecode format. This is currently under +development and in alpha state. To enable the experimental EOFv1 support you use the +``--experimental-eof`` command line option. Additionally a greater than ``paris`` evm version should +be used. + +Usage example: +```bash +vyper --experimental-eof --evm-version=shanghai -f opcodes_runtime contract.vy +``` + + # Contributing * See Issues tab, and feel free to submit your own issues * Add PRs if you discover a solution to an existing issue From 511df2b4de6e7b3e6ad3622c34339ba5cfeaa978 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 11 Jan 2023 13:30:51 +0200 Subject: [PATCH 75/82] more detail in the exception --- vyper/compiler/output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 358fe14c72..1b164023bd 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -290,7 +290,7 @@ def _build_legacy_opcodes(bytecode: bytes) -> str: mnemonic = opcode_map.get(op) if mnemonic == None: - raise CompilerPanic(f"Unsupported opcode {hex(op)}") + raise CompilerPanic(f"Unsupported opcode {hex(op)} after {opcode_output}") opcode_output.append(mnemonic) if "PUSH" in opcode_output[-1]: From 51ce2d808711dfe806b57a3f778fe9217c9dfec5 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 11 Jan 2023 13:30:55 +0200 Subject: [PATCH 76/82] assert push size gt 0 --- vyper/ir/compile_ir.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 5361c8361e..500b90ef30 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -101,7 +101,7 @@ def _runtime_code_offsets(ctor_mem_size, runtime_codelen): # mem offsets in the code. For instance, if we only see mem symbols # up to size 256, we can use PUSH1. def calc_mem_ofst_size(ctor_mem_size): - return math.ceil(math.log(ctor_mem_size + 1, 256)) + return max(math.ceil(math.log(ctor_mem_size + 1, 256)), 1) # temporary optimization to handle stack items for return sequences @@ -1254,7 +1254,9 @@ def assembly_to_evm( elif isinstance(item, str) and item.upper() in get_opcodes(): o += bytes([get_opcodes()[item.upper()][0]]) elif item[:4] == "PUSH": - o += bytes([PUSH_OFFSET + int(item[4:])]) + push_size = int(item[4:]) + assert push_size > 0, f"Bad PUSH size for {item}" + o += bytes([PUSH_OFFSET + push_size]) elif item[:3] == "DUP": o += bytes([DUP_OFFSET + int(item[3:])]) elif item[:4] == "SWAP": From 5ee0a46665269125ce39fd83672374abb46b53cc Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 12 Jan 2023 12:32:01 +0200 Subject: [PATCH 77/82] update shanghai to cancun --- README.md | 2 +- vyper/compiler/__init__.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 61a4c3ea52..6504028ad4 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ be used. Usage example: ```bash -vyper --experimental-eof --evm-version=shanghai -f opcodes_runtime contract.vy +vyper --experimental-eof --evm-version=cancun -f opcodes_runtime contract.vy ``` diff --git a/vyper/compiler/__init__.py b/vyper/compiler/__init__.py index e3687c862a..0256370e80 100644 --- a/vyper/compiler/__init__.py +++ b/vyper/compiler/__init__.py @@ -101,8 +101,8 @@ def compile_codes( Compiler output as `{'contract name': {'output key': "output data"}}` """ - if experimental_eof and not version_check('shanghai'): - raise ValueError(f"Enabling EOFv1 requires evm version of shanghai or greater") + if experimental_eof and not version_check('cancun'): + raise ValueError(f"Enabling EOFv1 requires evm version of cancun or greater") set_eof_enabled(experimental_eof) From 63ccde9cc0a45aa6eea101d8199a38c77848acde Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Thu, 12 Jan 2023 15:34:30 +0200 Subject: [PATCH 78/82] shanghai -> cancun --- README.md | 2 +- tests/compiler/test_opcodes.py | 2 +- vyper/evm/opcodes.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6504028ad4..45762811bc 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ python setup.py test This version of the Vyper compiler supports the upcoming EOFv1 bytecode format. This is currently under development and in alpha state. To enable the experimental EOFv1 support you use the -``--experimental-eof`` command line option. Additionally a greater than ``paris`` evm version should +``--experimental-eof`` command line option. Additionally a greater or equal to ``cancun`` evm version should be used. Usage example: diff --git a/tests/compiler/test_opcodes.py b/tests/compiler/test_opcodes.py index de62e28bc0..cdc92ff4b5 100644 --- a/tests/compiler/test_opcodes.py +++ b/tests/compiler/test_opcodes.py @@ -43,7 +43,7 @@ def test_version_check(evm_version): def test_get_opcodes(evm_version): op = opcodes.get_opcodes() - if evm_version in ("shanghai", "paris", "berlin"): + if evm_version in ("cancun", "shanghai", "paris", "berlin"): assert "CHAINID" in op assert op["SLOAD"][-1] == 2100 elif evm_version == "istanbul": diff --git a/vyper/evm/opcodes.py b/vyper/evm/opcodes.py index 101a57ce8e..37d1c9590b 100644 --- a/vyper/evm/opcodes.py +++ b/vyper/evm/opcodes.py @@ -26,6 +26,7 @@ "berlin": 3, "paris": 4, "shanghai": 5, + "cancun": 6, # ETC Forks "atlantis": 0, From e7f0671d5f3b1fd7f97fd365f32ca1c841805379 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 18 Jan 2023 14:56:27 +0200 Subject: [PATCH 79/82] get opcode metadata helper --- vyper/evm/opcodes.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vyper/evm/opcodes.py b/vyper/evm/opcodes.py index 37d1c9590b..4ec1117f77 100644 --- a/vyper/evm/opcodes.py +++ b/vyper/evm/opcodes.py @@ -292,6 +292,12 @@ def immediate_size(op): else: return 0 +def get_opcode_metadata(mnem_or_op): + if isinstance(mnem_or_op, int): + mnem_or_op = get_mnemonic(mnem_or_op) + + return get_opcodes()[mnem_or_op] + def version_check(begin: Optional[str] = None, end: Optional[str] = None) -> bool: if begin is None and end is None: raise CompilerPanic("Either beginning or end fork ruleset must be set.") From ee1a62a8003fc4204b34ebcc004f92404dbcb0af Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 18 Jan 2023 14:59:01 +0200 Subject: [PATCH 80/82] calculate_max_stack_height() implementation --- vyper/evm/eof.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/vyper/evm/eof.py b/vyper/evm/eof.py index 63ebd77153..645d8be9e2 100644 --- a/vyper/evm/eof.py +++ b/vyper/evm/eof.py @@ -2,16 +2,18 @@ from vyper.exceptions import VyperInternalException from vyper.evm.opcodes import TERMINATING_OPCODES, VALID_OPCODES, immediate_size, get_mnemonic -MAGIC = b'\xEF\x00' +MAGIC = b"\xEF\x00" VERSION = 0x01 S_TERMINATOR = 0x00 S_TYPE = 0x01 S_CODE = 0x02 S_DATA = 0x03 + class ValidationException(VyperInternalException): """Validation exception.""" + class FunctionType: def __init__(self, function_id, inputs, outputs, max_stack_height) -> None: self.function_id = function_id @@ -34,9 +36,10 @@ def disassemble(self): if immediates_len > 0: output += f" {immediates}" output += "\n" - + return output + "\n" + class EOFReader: bytecode: bytes @@ -222,3 +225,54 @@ def disassemble(self): output += code.disassemble() return output + +# Calculates the max stack height for the given code block. Meanwhile calculates the stack height at every instruction +# to be later used to validate jump destination stack validity. Currently disabled. +def calculate_max_stack_height(bytecode: bytes, start_pc: int = 0, stack_height: int = 0, stack_heights = []) -> int: + max_stack_height = 0 + + if len(stack_heights) == 0: + stack_heights = [-1] * len(bytecode) + + pc = start_pc + while pc < len(bytecode): + op = bytecode[pc] + meta = get_opcode_metadata(op) + mnemonic = get_mnemonic(meta[0]) + pop_size = meta[1] + push_size = meta[2] + + if mnemonic == "CALLF": + pop_size = 0 + push_size = 1 + + stack_height -= pop_size + if stack_height < 0: + raise ValidationException("Stack underflow") + stack_height += push_size + max_stack_height = max(max_stack_height, stack_height) + + # fill the stack height buffer + stack_heights[pc:pc+immediate_size(op)+1] = [stack_height] * (immediate_size(op) + 1) + #print(pc, mnemonic, stack_heights, max_stack_height) + + if mnemonic == "RJUMP": + jump_offset = int.from_bytes(bytecode[pc + 1 : pc + 3], byteorder="big", signed=True) + # if stack_heights[pc+jump_offset] != -1 and stack_heights[pc+jump_offset] != stack_height: + # raise ValidationException("Stack height missmatch at jump target") + if stack_heights[pc+jump_offset] != -1: + return max_stack_height + pc += jump_offset + elif mnemonic == "RJUMPI": + jump_offset = int.from_bytes(bytecode[pc + 1 : pc + 3], byteorder="big", signed=True) + return max( + max_stack_height, + calculate_max_stack_height(bytecode, pc + 3, stack_height, stack_heights), + calculate_max_stack_height(bytecode, pc + 3 + jump_offset, stack_height, stack_heights), + ) + elif mnemonic in TERMINATING_OPCODES: + return max_stack_height + + pc += 1 + immediate_size(op) + + return max_stack_height From a96f172dc92cadcfd28324b2f94b9f151a7d086f Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 18 Jan 2023 14:59:27 +0200 Subject: [PATCH 81/82] append proper max stack heights to eof header --- vyper/ir/compile_ir.py | 45 +++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 500b90ef30..80e40688ba 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -13,12 +13,15 @@ DUP_OFFSET = 0x7F SWAP_OFFSET = 0x8F + def JUMPI() -> str: return "RJUMPI" if is_eof_enabled() else "JUMPI" + def JUMP() -> str: return "RJUMP" if is_eof_enabled() else "JUMP" + def num_to_bytearray(x): o = [] while x > 0: @@ -995,10 +998,11 @@ def _optimize_assembly(assembly): raise CompilerPanic("infinite loop detected during assembly reduction") # pragma: notest -def generateEOFHeader(function_sizes) -> bytes: + +def generateEOFHeader(function_sizes, max_stack_heights) -> bytes: code_sections_len = len(function_sizes) header = b"" - header += eof.MAGIC # EOFv1 signature + header += eof.MAGIC # EOFv1 signature header += bytes([eof.VERSION]) header += bytes([eof.S_TYPE]) @@ -1013,19 +1017,20 @@ def generateEOFHeader(function_sizes) -> bytes: header += bytes([eof.S_DATA]) header += bytes([0x0, 0x0]) - header += bytes([0x0]) # Terminator + header += bytes([0x0]) # Terminator # Type section for i in range(code_sections_len): if i == 0: - header += bytes([0x0]) # inputs + header += bytes([0x0]) # inputs else: - header += bytes([0x1]) # inputs - header += bytes([0x0]) # outputs - header += (64).to_bytes(2, "big") # max stack + header += bytes([0x1]) # inputs + header += bytes([0x0]) # outputs + header += (max_stack_heights[i]).to_bytes(2, "big") # max stack return header + def adjust_pc_maps(pc_maps, ofst): assert ofst >= 0 @@ -1041,7 +1046,12 @@ def adjust_pc_maps(pc_maps, ofst): def assembly_to_evm( - assembly, pc_ofst=0, insert_vyper_signature=False, emit_headers=False, disable_bytecode_metadata=False, eof_enabled=False + assembly, + pc_ofst=0, + insert_vyper_signature=False, + emit_headers=False, + disable_bytecode_metadata=False, + eof_enabled=False, ): """ Assembles assembly into EVM @@ -1196,7 +1206,8 @@ def assembly_to_evm( if is_eof_enabled() and emit_headers: # generate header with placeholder function sizes - header = generateEOFHeader([0] * (len(function_breaks) + 1)) + dummy_placeholder_data = [0] * (len(function_breaks) + 1) + header = generateEOFHeader(dummy_placeholder_data, dummy_placeholder_data) o += header # now that all symbols have been resolved, generate bytecode @@ -1213,7 +1224,7 @@ def assembly_to_evm( continue # skippable opcodes # When EOFv1 enabled skip emiting JUMPDESTs elif item == "JUMPDEST" and is_eof_enabled(): - continue + continue elif isinstance(item, str) and item.startswith("_DEPLOY_MEM_OFST_"): continue @@ -1224,7 +1235,7 @@ def assembly_to_evm( assert is_symbol(sym), f"Internal compiler error: {assembly[i + 1]} not preceded by symbol" o += bytes([get_opcode(assembly[i + 1])]) - if assembly[i + 1] == "CALLF": + if assembly[i + 1] == "CALLF": function_id = function_breaks[symbol_map[sym]] o += bytes(function_id.to_bytes(2, 'big', signed=True)) else: @@ -1269,13 +1280,23 @@ def assembly_to_evm( if is_eof_enabled() and emit_headers: last_offset = 0 + function_offsets = [] for offset in sorted(function_breaks.keys()): + function_offsets.append(offset) function_sizes.append(offset - last_offset) last_offset = offset function_sizes.append(symbol_map.get("_sym_runtime_begin2", pc) - last_offset) + + max_stack_heights = [] + offset = len(header) + for i, size in enumerate(function_sizes): + max_stack_heights.append( + eof.calculate_max_stack_height(o[offset : offset + size], stack_height=0 if i == 0 else 1) + ) + offset += size # Generate the final header and replace the placeholder - header = generateEOFHeader(function_sizes) + header = generateEOFHeader(function_sizes, max_stack_heights) o = header + o[len(header):] o += bytecode_suffix From 693535176707253121c3f6bca64cfb072833b756 Mon Sep 17 00:00:00 2001 From: Harry Kalogirou Date: Wed, 18 Jan 2023 18:32:16 +0200 Subject: [PATCH 82/82] leftout import --- vyper/evm/eof.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vyper/evm/eof.py b/vyper/evm/eof.py index 645d8be9e2..89cd543199 100644 --- a/vyper/evm/eof.py +++ b/vyper/evm/eof.py @@ -1,6 +1,6 @@ from collections import deque from vyper.exceptions import VyperInternalException -from vyper.evm.opcodes import TERMINATING_OPCODES, VALID_OPCODES, immediate_size, get_mnemonic +from vyper.evm.opcodes import TERMINATING_OPCODES, VALID_OPCODES, immediate_size, get_mnemonic, get_opcode_metadata MAGIC = b"\xEF\x00" VERSION = 0x01