From 2250154bb17ef9cf573f88c52664ad35861bcfa0 Mon Sep 17 00:00:00 2001 From: Yonatan Schachter Date: Sun, 1 Oct 2023 17:25:23 +0300 Subject: [PATCH] wip: POC of checksums using bindesc Signed-off-by: Yonatan Schachter --- CMakeLists.txt | 8 +++ include/zephyr/bindesc.h | 6 ++ samples/subsys/bindesc/hello_bindesc/prj.conf | 6 ++ scripts/build/gen_bindesc_checksums.py | 72 +++++++++++++++++++ scripts/west_commands/bindesc.py | 2 + subsys/bindesc/CMakeLists.txt | 2 + subsys/bindesc/Kconfig | 19 +++++ subsys/bindesc/Kconfig.checksums | 21 ++++++ subsys/bindesc/bindesc.ld | 6 ++ subsys/bindesc/bindesc_checksums.c | 18 +++++ 10 files changed, 160 insertions(+) create mode 100644 scripts/build/gen_bindesc_checksums.py create mode 100644 subsys/bindesc/Kconfig.checksums create mode 100644 subsys/bindesc/bindesc_checksums.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 6d5c643f3e4911..8a0a4e437c19bd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1504,6 +1504,14 @@ set_target_properties(${logical_target_for_zephyr_elf} PROPERTIES OUTPUT_NAME ${ set(post_build_commands "") set(post_build_byproducts "") +if(CONFIG_BINDESC_DEFINE_CHECKSUMS) + list(APPEND + post_build_commands + COMMAND + ${Python3_EXECUTABLE} ${ZEPHYR_BASE}/scripts/build/gen_bindesc_checksums.py ${KERNEL_ELF_NAME} + ) +endif() + list(APPEND post_build_commands COMMAND diff --git a/include/zephyr/bindesc.h b/include/zephyr/bindesc.h index 4280cf91d21f23..ba3ed957e44976 100644 --- a/include/zephyr/bindesc.h +++ b/include/zephyr/bindesc.h @@ -108,6 +108,12 @@ extern "C" { /** The C++ compiler version */ #define BINDESC_ID_CXX_COMPILER_VERSION 0xb04 +/** The MD5 checksum as bytes */ +#define BINDESC_ID_CHECKSUM_MD5_BYTES 0xc00 + +/** The MD5 checksum as a string */ +#define BINDESC_ID_CHECKSUM_MD5_STRING 0xc01 + #define BINDESC_TAG_DESCRIPTORS_END BINDESC_TAG(DESCRIPTORS_END, 0x0fff) /** diff --git a/samples/subsys/bindesc/hello_bindesc/prj.conf b/samples/subsys/bindesc/hello_bindesc/prj.conf index 0b22906d9f552d..caff1c2da33ab7 100644 --- a/samples/subsys/bindesc/hello_bindesc/prj.conf +++ b/samples/subsys/bindesc/hello_bindesc/prj.conf @@ -19,3 +19,9 @@ CONFIG_BINDESC_APP_VERSION_STRING=y CONFIG_BINDESC_DEFINE_HOST_INFO=y CONFIG_BINDESC_C_COMPILER_NAME=y CONFIG_BINDESC_C_COMPILER_VERSION=y + + +CONFIG_BINDESC_DEFINE_CHECKSUMS=y +CONFIG_BINDESC_CHECKSUM_MD5_STRING=y +CONFIG_BINDESC_CHECKSUM_MD5_BYTES=y +CONFIG_BINDESC_DEFINE_DESCRIPTORS_SIZE=512 diff --git a/scripts/build/gen_bindesc_checksums.py b/scripts/build/gen_bindesc_checksums.py new file mode 100644 index 00000000000000..134c0b5d8f0f40 --- /dev/null +++ b/scripts/build/gen_bindesc_checksums.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 + +import sys +import hashlib +from elftools.elf.elffile import ELFFile +from elftools.elf.constants import SH_FLAGS + + +BINDESC_OFFSETOF_DATA = 4 +HASH_ALGORITHMS = { + "md5": hashlib.md5, +} + + +def set_checksums(filename): + with open(filename, 'rb+') as file_stream: + elffile = ELFFile(file_stream) + + # Variable for storing the raw bytes of the image + image_data = b"" + + # Iterate over the segments to produce the image to calculate the + # checksums over. + for segment in elffile.iter_segments(): + if segment.header.p_type != "PT_LOAD": + continue + + if segment.header.p_filesz == 0: + continue + + for section in elffile.iter_sections(): + if segment.section_in_segment(section): + if not (section.name == "rom_start" and segment.section_in_segment(section)): + # Don't include rom_start in the checksum calculation + image_data += section.data() + + symbol_table = elffile.get_section_by_name(".symtab") + + # The binary descriptors to modify are found in the rom_start section + rom_start = elffile.get_section_by_name("rom_start") + + for symbol in symbol_table.iter_symbols(): + # Iterate over all the symbols, modify the checksums + if symbol.name.startswith("bindesc_entry_checksum"): + # Determine the alorithm and data type + _, _, _, algorithm, data_type = symbol.name.split("_") + + # Calculate the checksum + checksum = HASH_ALGORITHMS[algorithm](image_data) + if data_type == "bytes": + to_write = checksum.digest() + else: + to_write = checksum.hexdigest().encode() + + # Calculate the offset of the ELF file to write the checksum to + # The formula is: offset of rom_start + address of the symbol minus + # the base address of rom_start + offset of the data inside the descriptor + # entry. + offset = rom_start.header.sh_offset + \ + symbol.entry.st_value - rom_start.header.sh_addr + \ + BINDESC_OFFSETOF_DATA + + # Write the hash to the file + elffile.stream.seek(offset) + elffile.stream.write(to_write) + + +def main(): + set_checksums(sys.argv[1]) + +if __name__ == "__main__": + main() diff --git a/scripts/west_commands/bindesc.py b/scripts/west_commands/bindesc.py index c56555484e6698..2a7ea067a435df 100644 --- a/scripts/west_commands/bindesc.py +++ b/scripts/west_commands/bindesc.py @@ -95,6 +95,8 @@ def __init__(self): self.bindesc_gen_tag(self.TYPE_STR, 0xb02): 'C_COMPILER_VERSION', self.bindesc_gen_tag(self.TYPE_STR, 0xb03): 'CXX_COMPILER_NAME', self.bindesc_gen_tag(self.TYPE_STR, 0xb04): 'CXX_COMPILER_VERSION', + self.bindesc_gen_tag(self.TYPE_BYTES, 0xc00): 'CHECKSUM_MD5_BYTES', + self.bindesc_gen_tag(self.TYPE_STR, 0xc01): 'CHECKSUM_MD5_STRING', } self.NAME_TO_TAG = {v: k for k, v in self.TAG_TO_NAME.items()} diff --git a/subsys/bindesc/CMakeLists.txt b/subsys/bindesc/CMakeLists.txt index 72634be12e6db5..cc37f8fb9fa8f0 100644 --- a/subsys/bindesc/CMakeLists.txt +++ b/subsys/bindesc/CMakeLists.txt @@ -76,3 +76,5 @@ if(CONFIG_BINDESC_DEFINE_HOST_INFO) gen_str_definition(CXX_COMPILER_NAME ${CMAKE_CXX_COMPILER_ID}) gen_str_definition(CXX_COMPILER_VERSION ${CMAKE_CXX_COMPILER_VERSION}) endif() + +zephyr_library_sources_ifdef(CONFIG_BINDESC_DEFINE_CHECKSUMS bindesc_checksums.c) diff --git a/subsys/bindesc/Kconfig b/subsys/bindesc/Kconfig index 0f55564d46063e..d410277e5d79a3 100644 --- a/subsys/bindesc/Kconfig +++ b/subsys/bindesc/Kconfig @@ -16,9 +16,28 @@ config BINDESC_DEFINE if BINDESC_DEFINE +config BINDESC_DEFINE_DESCRIPTORS_SIZE + int "Binary descriptors descriptors size" + default 0 + help + Determines the total amount of bytes allocated to the descriptors. + If the size is larger than the actual size of the descriptors, padding + will be added to ensure that the total amount is equal to this symbol. + If the size is smaller, it will have no effect. + This might be useful to ensure that the checksum of the image remains + the same even if the descriptors change in size (the descriptors themselves + are not part of the checksum calculation). + For example, if an app uses the host name descriptor and an md5 checksum descriptor, + compiling on machines with names of different sizes will result in images + with different checksums. By allocating a constant size for the descriptors, the + descriptors have room to grow. + The size should be set by the app to a value slightly larger than the size of the + descriptors. Setting this to zero will disable this feature. + source "subsys/bindesc/Kconfig.version" source "subsys/bindesc/Kconfig.build_time" source "subsys/bindesc/Kconfig.host_info" +source "subsys/bindesc/Kconfig.checksums" endif # BINDESC_DEFINE diff --git a/subsys/bindesc/Kconfig.checksums b/subsys/bindesc/Kconfig.checksums new file mode 100644 index 00000000000000..05b2f0a0596eb0 --- /dev/null +++ b/subsys/bindesc/Kconfig.checksums @@ -0,0 +1,21 @@ +# Copyright (c) 2023 Yonatan Schachter +# SPDX-License-Identifier: Apache-2.0 + +menuconfig BINDESC_DEFINE_CHECKSUMS + bool "Checksum binary descriptors" + help + Add checksum binary descriptors + +if BINDESC_DEFINE_CHECKSUMS + +config BINDESC_CHECKSUM_MD5_BYTES + bool "MD5 checksum as bytes" + help + MD5 checksum as bytes + +config BINDESC_CHECKSUM_MD5_STRING + bool "MD5 checksum as a string" + help + MD5 checksum as a string + +endif # BINDESC_DEFINE_CHECKSUMS diff --git a/subsys/bindesc/bindesc.ld b/subsys/bindesc/bindesc.ld index e07222d5f10461..02f1d9f22e9e6c 100644 --- a/subsys/bindesc/bindesc.ld +++ b/subsys/bindesc/bindesc.ld @@ -7,7 +7,13 @@ #include #include +#define BINDESC_MAX(a, b) (((a) > (b)) ? (a) : (b)) + +bindesc_start = .; SQUAD(BINDESC_MAGIC); Z_LINK_ITERABLE(bindesc_entry); . = ALIGN(BINDESC_ALIGNMENT); LONG(BINDESC_TAG_DESCRIPTORS_END) +. = BINDESC_MAX(bindesc_start + CONFIG_BINDESC_DEFINE_DESCRIPTORS_SIZE, .); +. = ALIGN(BINDESC_ALIGNMENT); +bindesc_end = .; diff --git a/subsys/bindesc/bindesc_checksums.c b/subsys/bindesc/bindesc_checksums.c new file mode 100644 index 00000000000000..ebc026371bdf5d --- /dev/null +++ b/subsys/bindesc/bindesc_checksums.c @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2023 Yonatan Schachter + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include + +#if IS_ENABLED(CONFIG_BINDESC_CHECKSUM_MD5_BYTES) +BINDESC_BYTES_DEFINE(checksum_md5_bytes, BINDESC_ID_CHECKSUM_MD5_BYTES, + ({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})); +#endif /* IS_ENABLED(CONFIG_BINDESC_CHECKSUM_MD5_BYTES) */ + +#if IS_ENABLED(CONFIG_BINDESC_CHECKSUM_MD5_STRING) +BINDESC_STR_DEFINE(checksum_md5_string, BINDESC_ID_CHECKSUM_MD5_STRING, + "00000000000000000000000000000000"); +#endif /* IS_ENABLED(CONFIG_BINDESC_CHECKSUM_MD5_STRING) */