From 20bce37deb20ff5771aa5167c8ae9734fcbcd926 Mon Sep 17 00:00:00 2001 From: Spydr <58859306+Spydr06@users.noreply.github.com> Date: Mon, 25 Sep 2023 11:58:17 +0200 Subject: [PATCH] native: parse dll files to resolve extern symbols (#19433) * add simple parsing of dll files to resolve extern symbols * use enums instead of string values in offsetof functions to reduce bug risks * add `-d no_backtrace` to native test options --- vlib/v/gen/native/dos.v | 7 +- vlib/v/gen/native/pe.v | 164 ++++++++--------- vlib/v/gen/native/readdll.v | 244 ++++++++++++++++++++++++++ vlib/v/gen/native/tests/native_test.v | 2 +- 4 files changed, 326 insertions(+), 91 deletions(-) create mode 100644 vlib/v/gen/native/readdll.v diff --git a/vlib/v/gen/native/dos.v b/vlib/v/gen/native/dos.v index 10a9226f04e22b..76d357837d7f28 100644 --- a/vlib/v/gen/native/dos.v +++ b/vlib/v/gen/native/dos.v @@ -3,6 +3,11 @@ // that can be found in the LICENSE file. module native +const ( + dos_header_size = 0x40 + dos_header_lfanew_offset = 0x3c +) + pub fn (mut g Gen) gen_dos_header() { dos_header := [ int(PeMagic.mz), @@ -89,7 +94,7 @@ pub fn (mut g Gen) gen_dos_header() { g.println('; ' + dos_header_description[i]) } } - if g.pos() != 0x40 { + if g.pos() != native.dos_header_size { g.n_error('Invalid dos header size') } diff --git a/vlib/v/gen/native/pe.v b/vlib/v/gen/native/pe.v index 244cd3ef272416..43211e0a7640ff 100644 --- a/vlib/v/gen/native/pe.v +++ b/vlib/v/gen/native/pe.v @@ -20,8 +20,11 @@ const ( pe_section_align = 0x1000 pe_file_align = 0x0200 + pe_coff_hdr_size = 0x18 pe_opt_hdr_size = 0xf0 + pe32_plus_opt_hdr_size = 0x70 pe_header_size = pe_file_align + pe_section_header_size = 0x28 pe_stack_size = 0x200000 // gcc default on windows pe_heap_size = 0x100000 // gcc default on windows // tcc defaults @@ -32,6 +35,9 @@ const ( pe_major_subsystem_version = 4 pe_minor_subsystem_version = 0 + pe_header_machine_offset = 4 + pe_number_of_sections_offset = 6 + pe_num_data_dirs = 0x10 dos_stub_end = 0x80 @@ -160,7 +166,7 @@ pub fn (mut g Gen) gen_pe_header() { 0x16: '; mSizeOfOptionalHeader' 0x18: '; mCharacteristics' } - assert 0x18 == pe_header.len * 2 + assert native.pe_coff_hdr_size == pe_header.len * 2 } g.pe_coff_hdr_pos = g.pos() @@ -247,28 +253,19 @@ fn (mut g Gen) get_pe32_plus_optional_header() Pe32PlusOptionalHeader { } } +enum Pe32PlusOPtionalHeaderField { + size_of_code = 4 + size_of_initialized_data = 8 + address_of_entry_point = 16 + base_of_code = 20 + size_of_image = 56 + number_of_rva_and_sizes = 108 +} + // implemented because __offsetof() + [packed] structs wasn't consistend across OSs -fn pe32_plus_optional_header_offsetof(field string) i64 { - return match field { - 'size_of_code' { - 4 - } - 'size_of_initialized_data' { - 8 - } - 'address_of_entry_point' { - 16 - } - 'base_of_code' { - 20 - } - 'size_of_image' { - 56 - } - else { - panic('pe32_plus_optional_header_offsetof("${field}") not implemented') - } - } +[inline] +fn pe32_plus_optional_header_offsetof(field Pe32PlusOPtionalHeaderField) i64 { + return i64(field) } // for later expandability @@ -445,25 +442,17 @@ mut: characteristics int } +enum PeSectionHeaderField { + virtual_size = 8 + virtual_address = 12 + size_of_raw_data = 16 + pointer_to_raw_data = 20 +} + // implemented because __offsetof() + [packed] structs wasn't consistend across OSs -fn pe_section_header_offsetof(field string) i64 { - return match field { - 'virtual_size' { - 8 - } - 'virtual_address' { - 12 - } - 'size_of_raw_data' { - 16 - } - 'pointer_to_raw_data' { - 20 - } - else { - panic('PeSectionHeader.offsetof("${field}") not implemented') - } - } +[inline] +fn pe_section_header_offsetof(field PeSectionHeaderField) i64 { + return i64(field) } struct PeSection { @@ -475,7 +464,7 @@ mut: fn (mut s PeSection) set_pointer_to_raw_data(mut g Gen, pointer int) { s.header.pointer_to_raw_data = pointer - g.write32_at(s.header_pos + pe_section_header_offsetof('pointer_to_raw_data'), pointer) + g.write32_at(s.header_pos + pe_section_header_offsetof(.pointer_to_raw_data), pointer) } fn (mut s PeSection) set_size_of_raw_data(mut g Gen, size int) { @@ -484,21 +473,21 @@ fn (mut s PeSection) set_size_of_raw_data(mut g Gen, size int) { } s.header.pointer_to_raw_data = size - g.write32_at(s.header_pos + pe_section_header_offsetof('size_of_raw_data'), size) + g.write32_at(s.header_pos + pe_section_header_offsetof(.size_of_raw_data), size) } fn (mut s PeSection) set_virtual_address(mut g Gen, addr int) { aligned := (addr + native.pe_section_align - 1) & ~(native.pe_section_align - 1) s.header.virtual_address = aligned - g.write32_at(s.header_pos + pe_section_header_offsetof('virtual_address'), aligned) + g.write32_at(s.header_pos + pe_section_header_offsetof(.virtual_address), aligned) } fn (mut s PeSection) set_virtual_size(mut g Gen, size int) { aligned := (size + native.pe_section_align - 1) & ~(native.pe_section_align - 1) s.header.virtual_size = aligned - g.write32_at(s.header_pos + pe_section_header_offsetof('virtual_size'), aligned) + g.write32_at(s.header_pos + pe_section_header_offsetof(.virtual_size), aligned) } fn (mut g Gen) create_pe_section(name string, header PeSectionHeader) PeSection { @@ -566,19 +555,15 @@ mut: import_address_table_rva int } +enum PeImportDirectoryTableField { + name_rva = 12 + import_address_table_rva = 16 +} + // implemented because __offsetof() + [packed] structs wasn't consistend across OSs -fn pe_idt_offsetof(field string) i64 { - return match field { - 'import_address_table_rva' { - 16 - } - 'name_rva' { - 12 - } - else { - panic('pe_import_table_offsetof("${field}") not implemented') - } - } +[inline] +fn pe_idt_offsetof(field PeImportDirectoryTableField) i64 { + return i64(field) } fn default_pe_idt() PeImportDirectoryTable { @@ -623,34 +608,35 @@ fn (mut g Gen) gen_pe_idata() { idata_pos := g.pos() idata_section.set_pointer_to_raw_data(mut g, int(idata_pos)) - mut imports := [ - PeDllImport{ - name: 'KERNEL32.DLL' - functions: [ - 'GetStdHandle', - 'ExitProcess', - 'WriteFile', - // winapi functions - ] - }, - PeDllImport{ - name: 'USER32.DLL' - }, - PeDllImport{ - name: 'msvcrt.dll' - functions: [ - 'malloc', - 'free', - 'printf', - 'puts', - 'isdigit', - 'isalpha', - 'memset', - // etc... - ] - }, + dll_files := ['KERNEL32.DLL', 'USER32.DLL', 'msvcrt.dll'] + mut dlls := dll_files + .map(lookup_system_dll(it) or { g.n_error('${it}: ${err}') }) + .map(index_dll(it) or { g.n_error('${it}: ${err}') }) + + g.extern_symbols << [ + 'GetStdHandle', + 'ExitProcess', + 'WriteFile', ] + for symbol in g.extern_symbols { + sym := symbol.trim_left('C.') + mut found := false + for mut dll in dlls { + if sym in dll.exports { + found = true + dll.exports[sym] = true + break + } + } + + if !found { + eprintln('could not find symbol `${sym}` in ${dll_files}') + } + } + + mut imports := dlls.map(it.to_import()) + // import directory table for mut imp in imports { // generate idt @@ -661,7 +647,7 @@ fn (mut g Gen) gen_pe_idata() { g.gen_pe_idt(&PeImportDirectoryTable{}, 'null entry') // null entry for imp in imports { - g.write32_at(imp.idt_pos + pe_idt_offsetof('import_address_table_rva'), + g.write32_at(imp.idt_pos + pe_idt_offsetof(.import_address_table_rva), int(g.pos() - idata_pos) + idata_section.header.virtual_address + 4) for func in imp.functions { @@ -680,7 +666,7 @@ fn (mut g Gen) gen_pe_idata() { // dll names for imp in imports { - g.write32_at(imp.idt_pos + pe_idt_offsetof('name_rva'), int(g.pos() - idata_pos) + + g.write32_at(imp.idt_pos + pe_idt_offsetof(.name_rva), int(g.pos() - idata_pos) + idata_section.header.virtual_address) g.write_string(imp.name) g.println('"${imp.name}"') @@ -761,10 +747,10 @@ fn (mut g Gen) patch_section_virtual_addrs() { match section.name { '.text' { - g.write32_at(g.pe_opt_hdr_pos + pe32_plus_optional_header_offsetof('base_of_code'), + g.write32_at(g.pe_opt_hdr_pos + pe32_plus_optional_header_offsetof(.base_of_code), section.header.virtual_address) g.write32_at(g.pe_opt_hdr_pos + - pe32_plus_optional_header_offsetof('address_of_entry_point'), section.header.virtual_address) + pe32_plus_optional_header_offsetof(.address_of_entry_point), section.header.virtual_address) } else {} } @@ -774,9 +760,9 @@ fn (mut g Gen) patch_section_virtual_addrs() { fn (mut g Gen) patch_pe_code_size() { code_size := int(g.file_size_pos - g.code_start_pos) - g.write32_at(g.pe_opt_hdr_pos + pe32_plus_optional_header_offsetof('size_of_code'), + g.write32_at(g.pe_opt_hdr_pos + pe32_plus_optional_header_offsetof(.size_of_code), code_size) - g.write32_at(g.pe_opt_hdr_pos + pe32_plus_optional_header_offsetof('size_of_initialized_data'), + g.write32_at(g.pe_opt_hdr_pos + pe32_plus_optional_header_offsetof(.size_of_initialized_data), code_size) text_section_index := g.get_pe_section_index('.text') or { @@ -791,7 +777,7 @@ fn (mut g Gen) patch_pe_image_size() { last_section := g.pe_sections.last() image_size := (last_section.header.virtual_address + last_section.header.virtual_size + native.pe_section_align - 1) & ~(native.pe_section_align - 1) - g.write32_at(g.pe_opt_hdr_pos + pe32_plus_optional_header_offsetof('size_of_image'), + g.write32_at(g.pe_opt_hdr_pos + pe32_plus_optional_header_offsetof(.size_of_image), image_size) } diff --git a/vlib/v/gen/native/readdll.v b/vlib/v/gen/native/readdll.v new file mode 100644 index 00000000000000..6744a1aed4448d --- /dev/null +++ b/vlib/v/gen/native/readdll.v @@ -0,0 +1,244 @@ +// Copyright (c) 2019-2023 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. +module native + +import maps +import os +import encoding.binary + +const ( + pe_dword_size = 4 + pe_export_data_dir_index = 0 + pe_export_directory_size = 0x28 +) + +struct SystemDll { + name string + full_path string +} + +fn C.SearchPathA(lp_path &char, lp_file_name &char, lp_extension &char, n_buffer_length u32, lp_buffer &char, lp_file_part &&char) u32 +fn C.GetLastError() u32 + +fn lookup_system_dll(dll string) !SystemDll { + $if windows { + unsafe { + buffer := malloc(1024) + len := C.SearchPathA(nil, dll.str, '.dll'.str, 1024, buffer, nil) + if len == 0 { + err_code := C.GetLastError() + err_msg := cstring_to_vstring(C.strerror(err_code)) + return error('could not find dll: ${err_msg}') + } + + full_path := cstring_to_vstring(buffer) + free(buffer) + return SystemDll{ + name: dll + full_path: full_path + } + } + } $else { + // todo look into librarys dirs + return SystemDll{ + name: dll + } + } +} + +struct DllIndex { + dllname string +mut: + exports map[string]bool +} + +fn (di DllIndex) to_import() PeDllImport { + return PeDllImport{ + name: di.dllname + functions: maps.filter(di.exports, fn (_ string, val bool) bool { + return val + }).keys() + } +} + +fn index_dll(dll SystemDll) !DllIndex { + mut file := os.open(dll.full_path)! + index := DllIndex{ + dllname: dll.name + exports: get_dllexports(mut file)! + } + file.close() + return index +} + +fn get_dllexports(mut file os.File) !map[string]bool { + dos_header := read_dos_header(mut file)! + if dos_header.magic != u16(PeMagic.mz) { + return error('wrong magic bytes: `${dos_header.magic.hex()}`, want: `${u16(PeMagic.mz).hex()}`') + } + + pe_header := read_pe_header(mut file, dos_header.lfanew)! + if pe_header.magic != u16(PeMagic.pe) { + return error('wrong magic bytes: `${pe_header.magic.hex()}`, want: `${u16(PeMagic.pe).hex()}`') + } + opt_hdroffset := dos_header.lfanew + pe_coff_hdr_size + + mut sec_hdroffset := u32(0) + export_data_dir := match pe_header.machine { + u16(PeMachine.amd64), u16(PeMachine.arm64) { + optional_header := read_pe32plus_optional_header(mut file, opt_hdroffset)! + if optional_header.magic != u16(PeMagic.pe32plus) { + return error('wrong magic bytes: `${optional_header.magic.hex()}`, want: `${u16(PeMagic.pe32plus).hex()}`') + } + if optional_header.number_of_rva_and_sizes <= native.pe_export_data_dir_index { + return map[string]bool{} // no exports in this file + } + + sec_hdroffset = opt_hdroffset + u32(pe_opt_hdr_size) + read_pe_data_dir(mut file, opt_hdroffset + pe32_plus_opt_hdr_size, native.pe_export_data_dir_index)! + } + u16(PeMachine.i386) { + return error('32-bit (i386) dlls not supported yet') + } + else { + return error('unknown machine `${pe_header.machine.hex()}`') + } + } + + for i in 0 .. pe_header.number_of_sections { + section_header := read_pe_section_header(mut file, sec_hdroffset + + i * pe_section_header_size)! + if export_data_dir.rva >= section_header.virtual_address + && export_data_dir.rva < section_header.virtual_address + section_header.size_of_raw_data { + // found the right section + return parse_export_section(mut file, export_data_dir, section_header) + } + } + + return map[string]bool{} +} + +fn parse_export_section(mut file os.File, export_data_dir PeDataDir, section_header PeSectionHeaderRead) !map[string]bool { + ref := section_header.virtual_address - section_header.pointer_to_raw_data + export_directory := read_pe_export_directory(mut file, u64(export_data_dir.rva) - ref)! + + mut exports := map[string]bool{} + exports.reserve(u32(export_data_dir.size)) + + mut name_ptr := export_directory.name_ptr_rva - ref + mut buf := []u8{} + for _ in 0 .. export_directory.number_of_name_ptrs { + ptr := binary.little_endian_u32(file.read_bytes_at(native.pe_dword_size, name_ptr)) + name_ptr += native.pe_dword_size + + mut j := u32(0) + buf.clear() + for { + buf << file.read_bytes_at(1, ptr - ref + j)[0] + if buf[j] == 0 { + buf.delete_last() + exports[buf.bytestr()] = false + break + } + j++ + } + } + return exports +} + +struct DosHeaderRead { + magic u16 + lfanew u32 + // address of the new exe header +} + +fn read_dos_header(mut file os.File) !DosHeaderRead { + buf := file.read_bytes(dos_header_size) + if buf.len != dos_header_size { + return error('error reading dos header (${dos_header_size} bytes)') + } + + return DosHeaderRead{ + magic: binary.little_endian_u16(buf) + lfanew: binary.little_endian_u32(buf[dos_header_lfanew_offset..]) + } +} + +struct PeHeaderRead { + magic u16 + machine u16 + number_of_sections u16 +} + +fn read_pe_header(mut file os.File, offset u64) !PeHeaderRead { + buf := file.read_bytes_at(pe_header_size, offset) + if buf.len != pe_header_size { + return error('error reading pe header (${pe_header_size} bytes)') + } + + return PeHeaderRead{ + magic: binary.little_endian_u16(buf) + machine: binary.little_endian_u16(buf[pe_header_machine_offset..]) + number_of_sections: binary.little_endian_u16(buf[pe_number_of_sections_offset..]) + } +} + +struct Pe32PlusOptionalHeaderRead { + magic u16 + number_of_rva_and_sizes u32 +} + +fn read_pe32plus_optional_header(mut file os.File, offset u64) !Pe32PlusOptionalHeaderRead { + buf := file.read_bytes_at(pe_opt_hdr_size, offset) + if buf.len != pe_opt_hdr_size { + return error('error reading pe32+ optional header (${pe_opt_hdr_size} bytes)') + } + + return Pe32PlusOptionalHeaderRead{ + magic: binary.little_endian_u16(buf) + number_of_rva_and_sizes: binary.little_endian_u32(buf[pe32_plus_optional_header_offsetof(.number_of_rva_and_sizes)..]) + } +} + +fn read_pe_data_dir(mut file os.File, offset u64, index u64) !PeDataDir { + mut data_dir := PeDataDir{} + file.read_struct_at(mut data_dir, offset + index * sizeof(PeDataDir))! + return data_dir +} + +struct PeSectionHeaderRead { + virtual_address u32 + size_of_raw_data u32 + pointer_to_raw_data u32 +} + +fn read_pe_section_header(mut file os.File, offset u64) !PeSectionHeaderRead { + buf := file.read_bytes_at(pe_section_header_size, offset) + if buf.len != pe_section_header_size { + return error('error reading section header (${pe_section_header_size} bytes)') + } + + return PeSectionHeaderRead{ + virtual_address: binary.little_endian_u32(buf[pe_section_header_offsetof(.virtual_address)..]) + size_of_raw_data: binary.little_endian_u32(buf[pe_section_header_offsetof(.size_of_raw_data)..20]) + pointer_to_raw_data: binary.little_endian_u32(buf[pe_section_header_offsetof(.pointer_to_raw_data)..24]) + } +} + +struct PeExportDirectoryRead { + number_of_name_ptrs u32 + name_ptr_rva u32 +} + +fn read_pe_export_directory(mut file os.File, offset u64) !PeExportDirectoryRead { + buf := file.read_bytes_at(native.pe_export_directory_size, offset) + if buf.len != native.pe_export_directory_size { + return error('error reading export directory (${native.pe_export_directory_size} bytes)') + } + + return PeExportDirectoryRead{ + number_of_name_ptrs: binary.little_endian_u32(buf[24..28]) + name_ptr_rva: binary.little_endian_u32(buf[32..36]) + } +} diff --git a/vlib/v/gen/native/tests/native_test.v b/vlib/v/gen/native/tests/native_test.v index cd898da9f754a3..21da26036d6d25 100644 --- a/vlib/v/gen/native/tests/native_test.v +++ b/vlib/v/gen/native/tests/native_test.v @@ -41,7 +41,7 @@ fn test_native() { work_test_path := os.join_path(wrkdir, test_file_name) exe_test_path := os.join_path(wrkdir, test_file_name + '.exe') tmperrfile := os.join_path(dir, test + '.tmperr') - cmd := '${os.quoted_path(vexe)} -o ${os.quoted_path(exe_test_path)} -b native -skip-unused ${os.quoted_path(full_test_path)} -d custom_define 2> ${os.quoted_path(tmperrfile)}' + cmd := '${os.quoted_path(vexe)} -o ${os.quoted_path(exe_test_path)} -b native -skip-unused ${os.quoted_path(full_test_path)} -d no_backtrace -d custom_define 2> ${os.quoted_path(tmperrfile)}' if is_verbose { println(cmd) }