diff --git a/lib/build-web/fuzz.zig b/lib/build-web/fuzz.zig index c694e9e69ea8..44f143451752 100644 --- a/lib/build-web/fuzz.zig +++ b/lib/build-web/fuzz.zig @@ -228,20 +228,21 @@ fn unpackSourcesInner(tar_bytes: []u8) !void { if (std.mem.endsWith(u8, tar_file.name, ".zig")) { log.debug("found file: '{s}'", .{tar_file.name}); const file_name = try gpa.dupe(u8, tar_file.name); - if (std.mem.indexOfScalar(u8, file_name, '/')) |pkg_name_end| { - const pkg_name = file_name[0..pkg_name_end]; - const gop = try Walk.modules.getOrPut(gpa, pkg_name); - const file: Walk.File.Index = @enumFromInt(Walk.files.entries.len); - if (!gop.found_existing or - std.mem.eql(u8, file_name[pkg_name_end..], "/root.zig") or - std.mem.eql(u8, file_name[pkg_name_end + 1 .. file_name.len - ".zig".len], pkg_name)) - { - gop.value_ptr.* = file; - } - const file_bytes = tar_reader.take(@intCast(tar_file.size)) catch unreachable; - it.unread_file_bytes = 0; // we have read the whole thing - assert(file == try Walk.add_file(file_name, file_bytes)); - } + // This is a hack to guess modules from the tar file contents. To handle modules + // properly, the build system will need to change the structure here to have one + // directory per module. This in turn requires compiler enhancements to allow + // the build system to actually discover the required information. + const mod_name, const is_module_root = p: { + if (std.mem.find(u8, file_name, "std/")) |i| break :p .{ "std", std.mem.eql(u8, file_name[i + 4 ..], "std.zig") }; + if (std.mem.endsWith(u8, file_name, "/builtin.zig")) break :p .{ "builtin", true }; + break :p .{ "root", std.mem.endsWith(u8, file_name, "/root.zig") }; + }; + const gop = try Walk.modules.getOrPut(gpa, mod_name); + const file: Walk.File.Index = @enumFromInt(Walk.files.entries.len); + if (!gop.found_existing or is_module_root) gop.value_ptr.* = file; + const file_bytes = tar_reader.take(@intCast(tar_file.size)) catch unreachable; + it.unread_file_bytes = 0; // we have read the whole thing + assert(file == try Walk.add_file(file_name, file_bytes)); } else { log.warn("skipping: '{s}' - the tar creation should have done that", .{tar_file.name}); } diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig index 0d6f45194748..054fe1eb277a 100644 --- a/lib/compiler/test_runner.zig +++ b/lib/compiler/test_runner.zig @@ -184,7 +184,7 @@ fn mainServer() !void { const test_fn = builtin.test_functions[index]; const entry_addr = @intFromPtr(test_fn.func); - try server.serveU64Message(.fuzz_start_addr, entry_addr); + try server.serveU64Message(.fuzz_start_addr, fuzz_abi.fuzzer_unslide_address(entry_addr)); defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1); is_fuzz_test = false; fuzz_test_index = index; diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig index 6b7a846e4ccd..5c452340f60e 100644 --- a/lib/fuzzer.zig +++ b/lib/fuzzer.zig @@ -116,13 +116,18 @@ const Executable = struct { "failed to init memory map for coverage file '{s}': {t}", .{ &coverage_file_name, e }, ); - map.appendSliceAssumeCapacity(mem.asBytes(&abi.SeenPcsHeader{ + map.appendSliceAssumeCapacity(@ptrCast(&abi.SeenPcsHeader{ .n_runs = 0, .unique_runs = 0, .pcs_len = pcs.len, })); map.appendNTimesAssumeCapacity(0, pc_bitset_usizes * @sizeOf(usize)); - map.appendSliceAssumeCapacity(mem.sliceAsBytes(pcs)); + // Relocations have been applied to `pcs` so it contains runtime addresses (with slide + // applied). We need to translate these to the virtual addresses as on disk. + for (pcs) |pc| { + const pc_vaddr = fuzzer_unslide_address(pc); + map.appendSliceAssumeCapacity(@ptrCast(&pc_vaddr)); + } return map; } else { const size = coverage_file.getEndPos() catch |e| panic( @@ -215,7 +220,16 @@ const Executable = struct { .{ self.pc_counters.len, pcs.len }, ); - self.pc_digest = std.hash.Wyhash.hash(0, mem.sliceAsBytes(pcs)); + self.pc_digest = digest: { + // Relocations have been applied to `pcs` so it contains runtime addresses (with slide + // applied). We need to translate these to the virtual addresses as on disk. + var h: std.hash.Wyhash = .init(0); + for (pcs) |pc| { + const pc_vaddr = fuzzer_unslide_address(pc); + h.update(@ptrCast(&pc_vaddr)); + } + break :digest h.final(); + }; self.shared_seen_pcs = getCoverageFile(cache_dir, pcs, self.pc_digest); return self; @@ -622,6 +636,14 @@ export fn fuzzer_main(limit_kind: abi.LimitKind, amount: u64) void { } } +export fn fuzzer_unslide_address(addr: usize) usize { + const si = std.debug.getSelfDebugInfo() catch @compileError("unsupported"); + const slide = si.getModuleSlide(std.debug.getDebugInfoAllocator(), addr) catch |err| { + std.debug.panic("failed to find virtual address slide: {t}", .{err}); + }; + return addr - slide; +} + /// Helps determine run uniqueness in the face of recursion. /// Currently not used by the fuzzer. export threadlocal var __sancov_lowest_stack: usize = 0; @@ -1185,13 +1207,13 @@ const Mutation = enum { const j = rng.uintAtMostBiased(usize, corpus[splice_i].len - len); out.appendSliceAssumeCapacity(corpus[splice_i][j..][0..len]); }, - .@"const" => out.appendSliceAssumeCapacity(mem.asBytes( + .@"const" => out.appendSliceAssumeCapacity(@ptrCast( &data_ctx[rng.uintLessThanBiased(usize, data_ctx.len)], )), - .small => out.appendSliceAssumeCapacity(mem.asBytes( + .small => out.appendSliceAssumeCapacity(@ptrCast( &mem.nativeTo(data_ctx[0], rng.int(SmallValue), data_ctx[1]), )), - .few => out.appendSliceAssumeCapacity(mem.asBytes( + .few => out.appendSliceAssumeCapacity(@ptrCast( &fewValue(rng, data_ctx[0], data_ctx[1]), )), } diff --git a/lib/std/Build/Fuzz.zig b/lib/std/Build/Fuzz.zig index 6dd4f70f9fed..7a22522d6b02 100644 --- a/lib/std/Build/Fuzz.zig +++ b/lib/std/Build/Fuzz.zig @@ -383,7 +383,14 @@ fn prepareTables(fuzz: *Fuzz, run_step: *Step.Run, coverage_id: u64) error{ OutO errdefer gop.value_ptr.coverage.deinit(fuzz.gpa); const rebuilt_exe_path = run_step.rebuilt_executable.?; - var debug_info = std.debug.Info.load(fuzz.gpa, rebuilt_exe_path, &gop.value_ptr.coverage) catch |err| { + const target = run_step.producer.?.rootModuleTarget(); + var debug_info = std.debug.Info.load( + fuzz.gpa, + rebuilt_exe_path, + &gop.value_ptr.coverage, + target.ofmt, + target.cpu.arch, + ) catch |err| { log.err("step '{s}': failed to load debug information for '{f}': {s}", .{ run_step.step.name, rebuilt_exe_path, @errorName(err), }); @@ -479,9 +486,23 @@ fn addEntryPoint(fuzz: *Fuzz, coverage_id: u64, addr: u64) error{ AlreadyReporte if (false) { const sl = coverage_map.source_locations[index]; const file_name = coverage_map.coverage.stringAt(coverage_map.coverage.fileAt(sl.file).basename); - log.debug("server found entry point for 0x{x} at {s}:{d}:{d} - index {d} between {x} and {x}", .{ - addr, file_name, sl.line, sl.column, index, pcs[index - 1], pcs[index + 1], - }); + if (pcs.len == 1) { + log.debug("server found entry point for 0x{x} at {s}:{d}:{d} - index 0 (final)", .{ + addr, file_name, sl.line, sl.column, + }); + } else if (index == 0) { + log.debug("server found entry point for 0x{x} at {s}:{d}:{d} - index 0 before {x}", .{ + addr, file_name, sl.line, sl.column, pcs[index + 1], + }); + } else if (index == pcs.len - 1) { + log.debug("server found entry point for 0x{x} at {s}:{d}:{d} - index {d} (final) after {x}", .{ + addr, file_name, sl.line, sl.column, index, pcs[index - 1], + }); + } else { + log.debug("server found entry point for 0x{x} at {s}:{d}:{d} - index {d} between {x} and {x}", .{ + addr, file_name, sl.line, sl.column, index, pcs[index - 1], pcs[index + 1], + }); + } } try coverage_map.entry_points.append(fuzz.gpa, @intCast(index)); } diff --git a/lib/std/Build/Step/CheckObject.zig b/lib/std/Build/Step/CheckObject.zig index d9935d4f3df3..ab10d368b20e 100644 --- a/lib/std/Build/Step/CheckObject.zig +++ b/lib/std/Build/Step/CheckObject.zig @@ -729,10 +729,10 @@ const MachODumper = struct { imports: std.ArrayListUnmanaged([]const u8) = .empty, fn parse(ctx: *ObjectContext) !void { - var it = ctx.getLoadCommandIterator(); + var it = try ctx.getLoadCommandIterator(); var i: usize = 0; - while (it.next()) |cmd| { - switch (cmd.cmd()) { + while (try it.next()) |cmd| { + switch (cmd.hdr.cmd) { .SEGMENT_64 => { const seg = cmd.cast(macho.segment_command_64).?; try ctx.segments.append(ctx.gpa, seg); @@ -771,14 +771,13 @@ const MachODumper = struct { return mem.sliceTo(@as([*:0]const u8, @ptrCast(ctx.strtab.items.ptr + off)), 0); } - fn getLoadCommandIterator(ctx: ObjectContext) macho.LoadCommandIterator { - const data = ctx.data[@sizeOf(macho.mach_header_64)..][0..ctx.header.sizeofcmds]; - return .{ .ncmds = ctx.header.ncmds, .buffer = data }; + fn getLoadCommandIterator(ctx: ObjectContext) !macho.LoadCommandIterator { + return .init(&ctx.header, ctx.data[@sizeOf(macho.mach_header_64)..]); } - fn getLoadCommand(ctx: ObjectContext, cmd: macho.LC) ?macho.LoadCommandIterator.LoadCommand { - var it = ctx.getLoadCommandIterator(); - while (it.next()) |lc| if (lc.cmd() == cmd) { + fn getLoadCommand(ctx: ObjectContext, cmd: macho.LC) !?macho.LoadCommandIterator.LoadCommand { + var it = try ctx.getLoadCommandIterator(); + while (try it.next()) |lc| if (lc.hdr.cmd == cmd) { return lc; }; return null; @@ -872,9 +871,9 @@ const MachODumper = struct { \\LC {d} \\cmd {s} \\cmdsize {d} - , .{ index, @tagName(lc.cmd()), lc.cmdsize() }); + , .{ index, @tagName(lc.hdr.cmd), lc.hdr.cmdsize }); - switch (lc.cmd()) { + switch (lc.hdr.cmd) { .SEGMENT_64 => { const seg = lc.cast(macho.segment_command_64).?; try writer.writeByte('\n'); @@ -1592,9 +1591,9 @@ const MachODumper = struct { .headers => { try ObjectContext.dumpHeader(ctx.header, writer); - var it = ctx.getLoadCommandIterator(); + var it = try ctx.getLoadCommandIterator(); var i: usize = 0; - while (it.next()) |cmd| { + while (try it.next()) |cmd| { try ObjectContext.dumpLoadCommand(cmd, i, writer); try writer.writeByte('\n'); @@ -1615,7 +1614,7 @@ const MachODumper = struct { .dyld_weak_bind, .dyld_lazy_bind, => { - const cmd = ctx.getLoadCommand(.DYLD_INFO_ONLY) orelse + const cmd = try ctx.getLoadCommand(.DYLD_INFO_ONLY) orelse return step.fail("no dyld info found", .{}); const lc = cmd.cast(macho.dyld_info_command).?; @@ -1649,7 +1648,7 @@ const MachODumper = struct { }, .exports => blk: { - if (ctx.getLoadCommand(.DYLD_INFO_ONLY)) |cmd| { + if (try ctx.getLoadCommand(.DYLD_INFO_ONLY)) |cmd| { const lc = cmd.cast(macho.dyld_info_command).?; if (lc.export_size > 0) { const data = ctx.data[lc.export_off..][0..lc.export_size]; diff --git a/lib/std/Build/Step/Compile.zig b/lib/std/Build/Step/Compile.zig index 7dd9c17273f1..4ea09538c2ca 100644 --- a/lib/std/Build/Step/Compile.zig +++ b/lib/std/Build/Step/Compile.zig @@ -1932,6 +1932,11 @@ pub fn rebuildInFuzzMode(c: *Compile, gpa: Allocator, progress_node: std.Progres c.step.result_error_bundle.deinit(gpa); c.step.result_error_bundle = std.zig.ErrorBundle.empty; + if (c.step.result_failed_command) |cmd| { + gpa.free(cmd); + c.step.result_failed_command = null; + } + const zig_args = try getZigArgs(c, true); const maybe_output_bin_path = try c.step.evalZigProcess(zig_args, progress_node, false, null, gpa); return maybe_output_bin_path.?; diff --git a/lib/std/Build/Step/Run.zig b/lib/std/Build/Step/Run.zig index 52a690ef94a7..dfae77ffb54a 100644 --- a/lib/std/Build/Step/Run.zig +++ b/lib/std/Build/Step/Run.zig @@ -1140,6 +1140,12 @@ pub fn rerunInFuzzMode( .output_file, .output_directory => unreachable, } } + + if (run.step.result_failed_command) |cmd| { + fuzz.gpa.free(cmd); + run.step.result_failed_command = null; + } + const has_side_effects = false; const rand_int = std.crypto.random.int(u64); const tmp_dir_path = "tmp" ++ fs.path.sep_str ++ std.fmt.hex(rand_int); @@ -1150,7 +1156,7 @@ pub fn rerunInFuzzMode( .web_server = null, // only needed for time reports .ttyconf = fuzz.ttyconf, .unit_test_timeout_ns = null, // don't time out fuzz tests for now - .gpa = undefined, // not used by `runCommand` + .gpa = fuzz.gpa, }, .{ .unit_test_index = unit_test_index, .fuzz = fuzz, @@ -1870,7 +1876,10 @@ fn pollZigTest( // test. For instance, if the test runner leaves this much time between us requesting a test to // start and it acknowledging the test starting, we terminate the child and raise an error. This // *should* never happen, but could in theory be caused by some very unlucky IB in a test. - const response_timeout_ns = @max(options.unit_test_timeout_ns orelse 0, 60 * std.time.ns_per_s); + const response_timeout_ns: ?u64 = ns: { + if (fuzz_context != null) break :ns null; // don't timeout fuzz tests + break :ns @max(options.unit_test_timeout_ns orelse 0, 60 * std.time.ns_per_s); + }; const stdout = poller.reader(.stdout); const stderr = poller.reader(.stderr); diff --git a/lib/std/Build/abi.zig b/lib/std/Build/abi.zig index eb8f6cb1beba..b7c1e7379d37 100644 --- a/lib/std/Build/abi.zig +++ b/lib/std/Build/abi.zig @@ -145,6 +145,7 @@ pub const fuzz = struct { pub extern fn fuzzer_init_test(test_one: TestOne, unit_test_name: Slice) void; pub extern fn fuzzer_new_input(bytes: Slice) void; pub extern fn fuzzer_main(limit_kind: LimitKind, amount: u64) void; + pub extern fn fuzzer_unslide_address(addr: usize) usize; pub const Slice = extern struct { ptr: [*]const u8, diff --git a/lib/std/Io/Writer.zig b/lib/std/Io/Writer.zig index d0721307d49d..d8468083ebea 100644 --- a/lib/std/Io/Writer.zig +++ b/lib/std/Io/Writer.zig @@ -270,16 +270,17 @@ fn writeSplatHeaderLimitFinish( remaining -= copy_len; if (remaining == 0) break :v; } - for (data[0 .. data.len - 1]) |buf| if (buf.len != 0) { - const copy_len = @min(header.len, remaining); - vecs[i] = buf; + for (data[0 .. data.len - 1]) |buf| { + if (buf.len == 0) continue; + const copy_len = @min(buf.len, remaining); + vecs[i] = buf[0..copy_len]; i += 1; remaining -= copy_len; if (remaining == 0) break :v; if (vecs.len - i == 0) break :v; - }; + } const pattern = data[data.len - 1]; - if (splat == 1) { + if (splat == 1 or remaining < pattern.len) { vecs[i] = pattern[0..@min(remaining, pattern.len)]; i += 1; break :v; @@ -915,7 +916,16 @@ pub fn sendFileHeader( if (new_end <= w.buffer.len) { @memcpy(w.buffer[w.end..][0..header.len], header); w.end = new_end; - return header.len + try w.vtable.sendFile(w, file_reader, limit); + const file_bytes = w.vtable.sendFile(w, file_reader, limit) catch |err| switch (err) { + error.ReadFailed, error.WriteFailed => |e| return e, + error.EndOfStream, error.Unimplemented => |e| { + // These errors are non-fatal, so if we wrote any header bytes, we will report that + // and suppress this error. Only if there was no header may we return the error. + if (header.len != 0) return header.len; + return e; + }, + }; + return header.len + file_bytes; } const buffered_contents = limit.slice(file_reader.interface.buffered()); const n = try w.vtable.drain(w, &.{ header, buffered_contents }, 1); diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 3f1982070c20..29c0731f4e6e 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -21,6 +21,7 @@ const root = @import("root"); pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const ElfFile = @import("debug/ElfFile.zig"); +pub const MachOFile = @import("debug/MachOFile.zig"); pub const Info = @import("debug/Info.zig"); pub const Coverage = @import("debug/Coverage.zig"); pub const cpu_context = @import("debug/cpu_context.zig"); @@ -1366,7 +1367,7 @@ test printLineFromFile { /// The returned allocator should be thread-safe if the compilation is multi-threaded, because /// multiple threads could capture and/or print stack traces simultaneously. -fn getDebugInfoAllocator() Allocator { +pub fn getDebugInfoAllocator() Allocator { // Allow overriding the debug info allocator by exposing `root.debug.getDebugInfoAllocator`. if (@hasDecl(root, "debug") and @hasDecl(root.debug, "getDebugInfoAllocator")) { return root.debug.getDebugInfoAllocator(); diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index 74119a3ea40c..921cd36ab811 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -9,49 +9,67 @@ const std = @import("../std.zig"); const Allocator = std.mem.Allocator; const Path = std.Build.Cache.Path; -const ElfFile = std.debug.ElfFile; const assert = std.debug.assert; const Coverage = std.debug.Coverage; const SourceLocation = std.debug.Coverage.SourceLocation; +const ElfFile = std.debug.ElfFile; +const MachOFile = std.debug.MachOFile; + const Info = @This(); -/// Sorted by key, ascending. -address_map: std.AutoArrayHashMapUnmanaged(u64, ElfFile), +impl: union(enum) { + elf: ElfFile, + macho: MachOFile, +}, /// Externally managed, outlives this `Info` instance. coverage: *Coverage, -pub const LoadError = std.fs.File.OpenError || ElfFile.LoadError || std.debug.Dwarf.ScanError || error{MissingDebugInfo}; +pub const LoadError = std.fs.File.OpenError || ElfFile.LoadError || MachOFile.Error || std.debug.Dwarf.ScanError || error{ MissingDebugInfo, UnsupportedDebugInfo }; + +pub fn load(gpa: Allocator, path: Path, coverage: *Coverage, format: std.Target.ObjectFormat, arch: std.Target.Cpu.Arch) LoadError!Info { + switch (format) { + .elf => { + var file = try path.root_dir.handle.openFile(path.sub_path, .{}); + defer file.close(); -pub fn load(gpa: Allocator, path: Path, coverage: *Coverage) LoadError!Info { - var file = try path.root_dir.handle.openFile(path.sub_path, .{}); - defer file.close(); + var elf_file: ElfFile = try .load(gpa, file, null, &.none); + errdefer elf_file.deinit(gpa); - var elf_file: ElfFile = try .load(gpa, file, null, &.none); - errdefer elf_file.deinit(gpa); + if (elf_file.dwarf == null) return error.MissingDebugInfo; + try elf_file.dwarf.?.open(gpa, elf_file.endian); + try elf_file.dwarf.?.populateRanges(gpa, elf_file.endian); - if (elf_file.dwarf == null) return error.MissingDebugInfo; - try elf_file.dwarf.?.open(gpa, elf_file.endian); - try elf_file.dwarf.?.populateRanges(gpa, elf_file.endian); + return .{ + .impl = .{ .elf = elf_file }, + .coverage = coverage, + }; + }, + .macho => { + const path_str = try path.toString(gpa); + defer gpa.free(path_str); - var info: Info = .{ - .address_map = .{}, - .coverage = coverage, - }; - try info.address_map.put(gpa, 0, elf_file); - errdefer comptime unreachable; // elf_file is owned by the map now - return info; + var macho_file: MachOFile = try .load(gpa, path_str, arch); + errdefer macho_file.deinit(gpa); + + return .{ + .impl = .{ .macho = macho_file }, + .coverage = coverage, + }; + }, + else => return error.UnsupportedDebugInfo, + } } pub fn deinit(info: *Info, gpa: Allocator) void { - for (info.address_map.values()) |*elf_file| { - elf_file.dwarf.?.deinit(gpa); + switch (info.impl) { + .elf => |*ef| ef.deinit(gpa), + .macho => |*mf| mf.deinit(gpa), } - info.address_map.deinit(gpa); info.* = undefined; } -pub const ResolveAddressesError = Coverage.ResolveAddressesDwarfError; +pub const ResolveAddressesError = Coverage.ResolveAddressesDwarfError || error{UnsupportedDebugInfo}; /// Given an array of virtual memory addresses, sorted ascending, outputs a /// corresponding array of source locations. @@ -64,7 +82,28 @@ pub fn resolveAddresses( output: []SourceLocation, ) ResolveAddressesError!void { assert(sorted_pc_addrs.len == output.len); - if (info.address_map.entries.len != 1) @panic("TODO"); - const elf_file = &info.address_map.values()[0]; - return info.coverage.resolveAddressesDwarf(gpa, elf_file.endian, sorted_pc_addrs, output, &elf_file.dwarf.?); + switch (info.impl) { + .elf => |*ef| return info.coverage.resolveAddressesDwarf(gpa, ef.endian, sorted_pc_addrs, output, &ef.dwarf.?), + .macho => |*mf| { + // Resolving all of the addresses at once unfortunately isn't so easy in Mach-O binaries + // due to split debug information. For now, we'll just resolve the addreses one by one. + for (sorted_pc_addrs, output) |pc_addr, *src_loc| { + const dwarf, const dwarf_pc_addr = mf.getDwarfForAddress(gpa, pc_addr) catch |err| switch (err) { + error.InvalidMachO, error.InvalidDwarf => return error.InvalidDebugInfo, + else => |e| return e, + }; + if (dwarf.ranges.items.len == 0) { + dwarf.populateRanges(gpa, .little) catch |err| switch (err) { + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + error.ReadFailed, + => return error.InvalidDebugInfo, + else => |e| return e, + }; + } + try info.coverage.resolveAddressesDwarf(gpa, .little, &.{dwarf_pc_addr}, src_loc[0..1], dwarf); + } + }, + } } diff --git a/lib/std/debug/MachOFile.zig b/lib/std/debug/MachOFile.zig new file mode 100644 index 000000000000..3be1b1daff8f --- /dev/null +++ b/lib/std/debug/MachOFile.zig @@ -0,0 +1,548 @@ +mapped_memory: []align(std.heap.page_size_min) const u8, +symbols: []const Symbol, +strings: []const u8, +text_vmaddr: u64, + +/// Key is index into `strings` of the file path. +ofiles: std.AutoArrayHashMapUnmanaged(u32, Error!OFile), + +pub const Error = error{ + InvalidMachO, + InvalidDwarf, + MissingDebugInfo, + UnsupportedDebugInfo, + ReadFailed, + OutOfMemory, +}; + +pub fn deinit(mf: *MachOFile, gpa: Allocator) void { + for (mf.ofiles.values()) |*maybe_of| { + const of = &(maybe_of.* catch continue); + posix.munmap(of.mapped_memory); + of.dwarf.deinit(gpa); + of.symbols_by_name.deinit(gpa); + } + mf.ofiles.deinit(gpa); + gpa.free(mf.symbols); + posix.munmap(mf.mapped_memory); +} + +pub fn load(gpa: Allocator, path: []const u8, arch: std.Target.Cpu.Arch) Error!MachOFile { + switch (arch) { + .x86_64, .aarch64 => {}, + else => unreachable, + } + + const all_mapped_memory = try mapDebugInfoFile(path); + errdefer posix.munmap(all_mapped_memory); + + // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal + // binary": a simple file format which contains Mach-O binaries for multiple targets. For + // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images + // for both ARM64 macOS and x86_64 macOS. + if (all_mapped_memory.len < 4) return error.InvalidMachO; + const magic = std.mem.readInt(u32, all_mapped_memory.ptr[0..4], .little); + + // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`. + const mapped_macho = switch (magic) { + macho.MH_MAGIC_64 => all_mapped_memory, + + macho.FAT_CIGAM => mapped_macho: { + // This is the universal binary format (aka a "fat binary"). + var fat_r: Io.Reader = .fixed(all_mapped_memory); + const hdr = fat_r.takeStruct(macho.fat_header, .big) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + const want_cpu_type = switch (arch) { + .x86_64 => macho.CPU_TYPE_X86_64, + .aarch64 => macho.CPU_TYPE_ARM64, + else => unreachable, + }; + for (0..hdr.nfat_arch) |_| { + const fat_arch = fat_r.takeStruct(macho.fat_arch, .big) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + if (fat_arch.cputype != want_cpu_type) continue; + if (fat_arch.offset + fat_arch.size > all_mapped_memory.len) return error.InvalidMachO; + break :mapped_macho all_mapped_memory[fat_arch.offset..][0..fat_arch.size]; + } + // `arch` was not present in the fat binary. + return error.MissingDebugInfo; + }, + + // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It + // will be fairly easy to add support here if necessary; it's very similar to above. + macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo, + + else => return error.InvalidMachO, + }; + + var r: Io.Reader = .fixed(mapped_macho); + const hdr = r.takeStruct(macho.mach_header_64, .little) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + + if (hdr.magic != macho.MH_MAGIC_64) + return error.InvalidMachO; + + const symtab: macho.symtab_command, const text_vmaddr: u64 = lcs: { + var it: macho.LoadCommandIterator = try .init(&hdr, mapped_macho[@sizeOf(macho.mach_header_64)..]); + var symtab: ?macho.symtab_command = null; + var text_vmaddr: ?u64 = null; + while (try it.next()) |cmd| switch (cmd.hdr.cmd) { + .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidMachO, + .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| { + if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue; + text_vmaddr = seg_cmd.vmaddr; + }, + else => {}, + }; + break :lcs .{ + symtab orelse return error.MissingDebugInfo, + text_vmaddr orelse return error.MissingDebugInfo, + }; + }; + + const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1]; + + var symbols: std.ArrayList(Symbol) = try .initCapacity(gpa, symtab.nsyms); + defer symbols.deinit(gpa); + + // This map is temporary; it is used only to detect duplicates here. This is + // necessary because we prefer to use STAB ("symbolic debugging table") symbols, + // but they might not be present, so we track normal symbols too. + // Indices match 1-1 with those of `symbols`. + var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty; + defer symbol_names.deinit(gpa); + try symbol_names.ensureUnusedCapacity(gpa, symtab.nsyms); + + var ofile: u32 = undefined; + var last_sym: Symbol = undefined; + var state: enum { + init, + oso_open, + oso_close, + bnsym, + fun_strx, + fun_size, + ensym, + } = .init; + + var sym_r: Io.Reader = .fixed(mapped_macho[symtab.symoff..]); + for (0..symtab.nsyms) |_| { + const sym = sym_r.takeStruct(macho.nlist_64, .little) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + if (sym.n_type.bits.is_stab == 0) { + if (sym.n_strx == 0) continue; + switch (sym.n_type.bits.type) { + .undf, .pbud, .indr, .abs, _ => continue, + .sect => { + const name = std.mem.sliceTo(strings[sym.n_strx..], 0); + const gop = symbol_names.getOrPutAssumeCapacity(name); + if (!gop.found_existing) { + assert(gop.index == symbols.items.len); + symbols.appendAssumeCapacity(.{ + .strx = sym.n_strx, + .addr = sym.n_value, + .ofile = Symbol.unknown_ofile, + }); + } + }, + } + continue; + } + + // TODO handle globals N_GSYM, and statics N_STSYM + switch (sym.n_type.stab) { + .oso => switch (state) { + .init, .oso_close => { + state = .oso_open; + ofile = sym.n_strx; + }, + else => return error.InvalidMachO, + }, + .bnsym => switch (state) { + .oso_open, .ensym => { + state = .bnsym; + last_sym = .{ + .strx = 0, + .addr = sym.n_value, + .ofile = ofile, + }; + }, + else => return error.InvalidMachO, + }, + .fun => switch (state) { + .bnsym => { + state = .fun_strx; + last_sym.strx = sym.n_strx; + }, + .fun_strx => { + state = .fun_size; + }, + else => return error.InvalidMachO, + }, + .ensym => switch (state) { + .fun_size => { + state = .ensym; + if (last_sym.strx != 0) { + const name = std.mem.sliceTo(strings[last_sym.strx..], 0); + const gop = symbol_names.getOrPutAssumeCapacity(name); + if (!gop.found_existing) { + assert(gop.index == symbols.items.len); + symbols.appendAssumeCapacity(last_sym); + } else { + symbols.items[gop.index] = last_sym; + } + } + }, + else => return error.InvalidMachO, + }, + .so => switch (state) { + .init, .oso_close => {}, + .oso_open, .ensym => { + state = .oso_close; + }, + else => return error.InvalidMachO, + }, + else => {}, + } + } + + switch (state) { + .init => { + // Missing STAB symtab entries is still okay, unless there were also no normal symbols. + if (symbols.items.len == 0) return error.MissingDebugInfo; + }, + .oso_close => {}, + else => return error.InvalidMachO, // corrupted STAB entries in symtab + } + + const symbols_slice = try symbols.toOwnedSlice(gpa); + errdefer gpa.free(symbols_slice); + + // Even though lld emits symbols in ascending order, this debug code + // should work for programs linked in any valid way. + // This sort is so that we can binary search later. + mem.sort(Symbol, symbols_slice, {}, Symbol.addressLessThan); + + return .{ + .mapped_memory = all_mapped_memory, + .symbols = symbols_slice, + .strings = strings, + .ofiles = .empty, + .text_vmaddr = text_vmaddr, + }; +} +pub fn getDwarfForAddress(mf: *MachOFile, gpa: Allocator, vaddr: u64) !struct { *Dwarf, u64 } { + const symbol = Symbol.find(mf.symbols, vaddr) orelse return error.MissingDebugInfo; + + if (symbol.ofile == Symbol.unknown_ofile) return error.MissingDebugInfo; + + // offset of `address` from start of `symbol` + const address_symbol_offset = vaddr - symbol.addr; + + // Take the symbol name from the N_FUN STAB entry, we're going to + // use it if we fail to find the DWARF infos + const stab_symbol = mem.sliceTo(mf.strings[symbol.strx..], 0); + + const gop = try mf.ofiles.getOrPut(gpa, symbol.ofile); + if (!gop.found_existing) { + const name = mem.sliceTo(mf.strings[symbol.ofile..], 0); + gop.value_ptr.* = loadOFile(gpa, name); + } + const of = &(gop.value_ptr.* catch |err| return err); + + const symbol_index = of.symbols_by_name.getKeyAdapted( + @as([]const u8, stab_symbol), + @as(OFile.SymbolAdapter, .{ .strtab = of.strtab, .symtab_raw = of.symtab_raw }), + ) orelse return error.MissingDebugInfo; + + const symbol_ofile_vaddr = vaddr: { + var sym = of.symtab_raw[symbol_index]; + if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &sym); + break :vaddr sym.n_value; + }; + + return .{ &of.dwarf, symbol_ofile_vaddr + address_symbol_offset }; +} +pub fn lookupSymbolName(mf: *MachOFile, vaddr: u64) error{MissingDebugInfo}![]const u8 { + const symbol = Symbol.find(mf.symbols, vaddr) orelse return error.MissingDebugInfo; + return mem.sliceTo(mf.strings[symbol.strx..], 0); +} + +const OFile = struct { + mapped_memory: []align(std.heap.page_size_min) const u8, + dwarf: Dwarf, + strtab: []const u8, + symtab_raw: []align(1) const macho.nlist_64, + /// All named symbols in `symtab_raw`. Stored `u32` key is the index into `symtab_raw`. Accessed + /// through `SymbolAdapter`, so that the symbol name is used as the logical key. + symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true), + + const SymbolAdapter = struct { + strtab: []const u8, + symtab_raw: []align(1) const macho.nlist_64, + pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 { + _ = ctx; + return @truncate(std.hash.Wyhash.hash(0, sym_name)); + } + pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool { + _ = b_index; + var b_sym = ctx.symtab_raw[b_sym_index]; + if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &b_sym); + const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0); + return mem.eql(u8, a_sym_name, b_sym_name); + } + }; +}; + +const Symbol = struct { + strx: u32, + addr: u64, + /// Value may be `unknown_ofile`. + ofile: u32, + const unknown_ofile = std.math.maxInt(u32); + fn addressLessThan(context: void, lhs: Symbol, rhs: Symbol) bool { + _ = context; + return lhs.addr < rhs.addr; + } + /// Assumes that `symbols` is sorted in order of ascending `addr`. + fn find(symbols: []const Symbol, address: usize) ?*const Symbol { + if (symbols.len == 0) return null; // no potential match + if (address < symbols[0].addr) return null; // address is before the lowest-address symbol + var left: usize = 0; + var len: usize = symbols.len; + while (len > 1) { + const mid = left + len / 2; + if (address < symbols[mid].addr) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + return &symbols[left]; + } + + test find { + const symbols: []const Symbol = &.{ + .{ .addr = 100, .strx = undefined, .ofile = undefined }, + .{ .addr = 200, .strx = undefined, .ofile = undefined }, + .{ .addr = 300, .strx = undefined, .ofile = undefined }, + }; + + try testing.expectEqual(null, find(symbols, 0)); + try testing.expectEqual(null, find(symbols, 99)); + try testing.expectEqual(&symbols[0], find(symbols, 100).?); + try testing.expectEqual(&symbols[0], find(symbols, 150).?); + try testing.expectEqual(&symbols[0], find(symbols, 199).?); + + try testing.expectEqual(&symbols[1], find(symbols, 200).?); + try testing.expectEqual(&symbols[1], find(symbols, 250).?); + try testing.expectEqual(&symbols[1], find(symbols, 299).?); + + try testing.expectEqual(&symbols[2], find(symbols, 300).?); + try testing.expectEqual(&symbols[2], find(symbols, 301).?); + try testing.expectEqual(&symbols[2], find(symbols, 5000).?); + } +}; +test { + _ = Symbol; +} + +fn loadOFile(gpa: Allocator, o_file_name: []const u8) !OFile { + const all_mapped_memory, const mapped_ofile = map: { + const open_paren = paren: { + if (std.mem.endsWith(u8, o_file_name, ")")) { + if (std.mem.findScalarLast(u8, o_file_name, '(')) |i| { + break :paren i; + } + } + // Not an archive, just a normal path to a .o file + const m = try mapDebugInfoFile(o_file_name); + break :map .{ m, m }; + }; + + // We have the form 'path/to/archive.a(entry.o)'. Map the archive and find the object file in question. + + const archive_path = o_file_name[0..open_paren]; + const target_name_in_archive = o_file_name[open_paren + 1 .. o_file_name.len - 1]; + const mapped_archive = try mapDebugInfoFile(archive_path); + errdefer posix.munmap(mapped_archive); + + var ar_reader: Io.Reader = .fixed(mapped_archive); + const ar_magic = ar_reader.take(8) catch return error.InvalidMachO; + if (!std.mem.eql(u8, ar_magic, "!\n")) return error.InvalidMachO; + while (true) { + if (ar_reader.seek == ar_reader.buffer.len) return error.MissingDebugInfo; + + const raw_name = ar_reader.takeArray(16) catch return error.InvalidMachO; + ar_reader.discardAll(12 + 6 + 6 + 8) catch return error.InvalidMachO; + const raw_size = ar_reader.takeArray(10) catch return error.InvalidMachO; + const file_magic = ar_reader.takeArray(2) catch return error.InvalidMachO; + if (!std.mem.eql(u8, file_magic, "`\n")) return error.InvalidMachO; + + const size = std.fmt.parseInt(u32, mem.sliceTo(raw_size, ' '), 10) catch return error.InvalidMachO; + const raw_data = ar_reader.take(size) catch return error.InvalidMachO; + + const entry_name: []const u8, const entry_contents: []const u8 = entry: { + if (!std.mem.startsWith(u8, raw_name, "#1/")) { + break :entry .{ mem.sliceTo(raw_name, '/'), raw_data }; + } + const len = std.fmt.parseInt(u32, mem.sliceTo(raw_name[3..], ' '), 10) catch return error.InvalidMachO; + if (len > size) return error.InvalidMachO; + break :entry .{ mem.sliceTo(raw_data[0..len], 0), raw_data[len..] }; + }; + + if (std.mem.eql(u8, entry_name, target_name_in_archive)) { + break :map .{ mapped_archive, entry_contents }; + } + } + }; + errdefer posix.munmap(all_mapped_memory); + + var r: Io.Reader = .fixed(mapped_ofile); + const hdr = r.takeStruct(macho.mach_header_64, .little) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidMachO; + + const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { + var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; + var symtab_cmd: ?macho.symtab_command = null; + var it: macho.LoadCommandIterator = try .init(&hdr, mapped_ofile[@sizeOf(macho.mach_header_64)..]); + while (try it.next()) |lc| switch (lc.hdr.cmd) { + .SEGMENT_64 => seg_cmd = lc, + .SYMTAB => symtab_cmd = lc.cast(macho.symtab_command) orelse return error.InvalidMachO, + else => {}, + }; + break :cmds .{ + seg_cmd orelse return error.MissingDebugInfo, + symtab_cmd orelse return error.MissingDebugInfo, + }; + }; + + if (mapped_ofile.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidMachO; + if (mapped_ofile[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidMachO; + const strtab = mapped_ofile[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; + + const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); + if (mapped_ofile.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidMachO; + const symtab_raw: []align(1) const macho.nlist_64 = @ptrCast(mapped_ofile[symtab_cmd.symoff..][0..n_sym_bytes]); + + // TODO handle tentative (common) symbols + var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty; + defer symbols_by_name.deinit(gpa); + try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab_raw.len)); + for (symtab_raw, 0..) |sym_raw, sym_index| { + var sym = sym_raw; + if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.nlist_64, &sym); + if (sym.n_strx == 0) continue; + switch (sym.n_type.bits.type) { + .undf => continue, // includes tentative symbols + .abs => continue, + else => {}, + } + const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); + const gop = symbols_by_name.getOrPutAssumeCapacityAdapted( + @as([]const u8, sym_name), + @as(OFile.SymbolAdapter, .{ .strtab = strtab, .symtab_raw = symtab_raw }), + ); + if (gop.found_existing) return error.InvalidMachO; + gop.key_ptr.* = @intCast(sym_index); + } + + var sections: Dwarf.SectionArray = @splat(null); + for (seg_cmd.getSections()) |sect_raw| { + var sect = sect_raw; + if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(macho.section_64, §); + + if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; + + const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; + } else continue; + + if (mapped_ofile.len < sect.offset + sect.size) return error.InvalidMachO; + const section_bytes = mapped_ofile[sect.offset..][0..sect.size]; + sections[section_index] = .{ + .data = section_bytes, + .owned = false, + }; + } + + if (sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null) + { + return error.MissingDebugInfo; + } + + var dwarf: Dwarf = .{ .sections = sections }; + errdefer dwarf.deinit(gpa); + dwarf.open(gpa, .little) catch |err| switch (err) { + error.InvalidDebugInfo, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + => return error.InvalidDwarf, + + error.MissingDebugInfo, + error.ReadFailed, + error.OutOfMemory, + => |e| return e, + }; + + return .{ + .mapped_memory = all_mapped_memory, + .dwarf = dwarf, + .strtab = strtab, + .symtab_raw = symtab_raw, + .symbols_by_name = symbols_by_name.move(), + }; +} + +/// Uses `mmap` to map the file at `path` into memory. +fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 { + const file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return error.ReadFailed, + }; + defer file.close(); + + const file_len = std.math.cast( + usize, + file.getEndPos() catch return error.ReadFailed, + ) orelse return error.ReadFailed; + + return posix.mmap( + null, + file_len, + posix.PROT.READ, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ) catch return error.ReadFailed; +} + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const Io = std.Io; +const assert = std.debug.assert; +const posix = std.posix; +const macho = std.macho; +const mem = std.mem; +const testing = std.testing; + +const builtin = @import("builtin"); + +const MachOFile = @This(); diff --git a/lib/std/debug/SelfInfo/Elf.zig b/lib/std/debug/SelfInfo/Elf.zig index 5036d401977f..59c0b4245173 100644 --- a/lib/std/debug/SelfInfo/Elf.zig +++ b/lib/std/debug/SelfInfo/Elf.zig @@ -80,6 +80,11 @@ pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]cons if (module.name.len == 0) return error.MissingDebugInfo; return module.name; } +pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize { + const module = try si.findModule(gpa, address, .shared); + defer si.rwlock.unlockShared(); + return module.load_offset; +} pub const can_unwind: bool = s: { // The DWARF code can't deal with ILP32 ABIs yet: https://github.com/ziglang/zig/issues/25447 diff --git a/lib/std/debug/SelfInfo/MachO.zig b/lib/std/debug/SelfInfo/MachO.zig index f7eb4465c52e..94d50bbf7707 100644 --- a/lib/std/debug/SelfInfo/MachO.zig +++ b/lib/std/debug/SelfInfo/MachO.zig @@ -1,12 +1,10 @@ mutex: std.Thread.Mutex, /// Accessed through `Module.Adapter`. modules: std.ArrayHashMapUnmanaged(Module, void, Module.Context, false), -ofiles: std.StringArrayHashMapUnmanaged(?OFile), pub const init: SelfInfo = .{ .mutex = .{}, .modules = .empty, - .ofiles = .empty, }; pub fn deinit(si: *SelfInfo, gpa: Allocator) void { for (si.modules.keys()) |*module| { @@ -14,20 +12,12 @@ pub fn deinit(si: *SelfInfo, gpa: Allocator) void { const u = &(module.unwind orelse break :unwind catch break :unwind); if (u.dwarf) |*dwarf| dwarf.deinit(gpa); } - loaded: { - const l = &(module.loaded_macho orelse break :loaded catch break :loaded); - gpa.free(l.symbols); - posix.munmap(l.mapped_memory); + file: { + const f = &(module.file orelse break :file catch break :file); + f.deinit(gpa); } } - for (si.ofiles.values()) |*opt_ofile| { - const ofile = &(opt_ofile.* orelse continue); - ofile.dwarf.deinit(gpa); - ofile.symbols_by_name.deinit(gpa); - posix.munmap(ofile.mapped_memory); - } si.modules.deinit(gpa); - si.ofiles.deinit(gpa); } pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!std.debug.Symbol { @@ -35,67 +25,55 @@ pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!st const module = try si.findModule(gpa, address); defer si.mutex.unlock(); - const loaded_macho = try module.getLoadedMachO(gpa); - - const vaddr = address - loaded_macho.vaddr_offset; - const symbol = MachoSymbol.find(loaded_macho.symbols, vaddr) orelse return .unknown; + const file = try module.getFile(gpa); - // offset of `address` from start of `symbol` - const address_symbol_offset = vaddr - symbol.addr; + // This is not necessarily the same as the vmaddr_slide that dyld would report. This is + // because the segments in the file on disk might differ from the ones in memory. Normally + // we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying: + // it exists on disk (necessarily, because the kernel needs to load it!), but is also in + // the dyld cache (dyld actually restart itself from cache after loading it), and the two + // versions have (very) different segment base addresses. It's sort of like a large slide + // has been applied to all addresses in memory. For an optimal experience, we consider the + // on-disk vmaddr instead of the in-memory one. + const vaddr_offset = module.text_base - file.text_vmaddr; - // Take the symbol name from the N_FUN STAB entry, we're going to - // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(loaded_macho.strings[symbol.strx..], 0); + const vaddr = address - vaddr_offset; - // If any information is missing, we can at least return this from now on. - const sym_only_result: std.debug.Symbol = .{ - .name = stab_symbol, - .compile_unit_name = null, - .source_location = null, + const ofile_dwarf, const ofile_vaddr = file.getDwarfForAddress(gpa, vaddr) catch { + // Return at least the symbol name if available. + return .{ + .name = try file.lookupSymbolName(vaddr), + .compile_unit_name = null, + .source_location = null, + }; }; - if (symbol.ofile == MachoSymbol.unknown_ofile) { - // We don't have STAB info, so can't track down the object file; all we can do is the symbol name. - return sym_only_result; - } - - const o_file: *OFile = of: { - const path = mem.sliceTo(loaded_macho.strings[symbol.ofile..], 0); - const gop = try si.ofiles.getOrPut(gpa, path); - if (!gop.found_existing) { - gop.value_ptr.* = loadOFile(gpa, path) catch null; - } - if (gop.value_ptr.*) |*o_file| { - break :of o_file; - } else { - return sym_only_result; - } + const compile_unit = ofile_dwarf.findCompileUnit(native_endian, ofile_vaddr) catch { + // Return at least the symbol name if available. + return .{ + .name = try file.lookupSymbolName(vaddr), + .compile_unit_name = null, + .source_location = null, + }; }; - const symbol_index = o_file.symbols_by_name.getKeyAdapted( - @as([]const u8, stab_symbol), - @as(OFile.SymbolAdapter, .{ .strtab = o_file.strtab, .symtab = o_file.symtab }), - ) orelse return sym_only_result; - const symbol_ofile_vaddr = o_file.symtab[symbol_index].n_value; - - const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch return sym_only_result; - return .{ - .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr + address_symbol_offset) orelse stab_symbol, + .name = ofile_dwarf.getSymbolName(ofile_vaddr) orelse + try file.lookupSymbolName(vaddr), .compile_unit_name = compile_unit.die.getAttrString( - &o_file.dwarf, + ofile_dwarf, native_endian, std.dwarf.AT.name, - o_file.dwarf.section(.debug_str), + ofile_dwarf.section(.debug_str), compile_unit, ) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => null, }, - .source_location = o_file.dwarf.getLineNumberInfo( + .source_location = ofile_dwarf.getLineNumberInfo( gpa, native_endian, compile_unit, - symbol_ofile_vaddr + address_symbol_offset, + ofile_vaddr, ) catch null, }; } @@ -104,6 +82,20 @@ pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]cons defer si.mutex.unlock(); return module.name; } +pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize { + const module = try si.findModule(gpa, address); + defer si.mutex.unlock(); + const header: *std.macho.mach_header_64 = @ptrFromInt(module.text_base); + const raw_macho: [*]u8 = @ptrCast(header); + var it = macho.LoadCommandIterator.init(header, raw_macho[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds]) catch unreachable; + const text_vmaddr = while (it.next() catch unreachable) |load_cmd| { + if (load_cmd.hdr.cmd != .SEGMENT_64) continue; + const segment_cmd = load_cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; + break segment_cmd.vmaddr; + } else unreachable; + return module.text_base - text_vmaddr; +} pub const can_unwind: bool = true; pub const UnwindContext = std.debug.Dwarf.SelfUnwinder; @@ -447,7 +439,7 @@ fn findModule(si: *SelfInfo, gpa: Allocator, address: usize) Error!*Module { .text_base = @intFromPtr(info.fbase), .name = std.mem.span(info.fname), .unwind = null, - .loaded_macho = null, + .file = null, }; } return gop.key_ptr; @@ -457,7 +449,7 @@ const Module = struct { text_base: usize, name: []const u8, unwind: ?(Error!Unwind), - loaded_macho: ?(Error!LoadedMachO), + file: ?(Error!MachOFile), const Adapter = struct { pub fn hash(_: Adapter, text_base: usize) u32 { @@ -488,34 +480,17 @@ const Module = struct { dwarf: ?Dwarf.Unwind, }; - const LoadedMachO = struct { - mapped_memory: []align(std.heap.page_size_min) const u8, - symbols: []const MachoSymbol, - strings: []const u8, - /// This is not necessarily the same as the vmaddr_slide that dyld would report. This is - /// because the segments in the file on disk might differ from the ones in memory. Normally - /// we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying: - /// it exists on disk (necessarily, because the kernel needs to load it!), but is also in - /// the dyld cache (dyld actually restart itself from cache after loading it), and the two - /// versions have (very) different segment base addresses. It's sort of like a large slide - /// has been applied to all addresses in memory. For an optimal experience, we consider the - /// on-disk vmaddr instead of the in-memory one. - vaddr_offset: usize, - }; - fn getUnwindInfo(module: *Module, gpa: Allocator) Error!*Unwind { if (module.unwind == null) module.unwind = loadUnwindInfo(module, gpa); return if (module.unwind.?) |*unwind| unwind else |err| err; } fn loadUnwindInfo(module: *const Module, gpa: Allocator) Error!Unwind { - const header: *std.macho.mach_header = @ptrFromInt(module.text_base); + const header: *std.macho.mach_header_64 = @ptrFromInt(module.text_base); - var it: macho.LoadCommandIterator = .{ - .ncmds = header.ncmds, - .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], - }; - const sections, const text_vmaddr = while (it.next()) |load_cmd| { - if (load_cmd.cmd() != .SEGMENT_64) continue; + const raw_macho: [*]u8 = @ptrCast(header); + var it = macho.LoadCommandIterator.init(header, raw_macho[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds]) catch unreachable; + const sections, const text_vmaddr = while (it.next() catch unreachable) |load_cmd| { + if (load_cmd.hdr.cmd != .SEGMENT_64) continue; const segment_cmd = load_cmd.cast(macho.segment_command_64).?; if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; break .{ load_cmd.getSections(), segment_cmd.vmaddr }; @@ -568,237 +543,15 @@ const Module = struct { }; } - fn getLoadedMachO(module: *Module, gpa: Allocator) Error!*LoadedMachO { - if (module.loaded_macho == null) module.loaded_macho = loadMachO(module, gpa) catch |err| switch (err) { - error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory, error.Unexpected => |e| e, - else => error.ReadFailed, - }; - return if (module.loaded_macho.?) |*lm| lm else |err| err; - } - fn loadMachO(module: *const Module, gpa: Allocator) Error!LoadedMachO { - const all_mapped_memory = try mapDebugInfoFile(module.name); - errdefer posix.munmap(all_mapped_memory); - - // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal - // binary": a simple file format which contains Mach-O binaries for multiple targets. For - // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images - // for both ARM64 macOS and x86_64 macOS. - if (all_mapped_memory.len < 4) return error.InvalidDebugInfo; - const magic = @as(*const u32, @ptrCast(all_mapped_memory.ptr)).*; - // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`. - const mapped_macho = switch (magic) { - macho.MH_MAGIC_64 => all_mapped_memory, - - macho.FAT_CIGAM => mapped_macho: { - // This is the universal binary format (aka a "fat binary"). Annoyingly, the whole thing - // is big-endian, so we'll be swapping some bytes. - if (all_mapped_memory.len < @sizeOf(macho.fat_header)) return error.InvalidDebugInfo; - const hdr: *const macho.fat_header = @ptrCast(all_mapped_memory.ptr); - const archs_ptr: [*]const macho.fat_arch = @ptrCast(all_mapped_memory.ptr + @sizeOf(macho.fat_header)); - const archs: []const macho.fat_arch = archs_ptr[0..@byteSwap(hdr.nfat_arch)]; - const native_cpu_type = switch (builtin.cpu.arch) { - .x86_64 => macho.CPU_TYPE_X86_64, - .aarch64 => macho.CPU_TYPE_ARM64, - else => comptime unreachable, - }; - for (archs) |*arch| { - if (@byteSwap(arch.cputype) != native_cpu_type) continue; - const offset = @byteSwap(arch.offset); - const size = @byteSwap(arch.size); - break :mapped_macho all_mapped_memory[offset..][0..size]; - } - // Our native architecture was not present in the fat binary. - return error.MissingDebugInfo; - }, - - // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It - // will be fairly easy to add support here if necessary; it's very similar to above. - macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo, - - else => return error.InvalidDebugInfo, - }; - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_macho.ptr)); - if (hdr.magic != macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - const symtab: macho.symtab_command, const text_vmaddr: u64 = lc_iter: { - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_macho[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - var symtab: ?macho.symtab_command = null; - var text_vmaddr: ?u64 = null; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| { - if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue; - text_vmaddr = seg_cmd.vmaddr; - }, - else => {}, - }; - break :lc_iter .{ - symtab orelse return error.MissingDebugInfo, - text_vmaddr orelse return error.MissingDebugInfo, - }; - }; - - const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_macho[symtab.symoff..]); - const syms = syms_ptr[0..symtab.nsyms]; - const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1]; - - var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); - defer symbols.deinit(gpa); - - // This map is temporary; it is used only to detect duplicates here. This is - // necessary because we prefer to use STAB ("symbolic debugging table") symbols, - // but they might not be present, so we track normal symbols too. - // Indices match 1-1 with those of `symbols`. - var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty; - defer symbol_names.deinit(gpa); - try symbol_names.ensureUnusedCapacity(gpa, syms.len); - - var ofile: u32 = undefined; - var last_sym: MachoSymbol = undefined; - var state: enum { - init, - oso_open, - oso_close, - bnsym, - fun_strx, - fun_size, - ensym, - } = .init; - - for (syms) |*sym| { - if (sym.n_type.bits.is_stab == 0) { - if (sym.n_strx == 0) continue; - switch (sym.n_type.bits.type) { - .undf, .pbud, .indr, .abs, _ => continue, - .sect => { - const name = std.mem.sliceTo(strings[sym.n_strx..], 0); - const gop = symbol_names.getOrPutAssumeCapacity(name); - if (!gop.found_existing) { - assert(gop.index == symbols.items.len); - symbols.appendAssumeCapacity(.{ - .strx = sym.n_strx, - .addr = sym.n_value, - .ofile = MachoSymbol.unknown_ofile, - }); - } - }, - } - continue; - } - - // TODO handle globals N_GSYM, and statics N_STSYM - switch (sym.n_type.stab) { - .oso => switch (state) { - .init, .oso_close => { - state = .oso_open; - ofile = sym.n_strx; - }, - else => return error.InvalidDebugInfo, - }, - .bnsym => switch (state) { - .oso_open, .ensym => { - state = .bnsym; - last_sym = .{ - .strx = 0, - .addr = sym.n_value, - .ofile = ofile, - }; - }, - else => return error.InvalidDebugInfo, - }, - .fun => switch (state) { - .bnsym => { - state = .fun_strx; - last_sym.strx = sym.n_strx; - }, - .fun_strx => { - state = .fun_size; - }, - else => return error.InvalidDebugInfo, - }, - .ensym => switch (state) { - .fun_size => { - state = .ensym; - if (last_sym.strx != 0) { - const name = std.mem.sliceTo(strings[last_sym.strx..], 0); - const gop = symbol_names.getOrPutAssumeCapacity(name); - if (!gop.found_existing) { - assert(gop.index == symbols.items.len); - symbols.appendAssumeCapacity(last_sym); - } else { - symbols.items[gop.index] = last_sym; - } - } - }, - else => return error.InvalidDebugInfo, - }, - .so => switch (state) { - .init, .oso_close => {}, - .oso_open, .ensym => { - state = .oso_close; - }, - else => return error.InvalidDebugInfo, - }, - else => {}, - } - } - - switch (state) { - .init => { - // Missing STAB symtab entries is still okay, unless there were also no normal symbols. - if (symbols.items.len == 0) return error.MissingDebugInfo; - }, - .oso_close => {}, - else => return error.InvalidDebugInfo, // corrupted STAB entries in symtab - } - - const symbols_slice = try symbols.toOwnedSlice(gpa); - errdefer gpa.free(symbols_slice); - - // Even though lld emits symbols in ascending order, this debug code - // should work for programs linked in any valid way. - // This sort is so that we can binary search later. - mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); - - return .{ - .mapped_memory = all_mapped_memory, - .symbols = symbols_slice, - .strings = strings, - .vaddr_offset = module.text_base - text_vmaddr, + fn getFile(module: *Module, gpa: Allocator) Error!*MachOFile { + if (module.file == null) module.file = MachOFile.load(gpa, module.name, builtin.cpu.arch) catch |err| switch (err) { + error.InvalidMachO, error.InvalidDwarf => error.InvalidDebugInfo, + error.MissingDebugInfo, error.OutOfMemory, error.UnsupportedDebugInfo, error.ReadFailed => |e| e, }; + return if (module.file.?) |*f| f else |err| err; } }; -const OFile = struct { - mapped_memory: []align(std.heap.page_size_min) const u8, - dwarf: Dwarf, - strtab: []const u8, - symtab: []align(1) const macho.nlist_64, - /// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed - /// through `SymbolAdapter`, so that the symbol name is used as the logical key. - symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true), - - const SymbolAdapter = struct { - strtab: []const u8, - symtab: []align(1) const macho.nlist_64, - pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 { - _ = ctx; - return @truncate(std.hash.Wyhash.hash(0, sym_name)); - } - pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool { - _ = b_index; - const b_sym = ctx.symtab[b_sym_index]; - const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0); - return mem.eql(u8, a_sym_name, b_sym_name); - } - }; -}; - const MachoSymbol = struct { strx: u32, addr: u64, @@ -880,101 +633,12 @@ fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 }; } -fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { - const mapped_mem = try mapDebugInfoFile(o_file_path); - errdefer posix.munmap(mapped_mem); - - if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo; - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo; - - const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { - var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; - var symtab_cmd: ?macho.symtab_command = null; - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => seg_cmd = cmd, - .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - else => {}, - }; - break :cmds .{ - seg_cmd orelse return error.MissingDebugInfo, - symtab_cmd orelse return error.MissingDebugInfo, - }; - }; - - if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; - if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; - const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; - - const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); - if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; - const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); - - // TODO handle tentative (common) symbols - var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty; - defer symbols_by_name.deinit(gpa); - try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab.len)); - for (symtab, 0..) |sym, sym_index| { - if (sym.n_strx == 0) continue; - switch (sym.n_type.bits.type) { - .undf => continue, // includes tentative symbols - .abs => continue, - else => {}, - } - const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); - const gop = symbols_by_name.getOrPutAssumeCapacityAdapted( - @as([]const u8, sym_name), - @as(OFile.SymbolAdapter, .{ .strtab = strtab, .symtab = symtab }), - ); - if (gop.found_existing) return error.InvalidDebugInfo; - gop.key_ptr.* = @intCast(sym_index); - } - - var sections: Dwarf.SectionArray = @splat(null); - for (seg_cmd.getSections()) |sect| { - if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; - - const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; - } else continue; - - if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo; - const section_bytes = mapped_mem[sect.offset..][0..sect.size]; - sections[section_index] = .{ - .data = section_bytes, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - if (missing_debug_info) return error.MissingDebugInfo; - - var dwarf: Dwarf = .{ .sections = sections }; - errdefer dwarf.deinit(gpa); - try dwarf.open(gpa, native_endian); - - return .{ - .mapped_memory = mapped_mem, - .dwarf = dwarf, - .strtab = strtab, - .symtab = symtab, - .symbols_by_name = symbols_by_name.move(), - }; -} - const std = @import("std"); const Io = std.Io; const Allocator = std.mem.Allocator; const Dwarf = std.debug.Dwarf; const Error = std.debug.SelfInfoError; +const MachOFile = std.debug.MachOFile; const assert = std.debug.assert; const posix = std.posix; const macho = std.macho; diff --git a/lib/std/debug/SelfInfo/Windows.zig b/lib/std/debug/SelfInfo/Windows.zig index 70009217db53..306287a9e78e 100644 --- a/lib/std/debug/SelfInfo/Windows.zig +++ b/lib/std/debug/SelfInfo/Windows.zig @@ -33,6 +33,12 @@ pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]cons const module = try si.findModule(gpa, address); return module.name; } +pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize { + si.mutex.lock(); + defer si.mutex.unlock(); + const module = try si.findModule(gpa, address); + return module.base_address; +} pub const can_unwind: bool = switch (builtin.cpu.arch) { else => true, diff --git a/lib/std/http.zig b/lib/std/http.zig index dcc96ba741ce..eedd729576e0 100644 --- a/lib/std/http.zig +++ b/lib/std/http.zig @@ -962,6 +962,7 @@ pub const BodyWriter = struct { // have to flush the chunk header before knowing the chunk length. return error.Unimplemented; }; + if (data_len == 0) return error.EndOfStream; const out = bw.http_protocol_output; l: switch (bw.state.chunk_len) { 0 => { @@ -975,8 +976,7 @@ pub const BodyWriter = struct { 2 => { try out.writeAll("\r\n"); bw.state.chunk_len = 0; - assert(file_reader.atEnd()); - return error.EndOfStream; + continue :l 0; }, else => { const chunk_limit: std.Io.Limit = .limited(bw.state.chunk_len - 2); @@ -985,8 +985,7 @@ pub const BodyWriter = struct { else try out.write(chunk_limit.slice(w.buffered())); bw.state.chunk_len -= n; - const ret = w.consume(n); - return ret; + return w.consume(n); }, } } diff --git a/lib/std/macho.zig b/lib/std/macho.zig index d541e2d13e77..7b8894e981b6 100644 --- a/lib/std/macho.zig +++ b/lib/std/macho.zig @@ -1902,74 +1902,76 @@ pub const data_in_code_entry = extern struct { }; pub const LoadCommandIterator = struct { + next_index: usize, ncmds: usize, - buffer: []const u8, - index: usize = 0, + r: std.Io.Reader, pub const LoadCommand = struct { hdr: load_command, data: []const u8, - pub fn cmd(lc: LoadCommand) LC { - return lc.hdr.cmd; - } - - pub fn cmdsize(lc: LoadCommand) u32 { - return lc.hdr.cmdsize; - } - pub fn cast(lc: LoadCommand, comptime Cmd: type) ?Cmd { if (lc.data.len < @sizeOf(Cmd)) return null; - return @as(*align(1) const Cmd, @ptrCast(lc.data.ptr)).*; + const ptr: *align(1) const Cmd = @ptrCast(lc.data.ptr); + var cmd = ptr.*; + if (builtin.cpu.arch.endian() != .little) std.mem.byteSwapAllFields(Cmd, &cmd); + return cmd; } /// Asserts LoadCommand is of type segment_command_64. + /// If the native endian is not `.little`, the `section_64` values must be byte-swapped by the caller. pub fn getSections(lc: LoadCommand) []align(1) const section_64 { const segment_lc = lc.cast(segment_command_64).?; - if (segment_lc.nsects == 0) return &[0]section_64{}; - const data = lc.data[@sizeOf(segment_command_64)..]; - const sections = @as([*]align(1) const section_64, @ptrCast(data.ptr))[0..segment_lc.nsects]; - return sections; + const sects_ptr: [*]align(1) const section_64 = @ptrCast(lc.data[@sizeOf(segment_command_64)..]); + return sects_ptr[0..segment_lc.nsects]; } /// Asserts LoadCommand is of type dylib_command. pub fn getDylibPathName(lc: LoadCommand) []const u8 { const dylib_lc = lc.cast(dylib_command).?; - const data = lc.data[dylib_lc.dylib.name..]; - return mem.sliceTo(data, 0); + return mem.sliceTo(lc.data[dylib_lc.dylib.name..], 0); } /// Asserts LoadCommand is of type rpath_command. pub fn getRpathPathName(lc: LoadCommand) []const u8 { const rpath_lc = lc.cast(rpath_command).?; - const data = lc.data[rpath_lc.path..]; - return mem.sliceTo(data, 0); + return mem.sliceTo(lc.data[rpath_lc.path..], 0); } /// Asserts LoadCommand is of type build_version_command. + /// If the native endian is not `.little`, the `build_tool_version` values must be byte-swapped by the caller. pub fn getBuildVersionTools(lc: LoadCommand) []align(1) const build_tool_version { const build_lc = lc.cast(build_version_command).?; - const ntools = build_lc.ntools; - if (ntools == 0) return &[0]build_tool_version{}; - const data = lc.data[@sizeOf(build_version_command)..]; - const tools = @as([*]align(1) const build_tool_version, @ptrCast(data.ptr))[0..ntools]; - return tools; + const tools_ptr: [*]align(1) const build_tool_version = @ptrCast(lc.data[@sizeOf(build_version_command)..]); + return tools_ptr[0..build_lc.ntools]; } }; - pub fn next(it: *LoadCommandIterator) ?LoadCommand { - if (it.index >= it.ncmds) return null; + pub fn next(it: *LoadCommandIterator) error{InvalidMachO}!?LoadCommand { + if (it.next_index >= it.ncmds) return null; - const hdr = @as(*align(1) const load_command, @ptrCast(it.buffer.ptr)).*; - const cmd = LoadCommand{ - .hdr = hdr, - .data = it.buffer[0..hdr.cmdsize], + const hdr = it.r.peekStruct(load_command, .little) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, + }; + const data = it.r.take(hdr.cmdsize) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.InvalidMachO, }; - it.buffer = it.buffer[hdr.cmdsize..]; - it.index += 1; + it.next_index += 1; + return .{ .hdr = hdr, .data = data }; + } - return cmd; + pub fn init(hdr: *const mach_header_64, cmds_buf_overlong: []const u8) error{InvalidMachO}!LoadCommandIterator { + if (cmds_buf_overlong.len < hdr.sizeofcmds) return error.InvalidMachO; + if (hdr.ncmds > 0 and hdr.sizeofcmds < @sizeOf(load_command)) return error.InvalidMachO; + const cmds_buf = cmds_buf_overlong[0..hdr.sizeofcmds]; + return .{ + .next_index = 0, + .ncmds = hdr.ncmds, + .r = .fixed(cmds_buf), + }; } }; diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 8a3ee073154c..7c6708983cd3 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -4167,7 +4167,7 @@ pub const Platform = struct { /// Using Apple's ld64 as our blueprint, `min_version` as well as `sdk_version` are set to /// the extracted minimum platform version. pub fn fromLoadCommand(lc: macho.LoadCommandIterator.LoadCommand) Platform { - switch (lc.cmd()) { + switch (lc.hdr.cmd) { .BUILD_VERSION => { const cmd = lc.cast(macho.build_version_command).?; return .{ @@ -4200,7 +4200,7 @@ pub const Platform = struct { // We can't distinguish Mac Catalyst here, but this is legacy stuff anyway. const cmd = lc.cast(macho.version_min_command).?; return .{ - .os_tag = switch (lc.cmd()) { + .os_tag = switch (lc.hdr.cmd) { .VERSION_MIN_IPHONEOS => .ios, .VERSION_MIN_MACOSX => .macos, .VERSION_MIN_TVOS => .tvos, diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index c78d52f81594..64817ac4335c 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -90,11 +90,8 @@ fn parseBinary(self: *Dylib, macho_file: *MachO) !void { if (amt != lc_buffer.len) return error.InputOutput; } - var it = LoadCommandIterator{ - .ncmds = header.ncmds, - .buffer = lc_buffer, - }; - while (it.next()) |cmd| switch (cmd.cmd()) { + var it = LoadCommandIterator.init(&header, lc_buffer) catch |err| std.debug.panic("bad dylib: {t}", .{err}); + while (it.next() catch |err| std.debug.panic("bad dylib: {t}", .{err})) |cmd| switch (cmd.hdr.cmd) { .ID_DYLIB => { self.id = try Id.fromLoadCommand(gpa, cmd.cast(macho.dylib_command).?, cmd.getDylibPathName()); }, diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 7cec09ba9114..5f28d3dfdaf2 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -109,11 +109,8 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { if (amt != self.header.?.sizeofcmds) return error.InputOutput; } - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = lc_buffer, - }; - while (it.next()) |lc| switch (lc.cmd()) { + var it = LoadCommandIterator.init(&self.header.?, lc_buffer) catch |err| std.debug.panic("bad object: {t}", .{err}); + while (it.next() catch |err| std.debug.panic("bad object: {t}", .{err})) |lc| switch (lc.hdr.cmd) { .SEGMENT_64 => { const sections = lc.getSections(); try self.sections.ensureUnusedCapacity(gpa, sections.len); @@ -1644,11 +1641,8 @@ pub fn parseAr(self: *Object, macho_file: *MachO) !void { if (amt != self.header.?.sizeofcmds) return error.InputOutput; } - var it = LoadCommandIterator{ - .ncmds = self.header.?.ncmds, - .buffer = lc_buffer, - }; - while (it.next()) |lc| switch (lc.cmd()) { + var it = LoadCommandIterator.init(&self.header.?, lc_buffer) catch |err| std.debug.panic("bad object: {t}", .{err}); + while (it.next() catch |err| std.debug.panic("bad object: {t}", .{err})) |lc| switch (lc.hdr.cmd) { .SYMTAB => { const cmd = lc.cast(macho.symtab_command).?; try self.strtab.resize(gpa, cmd.strsize); diff --git a/tools/dump-cov.zig b/tools/dump-cov.zig index 249783b92741..3dd91de6122a 100644 --- a/tools/dump-cov.zig +++ b/tools/dump-cov.zig @@ -8,31 +8,50 @@ const assert = std.debug.assert; const SeenPcsHeader = std.Build.abi.fuzz.SeenPcsHeader; pub fn main() !void { - var general_purpose_allocator: std.heap.GeneralPurposeAllocator(.{}) = .init; - defer _ = general_purpose_allocator.deinit(); - const gpa = general_purpose_allocator.allocator(); + var debug_allocator: std.heap.DebugAllocator(.{}) = .init; + defer _ = debug_allocator.deinit(); + const gpa = debug_allocator.allocator(); - var arena_instance = std.heap.ArenaAllocator.init(gpa); + var arena_instance: std.heap.ArenaAllocator = .init(gpa); defer arena_instance.deinit(); const arena = arena_instance.allocator(); + var threaded: std.Io.Threaded = .init(gpa); + defer threaded.deinit(); + const io = threaded.io(); + const args = try std.process.argsAlloc(arena); + + const target_query_str = switch (args.len) { + 3 => "native", + 4 => args[3], + else => return fatal( + \\usage: {0s} path/to/exe path/to/coverage [target] + \\ if omitted, 'target' defaults to 'native' + \\ example: {0s} zig-out/test .zig-cache/v/xxxxxxxx x86_64-linux + , .{if (args.len == 0) "dump-cov" else args[0]}), + }; + + const target = std.zig.resolveTargetQueryOrFatal(io, try .parse(.{ + .arch_os_abi = target_query_str, + })); + const exe_file_name = args[1]; const cov_file_name = args[2]; const exe_path: Path = .{ - .root_dir = std.Build.Cache.Directory.cwd(), + .root_dir = .cwd(), .sub_path = exe_file_name, }; const cov_path: Path = .{ - .root_dir = std.Build.Cache.Directory.cwd(), + .root_dir = .cwd(), .sub_path = cov_file_name, }; - var coverage = std.debug.Coverage.init; + var coverage: std.debug.Coverage = .init; defer coverage.deinit(gpa); - var debug_info = std.debug.Info.load(gpa, exe_path, &coverage) catch |err| { + var debug_info = std.debug.Info.load(gpa, exe_path, &coverage, target.ofmt, target.cpu.arch) catch |err| { fatal("failed to load debug info for {f}: {s}", .{ exe_path, @errorName(err) }); }; defer debug_info.deinit(gpa);