diff --git a/CMakeLists.txt b/CMakeLists.txt index 690e2e35b1b6..1b19e728d2ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -561,7 +561,6 @@ set(ZIG_STAGE2_SOURCES src/libs/libunwind.zig src/link.zig src/link/C.zig - src/link/Coff.zig src/link/Dwarf.zig src/link/Elf.zig src/link/Elf/Archive.zig diff --git a/lib/compiler/resinator/cvtres.zig b/lib/compiler/resinator/cvtres.zig index d0fb4c2d1c9a..50d2c6e96a54 100644 --- a/lib/compiler/resinator/cvtres.zig +++ b/lib/compiler/resinator/cvtres.zig @@ -168,7 +168,7 @@ pub fn parseNameOrOrdinal(allocator: Allocator, reader: *std.Io.Reader) !NameOrO } pub const CoffOptions = struct { - target: std.coff.MachineType = .X64, + target: std.coff.IMAGE.FILE.MACHINE = .AMD64, /// If true, zeroes will be written to all timestamp fields reproducible: bool = true, /// If true, the MEM_WRITE flag will not be set in the .rsrc section header @@ -210,19 +210,19 @@ pub fn writeCoff(allocator: Allocator, writer: *std.Io.Writer, resources: []cons const lengths = resource_tree.dataLengths(); const byte_size_of_relocation = 10; const relocations_len: u32 = @intCast(byte_size_of_relocation * resources.len); - const pointer_to_rsrc01_data = @sizeOf(std.coff.CoffHeader) + (@sizeOf(std.coff.SectionHeader) * 2); + const pointer_to_rsrc01_data = @sizeOf(std.coff.Header) + (@sizeOf(std.coff.SectionHeader) * 2); const pointer_to_relocations = pointer_to_rsrc01_data + lengths.rsrc01; const pointer_to_rsrc02_data = pointer_to_relocations + relocations_len; const pointer_to_symbol_table = pointer_to_rsrc02_data + lengths.rsrc02; const timestamp: i64 = if (options.reproducible) 0 else std.time.timestamp(); const size_of_optional_header = 0; - const machine_type: std.coff.MachineType = options.target; - const flags = std.coff.CoffHeaderFlags{ - .@"32BIT_MACHINE" = 1, + const machine_type: std.coff.IMAGE.FILE.MACHINE = options.target; + const flags = std.coff.Header.Flags{ + .@"32BIT_MACHINE" = true, }; const number_of_symbols = 5 + @as(u32, @intCast(resources.len)) + @intFromBool(options.define_external_symbol != null); - const coff_header = std.coff.CoffHeader{ + const coff_header = std.coff.Header{ .machine = machine_type, .number_of_sections = 2, .time_date_stamp = @as(u32, @truncate(@as(u64, @bitCast(timestamp)))), @@ -245,9 +245,9 @@ pub fn writeCoff(allocator: Allocator, writer: *std.Io.Writer, resources: []cons .number_of_relocations = @intCast(resources.len), .number_of_linenumbers = 0, .flags = .{ - .CNT_INITIALIZED_DATA = 1, - .MEM_WRITE = @intFromBool(!options.read_only), - .MEM_READ = 1, + .CNT_INITIALIZED_DATA = true, + .MEM_WRITE = !options.read_only, + .MEM_READ = true, }, }; try writer.writeStruct(rsrc01_header, .little); @@ -263,9 +263,9 @@ pub fn writeCoff(allocator: Allocator, writer: *std.Io.Writer, resources: []cons .number_of_relocations = 0, .number_of_linenumbers = 0, .flags = .{ - .CNT_INITIALIZED_DATA = 1, - .MEM_WRITE = @intFromBool(!options.read_only), - .MEM_READ = 1, + .CNT_INITIALIZED_DATA = true, + .MEM_WRITE = !options.read_only, + .MEM_READ = true, }, }; try writer.writeStruct(rsrc02_header, .little); @@ -1005,9 +1005,9 @@ pub const supported_targets = struct { x86_64, aarch64, - pub fn toCoffMachineType(arch: Arch) std.coff.MachineType { + pub fn toCoffMachineType(arch: Arch) std.coff.IMAGE.FILE.MACHINE { return switch (arch) { - .x64, .amd64, .x86_64 => .X64, + .x64, .amd64, .x86_64 => .AMD64, .x86, .i386 => .I386, .arm, .armnt => .ARMNT, .arm64, .aarch64 => .ARM64, @@ -1079,26 +1079,26 @@ pub const supported_targets = struct { }; // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#type-indicators - pub fn rvaRelocationTypeIndicator(target: std.coff.MachineType) ?u16 { + pub fn rvaRelocationTypeIndicator(target: std.coff.IMAGE.FILE.MACHINE) ?u16 { return switch (target) { - .X64 => 0x3, // IMAGE_REL_AMD64_ADDR32NB - .I386 => 0x7, // IMAGE_REL_I386_DIR32NB - .ARMNT => 0x2, // IMAGE_REL_ARM_ADDR32NB - .ARM64, .ARM64EC, .ARM64X => 0x2, // IMAGE_REL_ARM64_ADDR32NB - .IA64 => 0x10, // IMAGE_REL_IA64_DIR32NB + .AMD64 => @intFromEnum(std.coff.IMAGE.REL.AMD64.ADDR32NB), + .I386 => @intFromEnum(std.coff.IMAGE.REL.I386.DIR32NB), + .ARMNT => @intFromEnum(std.coff.IMAGE.REL.ARM.ADDR32NB), + .ARM64, .ARM64EC, .ARM64X => @intFromEnum(std.coff.IMAGE.REL.ARM64.ADDR32NB), + .IA64 => @intFromEnum(std.coff.IMAGE.REL.IA64.DIR32NB), .EBC => 0x1, // This is what cvtres.exe writes for this target, unsure where it comes from else => null, }; } - pub fn isSupported(target: std.coff.MachineType) bool { + pub fn isSupported(target: std.coff.IMAGE.FILE.MACHINE) bool { return rvaRelocationTypeIndicator(target) != null; } comptime { // Enforce two things: // 1. Arch enum field names are all lowercase (necessary for how fromStringIgnoreCase is implemented) - // 2. All enum fields in Arch have an associated RVA relocation type when converted to a coff.MachineType + // 2. All enum fields in Arch have an associated RVA relocation type when converted to a coff.IMAGE.FILE.MACHINE for (@typeInfo(Arch).@"enum".fields) |enum_field| { const all_lower = all_lower: for (enum_field.name) |c| { if (std.ascii.isUpper(c)) break :all_lower false; diff --git a/lib/compiler/resinator/main.zig b/lib/compiler/resinator/main.zig index 08e6f7d4ce12..4d0182bf75c9 100644 --- a/lib/compiler/resinator/main.zig +++ b/lib/compiler/resinator/main.zig @@ -527,7 +527,7 @@ const LazyIncludePaths = struct { arena: std.mem.Allocator, auto_includes_option: cli.Options.AutoIncludes, zig_lib_dir: []const u8, - target_machine_type: std.coff.MachineType, + target_machine_type: std.coff.IMAGE.FILE.MACHINE, resolved_include_paths: ?[]const []const u8 = null, pub fn get(self: *LazyIncludePaths, error_handler: *ErrorHandler) ![]const []const u8 { @@ -555,11 +555,11 @@ const LazyIncludePaths = struct { } }; -fn getIncludePaths(arena: std.mem.Allocator, auto_includes_option: cli.Options.AutoIncludes, zig_lib_dir: []const u8, target_machine_type: std.coff.MachineType) ![]const []const u8 { +fn getIncludePaths(arena: std.mem.Allocator, auto_includes_option: cli.Options.AutoIncludes, zig_lib_dir: []const u8, target_machine_type: std.coff.IMAGE.FILE.MACHINE) ![]const []const u8 { if (auto_includes_option == .none) return &[_][]const u8{}; const includes_arch: std.Target.Cpu.Arch = switch (target_machine_type) { - .X64 => .x86_64, + .AMD64 => .x86_64, .I386 => .x86, .ARMNT => .thumb, .ARM64 => .aarch64, diff --git a/lib/std/Target.zig b/lib/std/Target.zig index 027872de54d4..83b6739747e0 100644 --- a/lib/std/Target.zig +++ b/lib/std/Target.zig @@ -1082,7 +1082,7 @@ pub fn toElfMachine(target: *const Target) std.elf.EM { }; } -pub fn toCoffMachine(target: *const Target) std.coff.MachineType { +pub fn toCoffMachine(target: *const Target) std.coff.IMAGE.FILE.MACHINE { return switch (target.cpu.arch) { .arm => .ARM, .thumb => .ARMNT, @@ -1092,7 +1092,7 @@ pub fn toCoffMachine(target: *const Target) std.coff.MachineType { .riscv32 => .RISCV32, .riscv64 => .RISCV64, .x86 => .I386, - .x86_64 => .X64, + .x86_64 => .AMD64, .amdgcn, .arc, diff --git a/lib/std/array_hash_map.zig b/lib/std/array_hash_map.zig index e1007ff27e5e..2550e0aebec9 100644 --- a/lib/std/array_hash_map.zig +++ b/lib/std/array_hash_map.zig @@ -50,7 +50,7 @@ pub fn eqlString(a: []const u8, b: []const u8) bool { } pub fn hashString(s: []const u8) u32 { - return @as(u32, @truncate(std.hash.Wyhash.hash(0, s))); + return @truncate(std.hash.Wyhash.hash(0, s)); } /// Deprecated in favor of `ArrayHashMapWithAllocator` (no code changes needed) diff --git a/lib/std/coff.zig b/lib/std/coff.zig index cb4112339d90..27f3fbc0994a 100644 --- a/lib/std/coff.zig +++ b/lib/std/coff.zig @@ -2,70 +2,9 @@ const std = @import("std.zig"); const assert = std.debug.assert; const mem = std.mem; -pub const CoffHeaderFlags = packed struct { - /// Image only, Windows CE, and Microsoft Windows NT and later. - /// This indicates that the file does not contain base relocations - /// and must therefore be loaded at its preferred base address. - /// If the base address is not available, the loader reports an error. - /// The default behavior of the linker is to strip base relocations - /// from executable (EXE) files. - RELOCS_STRIPPED: u1 = 0, - - /// Image only. This indicates that the image file is valid and can be run. - /// If this flag is not set, it indicates a linker error. - EXECUTABLE_IMAGE: u1 = 0, - - /// COFF line numbers have been removed. This flag is deprecated and should be zero. - LINE_NUMS_STRIPPED: u1 = 0, - - /// COFF symbol table entries for local symbols have been removed. - /// This flag is deprecated and should be zero. - LOCAL_SYMS_STRIPPED: u1 = 0, - - /// Obsolete. Aggressively trim working set. - /// This flag is deprecated for Windows 2000 and later and must be zero. - AGGRESSIVE_WS_TRIM: u1 = 0, - - /// Application can handle > 2-GB addresses. - LARGE_ADDRESS_AWARE: u1 = 0, - - /// This flag is reserved for future use. - RESERVED: u1 = 0, - - /// Little endian: the least significant bit (LSB) precedes the - /// most significant bit (MSB) in memory. This flag is deprecated and should be zero. - BYTES_REVERSED_LO: u1 = 0, - - /// Machine is based on a 32-bit-word architecture. - @"32BIT_MACHINE": u1 = 0, - - /// Debugging information is removed from the image file. - DEBUG_STRIPPED: u1 = 0, - - /// If the image is on removable media, fully load it and copy it to the swap file. - REMOVABLE_RUN_FROM_SWAP: u1 = 0, - - /// If the image is on network media, fully load it and copy it to the swap file. - NET_RUN_FROM_SWAP: u1 = 0, - - /// The image file is a system file, not a user program. - SYSTEM: u1 = 0, - - /// The image file is a dynamic-link library (DLL). - /// Such files are considered executable files for almost all purposes, - /// although they cannot be directly run. - DLL: u1 = 0, - - /// The file should be run only on a uniprocessor machine. - UP_SYSTEM_ONLY: u1 = 0, - - /// Big endian: the MSB precedes the LSB in memory. This flag is deprecated and should be zero. - BYTES_REVERSED_HI: u1 = 0, -}; - -pub const CoffHeader = extern struct { +pub const Header = extern struct { /// The number that identifies the type of target machine. - machine: MachineType, + machine: IMAGE.FILE.MACHINE, /// The number of sections. This indicates the size of the section table, which immediately follows the headers. number_of_sections: u16, @@ -88,49 +27,110 @@ pub const CoffHeader = extern struct { size_of_optional_header: u16, /// The flags that indicate the attributes of the file. - flags: CoffHeaderFlags, + flags: Header.Flags, + + pub const Flags = packed struct(u16) { + /// Image only, Windows CE, and Microsoft Windows NT and later. + /// This indicates that the file does not contain base relocations + /// and must therefore be loaded at its preferred base address. + /// If the base address is not available, the loader reports an error. + /// The default behavior of the linker is to strip base relocations + /// from executable (EXE) files. + RELOCS_STRIPPED: bool = false, + + /// Image only. This indicates that the image file is valid and can be run. + /// If this flag is not set, it indicates a linker error. + EXECUTABLE_IMAGE: bool = false, + + /// COFF line numbers have been removed. This flag is deprecated and should be zero. + LINE_NUMS_STRIPPED: bool = false, + + /// COFF symbol table entries for local symbols have been removed. + /// This flag is deprecated and should be zero. + LOCAL_SYMS_STRIPPED: bool = false, + + /// Obsolete. Aggressively trim working set. + /// This flag is deprecated for Windows 2000 and later and must be zero. + AGGRESSIVE_WS_TRIM: bool = false, + + /// Application can handle > 2-GB addresses. + LARGE_ADDRESS_AWARE: bool = false, + + /// This flag is reserved for future use. + RESERVED: bool = false, + + /// Little endian: the least significant bit (LSB) precedes the + /// most significant bit (MSB) in memory. This flag is deprecated and should be zero. + BYTES_REVERSED_LO: bool = false, + + /// Machine is based on a 32-bit-word architecture. + @"32BIT_MACHINE": bool = false, + + /// Debugging information is removed from the image file. + DEBUG_STRIPPED: bool = false, + + /// If the image is on removable media, fully load it and copy it to the swap file. + REMOVABLE_RUN_FROM_SWAP: bool = false, + + /// If the image is on network media, fully load it and copy it to the swap file. + NET_RUN_FROM_SWAP: bool = false, + + /// The image file is a system file, not a user program. + SYSTEM: bool = false, + + /// The image file is a dynamic-link library (DLL). + /// Such files are considered executable files for almost all purposes, + /// although they cannot be directly run. + DLL: bool = false, + + /// The file should be run only on a uniprocessor machine. + UP_SYSTEM_ONLY: bool = false, + + /// Big endian: the MSB precedes the LSB in memory. This flag is deprecated and should be zero. + BYTES_REVERSED_HI: bool = false, + }; }; // OptionalHeader.magic values // see https://msdn.microsoft.com/en-us/library/windows/desktop/ms680339(v=vs.85).aspx -pub const IMAGE_NT_OPTIONAL_HDR32_MAGIC = 0x10b; -pub const IMAGE_NT_OPTIONAL_HDR64_MAGIC = 0x20b; +pub const IMAGE_NT_OPTIONAL_HDR32_MAGIC = @intFromEnum(OptionalHeader.Magic.PE32); +pub const IMAGE_NT_OPTIONAL_HDR64_MAGIC = @intFromEnum(OptionalHeader.Magic.@"PE32+"); -pub const DllFlags = packed struct { +pub const DllFlags = packed struct(u16) { _reserved_0: u5 = 0, /// Image can handle a high entropy 64-bit virtual address space. - HIGH_ENTROPY_VA: u1 = 0, + HIGH_ENTROPY_VA: bool = false, /// DLL can be relocated at load time. - DYNAMIC_BASE: u1 = 0, + DYNAMIC_BASE: bool = false, /// Code Integrity checks are enforced. - FORCE_INTEGRITY: u1 = 0, + FORCE_INTEGRITY: bool = false, /// Image is NX compatible. - NX_COMPAT: u1 = 0, + NX_COMPAT: bool = false, /// Isolation aware, but do not isolate the image. - NO_ISOLATION: u1 = 0, + NO_ISOLATION: bool = false, /// Does not use structured exception (SE) handling. No SE handler may be called in this image. - NO_SEH: u1 = 0, + NO_SEH: bool = false, /// Do not bind the image. - NO_BIND: u1 = 0, + NO_BIND: bool = false, /// Image must execute in an AppContainer. - APPCONTAINER: u1 = 0, + APPCONTAINER: bool = false, /// A WDM driver. - WDM_DRIVER: u1 = 0, + WDM_DRIVER: bool = false, /// Image supports Control Flow Guard. - GUARD_CF: u1 = 0, + GUARD_CF: bool = false, /// Terminal Server aware. - TERMINAL_SERVER_AWARE: u1 = 0, + TERMINAL_SERVER_AWARE: bool = false, }; pub const Subsystem = enum(u16) { @@ -180,7 +180,7 @@ pub const Subsystem = enum(u16) { }; pub const OptionalHeader = extern struct { - magic: u16, + magic: OptionalHeader.Magic, major_linker_version: u8, minor_linker_version: u8, size_of_code: u32, @@ -188,71 +188,63 @@ pub const OptionalHeader = extern struct { size_of_uninitialized_data: u32, address_of_entry_point: u32, base_of_code: u32, -}; -pub const OptionalHeaderPE32 = extern struct { - magic: u16, - major_linker_version: u8, - minor_linker_version: u8, - size_of_code: u32, - size_of_initialized_data: u32, - size_of_uninitialized_data: u32, - address_of_entry_point: u32, - base_of_code: u32, - base_of_data: u32, - image_base: u32, - section_alignment: u32, - file_alignment: u32, - major_operating_system_version: u16, - minor_operating_system_version: u16, - major_image_version: u16, - minor_image_version: u16, - major_subsystem_version: u16, - minor_subsystem_version: u16, - win32_version_value: u32, - size_of_image: u32, - size_of_headers: u32, - checksum: u32, - subsystem: Subsystem, - dll_flags: DllFlags, - size_of_stack_reserve: u32, - size_of_stack_commit: u32, - size_of_heap_reserve: u32, - size_of_heap_commit: u32, - loader_flags: u32, - number_of_rva_and_sizes: u32, -}; + pub const Magic = enum(u16) { + PE32 = 0x10b, + @"PE32+" = 0x20b, + _, + }; -pub const OptionalHeaderPE64 = extern struct { - magic: u16, - major_linker_version: u8, - minor_linker_version: u8, - size_of_code: u32, - size_of_initialized_data: u32, - size_of_uninitialized_data: u32, - address_of_entry_point: u32, - base_of_code: u32, - image_base: u64, - section_alignment: u32, - file_alignment: u32, - major_operating_system_version: u16, - minor_operating_system_version: u16, - major_image_version: u16, - minor_image_version: u16, - major_subsystem_version: u16, - minor_subsystem_version: u16, - win32_version_value: u32, - size_of_image: u32, - size_of_headers: u32, - checksum: u32, - subsystem: Subsystem, - dll_flags: DllFlags, - size_of_stack_reserve: u64, - size_of_stack_commit: u64, - size_of_heap_reserve: u64, - size_of_heap_commit: u64, - loader_flags: u32, - number_of_rva_and_sizes: u32, + pub const PE32 = extern struct { + standard: OptionalHeader, + base_of_data: u32, + image_base: u32, + section_alignment: u32, + file_alignment: u32, + major_operating_system_version: u16, + minor_operating_system_version: u16, + major_image_version: u16, + minor_image_version: u16, + major_subsystem_version: u16, + minor_subsystem_version: u16, + win32_version_value: u32, + size_of_image: u32, + size_of_headers: u32, + checksum: u32, + subsystem: Subsystem, + dll_flags: DllFlags, + size_of_stack_reserve: u32, + size_of_stack_commit: u32, + size_of_heap_reserve: u32, + size_of_heap_commit: u32, + loader_flags: u32, + number_of_rva_and_sizes: u32, + }; + + pub const @"PE32+" = extern struct { + standard: OptionalHeader, + image_base: u64, + section_alignment: u32, + file_alignment: u32, + major_operating_system_version: u16, + minor_operating_system_version: u16, + major_image_version: u16, + minor_image_version: u16, + major_subsystem_version: u16, + minor_subsystem_version: u16, + win32_version_value: u32, + size_of_image: u32, + size_of_headers: u32, + checksum: u32, + subsystem: Subsystem, + dll_flags: DllFlags, + size_of_stack_reserve: u64, + size_of_stack_commit: u64, + size_of_heap_reserve: u64, + size_of_heap_commit: u64, + loader_flags: u32, + number_of_rva_and_sizes: u32, + }; }; pub const IMAGE_NUMBEROF_DIRECTORY_ENTRIES = 16; @@ -319,7 +311,7 @@ pub const BaseRelocationDirectoryEntry = extern struct { block_size: u32, }; -pub const BaseRelocation = packed struct { +pub const BaseRelocation = packed struct(u16) { /// Stored in the remaining 12 bits of the WORD, an offset from the starting address that was specified in the Page RVA field for the block. /// This offset specifies where the base relocation is to be applied. offset: u12, @@ -447,12 +439,12 @@ pub const ImportDirectoryEntry = extern struct { }; pub const ImportLookupEntry32 = struct { - pub const ByName = packed struct { + pub const ByName = packed struct(u32) { name_table_rva: u31, flag: u1 = 0, }; - pub const ByOrdinal = packed struct { + pub const ByOrdinal = packed struct(u32) { ordinal_number: u16, unused: u15 = 0, flag: u1 = 1, @@ -472,13 +464,13 @@ pub const ImportLookupEntry32 = struct { }; pub const ImportLookupEntry64 = struct { - pub const ByName = packed struct { + pub const ByName = packed struct(u64) { name_table_rva: u31, unused: u32 = 0, flag: u1 = 0, }; - pub const ByOrdinal = packed struct { + pub const ByOrdinal = packed struct(u64) { ordinal_number: u16, unused: u47 = 0, flag: u1 = 1, @@ -519,7 +511,7 @@ pub const SectionHeader = extern struct { pointer_to_linenumbers: u32, number_of_relocations: u16, number_of_linenumbers: u16, - flags: SectionHeaderFlags, + flags: SectionHeader.Flags, pub fn getName(self: *align(1) const SectionHeader) ?[]const u8 { if (self.name[0] == '/') return null; @@ -546,109 +538,121 @@ pub const SectionHeader = extern struct { } pub fn isCode(self: SectionHeader) bool { - return self.flags.CNT_CODE == 0b1; + return self.flags.CNT_CODE; } pub fn isComdat(self: SectionHeader) bool { - return self.flags.LNK_COMDAT == 0b1; + return self.flags.LNK_COMDAT; } -}; -pub const SectionHeaderFlags = packed struct { - _reserved_0: u3 = 0, + pub const Flags = packed struct(u32) { + SCALE_INDEX: bool = false, + + unused1: u2 = 0, - /// The section should not be padded to the next boundary. - /// This flag is obsolete and is replaced by IMAGE_SCN_ALIGN_1BYTES. - /// This is valid only for object files. - TYPE_NO_PAD: u1 = 0, + /// The section should not be padded to the next boundary. + /// This flag is obsolete and is replaced by `.ALIGN = .@"1BYTES"`. + /// This is valid only for object files. + TYPE_NO_PAD: bool = false, - _reserved_1: u1 = 0, + unused4: u1 = 0, - /// The section contains executable code. - CNT_CODE: u1 = 0, + /// The section contains executable code. + CNT_CODE: bool = false, - /// The section contains initialized data. - CNT_INITIALIZED_DATA: u1 = 0, + /// The section contains initialized data. + CNT_INITIALIZED_DATA: bool = false, - /// The section contains uninitialized data. - CNT_UNINITIALIZED_DATA: u1 = 0, + /// The section contains uninitialized data. + CNT_UNINITIALIZED_DATA: bool = false, - /// Reserved for future use. - LNK_OTHER: u1 = 0, + /// Reserved for future use. + LNK_OTHER: bool = false, - /// The section contains comments or other information. - /// The .drectve section has this type. - /// This is valid for object files only. - LNK_INFO: u1 = 0, + /// The section contains comments or other information. + /// The .drectve section has this type. + /// This is valid for object files only. + LNK_INFO: bool = false, - _reserved_2: u1 = 0, + unused10: u1 = 0, - /// The section will not become part of the image. - /// This is valid only for object files. - LNK_REMOVE: u1 = 0, + /// The section will not become part of the image. + /// This is valid only for object files. + LNK_REMOVE: bool = false, - /// The section contains COMDAT data. - /// For more information, see COMDAT Sections (Object Only). - /// This is valid only for object files. - LNK_COMDAT: u1 = 0, + /// The section contains COMDAT data. + /// For more information, see COMDAT Sections (Object Only). + /// This is valid only for object files. + LNK_COMDAT: bool = false, - _reserved_3: u2 = 0, + unused13: u2 = 0, - /// The section contains data referenced through the global pointer (GP). - GPREL: u1 = 0, + union14: packed union { + mask: u1, + /// The section contains data referenced through the global pointer (GP). + GPREL: bool, + MEM_FARDATA: bool, + } = .{ .mask = 0 }, - /// Reserved for future use. - MEM_PURGEABLE: u1 = 0, + unused15: u1 = 0, - /// Reserved for future use. - MEM_16BIT: u1 = 0, + union16: packed union { + mask: u1, + MEM_PURGEABLE: bool, + MEM_16BIT: bool, + } = .{ .mask = 0 }, - /// Reserved for future use. - MEM_LOCKED: u1 = 0, + /// Reserved for future use. + MEM_LOCKED: bool = false, - /// Reserved for future use. - MEM_PRELOAD: u1 = 0, + /// Reserved for future use. + MEM_PRELOAD: bool = false, - /// Takes on multiple values according to flags: - /// pub const IMAGE_SCN_ALIGN_1BYTES: u32 = 0x100000; - /// pub const IMAGE_SCN_ALIGN_2BYTES: u32 = 0x200000; - /// pub const IMAGE_SCN_ALIGN_4BYTES: u32 = 0x300000; - /// pub const IMAGE_SCN_ALIGN_8BYTES: u32 = 0x400000; - /// pub const IMAGE_SCN_ALIGN_16BYTES: u32 = 0x500000; - /// pub const IMAGE_SCN_ALIGN_32BYTES: u32 = 0x600000; - /// pub const IMAGE_SCN_ALIGN_64BYTES: u32 = 0x700000; - /// pub const IMAGE_SCN_ALIGN_128BYTES: u32 = 0x800000; - /// pub const IMAGE_SCN_ALIGN_256BYTES: u32 = 0x900000; - /// pub const IMAGE_SCN_ALIGN_512BYTES: u32 = 0xA00000; - /// pub const IMAGE_SCN_ALIGN_1024BYTES: u32 = 0xB00000; - /// pub const IMAGE_SCN_ALIGN_2048BYTES: u32 = 0xC00000; - /// pub const IMAGE_SCN_ALIGN_4096BYTES: u32 = 0xD00000; - /// pub const IMAGE_SCN_ALIGN_8192BYTES: u32 = 0xE00000; - ALIGN: u4 = 0, + ALIGN: SectionHeader.Flags.Align = .NONE, - /// The section contains extended relocations. - LNK_NRELOC_OVFL: u1 = 0, + /// The section contains extended relocations. + LNK_NRELOC_OVFL: bool = false, - /// The section can be discarded as needed. - MEM_DISCARDABLE: u1 = 0, + /// The section can be discarded as needed. + MEM_DISCARDABLE: bool = false, - /// The section cannot be cached. - MEM_NOT_CACHED: u1 = 0, + /// The section cannot be cached. + MEM_NOT_CACHED: bool = false, - /// The section is not pageable. - MEM_NOT_PAGED: u1 = 0, + /// The section is not pageable. + MEM_NOT_PAGED: bool = false, - /// The section can be shared in memory. - MEM_SHARED: u1 = 0, + /// The section can be shared in memory. + MEM_SHARED: bool = false, - /// The section can be executed as code. - MEM_EXECUTE: u1 = 0, + /// The section can be executed as code. + MEM_EXECUTE: bool = false, - /// The section can be read. - MEM_READ: u1 = 0, + /// The section can be read. + MEM_READ: bool = false, - /// The section can be written to. - MEM_WRITE: u1 = 0, + /// The section can be written to. + MEM_WRITE: bool = false, + + pub const Align = enum(u4) { + NONE = 0, + @"1BYTES" = 1, + @"2BYTES" = 2, + @"4BYTES" = 3, + @"8BYTES" = 4, + @"16BYTES" = 5, + @"32BYTES" = 6, + @"64BYTES" = 7, + @"128BYTES" = 8, + @"256BYTES" = 9, + @"512BYTES" = 10, + @"1024BYTES" = 11, + @"2048BYTES" = 12, + @"4096BYTES" = 13, + @"8192BYTES" = 14, + _, + }; + }; }; pub const Symbol = struct { @@ -691,7 +695,7 @@ pub const SectionNumber = enum(u16) { _, }; -pub const SymType = packed struct { +pub const SymType = packed struct(u16) { complex_type: ComplexType, base_type: BaseType, }; @@ -982,87 +986,7 @@ pub const DebugInfoDefinition = struct { unused_3: [2]u8, }; -pub const MachineType = enum(u16) { - UNKNOWN = 0x0, - /// Alpha AXP, 32-bit address space - ALPHA = 0x184, - /// Alpha 64, 64-bit address space - ALPHA64 = 0x284, - /// Matsushita AM33 - AM33 = 0x1d3, - /// x64 - X64 = 0x8664, - /// ARM little endian - ARM = 0x1c0, - /// ARM64 little endian - ARM64 = 0xaa64, - /// ARM64EC - ARM64EC = 0xa641, - /// ARM64X - ARM64X = 0xa64e, - /// ARM Thumb-2 little endian - ARMNT = 0x1c4, - /// CEE - CEE = 0xc0ee, - /// CEF - CEF = 0xcef, - /// Hybrid PE - CHPE_X86 = 0x3a64, - /// EFI byte code - EBC = 0xebc, - /// Intel 386 or later processors and compatible processors - I386 = 0x14c, - /// Intel Itanium processor family - IA64 = 0x200, - /// LoongArch32 - LOONGARCH32 = 0x6232, - /// LoongArch64 - LOONGARCH64 = 0x6264, - /// Mitsubishi M32R little endian - M32R = 0x9041, - /// MIPS16 - MIPS16 = 0x266, - /// MIPS with FPU - MIPSFPU = 0x366, - /// MIPS16 with FPU - MIPSFPU16 = 0x466, - /// Power PC little endian - POWERPC = 0x1f0, - /// Power PC with floating point support - POWERPCFP = 0x1f1, - /// MIPS little endian - R3000 = 0x162, - /// MIPS little endian - R4000 = 0x166, - /// MIPS little endian - R10000 = 0x168, - /// RISC-V 32-bit address space - RISCV32 = 0x5032, - /// RISC-V 64-bit address space - RISCV64 = 0x5064, - /// RISC-V 128-bit address space - RISCV128 = 0x5128, - /// Hitachi SH3 - SH3 = 0x1a2, - /// Hitachi SH3 DSP - SH3DSP = 0x1a3, - /// SH3E little-endian - SH3E = 0x1a4, - /// Hitachi SH4 - SH4 = 0x1a6, - /// Hitachi SH5 - SH5 = 0x1a8, - /// Thumb - THUMB = 0x1c2, - /// Infineon - TRICORE = 0x520, - /// MIPS little-endian WCE v2 - WCEMIPSV2 = 0x169, - - _, -}; - -pub const CoffError = error{ +pub const Error = error{ InvalidPEMagic, InvalidPEHeader, InvalidMachine, @@ -1104,7 +1028,7 @@ pub const Coff = struct { // Do some basic validation upfront if (is_image) { - const coff_header = coff.getCoffHeader(); + const coff_header = coff.getHeader(); if (coff_header.size_of_optional_header == 0) return error.MissingPEHeader; } @@ -1161,31 +1085,31 @@ pub const Coff = struct { return self.data[start .. start + len]; } - pub fn getCoffHeader(self: Coff) CoffHeader { - return @as(*align(1) const CoffHeader, @ptrCast(self.data[self.coff_header_offset..][0..@sizeOf(CoffHeader)])).*; + pub fn getHeader(self: Coff) Header { + return @as(*align(1) const Header, @ptrCast(self.data[self.coff_header_offset..][0..@sizeOf(Header)])).*; } pub fn getOptionalHeader(self: Coff) OptionalHeader { assert(self.is_image); - const offset = self.coff_header_offset + @sizeOf(CoffHeader); + const offset = self.coff_header_offset + @sizeOf(Header); return @as(*align(1) const OptionalHeader, @ptrCast(self.data[offset..][0..@sizeOf(OptionalHeader)])).*; } - pub fn getOptionalHeader32(self: Coff) OptionalHeaderPE32 { + pub fn getOptionalHeader32(self: Coff) OptionalHeader.PE32 { assert(self.is_image); - const offset = self.coff_header_offset + @sizeOf(CoffHeader); - return @as(*align(1) const OptionalHeaderPE32, @ptrCast(self.data[offset..][0..@sizeOf(OptionalHeaderPE32)])).*; + const offset = self.coff_header_offset + @sizeOf(Header); + return @as(*align(1) const OptionalHeader.PE32, @ptrCast(self.data[offset..][0..@sizeOf(OptionalHeader.PE32)])).*; } - pub fn getOptionalHeader64(self: Coff) OptionalHeaderPE64 { + pub fn getOptionalHeader64(self: Coff) OptionalHeader.@"PE32+" { assert(self.is_image); - const offset = self.coff_header_offset + @sizeOf(CoffHeader); - return @as(*align(1) const OptionalHeaderPE64, @ptrCast(self.data[offset..][0..@sizeOf(OptionalHeaderPE64)])).*; + const offset = self.coff_header_offset + @sizeOf(Header); + return @as(*align(1) const OptionalHeader.@"PE32+", @ptrCast(self.data[offset..][0..@sizeOf(OptionalHeader.@"PE32+")])).*; } pub fn getImageBase(self: Coff) u64 { const hdr = self.getOptionalHeader(); - return switch (hdr.magic) { + return switch (@intFromEnum(hdr.magic)) { IMAGE_NT_OPTIONAL_HDR32_MAGIC => self.getOptionalHeader32().image_base, IMAGE_NT_OPTIONAL_HDR64_MAGIC => self.getOptionalHeader64().image_base, else => unreachable, // We assume we have validated the header already @@ -1194,7 +1118,7 @@ pub const Coff = struct { pub fn getNumberOfDataDirectories(self: Coff) u32 { const hdr = self.getOptionalHeader(); - return switch (hdr.magic) { + return switch (@intFromEnum(hdr.magic)) { IMAGE_NT_OPTIONAL_HDR32_MAGIC => self.getOptionalHeader32().number_of_rva_and_sizes, IMAGE_NT_OPTIONAL_HDR64_MAGIC => self.getOptionalHeader64().number_of_rva_and_sizes, else => unreachable, // We assume we have validated the header already @@ -1203,17 +1127,17 @@ pub const Coff = struct { pub fn getDataDirectories(self: *const Coff) []align(1) const ImageDataDirectory { const hdr = self.getOptionalHeader(); - const size: usize = switch (hdr.magic) { - IMAGE_NT_OPTIONAL_HDR32_MAGIC => @sizeOf(OptionalHeaderPE32), - IMAGE_NT_OPTIONAL_HDR64_MAGIC => @sizeOf(OptionalHeaderPE64), + const size: usize = switch (@intFromEnum(hdr.magic)) { + IMAGE_NT_OPTIONAL_HDR32_MAGIC => @sizeOf(OptionalHeader.PE32), + IMAGE_NT_OPTIONAL_HDR64_MAGIC => @sizeOf(OptionalHeader.@"PE32+"), else => unreachable, // We assume we have validated the header already }; - const offset = self.coff_header_offset + @sizeOf(CoffHeader) + size; + const offset = self.coff_header_offset + @sizeOf(Header) + size; return @as([*]align(1) const ImageDataDirectory, @ptrCast(self.data[offset..]))[0..self.getNumberOfDataDirectories()]; } pub fn getSymtab(self: *const Coff) ?Symtab { - const coff_header = self.getCoffHeader(); + const coff_header = self.getHeader(); if (coff_header.pointer_to_symbol_table == 0) return null; const offset = coff_header.pointer_to_symbol_table; @@ -1222,7 +1146,7 @@ pub const Coff = struct { } pub fn getStrtab(self: *const Coff) error{InvalidStrtabSize}!?Strtab { - const coff_header = self.getCoffHeader(); + const coff_header = self.getHeader(); if (coff_header.pointer_to_symbol_table == 0) return null; const offset = coff_header.pointer_to_symbol_table + Symbol.sizeOf() * coff_header.number_of_symbols; @@ -1238,8 +1162,8 @@ pub const Coff = struct { } pub fn getSectionHeaders(self: *const Coff) []align(1) const SectionHeader { - const coff_header = self.getCoffHeader(); - const offset = self.coff_header_offset + @sizeOf(CoffHeader) + coff_header.size_of_optional_header; + const coff_header = self.getHeader(); + const offset = self.coff_header_offset + @sizeOf(Header) + coff_header.size_of_optional_header; return @as([*]align(1) const SectionHeader, @ptrCast(self.data.ptr + offset))[0..coff_header.number_of_sections]; } @@ -1414,14 +1338,14 @@ pub const Strtab = struct { }; pub const ImportHeader = extern struct { - sig1: MachineType, + sig1: IMAGE.FILE.MACHINE, sig2: u16, version: u16, - machine: MachineType, + machine: IMAGE.FILE.MACHINE, time_date_stamp: u32, size_of_data: u32, hint: u16, - types: packed struct { + types: packed struct(u32) { type: ImportType, name_type: ImportNameType, reserved: u11, @@ -1461,119 +1385,534 @@ pub const Relocation = extern struct { type: u16, }; -pub const ImageRelAmd64 = enum(u16) { - /// The relocation is ignored. - absolute = 0, - - /// The 64-bit VA of the relocation target. - addr64 = 1, - - /// The 32-bit VA of the relocation target. - addr32 = 2, - - /// The 32-bit address without an image base. - addr32nb = 3, - - /// The 32-bit relative address from the byte following the relocation. - rel32 = 4, - - /// The 32-bit address relative to byte distance 1 from the relocation. - rel32_1 = 5, - - /// The 32-bit address relative to byte distance 2 from the relocation. - rel32_2 = 6, - - /// The 32-bit address relative to byte distance 3 from the relocation. - rel32_3 = 7, - - /// The 32-bit address relative to byte distance 4 from the relocation. - rel32_4 = 8, - - /// The 32-bit address relative to byte distance 5 from the relocation. - rel32_5 = 9, - - /// The 16-bit section index of the section that contains the target. - /// This is used to support debugging information. - section = 10, - - /// The 32-bit offset of the target from the beginning of its section. - /// This is used to support debugging information and static thread local storage. - secrel = 11, - - /// A 7-bit unsigned offset from the base of the section that contains the target. - secrel7 = 12, - - /// CLR tokens. - token = 13, - - /// A 32-bit signed span-dependent value emitted into the object. - srel32 = 14, - - /// A pair that must immediately follow every span-dependent value. - pair = 15, - - /// A 32-bit signed span-dependent value that is applied at link time. - sspan32 = 16, - - _, -}; - -pub const ImageRelArm64 = enum(u16) { - /// The relocation is ignored. - absolute = 0, - - /// The 32-bit VA of the target. - addr32 = 1, - - /// The 32-bit RVA of the target. - addr32nb = 2, - - /// The 26-bit relative displacement to the target, for B and BL instructions. - branch26 = 3, - - /// The page base of the target, for ADRP instruction. - pagebase_rel21 = 4, - - /// The 21-bit relative displacement to the target, for instruction ADR. - rel21 = 5, - - /// The 12-bit page offset of the target, for instructions ADD/ADDS (immediate) with zero shift. - pageoffset_12a = 6, - - /// The 12-bit page offset of the target, for instruction LDR (indexed, unsigned immediate). - pageoffset_12l = 7, - - /// The 32-bit offset of the target from the beginning of its section. - /// This is used to support debugging information and static thread local storage. - secrel = 8, - - /// Bit 0:11 of section offset of the target for instructions ADD/ADDS (immediate) with zero shift. - low12a = 9, +pub const IMAGE = struct { + pub const FILE = struct { + /// Machine Types + /// The Machine field has one of the following values, which specify the CPU type. + /// An image file can be run only on the specified machine or on a system that emulates the specified machine. + pub const MACHINE = enum(u16) { + /// The content of this field is assumed to be applicable to any machine type + UNKNOWN = 0x0, + /// Alpha AXP, 32-bit address space + ALPHA = 0x184, + /// Alpha 64, 64-bit address space + ALPHA64 = 0x284, + /// Matsushita AM33 + AM33 = 0x1d3, + /// x64 + AMD64 = 0x8664, + /// ARM little endian + ARM = 0x1c0, + /// ARM64 little endian + ARM64 = 0xaa64, + /// ABI that enables interoperability between native ARM64 and emulated x64 code. + ARM64EC = 0xA641, + /// Binary format that allows both native ARM64 and ARM64EC code to coexist in the same file. + ARM64X = 0xA64E, + /// ARM Thumb-2 little endian + ARMNT = 0x1c4, + /// EFI byte code + EBC = 0xebc, + /// Intel 386 or later processors and compatible processors + I386 = 0x14c, + /// Intel Itanium processor family + IA64 = 0x200, + /// LoongArch 32-bit processor family + LOONGARCH32 = 0x6232, + /// LoongArch 64-bit processor family + LOONGARCH64 = 0x6264, + /// Mitsubishi M32R little endian + M32R = 0x9041, + /// MIPS16 + MIPS16 = 0x266, + /// MIPS with FPU + MIPSFPU = 0x366, + /// MIPS16 with FPU + MIPSFPU16 = 0x466, + /// Power PC little endian + POWERPC = 0x1f0, + /// Power PC with floating point support + POWERPCFP = 0x1f1, + /// MIPS I compatible 32-bit big endian + R3000BE = 0x160, + /// MIPS I compatible 32-bit little endian + R3000 = 0x162, + /// MIPS III compatible 64-bit little endian + R4000 = 0x166, + /// MIPS IV compatible 64-bit little endian + R10000 = 0x168, + /// RISC-V 32-bit address space + RISCV32 = 0x5032, + /// RISC-V 64-bit address space + RISCV64 = 0x5064, + /// RISC-V 128-bit address space + RISCV128 = 0x5128, + /// Hitachi SH3 + SH3 = 0x1a2, + /// Hitachi SH3 DSP + SH3DSP = 0x1a3, + /// Hitachi SH4 + SH4 = 0x1a6, + /// Hitachi SH5 + SH5 = 0x1a8, + /// Thumb + THUMB = 0x1c2, + /// MIPS little-endian WCE v2 + WCEMIPSV2 = 0x169, + _, + /// AXP 64 (Same as Alpha 64) + pub const AXP64: IMAGE.FILE.MACHINE = .ALPHA64; + }; + }; - /// Bit 12:23 of section offset of the target, for instructions ADD/ADDS (immediate) with zero shift. - high12a = 10, + pub const REL = struct { + /// x64 Processors + /// The following relocation type indicators are defined for x64 and compatible processors. + pub const AMD64 = enum(u16) { + /// The relocation is ignored. + ABSOLUTE = 0x0000, + /// The 64-bit VA of the relocation target. + ADDR64 = 0x0001, + /// The 32-bit VA of the relocation target. + ADDR32 = 0x0002, + /// The 32-bit address without an image base (RVA). + ADDR32NB = 0x0003, + /// The 32-bit relative address from the byte following the relocation. + REL32 = 0x0004, + /// The 32-bit address relative to byte distance 1 from the relocation. + REL32_1 = 0x0005, + /// The 32-bit address relative to byte distance 2 from the relocation. + REL32_2 = 0x0006, + /// The 32-bit address relative to byte distance 3 from the relocation. + REL32_3 = 0x0007, + /// The 32-bit address relative to byte distance 4 from the relocation. + REL32_4 = 0x0008, + /// The 32-bit address relative to byte distance 5 from the relocation. + REL32_5 = 0x0009, + /// The 16-bit section index of the section that contains the target. + /// This is used to support debugging information. + SECTION = 0x000A, + /// The 32-bit offset of the target from the beginning of its section. + /// This is used to support debugging information and static thread local storage. + SECREL = 0x000B, + /// A 7-bit unsigned offset from the base of the section that contains the target. + SECREL7 = 0x000C, + /// CLR tokens. + TOKEN = 0x000D, + /// A 32-bit signed span-dependent value emitted into the object. + SREL32 = 0x000E, + /// A pair that must immediately follow every span-dependent value. + PAIR = 0x000F, + /// A 32-bit signed span-dependent value that is applied at link time. + SSPAN32 = 0x0010, + _, + }; - /// Bit 0:11 of section offset of the target, for instruction LDR (indexed, unsigned immediate). - low12l = 11, + /// ARM Processors + /// The following relocation type indicators are defined for ARM processors. + pub const ARM = enum(u16) { + /// The relocation is ignored. + ABSOLUTE = 0x0000, + /// The 32-bit VA of the target. + ADDR32 = 0x0001, + /// The 32-bit RVA of the target. + ADDR32NB = 0x0002, + /// The 24-bit relative displacement to the target. + BRANCH24 = 0x0003, + /// The reference to a subroutine call. + /// The reference consists of two 16-bit instructions with 11-bit offsets. + BRANCH11 = 0x0004, + /// The 32-bit relative address from the byte following the relocation. + REL32 = 0x000A, + /// The 16-bit section index of the section that contains the target. + /// This is used to support debugging information. + SECTION = 0x000E, + /// The 32-bit offset of the target from the beginning of its section. + /// This is used to support debugging information and static thread local storage. + SECREL = 0x000F, + /// The 32-bit VA of the target. + /// This relocation is applied using a MOVW instruction for the low 16 bits followed by a MOVT for the high 16 bits. + MOV32 = 0x0010, + /// The 32-bit VA of the target. + /// This relocation is applied using a MOVW instruction for the low 16 bits followed by a MOVT for the high 16 bits. + THUMB_MOV32 = 0x0011, + /// The instruction is fixed up with the 21-bit relative displacement to the 2-byte aligned target. + /// The least significant bit of the displacement is always zero and is not stored. + /// This relocation corresponds to a Thumb-2 32-bit conditional B instruction. + THUMB_BRANCH20 = 0x0012, + Unused = 0x0013, + /// The instruction is fixed up with the 25-bit relative displacement to the 2-byte aligned target. + /// The least significant bit of the displacement is zero and is not stored.This relocation corresponds to a Thumb-2 B instruction. + THUMB_BRANCH24 = 0x0014, + /// The instruction is fixed up with the 25-bit relative displacement to the 4-byte aligned target. + /// The low 2 bits of the displacement are zero and are not stored. + /// This relocation corresponds to a Thumb-2 BLX instruction. + THUMB_BLX23 = 0x0015, + /// The relocation is valid only when it immediately follows a ARM_REFHI or THUMB_REFHI. + /// Its SymbolTableIndex contains a displacement and not an index into the symbol table. + PAIR = 0x0016, + _, + }; - /// CLR token. - token = 12, + /// ARM64 Processors + /// The following relocation type indicators are defined for ARM64 processors. + pub const ARM64 = enum(u16) { + /// The relocation is ignored. + ABSOLUTE = 0x0000, + /// The 32-bit VA of the target. + ADDR32 = 0x0001, + /// The 32-bit RVA of the target. + ADDR32NB = 0x0002, + /// The 26-bit relative displacement to the target, for B and BL instructions. + BRANCH26 = 0x0003, + /// The page base of the target, for ADRP instruction. + PAGEBASE_REL21 = 0x0004, + /// The 12-bit relative displacement to the target, for instruction ADR + REL21 = 0x0005, + /// The 12-bit page offset of the target, for instructions ADD/ADDS (immediate) with zero shift. + PAGEOFFSET_12A = 0x0006, + /// The 12-bit page offset of the target, for instruction LDR (indexed, unsigned immediate). + PAGEOFFSET_12L = 0x0007, + /// The 32-bit offset of the target from the beginning of its section. + /// This is used to support debugging information and static thread local storage. + SECREL = 0x0008, + /// Bit 0:11 of section offset of the target, for instructions ADD/ADDS (immediate) with zero shift. + SECREL_LOW12A = 0x0009, + /// Bit 12:23 of section offset of the target, for instructions ADD/ADDS (immediate) with zero shift. + SECREL_HIGH12A = 0x000A, + /// Bit 0:11 of section offset of the target, for instruction LDR (indexed, unsigned immediate). + SECREL_LOW12L = 0x000B, + /// CLR token. + TOKEN = 0x000C, + /// The 16-bit section index of the section that contains the target. + /// This is used to support debugging information. + SECTION = 0x000D, + /// The 64-bit VA of the relocation target. + ADDR64 = 0x000E, + /// The 19-bit offset to the relocation target, for conditional B instruction. + BRANCH19 = 0x000F, + /// The 14-bit offset to the relocation target, for instructions TBZ and TBNZ. + BRANCH14 = 0x0010, + /// The 32-bit relative address from the byte following the relocation. + REL32 = 0x0011, + _, + }; - /// The 16-bit section index of the section that contains the target. - /// This is used to support debugging information. - section = 13, + /// Hitachi SuperH Processors + /// The following relocation type indicators are defined for SH3 and SH4 processors. + /// SH5-specific relocations are noted as SHM (SH Media). + pub const SH = enum(u16) { + /// The relocation is ignored. + @"3_ABSOLUTE" = 0x0000, + /// A reference to the 16-bit location that contains the VA of the target symbol. + @"3_DIRECT16" = 0x0001, + /// The 32-bit VA of the target symbol. + @"3_DIRECT32" = 0x0002, + /// A reference to the 8-bit location that contains the VA of the target symbol. + @"3_DIRECT8" = 0x0003, + /// A reference to the 8-bit instruction that contains the effective 16-bit VA of the target symbol. + @"3_DIRECT8_WORD" = 0x0004, + /// A reference to the 8-bit instruction that contains the effective 32-bit VA of the target symbol. + @"3_DIRECT8_LONG" = 0x0005, + /// A reference to the 8-bit location whose low 4 bits contain the VA of the target symbol. + @"3_DIRECT4" = 0x0006, + /// A reference to the 8-bit instruction whose low 4 bits contain the effective 16-bit VA of the target symbol. + @"3_DIRECT4_WORD" = 0x0007, + /// A reference to the 8-bit instruction whose low 4 bits contain the effective 32-bit VA of the target symbol. + @"3_DIRECT4_LONG" = 0x0008, + /// A reference to the 8-bit instruction that contains the effective 16-bit relative offset of the target symbol. + @"3_PCREL8_WORD" = 0x0009, + /// A reference to the 8-bit instruction that contains the effective 32-bit relative offset of the target symbol. + @"3_PCREL8_LONG" = 0x000A, + /// A reference to the 16-bit instruction whose low 12 bits contain the effective 16-bit relative offset of the target symbol. + @"3_PCREL12_WORD" = 0x000B, + /// A reference to a 32-bit location that is the VA of the section that contains the target symbol. + @"3_STARTOF_SECTION" = 0x000C, + /// A reference to the 32-bit location that is the size of the section that contains the target symbol. + @"3_SIZEOF_SECTION" = 0x000D, + /// The 16-bit section index of the section that contains the target. + /// This is used to support debugging information. + @"3_SECTION" = 0x000E, + /// The 32-bit offset of the target from the beginning of its section. + /// This is used to support debugging information and static thread local storage. + @"3_SECREL" = 0x000F, + /// The 32-bit RVA of the target symbol. + @"3_DIRECT32_NB" = 0x0010, + /// GP relative. + @"3_GPREL4_LONG" = 0x0011, + /// CLR token. + @"3_TOKEN" = 0x0012, + /// The offset from the current instruction in longwords. + /// If the NOMODE bit is not set, insert the inverse of the low bit at bit 32 to select PTA or PTB. + M_PCRELPT = 0x0013, + /// The low 16 bits of the 32-bit address. + M_REFLO = 0x0014, + /// The high 16 bits of the 32-bit address. + M_REFHALF = 0x0015, + /// The low 16 bits of the relative address. + M_RELLO = 0x0016, + /// The high 16 bits of the relative address. + M_RELHALF = 0x0017, + /// The relocation is valid only when it immediately follows a REFHALF, RELHALF, or RELLO relocation. + /// The SymbolTableIndex field of the relocation contains a displacement and not an index into the symbol table. + M_PAIR = 0x0018, + /// The relocation ignores section mode. + M_NOMODE = 0x8000, + _, + }; - /// The 64-bit VA of the relocation target. - addr64 = 14, + /// IBM PowerPC Processors + /// The following relocation type indicators are defined for PowerPC processors. + pub const PPC = enum(u16) { + /// The relocation is ignored. + ABSOLUTE = 0x0000, + /// The 64-bit VA of the target. + ADDR64 = 0x0001, + /// The 32-bit VA of the target. + ADDR32 = 0x0002, + /// The low 24 bits of the VA of the target. + /// This is valid only when the target symbol is absolute and can be sign-extended to its original value. + ADDR24 = 0x0003, + /// The low 16 bits of the target's VA. + ADDR16 = 0x0004, + /// The low 14 bits of the target's VA. + /// This is valid only when the target symbol is absolute and can be sign-extended to its original value. + ADDR14 = 0x0005, + /// A 24-bit PC-relative offset to the symbol's location. + REL24 = 0x0006, + /// A 14-bit PC-relative offset to the symbol's location. + REL14 = 0x0007, + /// The 32-bit RVA of the target. + ADDR32NB = 0x000A, + /// The 32-bit offset of the target from the beginning of its section. + /// This is used to support debugging information and static thread local storage. + SECREL = 0x000B, + /// The 16-bit section index of the section that contains the target. + /// This is used to support debugging information. + SECTION = 0x000C, + /// The 16-bit offset of the target from the beginning of its section. + /// This is used to support debugging information and static thread local storage. + SECREL16 = 0x000F, + /// The high 16 bits of the target's 32-bit VA. + /// This is used for the first instruction in a two-instruction sequence that loads a full address. + /// This relocation must be immediately followed by a PAIR relocation whose SymbolTableIndex contains a signed 16-bit displacement that is added to the upper 16 bits that was taken from the location that is being relocated. + REFHI = 0x0010, + /// The low 16 bits of the target's VA. + REFLO = 0x0011, + /// A relocation that is valid only when it immediately follows a REFHI or SECRELHI relocation. + /// Its SymbolTableIndex contains a displacement and not an index into the symbol table. + PAIR = 0x0012, + /// The low 16 bits of the 32-bit offset of the target from the beginning of its section. + SECRELLO = 0x0013, + /// The 16-bit signed displacement of the target relative to the GP register. + GPREL = 0x0015, + /// The CLR token. + TOKEN = 0x0016, + _, + }; - /// The 19-bit offset to the relocation target, for conditional B instruction. - branch19 = 15, + /// Intel 386 Processors + /// The following relocation type indicators are defined for Intel 386 and compatible processors. + pub const I386 = enum(u16) { + /// The relocation is ignored. + ABSOLUTE = 0x0000, + /// Not supported. + DIR16 = 0x0001, + /// Not supported. + REL16 = 0x0002, + /// The target's 32-bit VA. + DIR32 = 0x0006, + /// The target's 32-bit RVA. + DIR32NB = 0x0007, + /// Not supported. + SEG12 = 0x0009, + /// The 16-bit section index of the section that contains the target. + /// This is used to support debugging information. + SECTION = 0x000A, + /// The 32-bit offset of the target from the beginning of its section. + /// This is used to support debugging information and static thread local storage. + SECREL = 0x000B, + /// The CLR token. + TOKEN = 0x000C, + /// A 7-bit offset from the base of the section that contains the target. + SECREL7 = 0x000D, + /// The 32-bit relative displacement to the target. + /// This supports the x86 relative branch and call instructions. + REL32 = 0x0014, + _, + }; - /// The 14-bit offset to the relocation target, for instructions TBZ and TBNZ. - branch14 = 16, + /// Intel Itanium Processor Family (IPF) + /// The following relocation type indicators are defined for the Intel Itanium processor family and compatible processors. + /// Note that relocations on instructions use the bundle's offset and slot number for the relocation offset. + pub const IA64 = enum(u16) { + /// The relocation is ignored. + ABSOLUTE = 0x0000, + /// The instruction relocation can be followed by an ADDEND relocation whose value is added to the target address before it is inserted into the specified slot in the IMM14 bundle. + /// The relocation target must be absolute or the image must be fixed. + IMM14 = 0x0001, + /// The instruction relocation can be followed by an ADDEND relocation whose value is added to the target address before it is inserted into the specified slot in the IMM22 bundle. + /// The relocation target must be absolute or the image must be fixed. + IMM22 = 0x0002, + /// The slot number of this relocation must be one (1). + /// The relocation can be followed by an ADDEND relocation whose value is added to the target address before it is stored in all three slots of the IMM64 bundle. + IMM64 = 0x0003, + /// The target's 32-bit VA. + /// This is supported only for /LARGEADDRESSAWARE:NO images. + DIR32 = 0x0004, + /// The target's 64-bit VA. + DIR64 = 0x0005, + /// The instruction is fixed up with the 25-bit relative displacement to the 16-bit aligned target. + /// The low 4 bits of the displacement are zero and are not stored. + PCREL21B = 0x0006, + /// The instruction is fixed up with the 25-bit relative displacement to the 16-bit aligned target. + /// The low 4 bits of the displacement, which are zero, are not stored. + PCREL21M = 0x0007, + /// The LSBs of this relocation's offset must contain the slot number whereas the rest is the bundle address. + /// The bundle is fixed up with the 25-bit relative displacement to the 16-bit aligned target. + /// The low 4 bits of the displacement are zero and are not stored. + PCREL21F = 0x0008, + /// The instruction relocation can be followed by an ADDEND relocation whose value is added to the target address and then a 22-bit GP-relative offset that is calculated and applied to the GPREL22 bundle. + GPREL22 = 0x0009, + /// The instruction is fixed up with the 22-bit GP-relative offset to the target symbol's literal table entry. + /// The linker creates this literal table entry based on this relocation and the ADDEND relocation that might follow. + LTOFF22 = 0x000A, + /// The 16-bit section index of the section contains the target. + /// This is used to support debugging information. + SECTION = 0x000B, + /// The instruction is fixed up with the 22-bit offset of the target from the beginning of its section. + /// This relocation can be followed immediately by an ADDEND relocation, whose Value field contains the 32-bit unsigned offset of the target from the beginning of the section. + SECREL22 = 0x000C, + /// The slot number for this relocation must be one (1). + /// The instruction is fixed up with the 64-bit offset of the target from the beginning of its section. + /// This relocation can be followed immediately by an ADDEND relocation whose Value field contains the 32-bit unsigned offset of the target from the beginning of the section. + SECREL64I = 0x000D, + /// The address of data to be fixed up with the 32-bit offset of the target from the beginning of its section. + SECREL32 = 0x000E, + /// The target's 32-bit RVA. + DIR32NB = 0x0010, + /// This is applied to a signed 14-bit immediate that contains the difference between two relocatable targets. + /// This is a declarative field for the linker that indicates that the compiler has already emitted this value. + SREL14 = 0x0011, + /// This is applied to a signed 22-bit immediate that contains the difference between two relocatable targets. + /// This is a declarative field for the linker that indicates that the compiler has already emitted this value. + SREL22 = 0x0012, + /// This is applied to a signed 32-bit immediate that contains the difference between two relocatable values. + /// This is a declarative field for the linker that indicates that the compiler has already emitted this value. + SREL32 = 0x0013, + /// This is applied to an unsigned 32-bit immediate that contains the difference between two relocatable values. + /// This is a declarative field for the linker that indicates that the compiler has already emitted this value. + UREL32 = 0x0014, + /// A 60-bit PC-relative fixup that always stays as a BRL instruction of an MLX bundle. + PCREL60X = 0x0015, + /// A 60-bit PC-relative fixup. + /// If the target displacement fits in a signed 25-bit field, convert the entire bundle to an MBB bundle with NOP.B in slot 1 and a 25-bit BR instruction (with the 4 lowest bits all zero and dropped) in slot 2. + PCREL60B = 0x0016, + /// A 60-bit PC-relative fixup. + /// If the target displacement fits in a signed 25-bit field, convert the entire bundle to an MFB bundle with NOP.F in slot 1 and a 25-bit (4 lowest bits all zero and dropped) BR instruction in slot 2. + PCREL60F = 0x0017, + /// A 60-bit PC-relative fixup. + /// If the target displacement fits in a signed 25-bit field, convert the entire bundle to an MIB bundle with NOP.I in slot 1 and a 25-bit (4 lowest bits all zero and dropped) BR instruction in slot 2. + PCREL60I = 0x0018, + /// A 60-bit PC-relative fixup. + /// If the target displacement fits in a signed 25-bit field, convert the entire bundle to an MMB bundle with NOP.M in slot 1 and a 25-bit (4 lowest bits all zero and dropped) BR instruction in slot 2. + PCREL60M = 0x0019, + /// A 64-bit GP-relative fixup. + IMMGPREL64 = 0x001a, + /// A CLR token. + TOKEN = 0x001b, + /// A 32-bit GP-relative fixup. + GPREL32 = 0x001c, + /// The relocation is valid only when it immediately follows one of the following relocations: IMM14, IMM22, IMM64, GPREL22, LTOFF22, LTOFF64, SECREL22, SECREL64I, or SECREL32. + /// Its value contains the addend to apply to instructions within a bundle, not for data. + ADDEND = 0x001F, + _, + }; - /// The 32-bit relative address from the byte following the relocation. - rel32 = 17, + /// MIPS Processors + /// The following relocation type indicators are defined for MIPS processors. + pub const MIPS = enum(u16) { + /// The relocation is ignored. + ABSOLUTE = 0x0000, + /// The high 16 bits of the target's 32-bit VA. + REFHALF = 0x0001, + /// The target's 32-bit VA. + REFWORD = 0x0002, + /// The low 26 bits of the target's VA. + /// This supports the MIPS J and JAL instructions. + JMPADDR = 0x0003, + /// The high 16 bits of the target's 32-bit VA. + /// This is used for the first instruction in a two-instruction sequence that loads a full address. + /// This relocation must be immediately followed by a PAIR relocation whose SymbolTableIndex contains a signed 16-bit displacement that is added to the upper 16 bits that are taken from the location that is being relocated. + REFHI = 0x0004, + /// The low 16 bits of the target's VA. + REFLO = 0x0005, + /// A 16-bit signed displacement of the target relative to the GP register. + GPREL = 0x0006, + /// The same as IMAGE_REL_MIPS_GPREL. + LITERAL = 0x0007, + /// The 16-bit section index of the section contains the target. + /// This is used to support debugging information. + SECTION = 0x000A, + /// The 32-bit offset of the target from the beginning of its section. + /// This is used to support debugging information and static thread local storage. + SECREL = 0x000B, + /// The low 16 bits of the 32-bit offset of the target from the beginning of its section. + SECRELLO = 0x000C, + /// The high 16 bits of the 32-bit offset of the target from the beginning of its section. + /// An IMAGE_REL_MIPS_PAIR relocation must immediately follow this one. + /// The SymbolTableIndex of the PAIR relocation contains a signed 16-bit displacement that is added to the upper 16 bits that are taken from the location that is being relocated. + SECRELHI = 0x000D, + /// The low 26 bits of the target's VA. + /// This supports the MIPS16 JAL instruction. + JMPADDR16 = 0x0010, + /// The target's 32-bit RVA. + REFWORDNB = 0x0022, + /// The relocation is valid only when it immediately follows a REFHI or SECRELHI relocation. + /// Its SymbolTableIndex contains a displacement and not an index into the symbol table. + PAIR = 0x0025, + _, + }; - _, + /// Mitsubishi M32R + /// The following relocation type indicators are defined for the Mitsubishi M32R processors. + pub const M32R = enum(u16) { + /// The relocation is ignored. + ABSOLUTE = 0x0000, + /// The target's 32-bit VA. + ADDR32 = 0x0001, + /// The target's 32-bit RVA. + ADDR32NB = 0x0002, + /// The target's 24-bit VA. + ADDR24 = 0x0003, + /// The target's 16-bit offset from the GP register. + GPREL16 = 0x0004, + /// The target's 24-bit offset from the program counter (PC), shifted left by 2 bits and sign-extended + PCREL24 = 0x0005, + /// The target's 16-bit offset from the PC, shifted left by 2 bits and sign-extended + PCREL16 = 0x0006, + /// The target's 8-bit offset from the PC, shifted left by 2 bits and sign-extended + PCREL8 = 0x0007, + /// The 16 MSBs of the target VA. + REFHALF = 0x0008, + /// The 16 MSBs of the target VA, adjusted for LSB sign extension. + /// This is used for the first instruction in a two-instruction sequence that loads a full 32-bit address. + /// This relocation must be immediately followed by a PAIR relocation whose SymbolTableIndex contains a signed 16-bit displacement that is added to the upper 16 bits that are taken from the location that is being relocated. + REFHI = 0x0009, + /// The 16 LSBs of the target VA. + REFLO = 0x000A, + /// The relocation must follow the REFHI relocation. + /// Its SymbolTableIndex contains a displacement and not an index into the symbol table. + PAIR = 0x000B, + /// The 16-bit section index of the section that contains the target. + /// This is used to support debugging information. + SECTION = 0x000C, + /// The 32-bit offset of the target from the beginning of its section. + /// This is used to support debugging information and static thread local storage. + SECREL = 0x000D, + /// The CLR token. + TOKEN = 0x000E, + _, + }; + }; }; diff --git a/lib/std/heap.zig b/lib/std/heap.zig index 84f119d3f978..b5fd8229597b 100644 --- a/lib/std/heap.zig +++ b/lib/std/heap.zig @@ -78,13 +78,15 @@ pub fn defaultQueryPageSize() usize { }; var size = global.cached_result.load(.unordered); if (size > 0) return size; - size = switch (builtin.os.tag) { - .linux => if (builtin.link_libc) @intCast(std.c.sysconf(@intFromEnum(std.c._SC.PAGESIZE))) else std.os.linux.getauxval(std.elf.AT_PAGESZ), - .driverkit, .ios, .macos, .tvos, .visionos, .watchos => blk: { + size = size: switch (builtin.os.tag) { + .linux => if (builtin.link_libc) + @max(std.c.sysconf(@intFromEnum(std.c._SC.PAGESIZE)), 0) + else + std.os.linux.getauxval(std.elf.AT_PAGESZ), + .driverkit, .ios, .macos, .tvos, .visionos, .watchos => { const task_port = std.c.mach_task_self(); // mach_task_self may fail "if there are any resource failures or other errors". - if (task_port == std.c.TASK.NULL) - break :blk 0; + if (task_port == std.c.TASK.NULL) break :size 0; var info_count = std.c.TASK.VM.INFO_COUNT; var vm_info: std.c.task_vm_info_data_t = undefined; vm_info.page_size = 0; @@ -94,21 +96,28 @@ pub fn defaultQueryPageSize() usize { @as(std.c.task_info_t, @ptrCast(&vm_info)), &info_count, ); - assert(vm_info.page_size != 0); - break :blk @intCast(vm_info.page_size); + break :size @intCast(vm_info.page_size); }, - .windows => blk: { - var info: std.os.windows.SYSTEM_INFO = undefined; - std.os.windows.kernel32.GetSystemInfo(&info); - break :blk info.dwPageSize; + .windows => { + var sbi: windows.SYSTEM_BASIC_INFORMATION = undefined; + switch (windows.ntdll.NtQuerySystemInformation( + .SystemBasicInformation, + &sbi, + @sizeOf(windows.SYSTEM_BASIC_INFORMATION), + null, + )) { + .SUCCESS => break :size sbi.PageSize, + else => break :size 0, + } }, else => if (builtin.link_libc) - @intCast(std.c.sysconf(@intFromEnum(std.c._SC.PAGESIZE))) + @max(std.c.sysconf(@intFromEnum(std.c._SC.PAGESIZE)), 0) else if (builtin.os.tag == .freestanding or builtin.os.tag == .other) @compileError("unsupported target: freestanding/other") else @compileError("pageSize on " ++ @tagName(builtin.cpu.arch) ++ "-" ++ @tagName(builtin.os.tag) ++ " is not supported without linking libc, using the default implementation"), }; + if (size == 0) size = page_size_max; assert(size >= page_size_min); assert(size <= page_size_max); diff --git a/src/Compilation.zig b/src/Compilation.zig index 86b1356a3f11..434a79069545 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -256,8 +256,8 @@ test_filters: []const []const u8, link_task_wait_group: WaitGroup = .{}, link_prog_node: std.Progress.Node = .none, -link_uav_prog_node: std.Progress.Node = .none, -link_lazy_prog_node: std.Progress.Node = .none, +link_const_prog_node: std.Progress.Node = .none, +link_synth_prog_node: std.Progress.Node = .none, llvm_opt_bisect_limit: c_int, @@ -1982,13 +1982,13 @@ pub fn create(gpa: Allocator, arena: Allocator, diag: *CreateDiagnostic, options }; if (have_zcu and (!need_llvm or use_llvm)) { if (output_mode == .Obj) break :s .zcu; - if (options.config.use_new_linker) break :s .zcu; switch (target_util.zigBackend(target, use_llvm)) { else => {}, .stage2_aarch64, .stage2_x86_64 => if (target.ofmt == .coff) { break :s if (is_exe_or_dyn_lib) .dyn_lib else .zcu; }, } + if (options.config.use_new_linker) break :s .zcu; } if (need_llvm and !build_options.have_llvm) break :s .none; // impossible to build without llvm if (is_exe_or_dyn_lib) break :s .lib; @@ -3081,22 +3081,30 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) UpdateE comp.link_prog_node = main_progress_node.start("Linking", 0); if (lf.cast(.elf2)) |elf| { comp.link_prog_node.increaseEstimatedTotalItems(3); - comp.link_uav_prog_node = comp.link_prog_node.start("Constants", 0); - comp.link_lazy_prog_node = comp.link_prog_node.start("Synthetics", 0); + comp.link_const_prog_node = comp.link_prog_node.start("Constants", 0); + comp.link_synth_prog_node = comp.link_prog_node.start("Synthetics", 0); elf.mf.update_prog_node = comp.link_prog_node.start("Relocations", elf.mf.updates.items.len); + } else if (lf.cast(.coff2)) |coff| { + comp.link_prog_node.increaseEstimatedTotalItems(3); + comp.link_const_prog_node = comp.link_prog_node.start("Constants", 0); + comp.link_synth_prog_node = comp.link_prog_node.start("Synthetics", 0); + coff.mf.update_prog_node = comp.link_prog_node.start("Relocations", coff.mf.updates.items.len); } } defer { comp.link_prog_node.end(); comp.link_prog_node = .none; - comp.link_uav_prog_node.end(); - comp.link_uav_prog_node = .none; - comp.link_lazy_prog_node.end(); - comp.link_lazy_prog_node = .none; + comp.link_const_prog_node.end(); + comp.link_const_prog_node = .none; + comp.link_synth_prog_node.end(); + comp.link_synth_prog_node = .none; if (comp.bin_file) |lf| { if (lf.cast(.elf2)) |elf| { elf.mf.update_prog_node.end(); elf.mf.update_prog_node = .none; + } else if (lf.cast(.coff2)) |coff| { + coff.mf.update_prog_node.end(); + coff.mf.update_prog_node = .none; } } } @@ -3218,7 +3226,7 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) UpdateE .root_dir = comp.dirs.local_cache, .sub_path = try fs.path.join(arena, &.{ o_sub_path, comp.emit_bin.? }), }; - const result: link.File.OpenError!void = switch (need_writable_dance) { + const result: (link.File.OpenError || error{HotSwapUnavailableOnHostOperatingSystem})!void = switch (need_writable_dance) { .no => {}, .lf_only => lf.makeWritable(), .lf_and_debug => res: { diff --git a/src/InternPool.zig b/src/InternPool.zig index e19fc075ee09..5e2d8c4b5c1a 100644 --- a/src/InternPool.zig +++ b/src/InternPool.zig @@ -11919,10 +11919,10 @@ pub fn getString(ip: *InternPool, key: []const u8) OptionalNullTerminatedString var map_index = hash; while (true) : (map_index += 1) { map_index &= map_mask; - const entry = map.at(map_index); - const index = entry.acquire().unwrap() orelse return null; + const entry = &map.entries[map_index]; + const index = entry.value.unwrap() orelse return .none; if (entry.hash != hash) continue; - if (index.eqlSlice(key, ip)) return index; + if (index.eqlSlice(key, ip)) return index.toOptional(); } } diff --git a/src/codegen.zig b/src/codegen.zig index ed046e966cd5..caff954e0778 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -978,21 +978,8 @@ pub fn genNavRef( }, .link_once => unreachable, } - } else if (lf.cast(.coff)) |coff_file| { - // TODO audit this - switch (linkage) { - .internal => { - const atom_index = try coff_file.getOrCreateAtomForNav(nav_index); - const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?; - return .{ .sym_index = sym_index }; - }, - .strong, .weak => { - const global_index = try coff_file.getGlobalSymbol(nav.name.toSlice(ip), lib_name.toSlice(ip)); - try coff_file.need_got_table.put(zcu.gpa, global_index, {}); // needs GOT - return .{ .sym_index = global_index }; - }, - .link_once => unreachable, - } + } else if (lf.cast(.coff2)) |coff| { + return .{ .sym_index = @intFromEnum(try coff.navSymbol(zcu, nav_index)) }; } else { const msg = try ErrorMsg.create(zcu.gpa, src_loc, "TODO genNavRef for target {}", .{target}); return .{ .fail = msg }; diff --git a/src/codegen/aarch64/Mir.zig b/src/codegen/aarch64/Mir.zig index be6478eae896..3e89e2882558 100644 --- a/src/codegen/aarch64/Mir.zig +++ b/src/codegen/aarch64/Mir.zig @@ -135,11 +135,6 @@ pub fn emit( else if (lf.cast(.macho)) |mf| mf.getZigObject().?.getOrCreateMetadataForLazySymbol(mf, pt, lazy_reloc.symbol) catch |err| return zcu.codegenFail(func.owner_nav, "{s} creating lazy symbol", .{@errorName(err)}) - else if (lf.cast(.coff)) |cf| - if (cf.getOrCreateAtomForLazySymbol(pt, lazy_reloc.symbol)) |atom| - cf.getAtom(atom).getSymbolIndex().? - else |err| - return zcu.codegenFail(func.owner_nav, "{s} creating lazy symbol", .{@errorName(err)}) else return zcu.codegenFail(func.owner_nav, "external symbols unimplemented for {s}", .{@tagName(lf.tag)}), mir.body[lazy_reloc.reloc.label], @@ -154,8 +149,6 @@ pub fn emit( try ef.getGlobalSymbol(std.mem.span(global_reloc.name), null) else if (lf.cast(.macho)) |mf| try mf.getGlobalSymbol(std.mem.span(global_reloc.name), null) - else if (lf.cast(.coff)) |cf| - try cf.getGlobalSymbol(std.mem.span(global_reloc.name), "compiler_rt") else return zcu.codegenFail(func.owner_nav, "external symbols unimplemented for {s}", .{@tagName(lf.tag)}), mir.body[global_reloc.reloc.label], diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 04c231c5ec07..6a33557e0725 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -12103,7 +12103,7 @@ fn firstParamSRet(fn_info: InternPool.Key.FuncType, zcu: *Zcu, target: *const st return switch (fn_info.cc) { .auto => returnTypeByRef(zcu, target, return_type), .x86_64_sysv => firstParamSRetSystemV(return_type, zcu, target), - .x86_64_win => x86_64_abi.classifyWindows(return_type, zcu, target) == .memory, + .x86_64_win => x86_64_abi.classifyWindows(return_type, zcu, target, .ret) == .memory, .x86_sysv, .x86_win => isByRef(return_type, zcu), .x86_stdcall => !isScalar(zcu, return_type), .wasm_mvp => wasm_c_abi.classifyType(return_type, zcu) == .indirect, @@ -12205,7 +12205,7 @@ fn lowerFnRetTy(o: *Object, pt: Zcu.PerThread, fn_info: InternPool.Key.FuncType) fn lowerWin64FnRetTy(o: *Object, pt: Zcu.PerThread, fn_info: InternPool.Key.FuncType) Allocator.Error!Builder.Type { const zcu = pt.zcu; const return_type = Type.fromInterned(fn_info.return_type); - switch (x86_64_abi.classifyWindows(return_type, zcu, zcu.getTarget())) { + switch (x86_64_abi.classifyWindows(return_type, zcu, zcu.getTarget(), .ret)) { .integer => { if (isScalar(zcu, return_type)) { return o.lowerType(pt, return_type); @@ -12476,7 +12476,7 @@ const ParamTypeIterator = struct { fn nextWin64(it: *ParamTypeIterator, ty: Type) ?Lowering { const zcu = it.pt.zcu; - switch (x86_64_abi.classifyWindows(ty, zcu, zcu.getTarget())) { + switch (x86_64_abi.classifyWindows(ty, zcu, zcu.getTarget(), .arg)) { .integer => { if (isScalar(zcu, ty)) { it.zig_index += 1; diff --git a/src/codegen/x86_64/CodeGen.zig b/src/codegen/x86_64/CodeGen.zig index cf6f91e0c609..18e65765149f 100644 --- a/src/codegen/x86_64/CodeGen.zig +++ b/src/codegen/x86_64/CodeGen.zig @@ -2292,7 +2292,7 @@ fn genBodyBlock(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { - @setEvalBranchQuota(29_600); + @setEvalBranchQuota(31_000); const pt = cg.pt; const zcu = pt.zcu; const ip = &zcu.intern_pool; @@ -4168,6 +4168,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_cw, .ld, .tmp0w, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, @@ -4201,6 +4202,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -4212,7 +4246,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, @@ -4227,15 +4261,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -4247,7 +4282,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, @@ -4262,15 +4297,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -4282,7 +4318,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, @@ -4297,13 +4333,121 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, } }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{ @@ -14775,6 +14919,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_cw, .ld, .tmp0w, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, @@ -14808,6 +14953,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__subtf3" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -14819,7 +14997,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__subtf3" } }, @@ -14834,15 +15012,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -14854,7 +15033,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__subtf3" } }, @@ -14869,15 +15048,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -14889,7 +15069,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__subtf3" } }, @@ -14904,13 +15084,121 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__subtf3" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__subtf3" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__subtf3" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, } }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{ @@ -24415,6 +24703,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_cw, .ld, .tmp0w, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, @@ -24448,6 +24737,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -24459,7 +24781,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, @@ -24474,15 +24796,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -24494,7 +24817,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, @@ -24509,15 +24832,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -24529,7 +24853,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, @@ -24544,13 +24868,121 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, } }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{ @@ -26350,18 +26782,53 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .add, .tmp0p, .sa(.src0, .add_elem_size), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, - }, .{ - .required_features = .{ .f16c, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .word, .is = .word } }, - .{ .scalar_float = .{ .of = .word, .is = .word } }, - .any, + } }) catch |err| switch (err) { + error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{ + @tagName(air_tag), + ty.fmt(pt), + ops[0].tracking(cg), + ops[1].tracking(cg), + }), + else => |e| return e, + }; + res[0].wrapInt(cg) catch |err| switch (err) { + error.SelectFailed => return cg.fail("failed to select {s} wrap {f} {f}", .{ + @tagName(air_tag), + cg.typeOf(bin_op.lhs).fmt(pt), + res[0].tracking(cg), + }), + else => |e| return e, + }; + try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg); + }, + .mul_sat => |air_tag| { + const bin_op = air_datas[@intFromEnum(inst)].bin_op; + var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); + var res: [1]Temp = undefined; + cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, comptime &.{ .{ + .src_constraints = .{ .{ .exact_signed_int = 8 }, .{ .exact_signed_int = 8 }, .any }, + .patterns = &.{ + .{ .src = .{ .{ .to_reg = .al }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } }, }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, + .each = .{ .once = &.{ + .{ ._, .i_, .mul, .src1b, ._, ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sa(.src0, .add_smax), ._, ._ }, + } }, + }, .{ + .src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte }, .any }, .patterns = &.{ - .{ .src = .{ .to_sse, .to_sse, .none } }, + .{ .src = .{ .{ .to_reg = .al }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } }, }, .extra_temps = .{ - .{ .type = .f32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } }, + .{ .type = .i8, .kind = .{ .rc = .gphi } }, .unused, .unused, .unused, @@ -26373,30 +26840,27 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ }, - .{ ._, .v_ps, .cvtph2, .tmp0x, .src1q, ._, ._ }, - .{ ._, .v_ss, .mul, .dst0x, .dst0x, .tmp0d, ._ }, - .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + .{ ._, .i_, .mul, .src1b, ._, ._, ._ }, + .{ ._, ._c, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .dst0d, ._, ._ }, + .{ ._, ._r, .sa, .tmp0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp0b, .dst0h, ._, ._ }, + .{ ._, ._e, .j, .@"0f", ._, ._, ._ }, + .{ .@"1:", ._r, .sa, .dst0w, .ui(15), ._, ._ }, + .{ ._, ._, .xor, .dst0b, .sa(.src0, .add_smax), ._, ._ }, } }, }, .{ - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .word, .is = .word } }, - .{ .scalar_float = .{ .of = .word, .is = .word } }, - .any, - }, + .src_constraints = .{ .{ .exact_unsigned_int = 8 }, .{ .exact_unsigned_int = 8 }, .any }, .patterns = &.{ - .{ .src = .{ - .{ .to_param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, - .{ .to_param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } }, - .none, - } }, + .{ .src = .{ .{ .to_reg = .al }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } }, }, - .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .extern_func = "__mulhf3" } }, + .{ .type = .u8, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, .unused, .unused, .unused, @@ -26409,25 +26873,22 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .mul, .src1b, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .@"or", .dst0b, .tmp0b, ._, ._ }, } }, }, .{ - .required_features = .{ .f16c, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .qword, .is = .word } }, - .{ .scalar_float = .{ .of = .qword, .is = .word } }, - .any, - }, + .required_features = .{ .cmov, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any }, .patterns = &.{ - .{ .src = .{ .mem, .mem, .none } }, - .{ .src = .{ .to_sse, .mem, .none } }, - .{ .src = .{ .mem, .to_sse, .none } }, - .{ .src = .{ .to_sse, .to_sse, .none } }, + .{ .src = .{ .{ .to_reg = .al }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } }, }, .extra_temps = .{ - .{ .type = .vector_4_f32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } }, + .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, .unused, .unused, .unused, @@ -26439,28 +26900,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, .v_ps, .cvtph2, .dst0x, .src0q, ._, ._ }, - .{ ._, .v_ps, .cvtph2, .tmp0x, .src1q, ._, ._ }, - .{ ._, .v_ps, .mul, .dst0x, .dst0x, .tmp0x, ._ }, - .{ ._, .v_, .cvtps2ph, .dst0q, .dst0x, .rm(.{}), ._ }, + .{ ._, ._, .mul, .src1b, ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .cmp, .dst0w, .tmp0w, ._, ._ }, + .{ ._, ._a, .cmov, .dst0d, .tmp0d, ._, ._ }, } }, }, .{ - .required_features = .{ .f16c, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .xword, .is = .word } }, - .{ .scalar_float = .{ .of = .xword, .is = .word } }, - .any, - }, + .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any }, .patterns = &.{ - .{ .src = .{ .mem, .mem, .none } }, - .{ .src = .{ .to_sse, .mem, .none } }, - .{ .src = .{ .mem, .to_sse, .none } }, - .{ .src = .{ .to_sse, .to_sse, .none } }, + .{ .src = .{ .{ .to_reg = .al }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } }, }, .extra_temps = .{ - .{ .type = .vector_8_f32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .sse } } }, + .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, .unused, .unused, .unused, @@ -26472,27 +26928,26 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, .v_ps, .cvtph2, .dst0y, .src0x, ._, ._ }, - .{ ._, .v_ps, .cvtph2, .tmp0y, .src1x, ._, ._ }, - .{ ._, .v_ps, .mul, .dst0y, .dst0y, .tmp0y, ._ }, - .{ ._, .v_, .cvtps2ph, .dst0x, .dst0y, .rm(.{}), ._ }, + .{ ._, ._, .mul, .src1b, ._, ._, ._ }, + .{ ._, ._, .cmp, .dst0w, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._na, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .ua(.src0, .add_umax), ._, ._ }, } }, }, .{ - .required_features = .{ .f16c, null, null, null }, - .src_constraints = .{ - .{ .multiple_scalar_float = .{ .of = .xword, .is = .word } }, - .{ .multiple_scalar_float = .{ .of = .xword, .is = .word } }, - .any, - }, + .required_features = .{ .fast_imm16, null, null, null }, + .src_constraints = .{ .{ .exact_signed_int = 16 }, .{ .exact_signed_int = 16 }, .any }, .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem, .none } }, + .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, - .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .{ .type = .i16, .kind = .{ .reg = .dx } }, + .unused, + .unused, .unused, .unused, .unused, @@ -26502,33 +26957,28 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .mem, .unused }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .v_ps, .cvtph2, .tmp1y, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, .v_ps, .cvtph2, .tmp2y, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, .v_ps, .mul, .tmp1y, .tmp1y, .tmp2y, ._ }, - .{ ._, .v_, .cvtps2ph, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1y, .rm(.{}), ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, .i_, .mul, .src1w, ._, ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ }, + .{ ._, ._, .xor, .dst0w, .sa(.src0, .add_smax), ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ - .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, - .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, - .any, - }, + .src_constraints = .{ .{ .exact_signed_int = 16 }, .{ .exact_signed_int = 16 }, .any }, .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem, .none } }, + .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, }, - .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, - .{ .type = .usize, .kind = .{ .extern_func = "__mulhf3" } }, + .{ .type = .i16, .kind = .{ .reg = .dx } }, + .unused, + .unused, + .unused, .unused, .unused, .unused, @@ -26537,34 +26987,29 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .mem, .unused }, - .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .vp_, .xor, .tmp2x, .tmp2x, .tmp2x, ._ }, - .{ ._, .vp_w, .insr, .tmp1x, .tmp2x, .memia(.src0w, .tmp0, .add_unaligned_size), .ui(0) }, - .{ ._, .vp_w, .insr, .tmp2x, .tmp2x, .memia(.src1w, .tmp0, .add_unaligned_size), .ui(0) }, - .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, .vp_w, .extr, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1x, .ui(0), ._ }, - .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, .i_, .mul, .src1w, ._, ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ }, + .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_smax), ._, ._ }, } }, }, .{ - .required_features = .{ .sse4_1, null, null, null }, - .src_constraints = .{ - .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, - .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, - .any, - }, + .required_features = .{ .fast_imm16, null, null, null }, + .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word }, .any }, .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem, .none } }, + .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, }, - .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, - .{ .type = .usize, .kind = .{ .extern_func = "__mulhf3" } }, + .{ .type = .i16, .kind = .{ .reg = .dx } }, + .{ .type = .i16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, + .unused, + .unused, .unused, .unused, .unused, @@ -26573,36 +27018,33 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .mem, .unused }, - .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, - .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ }, - .{ ._, .p_w, .insr, .tmp1x, .memia(.src0w, .tmp0, .add_unaligned_size), .ui(0), ._ }, - .{ ._, .p_w, .insr, .tmp2x, .memia(.src1w, .tmp0, .add_unaligned_size), .ui(0), ._ }, - .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, .p_w, .extr, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1x, .ui(0), ._ }, - .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, .i_, .mul, .src1w, ._, ._, ._ }, + .{ ._, ._c, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, + .{ ._, ._r, .sa, .tmp1w, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp1w, .tmp0w, ._, ._ }, + .{ ._, ._e, .j, .@"0f", ._, ._, ._ }, + .{ .@"1:", ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ }, + .{ ._, ._, .xor, .dst0w, .sa(.src0, .add_smax), ._, ._ }, } }, }, .{ - .required_features = .{ .sse2, null, null, null }, - .src_constraints = .{ - .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, - .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, - .any, - }, + .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word }, .any }, .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem, .none } }, + .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, }, - .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, - .{ .type = .usize, .kind = .{ .extern_func = "__mulhf3" } }, - .{ .type = .f16, .kind = .{ .reg = .ax } }, + .{ .type = .i16, .kind = .{ .reg = .dx } }, + .{ .type = .i16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, + .unused, + .unused, + .unused, .unused, .unused, .unused, @@ -26610,154 +27052,90 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .mem, .unused }, - .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .p_, .xor, .tmp1x, .tmp1x, ._, ._ }, - .{ ._, .p_, .xor, .tmp2x, .tmp2x, ._, ._ }, - .{ ._, .p_w, .insr, .tmp1x, .memia(.src0w, .tmp0, .add_unaligned_size), .ui(0), ._ }, - .{ ._, .p_w, .insr, .tmp2x, .memia(.src1w, .tmp0, .add_unaligned_size), .ui(0), ._ }, - .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, .p_w, .extr, .tmp4d, .tmp1x, .ui(0), ._ }, - .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp4w, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, .i_, .mul, .src1w, ._, ._, ._ }, + .{ ._, ._c, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, + .{ ._, ._r, .sa, .tmp1w, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp1w, .tmp0w, ._, ._ }, + .{ ._, ._e, .j, .@"0f", ._, ._, ._ }, + .{ .@"1:", ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ }, + .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_smax), ._, ._ }, } }, }, .{ - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ - .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, - .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, - .any, - }, + .src_constraints = .{ .{ .exact_unsigned_int = 16 }, .{ .exact_unsigned_int = 16 }, .any }, .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem, .none } }, + .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, }, - .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f16, .kind = .{ .reg = .ax } }, - .{ .type = .f32, .kind = .mem }, - .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, - .{ .type = .usize, .kind = .{ .extern_func = "__mulhf3" } }, + .{ .type = .u16, .kind = .{ .reg = .dx } }, + .unused, + .unused, + .unused, + .unused, + .unused, .unused, .unused, .unused, .unused, .unused, - }, - .dst_temps = .{ .mem, .unused }, - .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._, .movzx, .tmp1d, .memia(.src0w, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._, .mov, .mem(.tmp2d), .tmp1d, ._, ._ }, - .{ ._, ._ss, .mov, .tmp3x, .mem(.tmp2d), ._, ._ }, - .{ ._, ._, .movzx, .tmp1d, .memia(.src1w, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._, .mov, .mem(.tmp2d), .tmp1d, ._, ._ }, - .{ ._, ._ss, .mov, .tmp4x, .mem(.tmp2d), ._, ._ }, - .{ ._, ._, .call, .tmp5d, ._, ._, ._ }, - .{ ._, ._ss, .mov, .mem(.tmp2d), .tmp3x, ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .mem(.tmp2d), ._, ._ }, - .{ ._, ._, .mov, .memia(.dst0w, .tmp0, .add_unaligned_size), .tmp1w, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(2), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .dword, .is = .dword } }, - .{ .scalar_float = .{ .of = .dword, .is = .dword } }, - .any, - }, - .patterns = &.{ - .{ .src = .{ .to_sse, .mem, .none } }, - .{ .src = .{ .mem, .to_sse, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_sse, .to_sse, .none } }, - }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, - .each = .{ .once = &.{ - .{ ._, .v_ss, .mul, .dst0x, .src0x, .src1d, ._ }, - } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .dword, .is = .dword } }, - .{ .scalar_float = .{ .of = .dword, .is = .dword } }, - .any, - }, - .patterns = &.{ - .{ .src = .{ .to_mut_sse, .mem, .none } }, - .{ .src = .{ .mem, .to_mut_sse, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_mut_sse, .to_sse, .none } }, }, .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._ss, .mul, .dst0x, .src1d, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .mul, .src1w, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .@"or", .dst0d, .tmp0d, ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .xword, .is = .dword } }, - .{ .scalar_float = .{ .of = .xword, .is = .dword } }, - .any, - }, + .required_features = .{ .bmi, .cmov, null, null }, + .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any }, .patterns = &.{ - .{ .src = .{ .to_sse, .mem, .none } }, - .{ .src = .{ .mem, .to_sse, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_sse, .to_sse, .none } }, - }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, - .each = .{ .once = &.{ - .{ ._, .v_ps, .mul, .dst0x, .src0x, .src1x, ._ }, - } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .xword, .is = .dword } }, - .{ .scalar_float = .{ .of = .xword, .is = .dword } }, - .any, + .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, }, - .patterns = &.{ - .{ .src = .{ .to_mut_sse, .mem, .none } }, - .{ .src = .{ .mem, .to_mut_sse, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_mut_sse, .to_sse, .none } }, + .extra_temps = .{ + .{ .type = .u16, .kind = .{ .reg = .dx } }, + .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, }, .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._ps, .mul, .dst0x, .src1x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .yword, .is = .dword } }, - .{ .scalar_float = .{ .of = .yword, .is = .dword } }, - .any, - }, - .patterns = &.{ - .{ .src = .{ .to_sse, .mem, .none } }, - .{ .src = .{ .mem, .to_sse, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_sse, .to_sse, .none } }, - }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, - .each = .{ .once = &.{ - .{ ._, .v_ps, .mul, .dst0y, .src0y, .src1y, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mul, .src1w, ._, ._, ._ }, + .{ ._, ._, .andn, .tmp2d, .tmp1d, .dst0d, ._ }, + .{ ._, ._, .@"or", .tmp2w, .tmp0w, ._, ._ }, + .{ ._, ._nz, .cmov, .dst0d, .tmp1d, ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ - .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } }, - .{ .multiple_scalar_float = .{ .of = .yword, .is = .dword } }, - .any, - }, + .required_features = .{ .cmov, .fast_imm16, null, null }, + .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any }, .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem, .none } }, + .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .vector_8_f32, .kind = .{ .rc = .sse } }, + .{ .type = .u16, .kind = .{ .reg = .dx } }, + .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, .unused, .unused, .unused, @@ -26768,29 +27146,28 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .mem, .unused }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .v_ps, .mova, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, .v_ps, .mul, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_unaligned_size), ._ }, - .{ ._, .v_ps, .mova, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp1y, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .mul, .src1w, ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, + .{ ._, ._, .@"and", .tmp1w, .sa(.src0, .add_2_smin), ._, ._ }, + .{ ._, ._, .@"or", .tmp1w, .tmp0w, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._nz, .cmov, .dst0d, .tmp0d, ._, ._ }, } }, }, .{ - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ - .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } }, - .{ .multiple_scalar_float = .{ .of = .xword, .is = .dword } }, - .any, - }, + .required_features = .{ .cmov, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any }, .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem, .none } }, + .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .vector_4_f32, .kind = .{ .rc = .sse } }, + .{ .type = .u16, .kind = .{ .reg = .dx } }, + .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, .unused, .unused, .unused, @@ -26801,61 +27178,28 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .mem, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._ps, .mul, .tmp1x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .qword, .is = .qword } }, - .{ .scalar_float = .{ .of = .qword, .is = .qword } }, - .any, - }, - .patterns = &.{ - .{ .src = .{ .to_sse, .mem, .none } }, - .{ .src = .{ .mem, .to_sse, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_sse, .to_sse, .none } }, - }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, - .each = .{ .once = &.{ - .{ ._, .v_sd, .mul, .dst0x, .src0x, .src1q, ._ }, - } }, - }, .{ - .required_features = .{ .sse2, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .qword, .is = .qword } }, - .{ .scalar_float = .{ .of = .qword, .is = .qword } }, - .any, - }, - .patterns = &.{ - .{ .src = .{ .to_mut_sse, .mem, .none } }, - .{ .src = .{ .mem, .to_mut_sse, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_mut_sse, .to_sse, .none } }, - }, .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._sd, .mul, .dst0x, .src1q, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .mul, .src1w, ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_2_smin), ._, ._ }, + .{ ._, ._, .@"or", .tmp1w, .tmp0w, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._nz, .cmov, .dst0d, .tmp0d, ._, ._ }, } }, }, .{ - .required_features = .{ .x87, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .qword, .is = .qword } }, - .{ .scalar_float = .{ .of = .qword, .is = .qword } }, - .any, - }, + .required_features = .{ .fast_imm16, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any }, .patterns = &.{ - .{ .src = .{ .mem, .mem, .none } }, + .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, }, .extra_temps = .{ - .{ .type = .f64, .kind = .{ .reg = .st6 } }, - .{ .type = .f64, .kind = .{ .reg = .st7 } }, + .{ .type = .u16, .kind = .{ .reg = .dx } }, + .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, .unused, .unused, .unused, @@ -26866,73 +27210,27 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .mem, .unused }, - .each = .{ .once = &.{ - .{ ._, .f_, .ld, .src0q, ._, ._, ._ }, - .{ ._, .f_, .mul, .src1q, ._, ._, ._ }, - .{ ._, .f_p, .st, .dst0q, ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .xword, .is = .qword } }, - .{ .scalar_float = .{ .of = .xword, .is = .qword } }, - .any, - }, - .patterns = &.{ - .{ .src = .{ .to_sse, .mem, .none } }, - .{ .src = .{ .mem, .to_sse, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_sse, .to_sse, .none } }, - }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, - .each = .{ .once = &.{ - .{ ._, .v_pd, .mul, .dst0x, .src0x, .src1x, ._ }, - } }, - }, .{ - .required_features = .{ .sse2, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .xword, .is = .qword } }, - .{ .scalar_float = .{ .of = .xword, .is = .qword } }, - .any, - }, - .patterns = &.{ - .{ .src = .{ .to_mut_sse, .mem, .none } }, - .{ .src = .{ .mem, .to_mut_sse, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_mut_sse, .to_sse, .none } }, - }, .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._pd, .mul, .dst0x, .src1x, ._, ._ }, - } }, - }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .yword, .is = .qword } }, - .{ .scalar_float = .{ .of = .yword, .is = .qword } }, - .any, - }, - .patterns = &.{ - .{ .src = .{ .to_sse, .mem, .none } }, - .{ .src = .{ .mem, .to_sse, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .to_sse, .to_sse, .none } }, - }, - .dst_temps = .{ .{ .mut_rc = .{ .ref = .src0, .rc = .sse } }, .unused }, - .each = .{ .once = &.{ - .{ ._, .v_pd, .mul, .dst0y, .src0y, .src1y, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .mul, .src1w, ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, + .{ ._, ._, .@"and", .tmp1w, .sa(.src0, .add_2_smin), ._, ._ }, + .{ ._, ._, .@"or", .tmp1w, .tmp0w, ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .ua(.src0, .add_umax), ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ - .{ .multiple_scalar_float = .{ .of = .yword, .is = .qword } }, - .{ .multiple_scalar_float = .{ .of = .yword, .is = .qword } }, - .any, - }, + .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any }, .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem, .none } }, + .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .vector_4_f64, .kind = .{ .rc = .sse } }, + .{ .type = .u16, .kind = .{ .reg = .dx } }, + .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, .unused, .unused, .unused, @@ -26943,29 +27241,27 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .mem, .unused }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .v_pd, .mova, .tmp1y, .memia(.src0y, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, .v_pd, .mul, .tmp1y, .tmp1y, .memia(.src1y, .tmp0, .add_unaligned_size), ._ }, - .{ ._, .v_pd, .mova, .memia(.dst0y, .tmp0, .add_unaligned_size), .tmp1y, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(32), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .mul, .src1w, ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_2_smin), ._, ._ }, + .{ ._, ._, .@"or", .tmp1w, .tmp0w, ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .ua(.src0, .add_umax), ._, ._ }, } }, }, .{ - .required_features = .{ .sse2, null, null, null }, - .src_constraints = .{ - .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } }, - .{ .multiple_scalar_float = .{ .of = .xword, .is = .qword } }, - .any, - }, + .src_constraints = .{ .{ .exact_signed_int = 32 }, .{ .exact_signed_int = 32 }, .any }, .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem, .none } }, + .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } }, }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .vector_2_f64, .kind = .{ .rc = .sse } }, + .{ .type = .i32, .kind = .{ .reg = .edx } }, + .unused, .unused, .unused, .unused, @@ -26976,30 +27272,26 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .mem, .unused }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._pd, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._pd, .mul, .tmp1x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._pd, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .i_, .mul, .src1d, ._, ._, ._ }, + .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sa, .dst0d, .ui(31), ._, ._ }, + .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_smax), ._, ._ }, } }, }, .{ - .required_features = .{ .x87, null, null, null }, - .src_constraints = .{ - .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, - .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, - .any, - }, + .src_constraints = .{ .{ .signed_int = .dword }, .{ .signed_int = .dword }, .any }, .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem, .none } }, + .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } }, }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f64, .kind = .{ .reg = .st6 } }, - .{ .type = .f64, .kind = .{ .reg = .st7 } }, + .{ .type = .i32, .kind = .{ .reg = .edx } }, + .{ .type = .i32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, + .unused, .unused, .unused, .unused, @@ -27009,29 +27301,29 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .mem, .unused }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .f_, .ld, .memia(.src0q, .tmp0, .add_unaligned_size), ._, ._, ._ }, - .{ ._, .f_, .mul, .memia(.src1q, .tmp0, .add_unaligned_size), ._, ._, ._ }, - .{ ._, .f_p, .st, .memia(.dst0q, .tmp0, .add_unaligned_size), ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(8), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .i_, .mul, .src1d, ._, ._, ._ }, + .{ ._, ._c, .j, .@"1f", ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, + .{ ._, ._r, .sa, .tmp1d, .sia(-1, .src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .cmp, .tmp1d, .tmp0d, ._, ._ }, + .{ ._, ._e, .j, .@"0f", ._, ._, ._ }, + .{ .@"1:", ._, .mov, .dst0d, .tmp0d, ._, ._ }, + .{ ._, ._r, .sa, .dst0d, .ui(31), ._, ._ }, + .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_smax), ._, ._ }, } }, }, .{ - .required_features = .{ .x87, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, - .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, - .any, - }, + .src_constraints = .{ .{ .exact_unsigned_int = 32 }, .{ .exact_unsigned_int = 32 }, .any }, .patterns = &.{ - .{ .src = .{ .mem, .mem, .none } }, + .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } }, }, .extra_temps = .{ - .{ .type = .f80, .kind = .{ .reg = .st6 } }, - .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .u32, .kind = .{ .reg = .edx } }, + .unused, .unused, .unused, .unused, @@ -27042,29 +27334,25 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .rc = .x87 }, .unused }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, .f_, .ld, .src0t, ._, ._, ._ }, - .{ ._, .f_, .ld, .src1t, ._, ._, ._ }, - .{ ._, .f_p, .mul, ._, ._, ._, ._ }, - .{ ._, .f_p, .st, .dst0t, ._, ._, ._ }, + .{ ._, ._, .mul, .src1d, ._, ._, ._ }, + .{ ._, ._, .sbb, .tmp0d, .tmp0d, ._, ._ }, + .{ ._, ._, .@"or", .dst0d, .tmp0d, ._, ._ }, } }, }, .{ - .required_features = .{ .x87, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, - .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, - .any, - }, + .required_features = .{ .bmi, .cmov, null, null }, + .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any }, .patterns = &.{ - .{ .src = .{ .to_x87, .mem, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .mem, .to_x87, .none } }, - .{ .src = .{ .to_x87, .to_x87, .none } }, + .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } }, }, .extra_temps = .{ - .{ .type = .f80, .kind = .{ .reg = .st7 } }, - .unused, - .unused, + .{ .type = .u32, .kind = .{ .reg = .edx } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, .unused, .unused, .unused, @@ -27074,26 +27362,27 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .rc = .x87 }, .unused }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, .f_, .ld, .src0t, ._, ._, ._ }, - .{ ._, .f_, .mul, .tmp0t, .src1t, ._, ._ }, - .{ ._, .f_p, .st, .dst0t, ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._, .mul, .src1d, ._, ._, ._ }, + .{ ._, ._, .andn, .tmp2d, .tmp1d, .dst0d, ._ }, + .{ ._, ._, .@"or", .tmp2d, .tmp0d, ._, ._ }, + .{ ._, ._nz, .cmov, .dst0d, .tmp1d, ._, ._ }, } }, }, .{ - .required_features = .{ .x87, null, null, null }, - .src_constraints = .{ - .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, - .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, - .any, - }, + .required_features = .{ .cmov, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any }, .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem, .none } }, + .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } }, }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f80, .kind = .{ .reg = .st6 } }, - .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .u32, .kind = .{ .reg = .edx } }, + .{ .type = .u32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, + .unused, .unused, .unused, .unused, @@ -27103,35 +27392,26 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .mem, .unused }, + .dst_temps = .{ .{ .ref = .src0 }, .unused }, .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .f_, .ld, .memia(.src0t, .tmp0, .add_unaligned_size), ._, ._, ._ }, - .{ ._, .f_, .ld, .memia(.src1t, .tmp0, .add_unaligned_size), ._, ._, ._ }, - .{ ._, .f_p, .mul, ._, ._, ._, ._ }, - .{ ._, .f_p, .st, .memia(.dst0t, .tmp0, .add_unaligned_size), ._, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .mul, .src1d, ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_2_smin), ._, ._ }, + .{ ._, ._, .@"or", .tmp1d, .tmp0d, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_umax), ._, ._ }, + .{ ._, ._nz, .cmov, .dst0d, .tmp0d, ._, ._ }, } }, }, .{ - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ - .{ .scalar_float = .{ .of = .xword, .is = .xword } }, - .{ .scalar_float = .{ .of = .xword, .is = .xword } }, - .any, - }, + .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any }, .patterns = &.{ - .{ .src = .{ - .{ .to_param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, - .{ .to_param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } }, - .none, - } }, + .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } }, }, - .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, - .unused, + .{ .type = .u32, .kind = .{ .reg = .edx } }, + .{ .type = .u32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, .unused, .unused, .unused, @@ -27143,775 +27423,25 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, }, .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .clobbers = .{ .eflags = true }, .each = .{ .once = &.{ - .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .mul, .src1d, ._, ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, + .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_2_smin), ._, ._ }, + .{ ._, ._, .@"or", .tmp1d, .tmp0d, ._, ._ }, + .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .mov, .dst0d, .ua(.src0, .add_umax), ._, ._ }, } }, }, .{ - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ - .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, - .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, - .any, - }, + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .exact_signed_int = 64 }, .{ .exact_signed_int = 64 }, .any }, .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem, .none } }, + .{ .src = .{ .{ .to_reg = .rax }, .mem, .none } }, + .{ .src = .{ .mem, .{ .to_reg = .rax }, .none }, .commute = .{ 0, 1 } }, + .{ .src = .{ .{ .to_reg = .rax }, .to_gpr, .none } }, }, - .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, - .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .mem, .unused }, - .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .sse2, null, null, null }, - .src_constraints = .{ - .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, - .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, - .any, - }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem, .none } }, - }, - .call_frame = .{ .alignment = .@"16" }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, - .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .mem, .unused }, - .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - }, .{ - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ - .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, - .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, - .any, - }, - .patterns = &.{ - .{ .src = .{ .to_mem, .to_mem, .none } }, - }, - .call_frame = .{ .alignment = .@"16" }, - .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, - .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .mem, .unused }, - .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, - } }, - } }) catch |err| switch (err) { - error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{ - @tagName(air_tag), - ty.fmt(pt), - ops[0].tracking(cg), - ops[1].tracking(cg), - }), - else => |e| return e, - }; - res[0].wrapInt(cg) catch |err| switch (err) { - error.SelectFailed => return cg.fail("failed to select {s} wrap {f} {f}", .{ - @tagName(air_tag), - cg.typeOf(bin_op.lhs).fmt(pt), - res[0].tracking(cg), - }), - else => |e| return e, - }; - try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg); - }, - .mul_sat => |air_tag| { - const bin_op = air_datas[@intFromEnum(inst)].bin_op; - var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); - var res: [1]Temp = undefined; - cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, comptime &.{ .{ - .src_constraints = .{ .{ .exact_signed_int = 8 }, .{ .exact_signed_int = 8 }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .al }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } }, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, .i_, .mul, .src1b, ._, ._, ._ }, - .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, - .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ }, - .{ ._, ._, .xor, .dst0b, .sa(.src0, .add_smax), ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .signed_int = .byte }, .{ .signed_int = .byte }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .al }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .i8, .kind = .{ .rc = .gphi } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, .i_, .mul, .src1b, ._, ._, ._ }, - .{ ._, ._c, .j, .@"1f", ._, ._, ._ }, - .{ ._, ._, .mov, .tmp0d, .dst0d, ._, ._ }, - .{ ._, ._r, .sa, .tmp0b, .sia(-1, .src0, .add_bit_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp0b, .dst0h, ._, ._ }, - .{ ._, ._e, .j, .@"0f", ._, ._, ._ }, - .{ .@"1:", ._r, .sa, .dst0w, .ui(15), ._, ._ }, - .{ ._, ._, .xor, .dst0b, .sa(.src0, .add_smax), ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .exact_unsigned_int = 8 }, .{ .exact_unsigned_int = 8 }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .al }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .u8, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .mul, .src1b, ._, ._, ._ }, - .{ ._, ._, .sbb, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, ._, .@"or", .dst0b, .tmp0b, ._, ._ }, - } }, - }, .{ - .required_features = .{ .cmov, null, null, null }, - .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .al }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .mul, .src1b, ._, ._, ._ }, - .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_umax), ._, ._ }, - .{ ._, ._, .cmp, .dst0w, .tmp0w, ._, ._ }, - .{ ._, ._a, .cmov, .dst0d, .tmp0d, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .al }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .al }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .al }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .mul, .src1b, ._, ._, ._ }, - .{ ._, ._, .cmp, .dst0w, .ua(.src0, .add_umax), ._, ._ }, - .{ ._, ._na, .j, .@"0f", ._, ._, ._ }, - .{ ._, ._, .mov, .dst0d, .ua(.src0, .add_umax), ._, ._ }, - } }, - }, .{ - .required_features = .{ .fast_imm16, null, null, null }, - .src_constraints = .{ .{ .exact_signed_int = 16 }, .{ .exact_signed_int = 16 }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .i16, .kind = .{ .reg = .dx } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, .i_, .mul, .src1w, ._, ._, ._ }, - .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, - .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, - .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ }, - .{ ._, ._, .xor, .dst0w, .sa(.src0, .add_smax), ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .exact_signed_int = 16 }, .{ .exact_signed_int = 16 }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .i16, .kind = .{ .reg = .dx } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, .i_, .mul, .src1w, ._, ._, ._ }, - .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, - .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, - .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ }, - .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_smax), ._, ._ }, - } }, - }, .{ - .required_features = .{ .fast_imm16, null, null, null }, - .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .i16, .kind = .{ .reg = .dx } }, - .{ .type = .i16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, .i_, .mul, .src1w, ._, ._, ._ }, - .{ ._, ._c, .j, .@"1f", ._, ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, - .{ ._, ._r, .sa, .tmp1w, .sia(-1, .src0, .add_bit_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp1w, .tmp0w, ._, ._ }, - .{ ._, ._e, .j, .@"0f", ._, ._, ._ }, - .{ .@"1:", ._, .mov, .dst0d, .tmp0d, ._, ._ }, - .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ }, - .{ ._, ._, .xor, .dst0w, .sa(.src0, .add_smax), ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .signed_int = .word }, .{ .signed_int = .word }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .i16, .kind = .{ .reg = .dx } }, - .{ .type = .i16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, .i_, .mul, .src1w, ._, ._, ._ }, - .{ ._, ._c, .j, .@"1f", ._, ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, - .{ ._, ._r, .sa, .tmp1w, .sia(-1, .src0, .add_bit_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp1w, .tmp0w, ._, ._ }, - .{ ._, ._e, .j, .@"0f", ._, ._, ._ }, - .{ .@"1:", ._, .mov, .dst0d, .tmp0d, ._, ._ }, - .{ ._, ._r, .sa, .dst0w, .ui(15), ._, ._ }, - .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_smax), ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .exact_unsigned_int = 16 }, .{ .exact_unsigned_int = 16 }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .u16, .kind = .{ .reg = .dx } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, ._, .mul, .src1w, ._, ._, ._ }, - .{ ._, ._, .sbb, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, ._, .@"or", .dst0d, .tmp0d, ._, ._ }, - } }, - }, .{ - .required_features = .{ .bmi, .cmov, null, null }, - .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .u16, .kind = .{ .reg = .dx } }, - .{ .type = .u16, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .ua(.src0, .add_umax), ._, ._ }, - .{ ._, ._, .mul, .src1w, ._, ._, ._ }, - .{ ._, ._, .andn, .tmp2d, .tmp1d, .dst0d, ._ }, - .{ ._, ._, .@"or", .tmp2w, .tmp0w, ._, ._ }, - .{ ._, ._nz, .cmov, .dst0d, .tmp1d, ._, ._ }, - } }, - }, .{ - .required_features = .{ .cmov, .fast_imm16, null, null }, - .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .u16, .kind = .{ .reg = .dx } }, - .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, ._, .mul, .src1w, ._, ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, - .{ ._, ._, .@"and", .tmp1w, .sa(.src0, .add_2_smin), ._, ._ }, - .{ ._, ._, .@"or", .tmp1w, .tmp0w, ._, ._ }, - .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_umax), ._, ._ }, - .{ ._, ._nz, .cmov, .dst0d, .tmp0d, ._, ._ }, - } }, - }, .{ - .required_features = .{ .cmov, null, null, null }, - .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .u16, .kind = .{ .reg = .dx } }, - .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, ._, .mul, .src1w, ._, ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, - .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_2_smin), ._, ._ }, - .{ ._, ._, .@"or", .tmp1w, .tmp0w, ._, ._ }, - .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_umax), ._, ._ }, - .{ ._, ._nz, .cmov, .dst0d, .tmp0d, ._, ._ }, - } }, - }, .{ - .required_features = .{ .fast_imm16, null, null, null }, - .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .u16, .kind = .{ .reg = .dx } }, - .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, ._, .mul, .src1w, ._, ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, - .{ ._, ._, .@"and", .tmp1w, .sa(.src0, .add_2_smin), ._, ._ }, - .{ ._, ._, .@"or", .tmp1w, .tmp0w, ._, ._ }, - .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, - .{ ._, ._, .mov, .dst0d, .ua(.src0, .add_umax), ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .ax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .ax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .ax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .u16, .kind = .{ .reg = .dx } }, - .{ .type = .u16, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .xor, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, ._, .mul, .src1w, ._, ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, - .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_2_smin), ._, ._ }, - .{ ._, ._, .@"or", .tmp1w, .tmp0w, ._, ._ }, - .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, - .{ ._, ._, .mov, .dst0d, .ua(.src0, .add_umax), ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .exact_signed_int = 32 }, .{ .exact_signed_int = 32 }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .i32, .kind = .{ .reg = .edx } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, .i_, .mul, .src1d, ._, ._, ._ }, - .{ ._, ._nc, .j, .@"0f", ._, ._, ._ }, - .{ ._, ._, .mov, .dst0d, .tmp0d, ._, ._ }, - .{ ._, ._r, .sa, .dst0d, .ui(31), ._, ._ }, - .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_smax), ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .signed_int = .dword }, .{ .signed_int = .dword }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .i32, .kind = .{ .reg = .edx } }, - .{ .type = .i32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, .i_, .mul, .src1d, ._, ._, ._ }, - .{ ._, ._c, .j, .@"1f", ._, ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, - .{ ._, ._r, .sa, .tmp1d, .sia(-1, .src0, .add_bit_size), ._, ._ }, - .{ ._, ._, .cmp, .tmp1d, .tmp0d, ._, ._ }, - .{ ._, ._e, .j, .@"0f", ._, ._, ._ }, - .{ .@"1:", ._, .mov, .dst0d, .tmp0d, ._, ._ }, - .{ ._, ._r, .sa, .dst0d, .ui(31), ._, ._ }, - .{ ._, ._, .xor, .dst0d, .sa(.src0, .add_smax), ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .exact_unsigned_int = 32 }, .{ .exact_unsigned_int = 32 }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .u32, .kind = .{ .reg = .edx } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .mul, .src1d, ._, ._, ._ }, - .{ ._, ._, .sbb, .tmp0d, .tmp0d, ._, ._ }, - .{ ._, ._, .@"or", .dst0d, .tmp0d, ._, ._ }, - } }, - }, .{ - .required_features = .{ .bmi, .cmov, null, null }, - .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .u32, .kind = .{ .reg = .edx } }, - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .u32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp1d, .ua(.src0, .add_umax), ._, ._ }, - .{ ._, ._, .mul, .src1d, ._, ._, ._ }, - .{ ._, ._, .andn, .tmp2d, .tmp1d, .dst0d, ._ }, - .{ ._, ._, .@"or", .tmp2d, .tmp0d, ._, ._ }, - .{ ._, ._nz, .cmov, .dst0d, .tmp1d, ._, ._ }, - } }, - }, .{ - .required_features = .{ .cmov, null, null, null }, - .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .u32, .kind = .{ .reg = .edx } }, - .{ .type = .u32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .mul, .src1d, ._, ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, - .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_2_smin), ._, ._ }, - .{ ._, ._, .@"or", .tmp1d, .tmp0d, ._, ._ }, - .{ ._, ._, .mov, .tmp0d, .ua(.src0, .add_umax), ._, ._ }, - .{ ._, ._nz, .cmov, .dst0d, .tmp0d, ._, ._ }, - } }, - }, .{ - .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .eax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .eax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .eax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .u32, .kind = .{ .reg = .edx } }, - .{ .type = .u32, .kind = .{ .mut_rc = .{ .ref = .src1, .rc = .general_purpose } } }, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - .unused, - }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, - .clobbers = .{ .eflags = true }, - .each = .{ .once = &.{ - .{ ._, ._, .mul, .src1d, ._, ._, ._ }, - .{ ._, ._, .mov, .tmp1d, .dst0d, ._, ._ }, - .{ ._, ._, .@"and", .tmp1d, .sa(.src0, .add_2_smin), ._, ._ }, - .{ ._, ._, .@"or", .tmp1d, .tmp0d, ._, ._ }, - .{ ._, ._z, .j, .@"0f", ._, ._, ._ }, - .{ ._, ._, .mov, .dst0d, .ua(.src0, .add_umax), ._, ._ }, - } }, - }, .{ - .required_features = .{ .@"64bit", null, null, null }, - .src_constraints = .{ .{ .exact_signed_int = 64 }, .{ .exact_signed_int = 64 }, .any }, - .patterns = &.{ - .{ .src = .{ .{ .to_reg = .rax }, .mem, .none } }, - .{ .src = .{ .mem, .{ .to_reg = .rax }, .none }, .commute = .{ 0, 1 } }, - .{ .src = .{ .{ .to_reg = .rax }, .to_gpr, .none } }, - }, - .extra_temps = .{ - .{ .type = .i64, .kind = .{ .reg = .rdx } }, + .{ .type = .i64, .kind = .{ .reg = .rdx } }, .unused, .unused, .unused, @@ -33431,6 +32961,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_cw, .ld, .tmp0w, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, @@ -33464,6 +32995,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -33475,7 +33039,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, @@ -33490,15 +33054,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -33510,7 +33075,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, @@ -33525,15 +33090,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -33545,7 +33111,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, @@ -33560,13 +33126,121 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, } }) else err: { assert(air_tag == .div_exact); @@ -34659,6 +34333,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_cw, .ld, .tmp0w, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, @@ -34693,6 +34368,112 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .usize, .kind = .{ .extern_func = "truncq" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ }, + .{ ._, .v_dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .usize, .kind = .{ .extern_func = "truncq" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ }, + .{ ._, ._dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .usize, .kind = .{ .extern_func = "truncq" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ }, + .{ ._, ._ps, .mova, .lea(.tmp0x), .dst0x, ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -34704,7 +34485,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, @@ -34719,16 +34500,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, - .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -34740,7 +34522,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, @@ -34755,16 +34537,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, - .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -34776,7 +34559,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, @@ -34791,14 +34574,131 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, - .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "truncq" } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, .v_dqa, .mov, .lea(.tmp1x), .tmp5x, ._, ._ }, + .{ ._, ._, .call, .tmp6d, ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "truncq" } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._dqa, .mov, .lea(.tmp1x), .tmp5x, ._, ._ }, + .{ ._, ._, .call, .tmp6d, ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "truncq" } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._ps, .mova, .lea(.tmp1x), .tmp5x, ._, ._ }, + .{ ._, ._, .call, .tmp6d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, } }) else err: { res[0] = ops[0].divTruncInts(&ops[1], cg) catch |err| break :err err; @@ -35955,6 +35855,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_cw, .ld, .tmp0w, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, @@ -35993,6 +35894,124 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .usize, .kind = .{ .extern_func = switch (direction) { + else => unreachable, + .zero => "truncq", + .down => "floorq", + } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ }, + .{ ._, .v_dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .usize, .kind = .{ .extern_func = switch (direction) { + else => unreachable, + .zero => "truncq", + .down => "floorq", + } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ }, + .{ ._, ._dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .usize, .kind = .{ .extern_func = switch (direction) { + else => unreachable, + .zero => "truncq", + .down => "floorq", + } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ }, + .{ ._, ._ps, .mova, .lea(.tmp0x), .dst0x, ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -36004,7 +36023,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, @@ -36023,16 +36042,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, - .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -36044,7 +36064,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, @@ -36063,16 +36083,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, - .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -36084,7 +36105,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, @@ -36103,14 +36124,143 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, - .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = switch (direction) { + else => unreachable, + .zero => "truncq", + .down => "floorq", + } } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, .v_dqa, .mov, .lea(.tmp1x), .tmp5x, ._, ._ }, + .{ ._, ._, .call, .tmp6d, ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = switch (direction) { + else => unreachable, + .zero => "truncq", + .down => "floorq", + } } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._dqa, .mov, .lea(.tmp1x), .tmp5x, ._, ._ }, + .{ ._, ._, .call, .tmp6d, ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = switch (direction) { + else => unreachable, + .zero => "truncq", + .down => "floorq", + } } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._ps, .mova, .lea(.tmp1x), .tmp5x, ._, ._ }, + .{ ._, ._, .call, .tmp6d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, } }, }) catch |err| switch (err) { @@ -37438,6 +37588,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_cw, .ld, .tmp0w, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, @@ -37472,6 +37623,112 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .usize, .kind = .{ .extern_func = "floorq" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ }, + .{ ._, .v_dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .usize, .kind = .{ .extern_func = "floorq" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ }, + .{ ._, ._dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .usize, .kind = .{ .extern_func = "floorq" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ }, + .{ ._, ._ps, .mova, .lea(.tmp0x), .dst0x, ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -37483,7 +37740,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, @@ -37498,16 +37755,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, - .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -37519,7 +37777,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, @@ -37534,16 +37792,17 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, - .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -37555,7 +37814,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, @@ -37570,14 +37829,131 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, - .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "floorq" } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, .v_dqa, .mov, .lea(.tmp1x), .tmp5x, ._, ._ }, + .{ ._, ._, .call, .tmp6d, ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "floorq" } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._dqa, .mov, .lea(.tmp1x), .tmp5x, ._, ._ }, + .{ ._, ._, .call, .tmp6d, ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__divtf3" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "floorq" } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1x, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._ps, .mova, .lea(.tmp1x), .tmp5x, ._, ._ }, + .{ ._, ._, .call, .tmp6d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, } })) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{ @@ -39080,6 +39456,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, @@ -39113,6 +39490,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -39124,7 +39534,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } }, @@ -39139,15 +39549,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -39159,7 +39570,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } }, @@ -39174,15 +39585,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -39194,7 +39606,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } }, @@ -39209,13 +39621,121 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, - .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, } }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{ @@ -39525,7 +40045,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .required_cc_abi = .sysv64, - .required_features = .{ .cmov, null, null, null }, + .required_features = .{ .@"64bit", .cmov, null, null }, .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any }, .patterns = &.{ .{ .src = .{ .{ .to_param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .to_mem, .none } }, @@ -39565,6 +40085,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } }, }, .{ .required_cc_abi = .sysv64, + .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any }, .patterns = &.{ .{ .src = .{ .{ .to_param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .to_mem, .none } }, @@ -39601,70 +40122,344 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .adc, .dst0q1, .src0q0, ._, ._ }, } }, }, .{ - .required_cc_abi = .sysv64, - .src_constraints = .{ .{ .unsigned_int = .xword }, .{ .unsigned_int = .xword }, .any }, + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .cmov, .avx, null }, + .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any }, .patterns = &.{ - .{ .src = .{ - .{ .to_param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, - .{ .to_param_gpr_pair = .{ .cc = .ccc, .after = 2, .at = 2 } }, - .none, - } }, + .{ .src = .{ .to_mem, .to_mem, .none } }, }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .extern_func = "__umodti3" } }, + .{ .type = .usize, .kind = .{ .extern_func = "__modti3" } }, + .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .u64, .kind = .{ .reg = .r8 } }, + .{ .type = .u64, .kind = .{ .reg = .r9 } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, + .{ .type = .u64, .kind = .{ .reg = .r10 } }, .unused, .unused, .unused, .unused, .unused, + }, + .dst_temps = .{ .{ .param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0q0, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .dst0q1, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + .{ ._, .v_q, .mov, .dst0q0, .tmp1x, ._, ._ }, + .{ ._, ._, .mov, .tmp2q, .memd(.src1q, 8), ._, ._ }, + .{ ._, .vp_q, .extr, .dst0q1, .tmp1x, .ui(1), ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .mov, .tmp4q, .tmp2q, ._, ._ }, + .{ ._, ._, .@"and", .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._, .xor, .tmp4q, .dst0q1, ._, ._ }, + .{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ }, + .{ ._, ._, .cmp, .dst0q0, .si(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nae, .cmov, .tmp2q, .tmp5q, ._, ._ }, + .{ ._, ._ae, .cmov, .tmp5q, .mem(.src1q), ._, ._ }, + .{ ._, ._, .add, .dst0q0, .tmp5q, ._, ._ }, + .{ ._, ._, .adc, .dst0q1, .tmp2q, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .cmov, .sse4_1, null }, + .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .extern_func = "__modti3" } }, + .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .u64, .kind = .{ .reg = .r8 } }, + .{ .type = .u64, .kind = .{ .reg = .r9 } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, + .{ .type = .u64, .kind = .{ .reg = .r10 } }, .unused, .unused, .unused, .unused, .unused, }, - .dst_temps = .{ .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .dst_temps = .{ .{ .param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0q0, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .dst0q1, .mem(.src1), ._, ._ }, .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + .{ ._, ._q, .mov, .dst0q0, .tmp1x, ._, ._ }, + .{ ._, ._, .mov, .tmp2q, .memd(.src1q, 8), ._, ._ }, + .{ ._, .p_q, .extr, .dst0q1, .tmp1x, .ui(1), ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .mov, .tmp4q, .tmp2q, ._, ._ }, + .{ ._, ._, .@"and", .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._, .xor, .tmp4q, .dst0q1, ._, ._ }, + .{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ }, + .{ ._, ._, .cmp, .dst0q0, .si(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nae, .cmov, .tmp2q, .tmp5q, ._, ._ }, + .{ ._, ._ae, .cmov, .tmp5q, .mem(.src1q), ._, ._ }, + .{ ._, ._, .add, .dst0q0, .tmp5q, ._, ._ }, + .{ ._, ._, .adc, .dst0q1, .tmp2q, ._, ._ }, } }, }, .{ .required_cc_abi = .win64, - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ .{ .unsigned_int = .xword }, .{ .unsigned_int = .xword }, .any }, + .required_features = .{ .@"64bit", .cmov, .sse2, null }, + .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .to_mem, .none } }, }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, - .{ .type = .usize, .kind = .{ .extern_func = "__umodti3" } }, + .{ .type = .usize, .kind = .{ .extern_func = "__modti3" } }, + .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .u64, .kind = .{ .reg = .r8 } }, + .{ .type = .u64, .kind = .{ .reg = .r9 } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, + .{ .type = .u64, .kind = .{ .reg = .r10 } }, .unused, .unused, .unused, .unused, .unused, + }, + .dst_temps = .{ .{ .param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0q0, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .dst0q1, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + .{ ._, ._q, .mov, .dst0q0, .tmp1x, ._, ._ }, + .{ ._, ._, .mov, .tmp2q, .memd(.src1q, 8), ._, ._ }, + .{ ._, .p_d, .shuf, .tmp1x, .tmp1x, .ui(0b11_10_11_10), ._ }, + .{ ._, ._q, .mov, .dst0q1, .tmp1x, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .mov, .tmp4q, .tmp2q, ._, ._ }, + .{ ._, ._, .@"and", .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._, .xor, .tmp4q, .dst0q1, ._, ._ }, + .{ ._, ._, .xor, .tmp5d, .tmp5d, ._, ._ }, + .{ ._, ._, .cmp, .dst0q0, .si(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nae, .cmov, .tmp2q, .tmp5q, ._, ._ }, + .{ ._, ._ae, .cmov, .tmp5q, .mem(.src1q), ._, ._ }, + .{ ._, ._, .add, .dst0q0, .tmp5q, ._, ._ }, + .{ ._, ._, .adc, .dst0q1, .tmp2q, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .cmov, .sse, null }, + .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .extern_func = "__modti3" } }, + .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .i128, .kind = .mem }, + .{ .type = .u64, .kind = .{ .reg = .r8 } }, + .{ .type = .u64, .kind = .{ .reg = .r9 } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, + .{ .type = .u64, .kind = .{ .reg = .r10 } }, + .unused, .unused, .unused, .unused, }, - .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .dst_temps = .{ .{ .param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, - .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, - .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .dst0q0, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .dst0q1, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .mem(.tmp2x), .tmp1x, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .memd(.src1q, 8), ._, ._ }, + .{ ._, ._, .mov, .dst0q0, .mem(.tmp2q), ._, ._ }, + .{ ._, ._, .mov, .dst0q1, .memd(.tmp2q, 8), ._, ._ }, + .{ ._, ._, .mov, .tmp4q, .ua(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .mov, .tmp5q, .tmp3q, ._, ._ }, + .{ ._, ._, .@"and", .tmp5q, .tmp4q, ._, ._ }, + .{ ._, ._, .xor, .tmp5q, .dst0q1, ._, ._ }, + .{ ._, ._, .xor, .tmp6d, .tmp6d, ._, ._ }, + .{ ._, ._, .cmp, .dst0q0, .si(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp5q, .tmp4q, ._, ._ }, + .{ ._, ._nae, .cmov, .tmp3q, .tmp6q, ._, ._ }, + .{ ._, ._ae, .cmov, .tmp6q, .mem(.src1q), ._, ._ }, + .{ ._, ._, .add, .dst0q0, .tmp6q, ._, ._ }, + .{ ._, ._, .adc, .dst0q1, .tmp3q, ._, ._ }, } }, }, .{ .required_cc_abi = .win64, - .required_features = .{ .sse, null, null, null }, + .required_features = .{ .@"64bit", .avx, null, null }, + .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .extern_func = "__modti3" } }, + .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .u64, .kind = .{ .reg = .r8 } }, + .{ .type = .u64, .kind = .{ .reg = .r9 } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0q0, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .dst0q1, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + .{ ._, .v_q, .mov, .dst0q0, .tmp1x, ._, ._ }, + .{ ._, ._, .mov, .tmp2q, .memd(.src1q, 8), ._, ._ }, + .{ ._, .vp_q, .extr, .dst0q1, .tmp1x, .ui(1), ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .mov, .tmp4q, .tmp2q, ._, ._ }, + .{ ._, ._, .@"and", .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._, .xor, .tmp4q, .dst0q1, ._, ._ }, + .{ ._, ._, .cmp, .dst0q0, .si(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nae, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .add, .dst0q0, .mem(.src1x), ._, ._ }, + .{ ._, ._, .adc, .dst0q1, .tmp2q, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .sse4_1, null, null }, + .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .extern_func = "__modti3" } }, + .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .u64, .kind = .{ .reg = .r8 } }, + .{ .type = .u64, .kind = .{ .reg = .r9 } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0q0, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .dst0q1, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + .{ ._, ._q, .mov, .dst0q0, .tmp1x, ._, ._ }, + .{ ._, ._, .mov, .tmp2q, .memd(.src1q, 8), ._, ._ }, + .{ ._, .p_q, .extr, .dst0q1, .tmp1x, .ui(1), ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .mov, .tmp4q, .tmp2q, ._, ._ }, + .{ ._, ._, .@"and", .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._, .xor, .tmp4q, .dst0q1, ._, ._ }, + .{ ._, ._, .cmp, .dst0q0, .si(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nae, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .add, .dst0q0, .mem(.src1x), ._, ._ }, + .{ ._, ._, .adc, .dst0q1, .tmp2q, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .sse2, null, null }, + .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .extern_func = "__modti3" } }, + .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .u64, .kind = .{ .reg = .r8 } }, + .{ .type = .u64, .kind = .{ .reg = .r9 } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0q0, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .dst0q1, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + .{ ._, ._q, .mov, .dst0q0, .tmp1x, ._, ._ }, + .{ ._, ._, .mov, .tmp2q, .memd(.src1q, 8), ._, ._ }, + .{ ._, .p_d, .shuf, .tmp1x, .tmp1x, .ui(0b11_10_11_10), ._ }, + .{ ._, ._q, .mov, .dst0q1, .tmp1x, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .ua(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .mov, .tmp4q, .tmp2q, ._, ._ }, + .{ ._, ._, .@"and", .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._, .xor, .tmp4q, .dst0q1, ._, ._ }, + .{ ._, ._, .cmp, .dst0q0, .si(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp4q, .tmp3q, ._, ._ }, + .{ ._, ._nae, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .add, .dst0q0, .mem(.src1x), ._, ._ }, + .{ ._, ._, .adc, .dst0q1, .tmp2q, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .sse, null, null }, + .src_constraints = .{ .{ .signed_int = .xword }, .{ .signed_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .extern_func = "__modti3" } }, + .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .i128, .kind = .mem }, + .{ .type = .u64, .kind = .{ .reg = .r8 } }, + .{ .type = .u64, .kind = .{ .reg = .r9 } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .dst0q0, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .dst0q1, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .mem(.tmp2x), .tmp1x, ._, ._ }, + .{ ._, ._, .mov, .tmp3q, .memd(.src1q, 8), ._, ._ }, + .{ ._, ._, .mov, .dst0q0, .mem(.tmp2q), ._, ._ }, + .{ ._, ._, .mov, .dst0q1, .memd(.tmp2q, 8), ._, ._ }, + .{ ._, ._, .mov, .tmp4q, .ua(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .mov, .tmp5q, .tmp3q, ._, ._ }, + .{ ._, ._, .@"and", .tmp5q, .tmp4q, ._, ._ }, + .{ ._, ._, .xor, .tmp5q, .dst0q1, ._, ._ }, + .{ ._, ._, .cmp, .dst0q0, .si(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp5q, .tmp4q, ._, ._ }, + .{ ._, ._nae, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .add, .dst0q0, .mem(.src1x), ._, ._ }, + .{ ._, ._, .adc, .dst0q1, .tmp3q, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .src_constraints = .{ .{ .unsigned_int = .xword }, .{ .unsigned_int = .xword }, .any }, .patterns = &.{ .{ .src = .{ - .{ .to_param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, - .{ .to_param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } }, + .{ .to_param_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, + .{ .to_param_gpr_pair = .{ .cc = .ccc, .after = 2, .at = 2 } }, .none, } }, }, @@ -39682,11 +40477,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .ref = .src0 }, .unused }, + .dst_temps = .{ .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .xword }, .{ .unsigned_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__umodti3" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + } }, }, .{ .required_features = .{ .@"64bit", null, null, null }, .src_constraints = .{ @@ -41082,8 +41905,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f64, .kind = .{ .reg = .rdx } }, .{ .type = .f64, .kind = .mem }, .{ .type = .f64, .kind = .{ .reg = .rax } }, - .{ .type = .f64, .kind = .{ .reg = .st6 } }, .{ .type = .f64, .kind = .{ .reg = .st7 } }, + .{ .type = .f64, .kind = .{ .reg = .st6 } }, .unused, .unused, }, @@ -41130,13 +41953,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f80, .kind = .{ .frame = .call_frame } }, .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } }, .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, .{ .type = .f80, .kind = .{ .reg = .rax } }, .unused, .unused, .unused, .unused, .unused, - .unused, }, .dst_temps = .{ .{ .reg = .st0 }, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, @@ -41145,17 +41968,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .v_dqa, .mov, .mem(.tmp1x), .tmp0x, ._, ._ }, .{ ._, .v_dqa, .mov, .tmp0x, .mem(.src1x), ._, ._ }, .{ ._, .v_dqa, .mov, .memd(.tmp1x, 16), .tmp0x, ._, ._ }, + .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, .f_, .ld, .dst0t, ._, ._, ._ }, + .{ ._, .f_, .ld, .tmp3t, ._, ._, ._ }, .{ ._, .f_p, .st, .mem(.tmp1t), ._, ._, ._ }, - .{ ._, ._, .movzx, .tmp4d, .memd(.tmp1w, 16 + 8), ._, ._ }, - .{ ._, ._, .@"and", .tmp4w, .sa(.src0, .add_smin), ._, ._ }, - .{ ._, ._, .xor, .tmp4w, .memd(.tmp1w, 8), ._, ._ }, + .{ ._, ._, .movzx, .tmp5d, .memd(.src1w, 8), ._, ._ }, + .{ ._, ._, .@"and", .tmp5w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .xor, .tmp5w, .memd(.tmp1w, 8), ._, ._ }, .{ ._, ._, .cmp, .mem(.tmp1q), .si(1), ._, ._ }, - .{ ._, ._, .sbb, .tmp4w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ }, .{ ._, ._nae, .j, .@"0f", ._, ._, ._ }, - .{ ._, .f_, .ld, .memd(.tmp1t, 16), ._, ._, ._ }, + .{ ._, .f_, .ld, .mem(.src1t), ._, ._, ._ }, .{ ._, .f_p, .add, ._, ._, ._, ._ }, + .{ .pseudo, .f_cstp, .in, ._, ._, ._, ._ }, } }, }, .{ .required_abi = .gnu, @@ -41175,13 +42000,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f80, .kind = .{ .frame = .call_frame } }, .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } }, .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, .{ .type = .f80, .kind = .{ .reg = .rax } }, .unused, .unused, .unused, .unused, .unused, - .unused, }, .dst_temps = .{ .{ .reg = .st0 }, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, @@ -41191,16 +42016,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .v_dqa, .mov, .tmp0x, .mem(.src1x), ._, ._ }, .{ ._, .v_dqa, .mov, .memd(.tmp1x, 16), .tmp0x, ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, .f_, .ld, .dst0t, ._, ._, ._ }, + .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ }, + .{ ._, .f_, .ld, .tmp3t, ._, ._, ._ }, .{ ._, .f_p, .st, .mem(.tmp1t), ._, ._, ._ }, - .{ ._, ._, .mov, .tmp4d, .sa(.src0, .add_smin), ._, ._ }, - .{ ._, ._, .@"and", .tmp4w, .memd(.tmp1w, 16 + 8), ._, ._ }, - .{ ._, ._, .xor, .tmp4w, .memd(.tmp1w, 8), ._, ._ }, + .{ ._, ._, .mov, .tmp5d, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .@"and", .tmp5w, .memd(.src1w, 8), ._, ._ }, + .{ ._, ._, .xor, .tmp5w, .memd(.tmp1w, 8), ._, ._ }, .{ ._, ._, .cmp, .mem(.tmp1q), .si(1), ._, ._ }, - .{ ._, ._, .sbb, .tmp4w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ }, .{ ._, ._nae, .j, .@"0f", ._, ._, ._ }, - .{ ._, .f_, .ld, .memd(.tmp1t, 16), ._, ._, ._ }, + .{ ._, .f_, .ld, .mem(.src1t), ._, ._, ._ }, .{ ._, .f_p, .add, ._, ._, ._, ._ }, + .{ .pseudo, .f_cstp, .in, ._, ._, ._, ._ }, } }, }, .{ .required_abi = .gnu, @@ -41220,13 +42047,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f80, .kind = .{ .frame = .call_frame } }, .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } }, .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, .{ .type = .f80, .kind = .{ .reg = .rax } }, .unused, .unused, .unused, .unused, .unused, - .unused, }, .dst_temps = .{ .{ .reg = .st0 }, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, @@ -41236,16 +42063,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._dqa, .mov, .tmp0x, .mem(.src1x), ._, ._ }, .{ ._, ._dqa, .mov, .memd(.tmp1x, 16), .tmp0x, ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, .f_, .ld, .dst0t, ._, ._, ._ }, + .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ }, + .{ ._, .f_, .ld, .tmp3t, ._, ._, ._ }, .{ ._, .f_p, .st, .mem(.tmp1t), ._, ._, ._ }, - .{ ._, ._, .movzx, .tmp4d, .memd(.tmp1w, 16 + 8), ._, ._ }, - .{ ._, ._, .@"and", .tmp4w, .sa(.src0, .add_smin), ._, ._ }, - .{ ._, ._, .xor, .tmp4w, .memd(.tmp1w, 8), ._, ._ }, + .{ ._, ._, .movzx, .tmp5d, .memd(.src1w, 8), ._, ._ }, + .{ ._, ._, .@"and", .tmp5w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .xor, .tmp5w, .memd(.tmp1w, 8), ._, ._ }, .{ ._, ._, .cmp, .mem(.tmp1q), .si(1), ._, ._ }, - .{ ._, ._, .sbb, .tmp4w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ }, .{ ._, ._nae, .j, .@"0f", ._, ._, ._ }, - .{ ._, .f_, .ld, .memd(.tmp1t, 16), ._, ._, ._ }, + .{ ._, .f_, .ld, .mem(.src1t), ._, ._, ._ }, .{ ._, .f_p, .add, ._, ._, ._, ._ }, + .{ .pseudo, .f_cstp, .in, ._, ._, ._, ._ }, } }, }, .{ .required_abi = .gnu, @@ -41265,13 +42094,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f80, .kind = .{ .frame = .call_frame } }, .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } }, .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, .{ .type = .f80, .kind = .{ .reg = .rax } }, .unused, .unused, .unused, .unused, .unused, - .unused, }, .dst_temps = .{ .{ .reg = .st0 }, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, @@ -41281,16 +42110,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._dqa, .mov, .tmp0x, .mem(.src1x), ._, ._ }, .{ ._, ._dqa, .mov, .memd(.tmp1x, 16), .tmp0x, ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, .f_, .ld, .dst0t, ._, ._, ._ }, + .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ }, + .{ ._, .f_, .ld, .tmp3t, ._, ._, ._ }, .{ ._, .f_p, .st, .mem(.tmp1t), ._, ._, ._ }, - .{ ._, ._, .mov, .tmp4d, .sa(.src0, .add_smin), ._, ._ }, - .{ ._, ._, .@"and", .tmp4w, .memd(.tmp1w, 16 + 8), ._, ._ }, - .{ ._, ._, .xor, .tmp4w, .memd(.tmp1w, 8), ._, ._ }, + .{ ._, ._, .mov, .tmp5d, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .@"and", .tmp5w, .memd(.src1w, 8), ._, ._ }, + .{ ._, ._, .xor, .tmp5w, .memd(.tmp1w, 8), ._, ._ }, .{ ._, ._, .cmp, .mem(.tmp1q), .si(1), ._, ._ }, - .{ ._, ._, .sbb, .tmp4w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ }, .{ ._, ._nae, .j, .@"0f", ._, ._, ._ }, - .{ ._, .f_, .ld, .memd(.tmp1t, 16), ._, ._, ._ }, + .{ ._, .f_, .ld, .mem(.src1t), ._, ._, ._ }, .{ ._, .f_p, .add, ._, ._, ._, ._ }, + .{ .pseudo, .f_cstp, .in, ._, ._, ._, ._ }, } }, }, .{ .required_abi = .gnu, @@ -41310,13 +42141,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f80, .kind = .{ .frame = .call_frame } }, .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } }, .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, .{ .type = .f80, .kind = .{ .reg = .rax } }, .unused, .unused, .unused, .unused, .unused, - .unused, }, .dst_temps = .{ .{ .reg = .st0 }, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, @@ -41326,16 +42157,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ps, .mova, .tmp0x, .mem(.src1x), ._, ._ }, .{ ._, ._ps, .mova, .memd(.tmp1x, 16), .tmp0x, ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, .f_, .ld, .dst0t, ._, ._, ._ }, + .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ }, + .{ ._, .f_, .ld, .tmp3t, ._, ._, ._ }, .{ ._, .f_p, .st, .mem(.tmp1t), ._, ._, ._ }, - .{ ._, ._, .movzx, .tmp4d, .memd(.tmp1w, 16 + 8), ._, ._ }, - .{ ._, ._, .@"and", .tmp4w, .sa(.src0, .add_smin), ._, ._ }, - .{ ._, ._, .xor, .tmp4w, .memd(.tmp1w, 8), ._, ._ }, + .{ ._, ._, .movzx, .tmp5d, .memd(.src1w, 8), ._, ._ }, + .{ ._, ._, .@"and", .tmp5w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .xor, .tmp5w, .memd(.tmp1w, 8), ._, ._ }, .{ ._, ._, .cmp, .mem(.tmp1q), .si(1), ._, ._ }, - .{ ._, ._, .sbb, .tmp4w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ }, .{ ._, ._nae, .j, .@"0f", ._, ._, ._ }, - .{ ._, .f_, .ld, .memd(.tmp1t, 16), ._, ._, ._ }, + .{ ._, .f_, .ld, .memd(.src1t, 16), ._, ._, ._ }, .{ ._, .f_p, .add, ._, ._, ._, ._ }, + .{ .pseudo, .f_cstp, .in, ._, ._, ._, ._ }, } }, }, .{ .required_abi = .gnu, @@ -41355,13 +42188,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f80, .kind = .{ .frame = .call_frame } }, .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } }, .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, .{ .type = .f80, .kind = .{ .reg = .rax } }, .unused, .unused, .unused, .unused, .unused, - .unused, }, .dst_temps = .{ .{ .reg = .st0 }, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, @@ -41371,16 +42204,106 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ps, .mova, .tmp0x, .mem(.src1x), ._, ._ }, .{ ._, ._ps, .mova, .memd(.tmp1x, 16), .tmp0x, ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, .f_, .ld, .dst0t, ._, ._, ._ }, + .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ }, + .{ ._, .f_, .ld, .tmp3t, ._, ._, ._ }, .{ ._, .f_p, .st, .mem(.tmp1t), ._, ._, ._ }, - .{ ._, ._, .mov, .tmp4d, .sa(.src0, .add_smin), ._, ._ }, - .{ ._, ._, .@"and", .tmp4w, .memd(.tmp1w, 16 + 8), ._, ._ }, - .{ ._, ._, .xor, .tmp4w, .memd(.tmp1w, 8), ._, ._ }, + .{ ._, ._, .mov, .tmp5d, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .@"and", .tmp5w, .memd(.src1w, 8), ._, ._ }, + .{ ._, ._, .xor, .tmp5w, .memd(.tmp1w, 8), ._, ._ }, .{ ._, ._, .cmp, .mem(.tmp1q), .si(1), ._, ._ }, - .{ ._, ._, .sbb, .tmp4w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ }, .{ ._, ._nae, .j, .@"0f", ._, ._, ._ }, - .{ ._, .f_, .ld, .memd(.tmp1t, 16), ._, ._, ._ }, + .{ ._, .f_, .ld, .mem(.src1t), ._, ._, ._ }, .{ ._, .f_p, .add, ._, ._, ._, ._ }, + .{ .pseudo, .f_cstp, .in, ._, ._, ._, ._ }, + } }, + }, .{ + .required_abi = .gnu, + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .x87, .fast_imm16, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, + .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .f80, .kind = .mem }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } }, + .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, + .{ .type = .f80, .kind = .{ .reg = .rax } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .reg = .st0 }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp1p, .mem(.tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp3p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + .{ ._, .f_, .ld, .mem(.tmp0t), ._, ._, ._ }, + .{ ._, ._, .movzx, .tmp7d, .memd(.src1w, 8), ._, ._ }, + .{ ._, ._, .@"and", .tmp7w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .xor, .tmp7w, .memd(.tmp0w, 8), ._, ._ }, + .{ ._, ._, .cmp, .mem(.tmp0q), .si(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp7w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._nae, .j, .@"0f", ._, ._, ._ }, + .{ ._, .f_, .ld, .mem(.src1t), ._, ._, ._ }, + .{ ._, .f_p, .add, ._, ._, ._, ._ }, + .{ .pseudo, .f_cstp, .in, ._, ._, ._, ._ }, + } }, + }, .{ + .required_abi = .gnu, + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .x87, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, + .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .f80, .kind = .mem }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } }, + .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, + .{ .type = .f80, .kind = .{ .reg = .rax } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .reg = .st0 }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp1p, .mem(.tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp3p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + .{ ._, .f_, .ld, .mem(.tmp0t), ._, ._, ._ }, + .{ ._, ._, .mov, .tmp7d, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .@"and", .tmp7w, .memd(.src1w, 8), ._, ._ }, + .{ ._, ._, .xor, .tmp7w, .memd(.tmp0w, 8), ._, ._ }, + .{ ._, ._, .cmp, .mem(.tmp0q), .si(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp7w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._nae, .j, .@"0f", ._, ._, ._ }, + .{ ._, .f_, .ld, .mem(.src1t), ._, ._, ._ }, + .{ ._, .f_p, .add, ._, ._, ._, ._ }, + .{ .pseudo, .f_cstp, .in, ._, ._, ._, ._ }, } }, }, .{ .required_abi = .gnu, @@ -41401,12 +42324,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f80, .kind = .{ .frame = .call_frame } }, .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } }, .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, .{ .type = .f80, .kind = .{ .reg = .rax } }, .unused, .unused, .unused, .unused, - .unused, }, .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, @@ -41420,13 +42343,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ }, .{ ._, .f_, .ld, .tmp4t, ._, ._, ._ }, .{ ._, .f_p, .st, .mem(.tmp2t), ._, ._, ._ }, - .{ ._, ._, .movzx, .tmp5d, .memd(.tmp2w, 16 + 8), ._, ._ }, - .{ ._, ._, .@"and", .tmp5w, .sa(.src0, .add_smin), ._, ._ }, - .{ ._, ._, .xor, .tmp5w, .memd(.tmp2w, 8), ._, ._ }, + .{ ._, ._, .movzx, .tmp6d, .memid(.src1w, .tmp0, 8), ._, ._ }, + .{ ._, ._, .@"and", .tmp6w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .xor, .tmp6w, .memd(.tmp2w, 8), ._, ._ }, .{ ._, ._, .cmp, .mem(.tmp2q), .si(1), ._, ._ }, - .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .sbb, .tmp6w, .sa(.src0, .add_smin), ._, ._ }, .{ ._, ._nae, .j, .@"1f", ._, ._, ._ }, - .{ ._, .f_, .ld, .memd(.tmp2t, 16), ._, ._, ._ }, + .{ ._, .f_, .ld, .memi(.src1t, .tmp0), ._, ._, ._ }, .{ ._, .f_p, .add, ._, ._, ._, ._ }, .{ .@"1:", .f_p, .st, .memia(.dst0t, .tmp0, .add_unaligned_size), ._, ._, ._ }, .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, @@ -41451,12 +42374,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f80, .kind = .{ .frame = .call_frame } }, .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } }, .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, .{ .type = .f80, .kind = .{ .reg = .rax } }, .unused, .unused, .unused, .unused, - .unused, }, .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, @@ -41470,13 +42393,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ }, .{ ._, .f_, .ld, .tmp4t, ._, ._, ._ }, .{ ._, .f_p, .st, .mem(.tmp2t), ._, ._, ._ }, - .{ ._, ._, .mov, .tmp5d, .sa(.src0, .add_smin), ._, ._ }, - .{ ._, ._, .@"and", .tmp5w, .memd(.tmp2w, 16 + 8), ._, ._ }, - .{ ._, ._, .xor, .tmp5w, .memd(.tmp2w, 8), ._, ._ }, + .{ ._, ._, .mov, .tmp6d, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .@"and", .tmp6w, .memid(.src1w, .tmp0, 8), ._, ._ }, + .{ ._, ._, .xor, .tmp6w, .memd(.tmp2w, 8), ._, ._ }, .{ ._, ._, .cmp, .mem(.tmp2q), .si(1), ._, ._ }, - .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .sbb, .tmp6w, .sa(.src0, .add_smin), ._, ._ }, .{ ._, ._nae, .j, .@"1f", ._, ._, ._ }, - .{ ._, .f_, .ld, .memd(.tmp2t, 16), ._, ._, ._ }, + .{ ._, .f_, .ld, .memi(.src1t, .tmp0), ._, ._, ._ }, .{ ._, .f_p, .add, ._, ._, ._, ._ }, .{ .@"1:", .f_p, .st, .memia(.dst0t, .tmp0, .add_unaligned_size), ._, ._, ._ }, .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, @@ -41501,12 +42424,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f80, .kind = .{ .frame = .call_frame } }, .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } }, .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, .{ .type = .f80, .kind = .{ .reg = .rax } }, .unused, .unused, .unused, .unused, - .unused, }, .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, @@ -41520,13 +42443,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ }, .{ ._, .f_, .ld, .tmp4t, ._, ._, ._ }, .{ ._, .f_p, .st, .mem(.tmp2t), ._, ._, ._ }, - .{ ._, ._, .movzx, .tmp5d, .memd(.tmp2w, 16 + 8), ._, ._ }, - .{ ._, ._, .@"and", .tmp5w, .sa(.src0, .add_smin), ._, ._ }, - .{ ._, ._, .xor, .tmp5w, .memd(.tmp2w, 8), ._, ._ }, + .{ ._, ._, .movzx, .tmp6d, .memid(.src1w, .tmp0, 8), ._, ._ }, + .{ ._, ._, .@"and", .tmp6w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .xor, .tmp6w, .memd(.tmp2w, 8), ._, ._ }, .{ ._, ._, .cmp, .mem(.tmp2q), .si(1), ._, ._ }, - .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .sbb, .tmp6w, .sa(.src0, .add_smin), ._, ._ }, .{ ._, ._nae, .j, .@"1f", ._, ._, ._ }, - .{ ._, .f_, .ld, .memd(.tmp2t, 16), ._, ._, ._ }, + .{ ._, .f_, .ld, .memi(.src1t, .tmp0), ._, ._, ._ }, .{ ._, .f_p, .add, ._, ._, ._, ._ }, .{ .@"1:", .f_p, .st, .memia(.dst0t, .tmp0, .add_unaligned_size), ._, ._, ._ }, .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, @@ -41551,12 +42474,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f80, .kind = .{ .frame = .call_frame } }, .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } }, .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, .{ .type = .f80, .kind = .{ .reg = .rax } }, .unused, .unused, .unused, .unused, - .unused, }, .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, @@ -41570,13 +42493,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ }, .{ ._, .f_, .ld, .tmp4t, ._, ._, ._ }, .{ ._, .f_p, .st, .mem(.tmp2t), ._, ._, ._ }, - .{ ._, ._, .mov, .tmp5d, .sa(.src0, .add_smin), ._, ._ }, - .{ ._, ._, .@"and", .tmp5w, .memd(.tmp2w, 16 + 8), ._, ._ }, - .{ ._, ._, .xor, .tmp5w, .memd(.tmp2w, 8), ._, ._ }, + .{ ._, ._, .mov, .tmp6d, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .@"and", .tmp6w, .memid(.src1w, .tmp0, 8), ._, ._ }, + .{ ._, ._, .xor, .tmp6w, .memd(.tmp2w, 8), ._, ._ }, .{ ._, ._, .cmp, .mem(.tmp2q), .si(1), ._, ._ }, - .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .sbb, .tmp6w, .sa(.src0, .add_smin), ._, ._ }, .{ ._, ._nae, .j, .@"1f", ._, ._, ._ }, - .{ ._, .f_, .ld, .memd(.tmp2t, 16), ._, ._, ._ }, + .{ ._, .f_, .ld, .memi(.src1t, .tmp0), ._, ._, ._ }, .{ ._, .f_p, .add, ._, ._, ._, ._ }, .{ .@"1:", .f_p, .st, .memia(.dst0t, .tmp0, .add_unaligned_size), ._, ._, ._ }, .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, @@ -41601,12 +42524,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f80, .kind = .{ .frame = .call_frame } }, .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } }, .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, .{ .type = .f80, .kind = .{ .reg = .rax } }, .unused, .unused, .unused, .unused, - .unused, }, .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, @@ -41620,13 +42543,13 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ }, .{ ._, .f_, .ld, .tmp4t, ._, ._, ._ }, .{ ._, .f_p, .st, .mem(.tmp2t), ._, ._, ._ }, - .{ ._, ._, .movzx, .tmp5d, .memd(.tmp2w, 16 + 8), ._, ._ }, - .{ ._, ._, .@"and", .tmp5w, .sa(.src0, .add_smin), ._, ._ }, - .{ ._, ._, .xor, .tmp5w, .memd(.tmp2w, 8), ._, ._ }, + .{ ._, ._, .movzx, .tmp6d, .memid(.src1w, .tmp0, 8), ._, ._ }, + .{ ._, ._, .@"and", .tmp6w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .xor, .tmp6w, .memd(.tmp2w, 8), ._, ._ }, .{ ._, ._, .cmp, .mem(.tmp2q), .si(1), ._, ._ }, - .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .sbb, .tmp6w, .sa(.src0, .add_smin), ._, ._ }, .{ ._, ._nae, .j, .@"1f", ._, ._, ._ }, - .{ ._, .f_, .ld, .memd(.tmp2t, 16), ._, ._, ._ }, + .{ ._, .f_, .ld, .memi(.src1t, .tmp0), ._, ._, ._ }, .{ ._, .f_p, .add, ._, ._, ._, ._ }, .{ .@"1:", .f_p, .st, .memia(.dst0t, .tmp0, .add_unaligned_size), ._, ._, ._ }, .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, @@ -41651,12 +42574,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f80, .kind = .{ .frame = .call_frame } }, .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } }, .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, .{ .type = .f80, .kind = .{ .reg = .rax } }, .unused, .unused, .unused, .unused, - .unused, }, .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, @@ -41670,19 +42593,114 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ }, .{ ._, .f_, .ld, .tmp4t, ._, ._, ._ }, .{ ._, .f_p, .st, .mem(.tmp2t), ._, ._, ._ }, - .{ ._, ._, .mov, .tmp5d, .sa(.src0, .add_smin), ._, ._ }, - .{ ._, ._, .@"and", .tmp5w, .memd(.tmp2w, 16 + 8), ._, ._ }, - .{ ._, ._, .xor, .tmp5w, .memd(.tmp2w, 8), ._, ._ }, + .{ ._, ._, .mov, .tmp6d, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .@"and", .tmp6w, .memid(.src1w, .tmp0, 8), ._, ._ }, + .{ ._, ._, .xor, .tmp6w, .memd(.tmp2w, 8), ._, ._ }, .{ ._, ._, .cmp, .mem(.tmp2q), .si(1), ._, ._ }, - .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .sbb, .tmp6w, .sa(.src0, .add_smin), ._, ._ }, .{ ._, ._nae, .j, .@"1f", ._, ._, ._ }, - .{ ._, .f_, .ld, .memd(.tmp2t, 16), ._, ._, ._ }, + .{ ._, .f_, .ld, .memi(.src1t, .tmp0), ._, ._, ._ }, .{ ._, .f_p, .add, ._, ._, ._, ._ }, .{ .@"1:", .f_p, .st, .memia(.dst0t, .tmp0, .add_unaligned_size), ._, ._, ._ }, .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_abi = .gnu, + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .sse, .x87, .fast_imm16 }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 2, .at = 2 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } }, + .{ .type = .f80, .kind = .{ .reg = .rax } }, + .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.dst0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp3p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + .{ ._, ._, .movzx, .tmp5d, .memid(.src1w, .tmp0, 8), ._, ._ }, + .{ ._, ._, .@"and", .tmp5w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .xor, .tmp5w, .memd(.tmp2w, 8), ._, ._ }, + .{ ._, ._, .cmp, .mem(.tmp2q), .si(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._nae, .j, .@"1f", ._, ._, ._ }, + .{ ._, .f_, .ld, .memi(.src1t, .tmp0), ._, ._, ._ }, + .{ ._, .f_, .ld, .memi(.dst0t, .tmp0), ._, ._, ._ }, + .{ ._, .f_p, .add, ._, ._, ._, ._ }, + .{ ._, .f_p, .st, .memi(.dst0t, .tmp0), ._, ._, ._ }, + .{ .@"1:", ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_abi = .gnu, + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .sse, .x87, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fmodx" } }, + .{ .type = .f80, .kind = .{ .reg = .rax } }, + .{ .type = .f80, .kind = .{ .reg = .st7 } }, + .{ .type = .f80, .kind = .{ .reg = .st6 } }, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.dst0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp3p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + .{ ._, ._, .mov, .tmp5d, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._, .@"and", .tmp5w, .memid(.src1w, .tmp0, 8), ._, ._ }, + .{ ._, ._, .xor, .tmp5w, .memd(.tmp2w, 8), ._, ._ }, + .{ ._, ._, .cmp, .mem(.tmp2q), .si(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp5w, .sa(.src0, .add_smin), ._, ._ }, + .{ ._, ._nae, .j, .@"1f", ._, ._, ._ }, + .{ ._, .f_, .ld, .memi(.src1t, .tmp0), ._, ._, ._ }, + .{ ._, .f_, .ld, .memi(.dst0t, .tmp0), ._, ._, ._ }, + .{ ._, .f_p, .add, ._, ._, ._, ._ }, + .{ ._, .f_p, .st, .memi(.dst0t, .tmp0), ._, ._, ._ }, + .{ .@"1:", ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .avx, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, @@ -41700,9 +42718,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .extra_temps = .{ .{ .type = .f128, .kind = .mem }, .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } }, - .{ .type = .f128, .kind = .{ .reg = .rcx } }, - .{ .type = .f128, .kind = .{ .reg = .rdx } }, - .{ .type = .f128, .kind = .{ .reg = .rax } }, + .{ .type = .u64, .kind = .{ .reg = .rcx } }, + .{ .type = .u64, .kind = .{ .reg = .rdx } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, .unused, .unused, @@ -41728,6 +42746,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp5d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse4_1, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, @@ -41745,9 +42764,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .extra_temps = .{ .{ .type = .f128, .kind = .mem }, .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } }, - .{ .type = .f128, .kind = .{ .reg = .rcx } }, - .{ .type = .f128, .kind = .{ .reg = .rdx } }, - .{ .type = .f128, .kind = .{ .reg = .rax } }, + .{ .type = .u64, .kind = .{ .reg = .rcx } }, + .{ .type = .u64, .kind = .{ .reg = .rdx } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, .unused, .unused, @@ -41773,6 +42792,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp5d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse2, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, @@ -41790,9 +42810,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .extra_temps = .{ .{ .type = .f128, .kind = .mem }, .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } }, - .{ .type = .f128, .kind = .{ .reg = .rcx } }, - .{ .type = .f128, .kind = .{ .reg = .rdx } }, - .{ .type = .f128, .kind = .{ .reg = .rax } }, + .{ .type = .u64, .kind = .{ .reg = .rcx } }, + .{ .type = .u64, .kind = .{ .reg = .rdx } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, .unused, .unused, @@ -41805,8 +42825,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .each = .{ .once = &.{ .{ ._, ._dqa, .mov, .mem(.tmp0x), .src1x, ._, ._ }, .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, - .{ ._, ._, .mov, .tmp2q, .ua(.src0, .add_smin), ._, ._ }, .{ ._, .p_d, .shuf, .src1x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, ._, .mov, .tmp2q, .ua(.src0, .add_smin), ._, ._ }, .{ ._, ._q, .mov, .tmp3q, .src1x, ._, ._ }, .{ ._, ._, .mov, .tmp4q, .tmp2q, ._, ._ }, .{ ._, ._, .@"and", .tmp4q, .memd(.tmp0q, 8), ._, ._ }, @@ -41819,6 +42839,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp5d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, @@ -41836,9 +42857,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .extra_temps = .{ .{ .type = .f128, .kind = .mem }, .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } }, - .{ .type = .f128, .kind = .{ .reg = .rdx } }, + .{ .type = .u64, .kind = .{ .reg = .rdx } }, .{ .type = .f128, .kind = .mem }, - .{ .type = .f128, .kind = .{ .reg = .rax } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, .unused, .unused, @@ -41863,6 +42884,186 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp5d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .avx, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .f128, .kind = .{ .reg = .xmm1 } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, + .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_smin), ._, ._ }, + .{ ._, .vp_q, .extr, .tmp1q, .dst0x, .ui(1), ._ }, + .{ ._, ._, .mov, .tmp5q, .tmp0q, ._, ._ }, + .{ ._, ._, .@"and", .tmp5q, .memd(.src1q, 8), ._, ._ }, + .{ ._, ._, .xor, .tmp5q, .tmp1q, ._, ._ }, + .{ ._, .v_q, .mov, .tmp1q, .dst0x, ._, ._ }, + .{ ._, ._, .cmp, .tmp1q, .si(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp5q, .tmp0q, ._, ._ }, + .{ ._, ._nae, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ }, + .{ ._, .v_dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp6d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .sse4_1, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .f128, .kind = .{ .reg = .xmm1 } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, + .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_smin), ._, ._ }, + .{ ._, .p_q, .extr, .tmp1q, .dst0x, .ui(1), ._ }, + .{ ._, ._, .mov, .tmp5q, .tmp0q, ._, ._ }, + .{ ._, ._, .@"and", .tmp5q, .memd(.src1q, 8), ._, ._ }, + .{ ._, ._, .xor, .tmp5q, .tmp1q, ._, ._ }, + .{ ._, ._q, .mov, .tmp1q, .dst0x, ._, ._ }, + .{ ._, ._, .cmp, .tmp1q, .si(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp5q, .tmp0q, ._, ._ }, + .{ ._, ._nae, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ }, + .{ ._, ._dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp6d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .sse2, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .f128, .kind = .{ .reg = .xmm1 } }, + .{ .type = .u64, .kind = .{ .reg = .rax } }, + .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, .p_d, .shuf, .tmp4x, .dst0x, .ui(0b11_10_11_10), ._ }, + .{ ._, ._, .mov, .tmp0q, .ua(.src0, .add_smin), ._, ._ }, + .{ ._, ._q, .mov, .tmp1q, .tmp4x, ._, ._ }, + .{ ._, ._, .mov, .tmp5q, .tmp0q, ._, ._ }, + .{ ._, ._, .@"and", .tmp5q, .memd(.src1q, 8), ._, ._ }, + .{ ._, ._, .xor, .tmp5q, .tmp1q, ._, ._ }, + .{ ._, ._q, .mov, .tmp1q, .dst0x, ._, ._ }, + .{ ._, ._, .cmp, .tmp1q, .si(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp5q, .tmp0q, ._, ._ }, + .{ ._, ._nae, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ }, + .{ ._, ._dqa, .mov, .lea(.tmp0x), .dst0x, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp6d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .sse, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmodq" } }, + .{ .type = .f128, .kind = .mem }, + .{ .type = .usize, .kind = .{ .reg = .rax } }, + .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .mem(.tmp3), ._, ._ }, + .{ ._, ._, .mov, .tmp1q, .ua(.src0, .add_smin), ._, ._ }, + .{ ._, ._ps, .mova, .lea(.tmp0x), .dst0x, ._, ._ }, + .{ ._, ._, .mov, .tmp4q, .tmp1q, ._, ._ }, + .{ ._, ._, .@"and", .tmp4q, .memd(.src1q, 8), ._, ._ }, + .{ ._, ._, .xor, .tmp4q, .lead(.tmp0q, 8), ._, ._ }, + .{ ._, ._, .cmp, .lea(.tmp0q), .si(1), ._, ._ }, + .{ ._, ._, .sbb, .tmp4q, .tmp1q, ._, ._ }, + .{ ._, ._nae, .j, .@"0f", ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp5d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .avx, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -41909,6 +43110,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse4_1, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -41955,6 +43157,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse2, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -42002,6 +43205,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -46317,6 +47521,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, @@ -46350,6 +47555,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -46361,7 +47599,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } }, @@ -46376,15 +47614,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, - .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, - .{ ._, .v_dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -46396,7 +47635,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } }, @@ -46411,15 +47650,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, - .{ ._, ._dqa, .mov, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._dqa, .mov, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -46431,7 +47671,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } }, @@ -46446,13 +47686,121 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_size), ._, ._ }, - .{ ._, ._ps, .mova, .tmp2x, .memia(.src1x, .tmp0, .add_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._ps, .mova, .tmp2x, .memi(.src1x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, } }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{ @@ -50476,6 +51824,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, @@ -50509,6 +51858,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fminq" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -50544,6 +51926,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -50579,6 +51962,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -50613,6 +51997,114 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fminq" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fminq" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fminq" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp4x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, } }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{ @tagName(air_tag), @@ -74864,6 +76356,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_cw, .ld, .tmp0w, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ @@ -74889,6 +76382,34 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "sqrtq" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ @@ -74896,7 +76417,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .usize, .kind = .{ .extern_func = "sqrtq" } }, .unused, @@ -74911,14 +76432,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ @@ -74926,7 +76448,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .usize, .kind = .{ .extern_func = "sqrtq" } }, .unused, @@ -74941,14 +76463,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ @@ -74956,7 +76479,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .usize, .kind = .{ .extern_func = "sqrtq" } }, .unused, @@ -74971,12 +76494,105 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "sqrtq" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "sqrtq" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "sqrtq" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, } }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {f} {f}", .{ @@ -75589,6 +77205,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ @@ -75614,6 +77231,34 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = @tagName(name) ++ "q" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ @@ -75644,6 +77289,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ @@ -75674,6 +77320,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ @@ -75703,6 +77350,99 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = @tagName(name) ++ "q" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = @tagName(name) ++ "q" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = @tagName(name) ++ "q" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, } }, }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {f} {f}", .{ @@ -78312,6 +80052,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ @@ -78342,6 +80083,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = switch (direction) { + else => unreachable, + .down => "floorq", + .up => "ceilq", + .zero => "truncq", + } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ @@ -78349,7 +80123,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .usize, .kind = .{ .extern_func = switch (direction) { else => unreachable, @@ -78369,14 +80143,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", .v_dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, .v_dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ @@ -78384,7 +80159,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .usize, .kind = .{ .extern_func = switch (direction) { else => unreachable, @@ -78404,14 +80179,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, ._dqa, .mov, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .patterns = &.{ @@ -78419,7 +80195,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .usize, .kind = .{ .extern_func = switch (direction) { else => unreachable, @@ -78439,12 +80215,120 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0p, .sa(.src0, .sub_unaligned_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memia(.src0x, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, ._ps, .mova, .memia(.dst0x, .tmp0, .add_unaligned_size), .tmp1x, ._, ._ }, - .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, - .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = switch (direction) { + else => unreachable, + .down => "floorq", + .up => "ceilq", + .zero => "truncq", + } } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = switch (direction) { + else => unreachable, + .down => "floorq", + .up => "ceilq", + .zero => "truncq", + } } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = switch (direction) { + else => unreachable, + .down => "floorq", + .up => "ceilq", + .zero => "truncq", + } } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, } }, }) catch |err| switch (err) { @@ -79063,7 +80947,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .unused, .unused, .unused, @@ -79398,6 +81282,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .float = .xword }, .{ .float = .xword }, .any }, .patterns = &.{ @@ -79410,7 +81295,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .unused, .unused, .unused, @@ -79430,6 +81315,38 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, .{ ._, ._, .@"test", .tmp1d, .tmp1d, ._, ._ }, } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .float = .xword }, .{ .float = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .cc = switch (strict) { + true => .l, + false => .le, + } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp3d, .tmp3d, ._, ._ }, + } }, } }, }); } else err: { @@ -79575,7 +81492,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .unused, .unused, .unused, @@ -79934,6 +81851,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .float = .xword }, .{ .float = .xword }, .any }, .patterns = &.{ @@ -79946,7 +81864,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .unused, .unused, .unused, @@ -79963,6 +81881,35 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, .{ ._, ._, .@"test", .tmp1d, .tmp1d, ._, ._ }, } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .float = .xword }, .{ .float = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .cc = .z }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .@"test", .tmp3d, .tmp3d, ._, ._ }, + } }, } }, }) catch |err| break :err err; switch (cmp_op) { @@ -80018,14 +81965,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg); }, - .cmp_vector, .cmp_vector_optimized => |air_tag| fallback: { + .cmp_vector, .cmp_vector_optimized => |air_tag| { const ty_pl = air_datas[@intFromEnum(inst)].ty_pl; const vector_cmp = cg.air.extraData(Air.VectorCmp, ty_pl.payload).data; - switch (vector_cmp.compareOperator()) { - .eq, .neq => {}, - .lt, .lte, .gte, .gt => if (cg.floatBits(cg.typeOf(vector_cmp.lhs).childType(zcu)) == null) - break :fallback try cg.airCmpVector(inst), - } var ops = try cg.tempsFromOperands(inst, .{ vector_cmp.lhs, vector_cmp.rhs }); var res: [1]Temp = undefined; (err: switch (vector_cmp.compareOperator()) { @@ -80615,7 +82557,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -80659,7 +82601,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -80703,7 +82645,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -80748,7 +82690,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -80793,7 +82735,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .{ .type = .f32, .kind = .mem }, @@ -80840,7 +82782,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .{ .type = .f32, .kind = .mem }, @@ -80887,7 +82829,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -80940,7 +82882,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -80993,7 +82935,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -81047,7 +82989,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -81101,7 +83043,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .{ .type = .f32, .kind = .mem }, @@ -81157,7 +83099,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .{ .type = .f32, .kind = .mem }, @@ -81984,7 +83926,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -82028,7 +83970,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -82072,7 +84014,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -82116,7 +84058,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -82160,7 +84102,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -82204,7 +84146,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -82248,7 +84190,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -82301,7 +84243,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -82354,7 +84296,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -82407,7 +84349,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -82460,7 +84402,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -82513,7 +84455,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -85125,7 +87067,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -85169,7 +87111,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -85213,7 +87155,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -85258,7 +87200,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -85303,7 +87245,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .{ .type = .f32, .kind = .mem }, @@ -85350,7 +87292,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .{ .type = .f32, .kind = .mem }, @@ -85397,7 +87339,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -85450,7 +87392,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -85503,7 +87445,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -85557,7 +87499,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -85611,7 +87553,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .{ .type = .f32, .kind = .mem }, @@ -85667,7 +87609,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f16, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmphf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .{ .type = .f32, .kind = .mem }, @@ -86508,7 +88450,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -86552,7 +88494,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -86596,7 +88538,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -86640,7 +88582,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -86684,7 +88626,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -86728,7 +88670,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u32, .kind = .{ .reg = .edx } }, .unused, @@ -86772,7 +88714,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -86825,7 +88767,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -86878,7 +88820,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -86931,7 +88873,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -86984,7 +88926,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -87037,7 +88979,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__cmptf2" } }, - .{ .type = .i32, .kind = .{ .reg = .eax } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .u8, .kind = .{ .reg = .cl } }, .{ .type = .u64, .kind = .{ .reg = .rdx } }, .unused, @@ -88690,6 +90632,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .scalar_float = .{ .of = .word, .is = .word } }, .any }, @@ -88716,6 +90659,35 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .scalar_float = .{ .of = .word, .is = .word } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__trunctfhf2" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any }, @@ -88747,6 +90719,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse4_1, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any }, @@ -88778,6 +90751,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any }, @@ -88810,6 +90784,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any }, @@ -88819,7 +90794,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f64, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__trunctfhf2" } }, .{ .type = .f32, .kind = .mem }, .{ .type = .f16, .kind = .{ .reg = .ax } }, @@ -88843,6 +90818,138 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__trunctfhf2" } }, + .{ .type = .f16, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-2, .dst0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"8", .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, .vp_w, .extr, .memi(.dst0w, .tmp0), .tmp3x, .ui(0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse4_1, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__trunctfhf2" } }, + .{ .type = .f16, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-2, .dst0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"8", .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, .p_w, .extr, .memi(.dst0w, .tmp0), .tmp3x, .ui(0), ._ }, + .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__trunctfhf2" } }, + .{ .type = .f16, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .f16, .kind = .{ .reg = .ax } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-2, .dst0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"8", .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, .p_w, .extr, .tmp4d, .tmp3x, .ui(0), ._ }, + .{ ._, ._, .mov, .memi(.dst0w, .tmp0), .tmp4w, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .word, .is = .word } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__trunctfhf2" } }, + .{ .type = .f32, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .f32, .kind = .mem }, + .{ .type = .f16, .kind = .{ .reg = .ax } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-2, .dst0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"8", .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._ss, .mov, .mem(.tmp4d), .tmp3x, ._, ._ }, + .{ ._, ._, .mov, .tmp5d, .mem(.tmp4d), ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0w, .tmp0), .tmp5w, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .scalar_float = .{ .of = .dword, .is = .dword } }, .any }, @@ -88869,6 +90976,35 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .scalar_float = .{ .of = .dword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__trunctfsf2" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .dword, .is = .dword } }, .any }, @@ -88900,6 +91036,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .dword, .is = .dword } }, .any }, @@ -88931,6 +91068,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .dword, .is = .dword } }, .any }, @@ -88962,6 +91100,71 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .dword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__trunctfsf2" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-4, .dst0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"4", .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, .v_ss, .mov, .memi(.dst0d, .tmp0), .tmp3x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(4), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .dword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__trunctfsf2" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-4, .dst0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"4", .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._ss, .mov, .memi(.dst0d, .tmp0), .tmp3x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(4), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .scalar_float = .{ .of = .qword, .is = .qword } }, .any }, @@ -88988,6 +91191,35 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .scalar_float = .{ .of = .qword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__trunctfdf2" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, .any }, @@ -89019,6 +91251,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, .any }, @@ -89050,6 +91283,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, .any }, @@ -89081,46 +91315,83 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_cc_abi = .sysv64, - .required_features = .{ .sse, .x87, null, null }, - .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, - .dst_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, .any }, + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, .any }, .patterns = &.{ - .{ .src = .{ .{ .to_param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .none, .none } }, + .{ .src = .{ .to_mem, .none, .none } }, }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .extern_func = "__trunctfxf2" } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__trunctfdf2" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, .unused, .unused, .unused, .unused, .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .dst0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"2", .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, .v_sd, .mov, .memi(.dst0q, .tmp0), .tmp3x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__trunctfdf2" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, .unused, .unused, .unused, .unused, .unused, }, - .dst_temps = .{ .{ .reg = .st0 }, .unused }, + .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-8, .dst0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"2", .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._sd, .mov, .memi(.dst0q, .tmp0), .tmp3x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .required_cc_abi = .win64, .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, - .dst_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .qword, .is = .qword } }, .any }, .patterns = &.{ - .{ .src = .{ .{ .to_reg = .xmm1 }, .none, .none } }, + .{ .src = .{ .to_mem, .none, .none } }, }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .usize, .kind = .{ .extern_func = "__trunctfxf2" } }, - .unused, - .unused, + .{ .type = .usize, .kind = .{ .extern_func = "__trunctfdf2" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .unused, .unused, .unused, @@ -89132,21 +91403,23 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .lea, .tmp0p, .mem(.dst0), ._, ._ }, - .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-8, .dst0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"2", .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._ps, .movl, .memi(.dst0q, .tmp0), .tmp3x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .required_cc_abi = .sysv64, - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, - .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, .any }, + .required_features = .{ .sse, .x87, null, null }, + .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, .any }, .patterns = &.{ - .{ .src = .{ .to_mem, .none, .none } }, + .{ .src = .{ .{ .to_param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .none, .none } }, }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__trunctfxf2" } }, .unused, .unused, @@ -89156,31 +91429,26 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, .unused, + .unused, + .unused, }, - .dst_temps = .{ .mem, .unused }, + .dst_temps = .{ .{ .reg = .st0 }, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0d, .sia(-16, .dst0, .add_unaligned_size), ._, ._ }, - .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, - .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ }, - .{ ._, .f_p, .st, .memi(.dst0t, .tmp0), ._, ._, ._ }, - .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, - .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ .required_cc_abi = .win64, - .required_features = .{ .avx, null, null, null }, - .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, - .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, .any }, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .tbyte } }, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__trunctfxf2" } }, .unused, .unused, @@ -89189,20 +91457,18 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, .unused, + .unused, }, .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0d, .sia(-16, .dst0, .add_unaligned_size), ._, ._ }, - .{ .@"0:", ._, .lea, .tmp1p, .memi(.dst0, .tmp0), ._, ._ }, - .{ ._, .v_dqa, .mov, .tmp2x, .memi(.src0x, .tmp0), ._, ._ }, - .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, - .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, - .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .mem(.dst0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, } }, }, .{ .required_cc_abi = .sysv64, - .required_features = .{ .sse2, null, null, null }, + .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, .any }, .patterns = &.{ @@ -89226,7 +91492,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .dst0, .add_unaligned_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ }, .{ ._, .f_p, .st, .memi(.dst0t, .tmp0), ._, ._, ._ }, @@ -89234,7 +91500,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_cc_abi = .win64, + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .tbyte } }, .any }, @@ -89244,8 +91510,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__trunctfxf2" } }, .unused, .unused, @@ -89254,14 +91519,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, .unused, + .unused, }, .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .dst0, .add_unaligned_size), ._, ._ }, - .{ .@"0:", ._, .lea, .tmp1p, .memi(.dst0, .tmp0), ._, ._ }, - .{ ._, ._dqa, .mov, .tmp2x, .memi(.src0x, .tmp0), ._, ._ }, - .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ .pseudo, .f_cstp, .de, ._, ._, ._, ._ }, + .{ ._, .f_p, .st, .memi(.dst0t, .tmp0), ._, ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, @@ -89310,7 +91577,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__trunctfxf2" } }, .unused, .unused, @@ -89323,9 +91590,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .mov, .tmp0d, .sa(.dst0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-16, .dst0, .add_unaligned_size), ._, ._ }, .{ .@"0:", ._, .lea, .tmp1p, .memi(.dst0, .tmp0), ._, ._ }, - .{ ._, ._ps, .mova, .tmp2x, .memi(.src0x, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, @@ -110769,6 +113036,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, .slow_incdec, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any }, @@ -110802,6 +113070,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any }, @@ -110835,6 +113104,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ns, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, .slow_incdec, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any }, @@ -110868,6 +113138,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any }, @@ -110901,6 +113172,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ns, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, .slow_incdec, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any }, @@ -110934,6 +113206,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any }, @@ -110967,6 +113240,75 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ns, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, .slow_incdec, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixtfsi" } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .sia(-1, .dst0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2p, .tmp0p, ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp4b, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .lead(.tmp0, -16), ._, ._ }, + .{ ._, ._, .sub, .tmp1d, .si(1), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .byte, .is = .byte } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixtfsi" } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .sia(-1, .dst0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2p, .tmp0p, ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0b, .tmp1), .tmp4b, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .lead(.tmp0, -16), ._, ._ }, + .{ ._, ._c, .de, .tmp1d, ._, ._, ._ }, + .{ ._, ._ns, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any }, @@ -110998,6 +113340,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any }, @@ -111029,6 +113372,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any }, @@ -111060,6 +113404,39 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .multiple_scalar_int = .{ .of = .word, .is = .word } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixtfsi" } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-2, .dst0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"8", .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0w, .tmp0), .tmp3w, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(2), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .float = .xword }, .any, .any }, .dst_constraints = .{ .{ .signed_int = .dword }, .any }, @@ -111086,6 +113463,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .float = .xword }, .any, .any }, .dst_constraints = .{ .{ .unsigned_int = .dword }, .any }, @@ -111112,6 +113490,63 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .float = .xword }, .any, .any }, + .dst_constraints = .{ .{ .signed_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixtfsi" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .float = .xword }, .any, .any }, + .dst_constraints = .{ .{ .unsigned_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfsi" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }, .any }, @@ -111143,6 +113578,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any }, @@ -111174,6 +113610,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }, .any }, @@ -111205,6 +113642,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any }, @@ -111236,6 +113674,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }, .any }, @@ -111267,6 +113706,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any }, @@ -111298,6 +113738,71 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .dword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixtfsi" } }, + .{ .type = .i32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-4, .dst0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"4", .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0d, .tmp0), .tmp3d, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(4), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .dword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfsi" } }, + .{ .type = .u32, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-4, .dst0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"4", .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0d, .tmp0), .tmp3d, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(4), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse, null, null }, .src_constraints = .{ .{ .float = .xword }, .any, .any }, .dst_constraints = .{ .{ .signed_int = .qword }, .any }, @@ -111324,6 +113829,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse, null, null }, .src_constraints = .{ .{ .float = .xword }, .any, .any }, .dst_constraints = .{ .{ .unsigned_int = .qword }, .any }, @@ -111350,6 +113856,63 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .sse, null, null }, + .src_constraints = .{ .{ .float = .xword }, .any, .any }, + .dst_constraints = .{ .{ .signed_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixtfdi" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .sse, null, null }, + .src_constraints = .{ .{ .float = .xword }, .any, .any }, + .dst_constraints = .{ .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfdi" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .avx, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }, .any }, @@ -111381,6 +113944,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .avx, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any }, @@ -111412,6 +113976,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse2, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }, .any }, @@ -111443,6 +114008,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse2, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any }, @@ -111474,6 +114040,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }, .any }, @@ -111505,6 +114072,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any }, @@ -111536,16 +114104,19 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_cc_abi = .sysv64, - .required_features = .{ .sse, null, null, null }, - .src_constraints = .{ .{ .float = .xword }, .any, .any }, - .dst_constraints = .{ .{ .signed_int = .xword }, .any }, + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .sse, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .qword, .is = .qword } }, .any }, .patterns = &.{ - .{ .src = .{ .{ .to_param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .none, .none } }, + .{ .src = .{ .to_mem, .none, .none } }, }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ - .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } }, + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixtfdi" } }, + .{ .type = .i64, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .unused, .unused, .unused, @@ -111553,17 +114124,51 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-8, .dst0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"2", .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", .sse, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .qword, .is = .qword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfdi" } }, + .{ .type = .u64, .kind = .{ .ret_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, .unused, .unused, .unused, }, - .dst_temps = .{ .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .dst_temps = .{ .mem, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .mov, .tmp0d, .sia(-8, .dst0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memsi(.src0, .@"2", .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(8), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_cc_abi = .win64, + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .float = .xword }, .any, .any }, .dst_constraints = .{ .{ .signed_int = .xword }, .any }, @@ -111584,7 +114189,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, }, - .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .dst_temps = .{ .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, @@ -111616,16 +114221,45 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .each = .{ .once = &.{ .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ .{ .float = .xword }, .any, .any }, + .dst_constraints = .{ .{ .signed_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, + } }, }, .{ .required_cc_abi = .win64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .float = .xword }, .any, .any }, .dst_constraints = .{ .{ .unsigned_int = .xword }, .any }, .patterns = &.{ - .{ .src = .{ .{ .to_param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .none, .none } }, + .{ .src = .{ .to_mem, .none, .none } }, }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } }, .unused, .unused, @@ -111636,12 +114270,12 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .unused, .unused, .unused, - .unused, }, .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ - .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .call, .tmp1d, ._, ._, ._ }, } }, }, .{ .required_cc_abi = .sysv64, @@ -111677,10 +114311,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_cc_abi = .win64, + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, - .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any }, + .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -111688,8 +114322,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } }, - .unused, + .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } }, + .{ .type = .u128, .kind = .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .unused, .unused, .unused, @@ -111704,15 +114338,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q0, ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp3q1, ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .required_cc_abi = .sysv64, - .required_features = .{ .avx, null, null, null }, + .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, - .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any }, + .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -111720,8 +114355,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } }, - .{ .type = .u128, .kind = .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } }, + .{ .type = .i128, .kind = .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .unused, .unused, .unused, @@ -111734,7 +114369,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, - .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q0, ._, ._ }, .{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp3q1, ._, ._ }, @@ -111742,8 +114377,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_cc_abi = .win64, - .required_features = .{ .avx, null, null, null }, + .required_cc_abi = .sysv64, + .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any }, .patterns = &.{ @@ -111754,7 +114389,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } }, - .unused, + .{ .type = .u128, .kind = .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .unused, .unused, .unused, @@ -111767,15 +114402,16 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, - .{ .@"0:", .v_dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q0, ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp3q1, ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .required_cc_abi = .sysv64, - .required_features = .{ .sse2, null, null, null }, + .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any }, .patterns = &.{ @@ -111799,7 +114435,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q0, ._, ._ }, .{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp3q1, ._, ._ }, @@ -111807,10 +114443,10 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_cc_abi = .win64, - .required_features = .{ .sse2, null, null, null }, + .required_cc_abi = .sysv64, + .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, - .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any }, + .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, @@ -111818,8 +114454,8 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } }, - .unused, + .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } }, + .{ .type = .u128, .kind = .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .unused, .unused, .unused, @@ -111832,26 +114468,27 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q0, ._, ._ }, + .{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp3q1, ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_cc_abi = .sysv64, - .required_features = .{ .sse2, null, null, null }, + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, - .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any }, + .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } }, - .{ .type = .u128, .kind = .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } }, + .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .unused, .unused, .unused, @@ -111864,16 +114501,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q0, ._, ._ }, - .{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp3q1, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .required_cc_abi = .win64, - .required_features = .{ .sse2, null, null, null }, + .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any }, .patterns = &.{ @@ -111882,9 +114518,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .f128, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } }, - .unused, + .{ .type = .u128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .unused, .unused, .unused, @@ -111897,15 +114533,15 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, - .{ .@"0:", ._dqa, .mov, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_cc_abi = .sysv64, - .required_features = .{ .sse, null, null, null }, + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any }, .patterns = &.{ @@ -111914,9 +114550,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } }, - .{ .type = .i128, .kind = .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .unused, .unused, .unused, @@ -111929,27 +114565,26 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q0, ._, ._ }, - .{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp3q1, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ .required_cc_abi = .win64, - .required_features = .{ .sse, null, null, null }, + .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, - .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any }, + .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } }, - .unused, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } }, + .{ .type = .u128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .unused, .unused, .unused, @@ -111962,26 +114597,26 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ - .required_cc_abi = .sysv64, + .required_cc_abi = .win64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, - .dst_constraints = .{ .{ .multiple_scalar_unsigned_int = .{ .of = .xword, .is = .xword } }, .any }, + .dst_constraints = .{ .{ .multiple_scalar_signed_int = .{ .of = .xword, .is = .xword } }, .any }, .patterns = &.{ .{ .src = .{ .to_mem, .none, .none } }, }, .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, - .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } }, - .{ .type = .u128, .kind = .{ .ret_gpr_pair = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixtfti" } }, + .{ .type = .i128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .unused, .unused, .unused, @@ -111994,10 +114629,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, - .{ ._, ._, .mov, .memi(.dst0q, .tmp0), .tmp3q0, ._, ._ }, - .{ ._, ._, .mov, .memid(.dst0q, .tmp0, 8), .tmp3q1, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp3x, ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, @@ -112012,9 +114646,9 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .call_frame = .{ .alignment = .@"16" }, .extra_temps = .{ .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, - .{ .type = .f128, .kind = .{ .param_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfti" } }, - .unused, + .{ .type = .u128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, .unused, .unused, .unused, @@ -112027,13 +114661,14 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, .each = .{ .once = &.{ .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, - .{ .@"0:", ._ps, .mova, .tmp1x, .memi(.src0x, .tmp0), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp1x, ._, ._ }, .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse, null, null }, .src_constraints = .{ .{ .float = .xword }, .any, .any }, .dst_constraints = .{ .{ .remainder_signed_int = .{ .of = .dword, .is = .dword } }, .any }, @@ -112062,6 +114697,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse, null, null }, .src_constraints = .{ .{ .float = .xword }, .any, .any }, .dst_constraints = .{ .{ .remainder_unsigned_int = .{ .of = .dword, .is = .dword } }, .any }, @@ -112090,6 +114726,67 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp2d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .float = .xword }, .any, .any }, + .dst_constraints = .{ .{ .remainder_signed_int = .{ .of = .dword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixtfei" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.dst0), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .mem(.src0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .float = .xword }, .any, .any }, + .dst_constraints = .{ .{ .remainder_unsigned_int = .{ .of = .dword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfei" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.dst0), ._, ._ }, + .{ ._, ._, .mov, .tmp1d, .sa(.dst0, .add_bit_size), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .mem(.src0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .avx, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .scalar_remainder_signed_int = .{ .of = .dword, .is = .dword } }, .any }, @@ -112124,6 +114821,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .avx, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .scalar_remainder_unsigned_int = .{ .of = .dword, .is = .dword } }, .any }, @@ -112158,6 +114856,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse2, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .scalar_remainder_signed_int = .{ .of = .dword, .is = .dword } }, .any }, @@ -112192,6 +114891,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse2, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .scalar_remainder_unsigned_int = .{ .of = .dword, .is = .dword } }, .any }, @@ -112226,6 +114926,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .scalar_remainder_signed_int = .{ .of = .dword, .is = .dword } }, .any }, @@ -112260,6 +114961,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .@"64bit", .sse, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, .dst_constraints = .{ .{ .scalar_remainder_unsigned_int = .{ .of = .dword, .is = .dword } }, .any }, @@ -112293,6 +114995,76 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .scalar_remainder_signed_int = .{ .of = .dword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixtfei" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mema(.dst0, .add_unaligned_size_sub_elem_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2p, .tmp1p, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .sa(.dst0, .add_bit_size), ._, ._ }, + .{ ._, ._, .lea, .tmp4p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp5d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .leaa(.tmp1, .sub_dst0_elem_size), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .dst_constraints = .{ .{ .scalar_remainder_unsigned_int = .{ .of = .dword, .is = .dword } }, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__fixunstfei" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mema(.dst0, .add_unaligned_size_sub_elem_size), ._, ._ }, + .{ .@"0:", ._, .mov, .tmp2p, .tmp1p, ._, ._ }, + .{ ._, ._, .mov, .tmp3d, .sa(.dst0, .add_bit_size), ._, ._ }, + .{ ._, ._, .lea, .tmp4p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp5d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .leaa(.tmp1, .sub_dst0_elem_size), ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, } }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f}", .{ @tagName(air_tag), @@ -139664,6 +142436,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_p, .st, .dst0t, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -139695,6 +142468,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -139726,6 +142500,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -139756,6 +142531,108 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fminq" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, .v_dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fminq" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fminq" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._ps, .mova, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, } }, .Max => comptime &.{ .{ .required_features = .{ .avx, null, null, null }, @@ -149792,6 +152669,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_p, .st, .dst0t, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -149823,6 +152701,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -149854,6 +152733,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -149884,6 +152764,108 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, .v_dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._ps, .mova, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, } }, .Add => comptime &.{ .{ .required_features = .{ .avx, null, null, null }, @@ -154411,6 +157393,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_cw, .ld, .tmp1w, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -154442,6 +157425,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -154473,6 +157457,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -154503,6 +157488,108 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memia(.src0, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, .v_dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memia(.src0, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memia(.src0, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._ps, .mova, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, } }, .Mul => comptime &.{ .{ .required_features = .{ .avx, null, null, null }, @@ -157989,6 +161076,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_cw, .ld, .tmp1w, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -158020,6 +161108,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -158051,6 +161140,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -158081,6 +161171,108 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memia(.src0, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, .v_dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memia(.src0, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .isize, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0p, .sia(16, .src0, .sub_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src0), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memia(.src0, .tmp0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._ps, .mova, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .add, .tmp0p, .si(16), ._, ._ }, + .{ ._, ._nc, .j, .@"0b", ._, ._, ._ }, + } }, } }, }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s}.{s} {f} {f}", .{ @@ -159711,6 +162903,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_p, .st, .dst0t, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -159742,6 +162935,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -159773,6 +162967,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -159803,6 +162998,108 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fminq" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, .v_dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fminq" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fminq" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._ps, .mova, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, } }, .Max => comptime &.{ .{ .required_features = .{ .f16c, null, null, null }, @@ -161403,6 +164700,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_p, .st, .dst0t, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -161434,6 +164732,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -161465,6 +164764,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -161495,6 +164795,108 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, .v_dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmaxq" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._ps, .mova, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, } }, .Add => comptime &.{ .{ .required_features = .{ .f16c, .fast_hops, null, null }, @@ -163701,6 +167103,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_cw, .ld, .tmp1w, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -163732,6 +167135,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -163763,6 +167167,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -163793,6 +167198,108 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, .v_dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__addtf3" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._ps, .mova, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, } }, .Mul => comptime &.{ .{ .required_features = .{ .f16c, null, null, null }, @@ -165283,6 +168790,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, .f_cw, .ld, .tmp1w, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -165314,6 +168822,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -165345,6 +168854,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .dst_constraints = .{ .{ .float = .xword }, .any }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, @@ -165375,6 +168885,108 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, .v_dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._dqa, .mov, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .dst_constraints = .{ .{ .float = .xword }, .any }, + .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, .any, .any }, + .patterns = &.{ + .{ .src = .{ .to_mem, .none, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "__multf3" } }, + .{ .type = .f128, .kind = .mem }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-32, .src0, .add_unaligned_size), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .memad(.src0, .add_unaligned_size, -16), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp2p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.tmp4), ._, ._ }, + .{ ._, ._ps, .mova, .lea(.tmp1x), .dst0x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._nb, .j, .@"0b", ._, ._, ._ }, + } }, } }, }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s}.{s} {f} {f}", .{ @@ -169007,6 +172619,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .scalar_float = .{ .of = .xword, .is = .xword } }, @@ -169040,6 +172653,40 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .scalar_float = .{ .of = .xword, .is = .xword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmaq" } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp0p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp1p, .mem(.src1), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .mem(.src2), ._, ._ }, + .{ ._, ._, .call, .tmp3d, ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .avx, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -169076,6 +172723,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse2, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -169112,6 +172760,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, }, .{ + .required_cc_abi = .sysv64, .required_features = .{ .sse, null, null, null }, .src_constraints = .{ .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, @@ -169147,6 +172796,117 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .avx, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmaq" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp3p, .memi(.src2, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + .{ ._, .v_dqa, .mov, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse2, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmaq" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp3p, .memi(.src2, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + .{ ._, ._dqa, .mov, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, + }, .{ + .required_cc_abi = .win64, + .required_features = .{ .sse, null, null, null }, + .src_constraints = .{ + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + .{ .multiple_scalar_float = .{ .of = .xword, .is = .xword } }, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .to_mem } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .u32, .kind = .{ .rc = .general_purpose } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 1, .at = 1 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .after = 2, .at = 2 } } }, + .{ .type = .usize, .kind = .{ .extern_func = "fmaq" } }, + .{ .type = .f128, .kind = .{ .ret_sse = .{ .cc = .ccc, .after = 0, .at = 0 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .mov, .tmp0d, .sia(-16, .src0, .add_unaligned_size), ._, ._ }, + .{ .@"0:", ._, .lea, .tmp1p, .memi(.src0, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .memi(.src1, .tmp0), ._, ._ }, + .{ ._, ._, .lea, .tmp3p, .memi(.src2, .tmp0), ._, ._ }, + .{ ._, ._, .call, .tmp4d, ._, ._, ._ }, + .{ ._, ._ps, .mova, .memi(.dst0x, .tmp0), .tmp5x, ._, ._ }, + .{ ._, ._, .sub, .tmp0d, .si(16), ._, ._ }, + .{ ._, ._ae, .j, .@"0b", ._, ._, ._ }, + } }, } }) catch |err| switch (err) { error.SelectFailed => return cg.fail("failed to select {s} {f} {f} {f} {f}", .{ @tagName(air_tag), @@ -170541,4887 +174301,78 @@ fn copyToRegisterWithInstTracking( return MCValue{ .register = reg }; } -fn airAlloc(self: *CodeGen, inst: Air.Inst.Index) !void { - const result = MCValue{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } }; - return self.finishAir(inst, result, .{ .none, .none, .none }); -} - -fn airRetPtr(self: *CodeGen, inst: Air.Inst.Index) !void { - const result: MCValue = switch (self.ret_mcv.long) { - else => unreachable, - .none => .{ .lea_frame = .{ .index = try self.allocMemPtr(inst) } }, - .load_frame => .{ .register_offset = .{ - .reg = (try self.copyToRegisterWithInstTracking( - inst, - self.typeOfIndex(inst), - self.ret_mcv.long, - )).register, - .off = self.ret_mcv.short.indirect.off, - } }, - }; - return self.finishAir(inst, result, .{ .none, .none, .none }); -} - -fn airFptrunc(self: *CodeGen, inst: Air.Inst.Index) !void { - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const dst_ty = self.typeOfIndex(inst); - const dst_bits = dst_ty.floatBits(self.target); - const src_ty = self.typeOf(ty_op.operand); - const src_bits = src_ty.floatBits(self.target); - - const result = result: { - if (switch (dst_bits) { - 16 => switch (src_bits) { - 32 => !self.hasFeature(.f16c), - 64, 80, 128 => true, - else => unreachable, - }, - 32 => switch (src_bits) { - 64 => false, - 80, 128 => true, - else => unreachable, - }, - 64 => switch (src_bits) { - 80, 128 => true, - else => unreachable, - }, - 80 => switch (src_bits) { - 128 => true, - else => unreachable, - }, - else => unreachable, - }) { - var sym_buf: ["__trunc?f?f2".len]u8 = undefined; - break :result try self.genCall(.{ .extern_func = .{ - .return_type = self.floatCompilerRtAbiType(dst_ty, src_ty).toIntern(), - .param_types = &.{self.floatCompilerRtAbiType(src_ty, dst_ty).toIntern()}, - .sym = std.fmt.bufPrint(&sym_buf, "__trunc{c}f{c}f2", .{ - floatCompilerRtAbiName(src_bits), - floatCompilerRtAbiName(dst_bits), - }) catch unreachable, - } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}, .{}); - } - - const src_mcv = try self.resolveInst(ty_op.operand); - const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_mcv - else - try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); - const dst_reg = dst_mcv.getReg().?.to128(); - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - if (dst_bits == 16) { - assert(self.hasFeature(.f16c)); - switch (src_bits) { - 32 => { - const mat_src_reg = if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(src_ty, src_mcv); - try self.asmRegisterRegisterImmediate( - .{ .v_, .cvtps2ph }, - dst_reg, - mat_src_reg.to128(), - bits.RoundMode.imm(.{}), - ); - }, - else => unreachable, - } - } else { - assert(src_bits == 64 and dst_bits == 32); - if (self.hasFeature(.avx)) if (src_mcv.isBase()) try self.asmRegisterRegisterMemory( - .{ .v_ss, .cvtsd2 }, - dst_reg, - dst_reg, - try src_mcv.mem(self, .{ .size = .qword }), - ) else try self.asmRegisterRegisterRegister( - .{ .v_ss, .cvtsd2 }, - dst_reg, - dst_reg, - (if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(src_ty, src_mcv)).to128(), - ) else if (src_mcv.isBase()) try self.asmRegisterMemory( - .{ ._ss, .cvtsd2 }, - dst_reg, - try src_mcv.mem(self, .{ .size = .qword }), - ) else try self.asmRegisterRegister( - .{ ._ss, .cvtsd2 }, - dst_reg, - (if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(src_ty, src_mcv)).to128(), - ); - } - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn airFpext(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const dst_ty = self.typeOfIndex(inst); - const dst_scalar_ty = dst_ty.scalarType(zcu); - const dst_bits = dst_scalar_ty.floatBits(self.target); - const src_ty = self.typeOf(ty_op.operand); - const src_scalar_ty = src_ty.scalarType(zcu); - const src_bits = src_scalar_ty.floatBits(self.target); - - const result = result: { - if (switch (src_bits) { - 16 => switch (dst_bits) { - 32, 64 => !self.hasFeature(.f16c), - 80, 128 => true, - else => unreachable, - }, - 32 => switch (dst_bits) { - 64 => false, - 80, 128 => true, - else => unreachable, - }, - 64 => switch (dst_bits) { - 80, 128 => true, - else => unreachable, - }, - 80 => switch (dst_bits) { - 128 => true, - else => unreachable, - }, - else => unreachable, - }) { - if (dst_ty.isVector(zcu)) break :result null; - var sym_buf: ["__extend?f?f2".len]u8 = undefined; - break :result try self.genCall(.{ .extern_func = .{ - .return_type = self.floatCompilerRtAbiType(dst_scalar_ty, src_scalar_ty).toIntern(), - .param_types = &.{self.floatCompilerRtAbiType(src_scalar_ty, dst_scalar_ty).toIntern()}, - .sym = std.fmt.bufPrint(&sym_buf, "__extend{c}f{c}f2", .{ - floatCompilerRtAbiName(src_bits), - floatCompilerRtAbiName(dst_bits), - }) catch unreachable, - } }, &.{src_scalar_ty}, &.{.{ .air_ref = ty_op.operand }}, .{}); - } - - const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu)); - const src_mcv = try self.resolveInst(ty_op.operand); - const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_mcv - else - try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); - const dst_reg = dst_mcv.getReg().?; - const dst_alias = registerAlias(dst_reg, @intCast(@max(dst_ty.abiSize(zcu), 16))); - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - const vec_len = if (dst_ty.isVector(zcu)) dst_ty.vectorLen(zcu) else 1; - if (src_bits == 16) { - assert(self.hasFeature(.f16c)); - const mat_src_reg = if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(src_ty, src_mcv); - try self.asmRegisterRegister( - .{ .v_ps, .cvtph2 }, - dst_alias, - registerAlias(mat_src_reg, src_abi_size), - ); - switch (dst_bits) { - 32 => {}, - 64 => try self.asmRegisterRegisterRegister( - .{ .v_sd, .cvtss2 }, - dst_alias, - dst_alias, - dst_alias, - ), - else => unreachable, - } - } else { - assert(src_bits == 32 and dst_bits == 64); - if (self.hasFeature(.avx)) switch (vec_len) { - 1 => if (src_mcv.isBase()) try self.asmRegisterRegisterMemory( - .{ .v_sd, .cvtss2 }, - dst_alias, - dst_alias, - try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }), - ) else try self.asmRegisterRegisterRegister( - .{ .v_sd, .cvtss2 }, - dst_alias, - dst_alias, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), - ), - 2...4 => if (src_mcv.isBase()) try self.asmRegisterMemory( - .{ .v_pd, .cvtps2 }, - dst_alias, - try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }), - ) else try self.asmRegisterRegister( - .{ .v_pd, .cvtps2 }, - dst_alias, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), - ), - else => break :result null, - } else if (src_mcv.isBase()) try self.asmRegisterMemory( - switch (vec_len) { - 1 => .{ ._sd, .cvtss2 }, - 2 => .{ ._pd, .cvtps2 }, - else => break :result null, - }, - dst_alias, - try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }), - ) else try self.asmRegisterRegister( - switch (vec_len) { - 1 => .{ ._sd, .cvtss2 }, - 2 => .{ ._pd, .cvtps2 }, - else => break :result null, - }, - dst_alias, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), - ); - } - break :result dst_mcv; - } orelse return self.fail("TODO implement airFpext from {f} to {f}", .{ - src_ty.fmt(pt), dst_ty.fmt(pt), - }); - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn airIntCast(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const src_ty = self.typeOf(ty_op.operand); - const dst_ty = self.typeOfIndex(inst); - - const result = @as(?MCValue, result: { - const src_abi_size: u31 = @intCast(src_ty.abiSize(zcu)); - const dst_abi_size: u31 = @intCast(dst_ty.abiSize(zcu)); - - const src_int_info = src_ty.intInfo(zcu); - const dst_int_info = dst_ty.intInfo(zcu); - const extend = switch (src_int_info.signedness) { - .signed => dst_int_info, - .unsigned => src_int_info, - }.signedness; - - const src_mcv = try self.resolveInst(ty_op.operand); - if (dst_ty.isVector(zcu)) { - const max_abi_size = @max(dst_abi_size, src_abi_size); - const has_avx = self.hasFeature(.avx); - - const dst_elem_abi_size = dst_ty.childType(zcu).abiSize(zcu); - const src_elem_abi_size = src_ty.childType(zcu).abiSize(zcu); - switch (std.math.order(dst_elem_abi_size, src_elem_abi_size)) { - .lt => { - if (max_abi_size > self.vectorSize(.int)) break :result null; - const mir_tag: Mir.Inst.FixedTag = switch (dst_elem_abi_size) { - else => break :result null, - 1 => switch (src_elem_abi_size) { - else => break :result null, - 2 => switch (dst_int_info.signedness) { - .signed => if (has_avx) .{ .vp_b, .ackssw } else .{ .p_b, .ackssw }, - .unsigned => if (has_avx) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw }, - }, - }, - 2 => switch (src_elem_abi_size) { - else => break :result null, - 4 => switch (dst_int_info.signedness) { - .signed => if (has_avx) .{ .vp_w, .ackssd } else .{ .p_w, .ackssd }, - .unsigned => if (has_avx) - .{ .vp_w, .ackusd } - else if (self.hasFeature(.sse4_1)) - .{ .p_w, .ackusd } - else - break :result null, - }, - }, - }; - - const dst_mcv: MCValue = if (src_mcv.isRegister() and - self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_mcv - else if (has_avx and src_mcv.isRegister()) - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } - else - try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv); - const dst_reg = dst_mcv.getReg().?; - const dst_alias = registerAlias(dst_reg, dst_abi_size); - - if (has_avx) try self.asmRegisterRegisterRegister( - mir_tag, - dst_alias, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - dst_reg, src_abi_size), - dst_alias, - ) else try self.asmRegisterRegister( - mir_tag, - dst_alias, - dst_alias, - ); - break :result dst_mcv; - }, - .eq => if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - break :result src_mcv - else { - const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(dst_ty, dst_mcv, src_mcv, .{}); - break :result dst_mcv; - }, - .gt => if (self.hasFeature(.sse4_1)) { - if (max_abi_size > self.vectorSize(.int)) break :result null; - const mir_tag: Mir.Inst.FixedTag = .{ switch (dst_elem_abi_size) { - else => break :result null, - 2 => if (has_avx) .vp_w else .p_w, - 4 => if (has_avx) .vp_d else .p_d, - 8 => if (has_avx) .vp_q else .p_q, - }, switch (src_elem_abi_size) { - else => break :result null, - 1 => switch (extend) { - .signed => .movsxb, - .unsigned => .movzxb, - }, - 2 => switch (extend) { - .signed => .movsxw, - .unsigned => .movzxw, - }, - 4 => switch (extend) { - .signed => .movsxd, - .unsigned => .movzxd, - }, - } }; - - const dst_mcv: MCValue = if (src_mcv.isRegister() and - self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_mcv - else - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) }; - const dst_reg = dst_mcv.getReg().?; - const dst_alias = registerAlias(dst_reg, dst_abi_size); - - if (src_mcv.isBase()) try self.asmRegisterMemory( - mir_tag, - dst_alias, - try src_mcv.mem(self, .{ .size = self.memSize(src_ty) }), - ) else try self.asmRegisterRegister( - mir_tag, - dst_alias, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(src_ty, src_mcv), src_abi_size), - ); - break :result dst_mcv; - } else { - const mir_tag: Mir.Inst.FixedTag = switch (dst_elem_abi_size) { - else => break :result null, - 2 => switch (src_elem_abi_size) { - else => break :result null, - 1 => .{ .p_, .unpcklbw }, - }, - 4 => switch (src_elem_abi_size) { - else => break :result null, - 2 => .{ .p_, .unpcklwd }, - }, - 8 => switch (src_elem_abi_size) { - else => break :result null, - 2 => .{ .p_, .unpckldq }, - }, - }; - - const dst_mcv: MCValue = if (src_mcv.isRegister() and - self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_mcv - else - try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); - const dst_reg = dst_mcv.getReg().?; - - const ext_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse); - const ext_alias = registerAlias(ext_reg, src_abi_size); - const ext_lock = self.register_manager.lockRegAssumeUnused(ext_reg); - defer self.register_manager.unlockReg(ext_lock); - - try self.asmRegisterRegister(.{ .p_, .xor }, ext_alias, ext_alias); - switch (extend) { - .signed => try self.asmRegisterRegister( - .{ switch (src_elem_abi_size) { - else => unreachable, - 1 => .p_b, - 2 => .p_w, - 4 => .p_d, - }, .cmpgt }, - ext_alias, - registerAlias(dst_reg, src_abi_size), - ), - .unsigned => {}, - } - try self.asmRegisterRegister( - mir_tag, - registerAlias(dst_reg, dst_abi_size), - registerAlias(ext_reg, dst_abi_size), - ); - break :result dst_mcv; - }, - } - @compileError("unreachable"); - } - - const min_ty = if (dst_int_info.bits < src_int_info.bits) dst_ty else src_ty; - - const src_storage_bits: u16 = switch (src_mcv) { - .register, .register_offset => 64, - .register_pair => 128, - .load_frame => |frame_addr| @intCast(self.getFrameAddrSize(frame_addr) * 8), - else => src_int_info.bits, - }; - - const dst_mcv = if ((if (src_mcv.getReg()) |src_reg| src_reg.isClass(.general_purpose) else src_abi_size > 8) and - dst_int_info.bits <= src_storage_bits and - std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable == - std.math.divCeil(u32, src_storage_bits, 64) catch unreachable and - self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: { - const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(min_ty, dst_mcv, src_mcv, .{}); - break :dst dst_mcv; - }; - - if (dst_int_info.bits <= src_int_info.bits) break :result if (dst_mcv.isRegister()) - .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) } - else - dst_mcv; - - if (dst_mcv.isRegister()) { - try self.truncateRegister(src_ty, dst_mcv.getReg().?); - break :result .{ .register = registerAlias(dst_mcv.getReg().?, dst_abi_size) }; - } - - const src_limbs_len = std.math.divCeil(u31, src_abi_size, 8) catch unreachable; - const dst_limbs_len = @divExact(dst_abi_size, 8); - - const high_mcv: MCValue = if (dst_mcv.isBase()) - dst_mcv.address().offset((src_limbs_len - 1) * 8).deref() - else - .{ .register = dst_mcv.register_pair[1] }; - const high_reg = if (high_mcv.isRegister()) - high_mcv.getReg().? - else - try self.copyToTmpRegister(switch (src_int_info.signedness) { - .signed => .isize, - .unsigned => .usize, - }, high_mcv); - const high_lock = self.register_manager.lockRegAssumeUnused(high_reg); - defer self.register_manager.unlockReg(high_lock); - - const high_bits = src_int_info.bits % 64; - if (high_bits > 0) { - try self.truncateRegister(src_ty, high_reg); - const high_ty: Type = if (dst_int_info.bits >= 64) .usize else dst_ty; - try self.genCopy(high_ty, high_mcv, .{ .register = high_reg }, .{}); - } - - if (dst_limbs_len > src_limbs_len) try self.genInlineMemset( - dst_mcv.address().offset(src_limbs_len * 8), - switch (extend) { - .signed => extend: { - const extend_mcv = MCValue{ .register = high_reg }; - try self.genShiftBinOpMir(.{ ._r, .sa }, .isize, extend_mcv, .u8, .{ .immediate = 63 }); - break :extend extend_mcv; - }, - .unsigned => .{ .immediate = 0 }, - }, - .{ .immediate = (dst_limbs_len - src_limbs_len) * 8 }, - .{}, - ); - - break :result dst_mcv; - }) orelse return self.fail("TODO implement airIntCast from {f} to {f}", .{ - src_ty.fmt(pt), dst_ty.fmt(pt), - }); - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn airTrunc(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - - const dst_ty = self.typeOfIndex(inst); - const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); - const src_ty = self.typeOf(ty_op.operand); - const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu)); - - const result = result: { - const src_mcv = try self.resolveInst(ty_op.operand); - const src_lock = - if (src_mcv.getReg()) |reg| self.register_manager.lockRegAssumeUnused(reg) else null; - defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - - const dst_mcv = if (src_mcv.isRegister() and src_mcv.getReg().?.isClass(self.regClassForType(dst_ty)) and - self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_mcv - else if (dst_abi_size <= 8) - try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv) - else if (dst_abi_size <= 16 and !dst_ty.isVector(zcu)) dst: { - const dst_regs = - try self.register_manager.allocRegs(2, .{ inst, inst }, abi.RegisterClass.gp); - const dst_mcv: MCValue = .{ .register_pair = dst_regs }; - const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs); - defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); - - try self.genCopy(dst_ty, dst_mcv, src_mcv, .{}); - break :dst dst_mcv; - } else dst: { - const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, inst, true); - try self.genCopy(src_ty, dst_mcv, src_mcv, .{}); - break :dst dst_mcv; - }; - - if (dst_ty.zigTypeTag(zcu) == .vector) { - assert(src_ty.zigTypeTag(zcu) == .vector and dst_ty.vectorLen(zcu) == src_ty.vectorLen(zcu)); - const dst_elem_ty = dst_ty.childType(zcu); - const dst_elem_abi_size: u32 = @intCast(dst_elem_ty.abiSize(zcu)); - const src_elem_ty = src_ty.childType(zcu); - const src_elem_abi_size: u32 = @intCast(src_elem_ty.abiSize(zcu)); - - const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_elem_abi_size) { - 1 => switch (src_elem_abi_size) { - 2 => switch (dst_ty.vectorLen(zcu)) { - 1...8 => if (self.hasFeature(.avx)) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw }, - 9...16 => if (self.hasFeature(.avx2)) .{ .vp_b, .ackusw } else null, - else => null, - }, - else => null, - }, - 2 => switch (src_elem_abi_size) { - 4 => switch (dst_ty.vectorLen(zcu)) { - 1...4 => if (self.hasFeature(.avx)) - .{ .vp_w, .ackusd } - else if (self.hasFeature(.sse4_1)) - .{ .p_w, .ackusd } - else - null, - 5...8 => if (self.hasFeature(.avx2)) .{ .vp_w, .ackusd } else null, - else => null, - }, - else => null, - }, - else => null, - }) orelse return self.fail("TODO implement airTrunc for {f}", .{dst_ty.fmt(pt)}); - - const dst_info = dst_elem_ty.intInfo(zcu); - const src_info = src_elem_ty.intInfo(zcu); - - const mask_val = try pt.intValue(src_elem_ty, @as(u64, std.math.maxInt(u64)) >> @intCast(64 - dst_info.bits)); - - const splat_ty = try pt.vectorType(.{ - .len = @intCast(@divExact(@as(u64, if (src_abi_size > 16) 256 else 128), src_info.bits)), - .child = src_elem_ty.ip_index, - }); - const splat_abi_size: u32 = @intCast(splat_ty.abiSize(zcu)); - - const splat_val = try pt.aggregateSplatValue(splat_ty, mask_val); - - const splat_mcv = try self.lowerValue(splat_val); - const splat_addr_mcv: MCValue = switch (splat_mcv) { - .memory, .indirect, .load_frame => splat_mcv.address(), - else => .{ .register = try self.copyToTmpRegister(.usize, splat_mcv.address()) }, - }; - - const dst_reg = dst_mcv.getReg().?; - const dst_alias = registerAlias(dst_reg, src_abi_size); - if (self.hasFeature(.avx)) { - try self.asmRegisterRegisterMemory( - .{ .vp_, .@"and" }, - dst_alias, - dst_alias, - try splat_addr_mcv.deref().mem(self, .{ .size = .fromSize(splat_abi_size) }), - ); - if (src_abi_size > 16) { - const temp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.sse); - const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg); - defer self.register_manager.unlockReg(temp_lock); - - try self.asmRegisterRegisterImmediate( - .{ if (self.hasFeature(.avx2)) .v_i128 else .v_f128, .extract }, - registerAlias(temp_reg, dst_abi_size), - dst_alias, - .u(1), - ); - try self.asmRegisterRegisterRegister( - mir_tag, - registerAlias(dst_reg, dst_abi_size), - registerAlias(dst_reg, dst_abi_size), - registerAlias(temp_reg, dst_abi_size), - ); - } else try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, dst_alias); - } else { - try self.asmRegisterMemory( - .{ .p_, .@"and" }, - dst_alias, - try splat_addr_mcv.deref().mem(self, .{ .size = .fromSize(splat_abi_size) }), - ); - try self.asmRegisterRegister(mir_tag, dst_alias, dst_alias); - } - break :result dst_mcv; - } - - // when truncating a `u16` to `u5`, for example, those top 3 bits in the result - // have to be removed. this only happens if the dst if not a power-of-two size. - if (dst_abi_size <= 8) { - if (self.regExtraBits(dst_ty) > 0) { - try self.truncateRegister(dst_ty, dst_mcv.register.to64()); - } - } else if (dst_abi_size <= 16) { - const dst_info = dst_ty.intInfo(zcu); - const high_ty = try pt.intType(dst_info.signedness, dst_info.bits - 64); - if (self.regExtraBits(high_ty) > 0) { - try self.truncateRegister(high_ty, dst_mcv.register_pair[1].to64()); - } - } - - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn airSlice(self: *CodeGen, inst: Air.Inst.Index) !void { - const zcu = self.pt.zcu; - const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; - - const slice_ty = self.typeOfIndex(inst); - const frame_index = try self.allocFrameIndex(.initSpill(slice_ty, zcu)); - - const ptr_ty = self.typeOf(bin_op.lhs); - try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, .{ .air_ref = bin_op.lhs }, .{}); - - const len_ty = self.typeOf(bin_op.rhs); - try self.genSetMem( - .{ .frame = frame_index }, - @intCast(ptr_ty.abiSize(zcu)), - len_ty, - .{ .air_ref = bin_op.rhs }, - .{}, - ); - - const result = MCValue{ .load_frame = .{ .index = frame_index } }; - return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn airUnOp(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const dst_mcv = try self.genUnOp(inst, tag, ty_op.operand); - return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); -} - -fn airBinOp(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { - const pt = self.pt; - const zcu = pt.zcu; - const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs); - - const dst_ty = self.typeOfIndex(inst); - if (dst_ty.isAbiInt(zcu)) { - const abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); - const bit_size: u32 = @intCast(dst_ty.bitSize(zcu)); - if (abi_size * 8 > bit_size) { - const dst_lock = switch (dst_mcv) { - .register => |dst_reg| self.register_manager.lockRegAssumeUnused(dst_reg), - else => null, - }; - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - if (dst_mcv.isRegister()) { - try self.truncateRegister(dst_ty, dst_mcv.getReg().?); - } else { - const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - const hi_ty = try pt.intType(.unsigned, @intCast((dst_ty.bitSize(zcu) - 1) % 64 + 1)); - const hi_mcv = dst_mcv.address().offset(@intCast(bit_size / 64 * 8)).deref(); - try self.genSetReg(tmp_reg, hi_ty, hi_mcv, .{}); - try self.truncateRegister(dst_ty, tmp_reg); - try self.genCopy(hi_ty, hi_mcv, .{ .register = tmp_reg }, .{}); - } - } - } - return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn airPtrArithmetic(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { - const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; - const dst_mcv = try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs); - return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn activeIntBits(self: *CodeGen, dst_air: Air.Inst.Ref) u16 { - const pt = self.pt; - const zcu = pt.zcu; - const air_tag = self.air.instructions.items(.tag); - const air_data = self.air.instructions.items(.data); - - const dst_ty = self.typeOf(dst_air); - const dst_info = dst_ty.intInfo(zcu); - if (dst_air.toIndex()) |inst| { - switch (air_tag[@intFromEnum(inst)]) { - .intcast => { - const src_ty = self.typeOf(air_data[@intFromEnum(inst)].ty_op.operand); - const src_info = src_ty.intInfo(zcu); - return @min(switch (src_info.signedness) { - .signed => switch (dst_info.signedness) { - .signed => src_info.bits, - .unsigned => src_info.bits - 1, - }, - .unsigned => switch (dst_info.signedness) { - .signed => src_info.bits + 1, - .unsigned => src_info.bits, - }, - }, dst_info.bits); - }, - else => {}, - } - } else if (dst_air.toInterned()) |ip_index| { - var space: Value.BigIntSpace = undefined; - const src_int = Value.fromInterned(ip_index).toBigInt(&space, zcu); - return @as(u16, @intCast(src_int.bitCountTwosComp())) + - @intFromBool(src_int.positive and dst_info.signedness == .signed); - } - return dst_info.bits; -} - -fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { - const pt = self.pt; - const zcu = pt.zcu; - const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - const result = result: { - const dst_ty = self.typeOfIndex(inst); - switch (dst_ty.zigTypeTag(zcu)) { - .float, .vector => break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs), - else => {}, - } - const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); - - const dst_info = dst_ty.intInfo(zcu); - const src_ty = try pt.intType(dst_info.signedness, switch (tag) { - else => unreachable, - .mul, .mul_wrap => @max( - self.activeIntBits(bin_op.lhs), - self.activeIntBits(bin_op.rhs), - dst_info.bits / 2, - ), - .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_info.bits, - }); - const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu)); - - if (dst_abi_size == 16 and src_abi_size == 16) switch (tag) { - else => unreachable, - .mul, .mul_wrap => {}, - .div_trunc, .div_floor, .div_exact, .rem, .mod => { - const signed = dst_ty.isSignedInt(zcu); - var sym_buf: ["__udiv?i3".len]u8 = undefined; - const signed_div_floor_state: struct { - frame_index: FrameIndex, - state: State, - reloc: Mir.Inst.Index, - } = if (signed and tag == .div_floor) state: { - const frame_index = try self.allocFrameIndex(.initType(.usize, zcu)); - try self.asmMemoryImmediate( - .{ ._, .mov }, - .{ .base = .{ .frame = frame_index }, .mod = .{ .rm = .{ .size = .qword } } }, - .u(0), - ); - - const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - const lhs_mcv = try self.resolveInst(bin_op.lhs); - const mat_lhs_mcv = switch (lhs_mcv) { - .load_nav, .load_uav, .load_lazy_sym => mat_lhs_mcv: { - // TODO clean this up! - const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address()); - break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; - }, - else => lhs_mcv, - }; - const mat_lhs_lock = switch (mat_lhs_mcv) { - .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg), - else => null, - }; - defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock); - if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory( - .{ ._, .mov }, - tmp_reg, - try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), - ) else try self.asmRegisterRegister( - .{ ._, .mov }, - tmp_reg, - mat_lhs_mcv.register_pair[1], - ); - - const rhs_mcv = try self.resolveInst(bin_op.rhs); - const mat_rhs_mcv = switch (rhs_mcv) { - .load_nav, .load_uav, .load_lazy_sym => mat_rhs_mcv: { - // TODO clean this up! - const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address()); - break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; - }, - else => rhs_mcv, - }; - const mat_rhs_lock = switch (mat_rhs_mcv) { - .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg), - else => null, - }; - defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); - if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory( - .{ ._, .xor }, - tmp_reg, - try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), - ) else try self.asmRegisterRegister( - .{ ._, .xor }, - tmp_reg, - mat_rhs_mcv.register_pair[1], - ); - const state = try self.saveState(); - const reloc = try self.asmJccReloc(.ns, undefined); - - break :state .{ .frame_index = frame_index, .state = state, .reloc = reloc }; - } else undefined; - const call_mcv = try self.genCall( - .{ .extern_func = .{ - .return_type = dst_ty.toIntern(), - .param_types = &.{ src_ty.toIntern(), src_ty.toIntern() }, - .sym = std.fmt.bufPrint(&sym_buf, "__{s}{s}{c}i3", .{ - if (signed) "" else "u", - switch (tag) { - .div_trunc, .div_exact => "div", - .div_floor => if (signed) "mod" else "div", - .rem, .mod => "mod", - else => unreachable, - }, - intCompilerRtAbiName(@intCast(dst_ty.bitSize(zcu))), - }) catch unreachable, - } }, - &.{ src_ty, src_ty }, - &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } }, - .{}, - ); - break :result if (signed) switch (tag) { - .div_floor => { - try self.asmRegisterRegister( - .{ ._, .@"or" }, - call_mcv.register_pair[0], - call_mcv.register_pair[1], - ); - try self.asmSetccMemory(.nz, .{ - .base = .{ .frame = signed_div_floor_state.frame_index }, - .mod = .{ .rm = .{ .size = .byte } }, - }); - try self.restoreState(signed_div_floor_state.state, &.{}, .{ - .emit_instructions = true, - .update_tracking = true, - .resurrect = true, - .close_scope = true, - }); - self.performReloc(signed_div_floor_state.reloc); - const dst_mcv = try self.genCall( - .{ .extern_func = .{ - .return_type = dst_ty.toIntern(), - .param_types = &.{ src_ty.toIntern(), src_ty.toIntern() }, - .sym = std.fmt.bufPrint(&sym_buf, "__div{c}i3", .{ - intCompilerRtAbiName(@intCast(dst_ty.bitSize(zcu))), - }) catch unreachable, - } }, - &.{ src_ty, src_ty }, - &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } }, - .{}, - ); - try self.asmRegisterMemory( - .{ ._, .sub }, - dst_mcv.register_pair[0], - .{ - .base = .{ .frame = signed_div_floor_state.frame_index }, - .mod = .{ .rm = .{ .size = .qword } }, - }, - ); - try self.asmRegisterImmediate(.{ ._, .sbb }, dst_mcv.register_pair[1], .u(0)); - try self.freeValue( - .{ .load_frame = .{ .index = signed_div_floor_state.frame_index } }, - ); - break :result dst_mcv; - }, - .mod => { - const dst_regs = call_mcv.register_pair; - const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs); - defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); - - const tmp_regs = - try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp); - const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs); - defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock); - - const rhs_mcv = try self.resolveInst(bin_op.rhs); - const mat_rhs_mcv = switch (rhs_mcv) { - .load_nav, .load_uav, .load_lazy_sym => mat_rhs_mcv: { - // TODO clean this up! - const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address()); - break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; - }, - else => rhs_mcv, - }; - const mat_rhs_lock = switch (mat_rhs_mcv) { - .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg), - else => null, - }; - defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); - - for (tmp_regs, dst_regs) |tmp_reg, dst_reg| - try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_reg); - if (mat_rhs_mcv.isBase()) { - try self.asmRegisterMemory( - .{ ._, .add }, - tmp_regs[0], - try mat_rhs_mcv.mem(self, .{ .size = .qword }), - ); - try self.asmRegisterMemory( - .{ ._, .adc }, - tmp_regs[1], - try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), - ); - } else for ( - [_]Mir.Inst.Tag{ .add, .adc }, - tmp_regs, - mat_rhs_mcv.register_pair, - ) |op, tmp_reg, rhs_reg| - try self.asmRegisterRegister(.{ ._, op }, tmp_reg, rhs_reg); - try self.asmRegisterRegister(.{ ._, .@"test" }, dst_regs[1], dst_regs[1]); - for (dst_regs, tmp_regs) |dst_reg, tmp_reg| - try self.asmCmovccRegisterRegister(.s, dst_reg, tmp_reg); - break :result call_mcv; - }, - else => call_mcv, - } else call_mcv; - }, - }; - - try self.spillEflagsIfOccupied(); - try self.spillRegisters(&.{ .rax, .rcx, .rdx }); - const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rax, .rcx, .rdx }); - defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - - const lhs_mcv = try self.resolveInst(bin_op.lhs); - const rhs_mcv = try self.resolveInst(bin_op.rhs); - break :result try self.genMulDivBinOp(tag, inst, dst_ty, src_ty, lhs_mcv, rhs_mcv); - }; - return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn airAddSat(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - const ty = self.typeOf(bin_op.lhs); - if (ty.zigTypeTag(zcu) == .vector or ty.abiSize(zcu) > 8) return self.fail( - "TODO implement airAddSat for {f}", - .{ty.fmt(pt)}, - ); - - const lhs_mcv = try self.resolveInst(bin_op.lhs); - const dst_mcv = if (lhs_mcv.isRegister() and self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) - lhs_mcv - else - try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv); - const dst_reg = dst_mcv.register; - const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); - defer self.register_manager.unlockReg(dst_lock); - - const rhs_mcv = try self.resolveInst(bin_op.rhs); - const rhs_lock = switch (rhs_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); - - const limit_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const limit_mcv = MCValue{ .register = limit_reg }; - const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg); - defer self.register_manager.unlockReg(limit_lock); - - const reg_bits = self.regBitSize(ty); - const reg_extra_bits = self.regExtraBits(ty); - const cc: Condition = if (ty.isSignedInt(zcu)) cc: { - if (reg_extra_bits > 0) { - try self.genShiftBinOpMir(.{ ._l, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits }); - } - try self.genSetReg(limit_reg, ty, dst_mcv, .{}); - try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .u8, .{ .immediate = reg_bits - 1 }); - try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ - .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1, - }); - if (reg_extra_bits > 0) { - const shifted_rhs_reg = try self.copyToTmpRegister(ty, rhs_mcv); - const shifted_rhs_mcv = MCValue{ .register = shifted_rhs_reg }; - const shifted_rhs_lock = self.register_manager.lockRegAssumeUnused(shifted_rhs_reg); - defer self.register_manager.unlockReg(shifted_rhs_lock); - - try self.genShiftBinOpMir(.{ ._l, .sa }, ty, shifted_rhs_mcv, .u8, .{ .immediate = reg_extra_bits }); - try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, shifted_rhs_mcv); - } else try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv); - break :cc .o; - } else cc: { - try self.genSetReg(limit_reg, ty, .{ - .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - ty.bitSize(zcu)), - }, .{}); - - try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv); - if (reg_extra_bits > 0) { - try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, limit_mcv); - break :cc .a; - } - break :cc .c; - }; - - const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(zcu))), 2); - try self.asmCmovccRegisterRegister( - cc, - registerAlias(dst_reg, cmov_abi_size), - registerAlias(limit_reg, cmov_abi_size), - ); - - if (reg_extra_bits > 0 and ty.isSignedInt(zcu)) - try self.genShiftBinOpMir(.{ ._r, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits }); - - return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn airSubSat(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - const ty = self.typeOf(bin_op.lhs); - if (ty.zigTypeTag(zcu) == .vector or ty.abiSize(zcu) > 8) return self.fail( - "TODO implement airSubSat for {f}", - .{ty.fmt(pt)}, - ); - - const lhs_mcv = try self.resolveInst(bin_op.lhs); - const dst_mcv = if (lhs_mcv.isRegister() and self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) - lhs_mcv - else - try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv); - const dst_reg = dst_mcv.register; - const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); - defer self.register_manager.unlockReg(dst_lock); - - const rhs_mcv = try self.resolveInst(bin_op.rhs); - const rhs_lock = switch (rhs_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); - - const limit_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const limit_mcv = MCValue{ .register = limit_reg }; - const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg); - defer self.register_manager.unlockReg(limit_lock); - - const reg_bits = self.regBitSize(ty); - const reg_extra_bits = self.regExtraBits(ty); - const cc: Condition = if (ty.isSignedInt(zcu)) cc: { - if (reg_extra_bits > 0) { - try self.genShiftBinOpMir(.{ ._l, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits }); - } - try self.genSetReg(limit_reg, ty, dst_mcv, .{}); - try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .u8, .{ .immediate = reg_bits - 1 }); - try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ - .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1, - }); - if (reg_extra_bits > 0) { - const shifted_rhs_reg = try self.copyToTmpRegister(ty, rhs_mcv); - const shifted_rhs_mcv = MCValue{ .register = shifted_rhs_reg }; - const shifted_rhs_lock = self.register_manager.lockRegAssumeUnused(shifted_rhs_reg); - defer self.register_manager.unlockReg(shifted_rhs_lock); - - try self.genShiftBinOpMir(.{ ._l, .sa }, ty, shifted_rhs_mcv, .u8, .{ .immediate = reg_extra_bits }); - try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, shifted_rhs_mcv); - } else try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv); - break :cc .o; - } else cc: { - try self.genSetReg(limit_reg, ty, .{ .immediate = 0 }, .{}); - try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv); - break :cc .c; - }; - - const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(zcu))), 2); - try self.asmCmovccRegisterRegister( - cc, - registerAlias(dst_reg, cmov_abi_size), - registerAlias(limit_reg, cmov_abi_size), - ); - - if (reg_extra_bits > 0 and ty.isSignedInt(zcu)) - try self.genShiftBinOpMir(.{ ._r, .sa }, ty, dst_mcv, .u8, .{ .immediate = reg_extra_bits }); - - return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn airMulSat(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - const ty = self.typeOf(bin_op.lhs); - - const result = result: { - if (ty.toIntern() == .i128_type) { - const ptr_c_int = try pt.singleMutPtrType(.c_int); - const overflow = try self.allocTempRegOrMem(.c_int, false); - - const dst_mcv = try self.genCall(.{ .extern_func = .{ - .return_type = .i128_type, - .param_types = &.{ .i128_type, .i128_type, ptr_c_int.toIntern() }, - .sym = "__muloti4", - } }, &.{ .i128, .i128, ptr_c_int }, &.{ - .{ .air_ref = bin_op.lhs }, - .{ .air_ref = bin_op.rhs }, - overflow.address(), - }, .{}); - const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_mcv.register_pair); - defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); - - const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - const lhs_mcv = try self.resolveInst(bin_op.lhs); - const mat_lhs_mcv = switch (lhs_mcv) { - .load_nav, .load_uav, .load_lazy_sym => mat_lhs_mcv: { - // TODO clean this up! - const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address()); - break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; - }, - else => lhs_mcv, - }; - const mat_lhs_lock = switch (mat_lhs_mcv) { - .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg), - else => null, - }; - defer if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock); - if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory( - .{ ._, .mov }, - tmp_reg, - try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), - ) else try self.asmRegisterRegister( - .{ ._, .mov }, - tmp_reg, - mat_lhs_mcv.register_pair[1], - ); - - const rhs_mcv = try self.resolveInst(bin_op.rhs); - const mat_rhs_mcv = switch (rhs_mcv) { - .load_nav, .load_uav, .load_lazy_sym => mat_rhs_mcv: { - // TODO clean this up! - const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address()); - break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; - }, - else => rhs_mcv, - }; - const mat_rhs_lock = switch (mat_rhs_mcv) { - .indirect => |reg_off| self.register_manager.lockReg(reg_off.reg), - else => null, - }; - defer if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); - if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory( - .{ ._, .xor }, - tmp_reg, - try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), - ) else try self.asmRegisterRegister( - .{ ._, .xor }, - tmp_reg, - mat_rhs_mcv.register_pair[1], - ); - - try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, .u(63)); - try self.asmRegister(.{ ._, .not }, tmp_reg); - try self.asmMemoryImmediate(.{ ._, .cmp }, try overflow.mem(self, .{ .size = .dword }), .s(0)); - try self.freeValue(overflow); - try self.asmCmovccRegisterRegister(.ne, dst_mcv.register_pair[0], tmp_reg); - try self.asmRegisterImmediate(.{ ._c, .bt }, tmp_reg, .u(63)); - try self.asmCmovccRegisterRegister(.ne, dst_mcv.register_pair[1], tmp_reg); - break :result dst_mcv; - } - - if (ty.zigTypeTag(zcu) == .vector or ty.abiSize(zcu) > 8) return self.fail( - "TODO implement airMulSat for {f}", - .{ty.fmt(pt)}, - ); - - try self.spillRegisters(&.{ .rax, .rcx, .rdx }); - const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rax, .rcx, .rdx }); - defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - - const lhs_mcv = try self.resolveInst(bin_op.lhs); - const lhs_lock = switch (lhs_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock); - - const rhs_mcv = try self.resolveInst(bin_op.rhs); - const rhs_lock = switch (rhs_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - else => null, - }; - defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); - - const limit_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const limit_mcv = MCValue{ .register = limit_reg }; - const limit_lock = self.register_manager.lockRegAssumeUnused(limit_reg); - defer self.register_manager.unlockReg(limit_lock); - - const reg_bits = self.regBitSize(ty); - const cc: Condition = if (ty.isSignedInt(zcu)) cc: { - try self.genSetReg(limit_reg, ty, lhs_mcv, .{}); - try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, rhs_mcv); - try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .u8, .{ .immediate = reg_bits - 1 }); - try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ - .immediate = (@as(u64, 1) << @intCast(reg_bits - 1)) - 1, - }); - break :cc .o; - } else cc: { - try self.genSetReg(limit_reg, ty, .{ - .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - reg_bits), - }, .{}); - break :cc .c; - }; - - const dst_mcv = try self.genMulDivBinOp(.mul, inst, ty, ty, lhs_mcv, rhs_mcv); - const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(zcu))), 2); - try self.asmCmovccRegisterRegister( - cc, - registerAlias(dst_mcv.register, cmov_abi_size), - registerAlias(limit_reg, cmov_abi_size), - ); - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn airAddSubWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; - const result: MCValue = result: { - const tag = self.air.instructions.items(.tag)[@intFromEnum(inst)]; - const ty = self.typeOf(bin_op.lhs); - switch (ty.zigTypeTag(zcu)) { - .vector => return self.fail("TODO implement add/sub with overflow for Vector type", .{}), - .int => { - try self.spillEflagsIfOccupied(); - try self.spillRegisters(&.{ .rcx, .rdi, .rsi }); - const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rcx, .rdi, .rsi }); - defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - - const partial_mcv = try self.genBinOp(null, switch (tag) { - .add_with_overflow => .add, - .sub_with_overflow => .sub, - else => unreachable, - }, bin_op.lhs, bin_op.rhs); - const int_info = ty.intInfo(zcu); - const cc: Condition = switch (int_info.signedness) { - .unsigned => .c, - .signed => .o, - }; - - const tuple_ty = self.typeOfIndex(inst); - if (int_info.bits >= 8 and std.math.isPowerOfTwo(int_info.bits)) { - switch (partial_mcv) { - .register => |reg| { - self.eflags_inst = inst; - break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } }; - }, - else => {}, - } - - const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); - try self.genSetMem( - .{ .frame = frame_index }, - @intCast(tuple_ty.structFieldOffset(1, zcu)), - .u1, - .{ .eflags = cc }, - .{}, - ); - try self.genSetMem( - .{ .frame = frame_index }, - @intCast(tuple_ty.structFieldOffset(0, zcu)), - ty, - partial_mcv, - .{}, - ); - break :result .{ .load_frame = .{ .index = frame_index } }; - } - - const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); - try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); - break :result .{ .load_frame = .{ .index = frame_index } }; - }, - else => unreachable, - } - }; - return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn airShlWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; - const result: MCValue = result: { - const lhs_ty = self.typeOf(bin_op.lhs); - const rhs_ty = self.typeOf(bin_op.rhs); - switch (lhs_ty.zigTypeTag(zcu)) { - .vector => return self.fail("TODO implement shl with overflow for Vector type", .{}), - .int => { - try self.spillEflagsIfOccupied(); - try self.spillRegisters(&.{ .rcx, .rdi, .rsi }); - const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rcx, .rdi, .rsi }); - defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - - const lhs = try self.resolveInst(bin_op.lhs); - const rhs = try self.resolveInst(bin_op.rhs); - - const int_info = lhs_ty.intInfo(zcu); - - const partial_mcv = try self.genShiftBinOp(.shl, null, lhs, rhs, lhs_ty, rhs_ty); - const partial_lock = switch (partial_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (partial_lock) |lock| self.register_manager.unlockReg(lock); - - const tmp_mcv = try self.genShiftBinOp(.shr, null, partial_mcv, rhs, lhs_ty, rhs_ty); - const tmp_lock = switch (tmp_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock); - - try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, tmp_mcv, lhs); - const cc = Condition.ne; - - const tuple_ty = self.typeOfIndex(inst); - if (int_info.bits >= 8 and std.math.isPowerOfTwo(int_info.bits)) { - switch (partial_mcv) { - .register => |reg| { - self.eflags_inst = inst; - break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } }; - }, - else => {}, - } - - const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); - try self.genSetMem( - .{ .frame = frame_index }, - @intCast(tuple_ty.structFieldOffset(1, zcu)), - tuple_ty.fieldType(1, zcu), - .{ .eflags = cc }, - .{}, - ); - try self.genSetMem( - .{ .frame = frame_index }, - @intCast(tuple_ty.structFieldOffset(0, zcu)), - tuple_ty.fieldType(0, zcu), - partial_mcv, - .{}, - ); - break :result .{ .load_frame = .{ .index = frame_index } }; - } - - const frame_index = - try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); - try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); - break :result .{ .load_frame = .{ .index = frame_index } }; - }, - else => unreachable, - } - }; - return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn genSetFrameTruncatedOverflowCompare( - self: *CodeGen, - tuple_ty: Type, - frame_index: FrameIndex, - src_mcv: MCValue, - overflow_cc: ?Condition, -) !void { - const pt = self.pt; - const zcu = pt.zcu; - const src_lock = switch (src_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - else => null, - }; - defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - - const ty = tuple_ty.fieldType(0, zcu); - const ty_size = ty.abiSize(zcu); - const int_info = ty.intInfo(zcu); - - const hi_bits = (int_info.bits - 1) % 64 + 1; - const hi_ty = try pt.intType(int_info.signedness, hi_bits); - - const limb_bits: u16 = @intCast(if (int_info.bits <= 64) self.regBitSize(ty) else 64); - const limb_ty = try pt.intType(int_info.signedness, limb_bits); - - const rest_ty = try pt.intType(.unsigned, int_info.bits - hi_bits); - - const temp_regs = - try self.register_manager.allocRegs(3, @splat(null), abi.RegisterClass.gp); - const temp_locks = self.register_manager.lockRegsAssumeUnused(3, temp_regs); - defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); - - const overflow_reg = temp_regs[0]; - if (overflow_cc) |cc| try self.asmSetccRegister(cc, overflow_reg.to8()); - - const scratch_reg = temp_regs[1]; - const hi_limb_off = if (int_info.bits <= 64) 0 else (int_info.bits - 1) / 64 * 8; - const hi_limb_mcv = if (hi_limb_off > 0) - src_mcv.address().offset(int_info.bits / 64 * 8).deref() - else - src_mcv; - try self.genSetReg(scratch_reg, limb_ty, hi_limb_mcv, .{}); - try self.truncateRegister(hi_ty, scratch_reg); - try self.genBinOpMir(.{ ._, .cmp }, limb_ty, .{ .register = scratch_reg }, hi_limb_mcv); - - const eq_reg = temp_regs[2]; - if (overflow_cc) |_| { - try self.asmSetccRegister(.ne, eq_reg.to8()); - try self.genBinOpMir(.{ ._, .@"or" }, .u8, .{ .register = overflow_reg }, .{ .register = eq_reg }); - } - try self.genSetMem( - .{ .frame = frame_index }, - @intCast(tuple_ty.structFieldOffset(1, zcu)), - tuple_ty.fieldType(1, zcu), - if (overflow_cc) |_| .{ .register = overflow_reg.to8() } else .{ .eflags = .ne }, - .{}, - ); - - const payload_off: i32 = @intCast(tuple_ty.structFieldOffset(0, zcu)); - if (hi_limb_off > 0) try self.genSetMem( - .{ .frame = frame_index }, - payload_off, - rest_ty, - src_mcv, - .{}, - ); - try self.genSetMem( - .{ .frame = frame_index }, - payload_off + hi_limb_off, - limb_ty, - .{ .register = scratch_reg }, - .{}, - ); - var ext_off: i32 = hi_limb_off + 8; - if (ext_off < ty_size) { - switch (int_info.signedness) { - .signed => try self.asmRegisterImmediate(.{ ._r, .sa }, scratch_reg.to64(), .s(63)), - .unsigned => try self.asmRegisterRegister(.{ ._, .xor }, scratch_reg.to32(), scratch_reg.to32()), - } - while (ext_off < ty_size) : (ext_off += 8) try self.genSetMem( - .{ .frame = frame_index }, - payload_off + ext_off, - limb_ty, - .{ .register = scratch_reg }, - .{}, - ); - } -} - -fn airMulWithOverflow(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; - const tuple_ty = self.typeOfIndex(inst); - const dst_ty = self.typeOf(bin_op.lhs); - const result: MCValue = switch (dst_ty.zigTypeTag(zcu)) { - .vector => return self.fail("TODO implement airMulWithOverflow for {f}", .{dst_ty.fmt(pt)}), - .int => result: { - const dst_info = dst_ty.intInfo(zcu); - if (dst_info.bits > 128 and dst_info.signedness == .unsigned) { - const slow_inc = self.hasFeature(.slow_incdec); - const abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); - const limb_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; - - try self.spillRegisters(&.{ .rax, .rcx, .rdx }); - const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rax, .rcx, .rdx }); - defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - - const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genInlineMemset( - dst_mcv.address(), - .{ .immediate = 0 }, - .{ .immediate = tuple_ty.abiSize(zcu) }, - .{}, - ); - const lhs_mcv = try self.resolveInst(bin_op.lhs); - const rhs_mcv = try self.resolveInst(bin_op.rhs); - - const temp_regs = - try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp); - const temp_locks = self.register_manager.lockRegsAssumeUnused(4, temp_regs); - defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); - - try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[0].to32(), temp_regs[0].to32()); - - const outer_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); - try self.asmRegisterMemory(.{ ._, .mov }, temp_regs[1].to64(), .{ - .base = .{ .frame = rhs_mcv.load_frame.index }, - .mod = .{ .rm = .{ - .size = .qword, - .index = temp_regs[0].to64(), - .scale = .@"8", - .disp = rhs_mcv.load_frame.off, - } }, - }); - try self.asmRegisterRegister(.{ ._, .@"test" }, temp_regs[1].to64(), temp_regs[1].to64()); - const skip_inner = try self.asmJccReloc(.z, undefined); - - try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[2].to32(), temp_regs[2].to32()); - try self.asmRegisterRegister(.{ ._, .mov }, temp_regs[3].to32(), temp_regs[0].to32()); - try self.asmRegisterRegister(.{ ._, .xor }, .ecx, .ecx); - try self.asmRegisterRegister(.{ ._, .xor }, .edx, .edx); - - const inner_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); - try self.asmRegisterImmediate(.{ ._r, .sh }, .cl, .u(1)); - try self.asmMemoryRegister(.{ ._, .adc }, .{ - .base = .{ .frame = dst_mcv.load_frame.index }, - .mod = .{ .rm = .{ - .size = .qword, - .index = temp_regs[3].to64(), - .scale = .@"8", - .disp = dst_mcv.load_frame.off + - @as(i32, @intCast(tuple_ty.structFieldOffset(0, zcu))), - } }, - }, .rdx); - try self.asmSetccRegister(.c, .cl); - - try self.asmRegisterMemory(.{ ._, .mov }, .rax, .{ - .base = .{ .frame = lhs_mcv.load_frame.index }, - .mod = .{ .rm = .{ - .size = .qword, - .index = temp_regs[2].to64(), - .scale = .@"8", - .disp = lhs_mcv.load_frame.off, - } }, - }); - try self.asmRegister(.{ ._, .mul }, temp_regs[1].to64()); - - try self.asmRegisterImmediate(.{ ._r, .sh }, .ch, .u(1)); - try self.asmMemoryRegister(.{ ._, .adc }, .{ - .base = .{ .frame = dst_mcv.load_frame.index }, - .mod = .{ .rm = .{ - .size = .qword, - .index = temp_regs[3].to64(), - .scale = .@"8", - .disp = dst_mcv.load_frame.off + - @as(i32, @intCast(tuple_ty.structFieldOffset(0, zcu))), - } }, - }, .rax); - try self.asmSetccRegister(.c, .ch); - - if (slow_inc) { - try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1)); - try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[3].to32(), .u(1)); - } else { - try self.asmRegister(.{ ._c, .in }, temp_regs[2].to32()); - try self.asmRegister(.{ ._c, .in }, temp_regs[3].to32()); - } - try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[3].to32(), .u(limb_len)); - _ = try self.asmJccReloc(.b, inner_loop); - - try self.asmRegisterRegister(.{ ._, .@"or" }, .rdx, .rcx); - const overflow = try self.asmJccReloc(.nz, undefined); - const overflow_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); - try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[2].to32(), .u(limb_len)); - const no_overflow = try self.asmJccReloc(.nb, undefined); - if (slow_inc) { - try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[2].to32(), .u(1)); - } else { - try self.asmRegister(.{ ._c, .in }, temp_regs[2].to32()); - } - try self.asmMemoryImmediate(.{ ._, .cmp }, .{ - .base = .{ .frame = lhs_mcv.load_frame.index }, - .mod = .{ .rm = .{ - .size = .qword, - .index = temp_regs[2].to64(), - .scale = .@"8", - .disp = lhs_mcv.load_frame.off - 8, - } }, - }, .u(0)); - _ = try self.asmJccReloc(.z, overflow_loop); - self.performReloc(overflow); - try self.asmMemoryImmediate(.{ ._, .mov }, .{ - .base = .{ .frame = dst_mcv.load_frame.index }, - .mod = .{ .rm = .{ - .size = .byte, - .disp = dst_mcv.load_frame.off + - @as(i32, @intCast(tuple_ty.structFieldOffset(1, zcu))), - } }, - }, .u(1)); - self.performReloc(no_overflow); - - self.performReloc(skip_inner); - if (slow_inc) { - try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1)); - } else { - try self.asmRegister(.{ ._c, .in }, temp_regs[0].to32()); - } - try self.asmRegisterImmediate(.{ ._, .cmp }, temp_regs[0].to32(), .u(limb_len)); - _ = try self.asmJccReloc(.b, outer_loop); - - break :result dst_mcv; - } - - const lhs_active_bits = self.activeIntBits(bin_op.lhs); - const rhs_active_bits = self.activeIntBits(bin_op.rhs); - const src_bits = @max(lhs_active_bits, rhs_active_bits, dst_info.bits / 2); - const src_ty = try pt.intType(dst_info.signedness, src_bits); - if (src_bits > 64 and src_bits <= 128 and - dst_info.bits > 64 and dst_info.bits <= 128) switch (dst_info.signedness) { - .signed => { - const ptr_c_int = try pt.singleMutPtrType(.c_int); - const overflow = try self.allocTempRegOrMem(.c_int, false); - const result = try self.genCall(.{ .extern_func = .{ - .return_type = .i128_type, - .param_types = &.{ .i128_type, .i128_type, ptr_c_int.toIntern() }, - .sym = "__muloti4", - } }, &.{ .i128, .i128, ptr_c_int }, &.{ - .{ .air_ref = bin_op.lhs }, - .{ .air_ref = bin_op.rhs }, - overflow.address(), - }, .{}); - - const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genSetMem( - .{ .frame = dst_mcv.load_frame.index }, - @intCast(tuple_ty.structFieldOffset(0, zcu)), - tuple_ty.fieldType(0, zcu), - result, - .{}, - ); - try self.asmMemoryImmediate( - .{ ._, .cmp }, - try overflow.mem(self, .{ .size = self.memSize(.c_int) }), - .s(0), - ); - try self.genSetMem( - .{ .frame = dst_mcv.load_frame.index }, - @intCast(tuple_ty.structFieldOffset(1, zcu)), - tuple_ty.fieldType(1, zcu), - .{ .eflags = .ne }, - .{}, - ); - try self.freeValue(overflow); - break :result dst_mcv; - }, - .unsigned => { - try self.spillEflagsIfOccupied(); - try self.spillRegisters(&.{ .rax, .rdx }); - const reg_locks = self.register_manager.lockRegsAssumeUnused(2, .{ .rax, .rdx }); - defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - - const tmp_regs = - try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp); - const tmp_locks = self.register_manager.lockRegsAssumeUnused(4, tmp_regs); - defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock); - - const lhs_mcv = try self.resolveInst(bin_op.lhs); - const rhs_mcv = try self.resolveInst(bin_op.rhs); - const mat_lhs_mcv = mat_lhs_mcv: switch (lhs_mcv) { - .register => |lhs_reg| switch (lhs_reg.class()) { - else => lhs_mcv, - .sse => { - const mat_lhs_mcv: MCValue = .{ - .register_pair = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp), - }; - try self.genCopy(dst_ty, mat_lhs_mcv, lhs_mcv, .{}); - break :mat_lhs_mcv mat_lhs_mcv; - }, - }, - .load_nav, .load_uav, .load_lazy_sym => { - // TODO clean this up! - const addr_reg = try self.copyToTmpRegister(.usize, lhs_mcv.address()); - break :mat_lhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; - }, - else => lhs_mcv, - }; - const mat_lhs_locks: [2]?RegisterLock = switch (mat_lhs_mcv) { - .register_pair => |mat_lhs_regs| self.register_manager.lockRegs(2, mat_lhs_regs), - .indirect => |reg_off| .{ self.register_manager.lockReg(reg_off.reg), null }, - else => @splat(null), - }; - defer for (mat_lhs_locks) |mat_lhs_lock| if (mat_lhs_lock) |lock| self.register_manager.unlockReg(lock); - const mat_rhs_mcv = mat_rhs_mcv: switch (rhs_mcv) { - .register => |rhs_reg| switch (rhs_reg.class()) { - else => rhs_mcv, - .sse => { - const mat_rhs_mcv: MCValue = .{ - .register_pair = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp), - }; - try self.genCopy(dst_ty, mat_rhs_mcv, rhs_mcv, .{}); - break :mat_rhs_mcv mat_rhs_mcv; - }, - }, - .load_nav, .load_uav, .load_lazy_sym => { - // TODO clean this up! - const addr_reg = try self.copyToTmpRegister(.usize, rhs_mcv.address()); - break :mat_rhs_mcv MCValue{ .indirect = .{ .reg = addr_reg } }; - }, - else => rhs_mcv, - }; - const mat_rhs_locks: [2]?RegisterLock = switch (mat_rhs_mcv) { - .register_pair => |mat_rhs_regs| self.register_manager.lockRegs(2, mat_rhs_regs), - .indirect => |reg_off| .{ self.register_manager.lockReg(reg_off.reg), null }, - else => @splat(null), - }; - defer for (mat_rhs_locks) |mat_rhs_lock| if (mat_rhs_lock) |lock| self.register_manager.unlockReg(lock); - - if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory( - .{ ._, .mov }, - .rax, - try mat_lhs_mcv.mem(self, .{ .size = .qword }), - ) else try self.asmRegisterRegister( - .{ ._, .mov }, - .rax, - mat_lhs_mcv.register_pair[0], - ); - if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory( - .{ ._, .mov }, - tmp_regs[0], - try mat_rhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), - ) else try self.asmRegisterRegister( - .{ ._, .mov }, - tmp_regs[0], - mat_rhs_mcv.register_pair[1], - ); - try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_regs[0], tmp_regs[0]); - try self.asmSetccRegister(.nz, tmp_regs[1].to8()); - try self.asmRegisterRegister(.{ .i_, .mul }, tmp_regs[0], .rax); - try self.asmSetccRegister(.o, tmp_regs[2].to8()); - if (mat_rhs_mcv.isBase()) - try self.asmMemory(.{ ._, .mul }, try mat_rhs_mcv.mem(self, .{ .size = .qword })) - else - try self.asmRegister(.{ ._, .mul }, mat_rhs_mcv.register_pair[0]); - try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]); - try self.asmSetccRegister(.c, tmp_regs[3].to8()); - try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[2].to8(), tmp_regs[3].to8()); - if (mat_lhs_mcv.isBase()) try self.asmRegisterMemory( - .{ ._, .mov }, - tmp_regs[0], - try mat_lhs_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), - ) else try self.asmRegisterRegister( - .{ ._, .mov }, - tmp_regs[0], - mat_lhs_mcv.register_pair[1], - ); - try self.asmRegisterRegister(.{ ._, .@"test" }, tmp_regs[0], tmp_regs[0]); - try self.asmSetccRegister(.nz, tmp_regs[3].to8()); - try self.asmRegisterRegister( - .{ ._, .@"and" }, - tmp_regs[1].to8(), - tmp_regs[3].to8(), - ); - try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8()); - if (mat_rhs_mcv.isBase()) try self.asmRegisterMemory( - .{ .i_, .mul }, - tmp_regs[0], - try mat_rhs_mcv.mem(self, .{ .size = .qword }), - ) else try self.asmRegisterRegister( - .{ .i_, .mul }, - tmp_regs[0], - mat_rhs_mcv.register_pair[0], - ); - try self.asmSetccRegister(.o, tmp_regs[2].to8()); - try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8()); - try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_regs[0]); - try self.asmSetccRegister(.c, tmp_regs[2].to8()); - try self.asmRegisterRegister(.{ ._, .@"or" }, tmp_regs[1].to8(), tmp_regs[2].to8()); - - const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genSetMem( - .{ .frame = dst_mcv.load_frame.index }, - @intCast(tuple_ty.structFieldOffset(0, zcu)), - tuple_ty.fieldType(0, zcu), - .{ .register_pair = .{ .rax, .rdx } }, - .{}, - ); - try self.genSetMem( - .{ .frame = dst_mcv.load_frame.index }, - @intCast(tuple_ty.structFieldOffset(1, zcu)), - tuple_ty.fieldType(1, zcu), - .{ .register = tmp_regs[1] }, - .{}, - ); - break :result dst_mcv; - }, - }; - - try self.spillEflagsIfOccupied(); - try self.spillRegisters(&.{ .rax, .rcx, .rdx, .rdi, .rsi }); - const reg_locks = self.register_manager.lockRegsAssumeUnused(5, .{ .rax, .rcx, .rdx, .rdi, .rsi }); - defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - - const cc: Condition = switch (dst_info.signedness) { - .unsigned => .c, - .signed => .o, - }; - - const lhs = try self.resolveInst(bin_op.lhs); - const rhs = try self.resolveInst(bin_op.rhs); - - const extra_bits = if (dst_info.bits <= 64) - self.regExtraBits(dst_ty) - else - dst_info.bits % 64; - const partial_mcv = try self.genMulDivBinOp(.mul, null, dst_ty, src_ty, lhs, rhs); - - switch (partial_mcv) { - .register => |reg| if (extra_bits == 0) { - self.eflags_inst = inst; - break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } }; - } else { - const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); - try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); - break :result .{ .load_frame = .{ .index = frame_index } }; - }, - else => { - // For now, this is the only supported multiply that doesn't fit in a register. - if (dst_info.bits > 128 or src_bits != 64) - return self.fail("TODO implement airWithOverflow from {f} to {f}", .{ - src_ty.fmt(pt), dst_ty.fmt(pt), - }); - - const frame_index = try self.allocFrameIndex(.initSpill(tuple_ty, zcu)); - if (dst_info.bits >= lhs_active_bits + rhs_active_bits) { - try self.genSetMem( - .{ .frame = frame_index }, - @intCast(tuple_ty.structFieldOffset(0, zcu)), - tuple_ty.fieldType(0, zcu), - partial_mcv, - .{}, - ); - try self.genSetMem( - .{ .frame = frame_index }, - @intCast(tuple_ty.structFieldOffset(1, zcu)), - tuple_ty.fieldType(1, zcu), - .{ .immediate = 0 }, // cc being set is impossible - .{}, - ); - } else try self.genSetFrameTruncatedOverflowCompare( - tuple_ty, - frame_index, - partial_mcv, - null, - ); - break :result .{ .load_frame = .{ .index = frame_index } }; - }, - } - }, - else => unreachable, - }; - return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -/// Generates signed or unsigned integer multiplication/division. -/// Clobbers .rax and .rdx registers. -/// Quotient is saved in .rax and remainder in .rdx. -fn genIntMulDivOpMir(self: *CodeGen, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue, rhs: MCValue) !void { - const pt = self.pt; - const abi_size: u32 = @intCast(ty.abiSize(pt.zcu)); - const bit_size: u32 = @intCast(self.regBitSize(ty)); - if (abi_size > 8) { - return self.fail("TODO implement genIntMulDivOpMir for ABI size larger than 8", .{}); - } - - try self.genSetReg(.rax, ty, lhs, .{}); - switch (tag[1]) { - else => unreachable, - .mul => {}, - .div => switch (tag[0]) { - ._ => { - const hi_reg: Register = - switch (bit_size) { - 8 => .ah, - 16, 32, 64 => .edx, - else => unreachable, - }; - try self.asmRegisterRegister(.{ ._, .xor }, hi_reg, hi_reg); - }, - .i_ => try self.asmOpOnly(.{ ._, switch (bit_size) { - 8 => .cbw, - 16 => .cwd, - 32 => .cdq, - 64 => .cqo, - else => unreachable, - } }), - else => unreachable, - }, - } - - const mat_rhs: MCValue = switch (rhs) { - .register, .indirect, .load_frame => rhs, - else => .{ .register = try self.copyToTmpRegister(ty, rhs) }, - }; - switch (mat_rhs) { - .register => |reg| try self.asmRegister(tag, registerAlias(reg, abi_size)), - .memory, .indirect, .load_frame => try self.asmMemory( - tag, - try mat_rhs.mem(self, .{ .size = .fromSize(abi_size) }), - ), - else => unreachable, - } - if (tag[1] == .div and bit_size == 8) try self.asmRegisterRegister(.{ ._, .mov }, .dl, .ah); -} - -/// Always returns a register. -/// Clobbers .rax and .rdx registers. -fn genInlineIntDivFloor(self: *CodeGen, ty: Type, lhs: MCValue, rhs: MCValue) !MCValue { - const pt = self.pt; - const zcu = pt.zcu; - const abi_size: u32 = @intCast(ty.abiSize(zcu)); - const int_info = ty.intInfo(zcu); - const dividend = switch (lhs) { - .register => |reg| reg, - else => try self.copyToTmpRegister(ty, lhs), - }; - const dividend_lock = self.register_manager.lockReg(dividend); - defer if (dividend_lock) |lock| self.register_manager.unlockReg(lock); - - const divisor = switch (rhs) { - .register => |reg| reg, - else => try self.copyToTmpRegister(ty, rhs), - }; - const divisor_lock = self.register_manager.lockReg(divisor); - defer if (divisor_lock) |lock| self.register_manager.unlockReg(lock); - - try self.genIntMulDivOpMir( - switch (int_info.signedness) { - .signed => .{ .i_, .div }, - .unsigned => .{ ._, .div }, - }, - ty, - .{ .register = dividend }, - .{ .register = divisor }, - ); - - try self.asmRegisterRegister( - .{ ._, .xor }, - registerAlias(divisor, abi_size), - registerAlias(dividend, abi_size), - ); - try self.asmRegisterImmediate( - .{ ._r, .sa }, - registerAlias(divisor, abi_size), - .u(int_info.bits - 1), - ); - try self.asmRegisterRegister( - .{ ._, .@"test" }, - registerAlias(.rdx, abi_size), - registerAlias(.rdx, abi_size), - ); - try self.asmCmovccRegisterRegister( - .z, - registerAlias(divisor, @max(abi_size, 2)), - registerAlias(.rdx, @max(abi_size, 2)), - ); - try self.genBinOpMir(.{ ._, .add }, ty, .{ .register = divisor }, .{ .register = .rax }); - return MCValue{ .register = divisor }; -} - -fn airShlShrBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - - const air_tags = self.air.instructions.items(.tag); - const tag = air_tags[@intFromEnum(inst)]; - const lhs_ty = self.typeOf(bin_op.lhs); - const rhs_ty = self.typeOf(bin_op.rhs); - const result: MCValue = result: { - switch (lhs_ty.zigTypeTag(zcu)) { - .int => { - try self.spillRegisters(&.{.rcx}); - try self.register_manager.getKnownReg(.rcx, null); - const lhs_mcv = try self.resolveInst(bin_op.lhs); - const rhs_mcv = try self.resolveInst(bin_op.rhs); - - const dst_mcv = try self.genShiftBinOp(tag, inst, lhs_mcv, rhs_mcv, lhs_ty, rhs_ty); - switch (tag) { - .shr, .shr_exact, .shl_exact => {}, - .shl => switch (dst_mcv) { - .register => |dst_reg| try self.truncateRegister(lhs_ty, dst_reg), - .register_pair => |dst_regs| try self.truncateRegister(lhs_ty, dst_regs[1]), - .load_frame => |frame_addr| { - const tmp_reg = - try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - const lhs_bits: u31 = @intCast(lhs_ty.bitSize(zcu)); - const tmp_ty: Type = if (lhs_bits > 64) .usize else lhs_ty; - const off = frame_addr.off + (lhs_bits - 1) / 64 * 8; - try self.genSetReg( - tmp_reg, - tmp_ty, - .{ .load_frame = .{ .index = frame_addr.index, .off = off } }, - .{}, - ); - try self.truncateRegister(lhs_ty, tmp_reg); - try self.genSetMem( - .{ .frame = frame_addr.index }, - off, - tmp_ty, - .{ .register = tmp_reg }, - .{}, - ); - }, - else => {}, - }, - else => unreachable, - } - break :result dst_mcv; - }, - .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { - .int => if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.childType(zcu).intInfo(zcu).bits) { - else => null, - 16 => switch (lhs_ty.vectorLen(zcu)) { - else => null, - 1...8 => switch (tag) { - else => unreachable, - .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) - .{ .vp_w, .sra } - else - .{ .p_w, .sra }, - .unsigned => if (self.hasFeature(.avx)) - .{ .vp_w, .srl } - else - .{ .p_w, .srl }, - }, - .shl, .shl_exact => if (self.hasFeature(.avx)) - .{ .vp_w, .sll } - else - .{ .p_w, .sll }, - }, - 9...16 => switch (tag) { - else => unreachable, - .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .sra } else null, - .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .srl } else null, - }, - .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_w, .sll } else null, - }, - }, - 32 => switch (lhs_ty.vectorLen(zcu)) { - else => null, - 1...4 => switch (tag) { - else => unreachable, - .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) - .{ .vp_d, .sra } - else - .{ .p_d, .sra }, - .unsigned => if (self.hasFeature(.avx)) - .{ .vp_d, .srl } - else - .{ .p_d, .srl }, - }, - .shl, .shl_exact => if (self.hasFeature(.avx)) - .{ .vp_d, .sll } - else - .{ .p_d, .sll }, - }, - 5...8 => switch (tag) { - else => unreachable, - .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .sra } else null, - .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .srl } else null, - }, - .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_d, .sll } else null, - }, - }, - 64 => switch (lhs_ty.vectorLen(zcu)) { - else => null, - 1...2 => switch (tag) { - else => unreachable, - .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) - .{ .vp_q, .sra } - else - .{ .p_q, .sra }, - .unsigned => if (self.hasFeature(.avx)) - .{ .vp_q, .srl } - else - .{ .p_q, .srl }, - }, - .shl, .shl_exact => if (self.hasFeature(.avx)) - .{ .vp_q, .sll } - else - .{ .p_q, .sll }, - }, - 3...4 => switch (tag) { - else => unreachable, - .shr, .shr_exact => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx2)) .{ .vp_q, .sra } else null, - .unsigned => if (self.hasFeature(.avx2)) .{ .vp_q, .srl } else null, - }, - .shl, .shl_exact => if (self.hasFeature(.avx2)) .{ .vp_q, .sll } else null, - }, - }, - })) |mir_tag| if (try self.air.value(bin_op.rhs, pt)) |rhs_val| { - switch (zcu.intern_pool.indexToKey(rhs_val.toIntern())) { - .aggregate => |rhs_aggregate| switch (rhs_aggregate.storage) { - .repeated_elem => |rhs_elem| { - const abi_size: u32 = @intCast(lhs_ty.abiSize(zcu)); - - const lhs_mcv = try self.resolveInst(bin_op.lhs); - const dst_reg, const lhs_reg = if (lhs_mcv.isRegister() and - self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) - .{lhs_mcv.getReg().?} ** 2 - else if (lhs_mcv.isRegister() and self.hasFeature(.avx)) .{ - try self.register_manager.allocReg(inst, abi.RegisterClass.sse), - lhs_mcv.getReg().?, - } else .{(try self.copyToRegisterWithInstTracking( - inst, - lhs_ty, - lhs_mcv, - )).register} ** 2; - const reg_locks = - self.register_manager.lockRegs(2, .{ dst_reg, lhs_reg }); - defer for (reg_locks) |reg_lock| if (reg_lock) |lock| - self.register_manager.unlockReg(lock); - - const shift_imm: Immediate = - .u(@intCast(Value.fromInterned(rhs_elem).toUnsignedInt(zcu))); - if (self.hasFeature(.avx)) try self.asmRegisterRegisterImmediate( - mir_tag, - registerAlias(dst_reg, abi_size), - registerAlias(lhs_reg, abi_size), - shift_imm, - ) else { - assert(dst_reg.id() == lhs_reg.id()); - try self.asmRegisterImmediate( - mir_tag, - registerAlias(dst_reg, abi_size), - shift_imm, - ); - } - break :result .{ .register = dst_reg }; - }, - else => {}, - }, - else => {}, - } - } else if (bin_op.rhs.toIndex()) |rhs_inst| switch (air_tags[@intFromEnum(rhs_inst)]) { - .splat => { - const abi_size: u32 = @intCast(lhs_ty.abiSize(zcu)); - - const lhs_mcv = try self.resolveInst(bin_op.lhs); - const dst_reg, const lhs_reg = if (lhs_mcv.isRegister() and - self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) - .{lhs_mcv.getReg().?} ** 2 - else if (lhs_mcv.isRegister() and self.hasFeature(.avx)) .{ - try self.register_manager.allocReg(inst, abi.RegisterClass.sse), - lhs_mcv.getReg().?, - } else .{(try self.copyToRegisterWithInstTracking( - inst, - lhs_ty, - lhs_mcv, - )).register} ** 2; - const reg_locks = self.register_manager.lockRegs(2, .{ dst_reg, lhs_reg }); - defer for (reg_locks) |reg_lock| if (reg_lock) |lock| - self.register_manager.unlockReg(lock); - - const shift_reg = - try self.copyToTmpRegister(rhs_ty, .{ .air_ref = bin_op.rhs }); - const shift_lock = self.register_manager.lockRegAssumeUnused(shift_reg); - defer self.register_manager.unlockReg(shift_lock); - - const mask_ty = try pt.vectorType(.{ .len = 16, .child = .u8_type }); - const mask_mcv = try self.lowerValue(try pt.aggregateValue( - mask_ty, - &([1]InternPool.Index{ - (try rhs_ty.childType(zcu).maxIntScalar(pt, .u8)).toIntern(), - } ++ [1]InternPool.Index{.zero_u8} ** 15), - )); - const mask_addr_reg = try self.copyToTmpRegister(.usize, mask_mcv.address()); - const mask_addr_lock = self.register_manager.lockRegAssumeUnused(mask_addr_reg); - defer self.register_manager.unlockReg(mask_addr_lock); - - if (self.hasFeature(.avx)) { - try self.asmRegisterRegisterMemory( - .{ .vp_, .@"and" }, - shift_reg.to128(), - shift_reg.to128(), - .{ - .base = .{ .reg = mask_addr_reg }, - .mod = .{ .rm = .{ .size = .xword } }, - }, - ); - try self.asmRegisterRegisterRegister( - mir_tag, - registerAlias(dst_reg, abi_size), - registerAlias(lhs_reg, abi_size), - shift_reg.to128(), - ); - } else { - try self.asmRegisterMemory( - .{ .p_, .@"and" }, - shift_reg.to128(), - .{ - .base = .{ .reg = mask_addr_reg }, - .mod = .{ .rm = .{ .size = .xword } }, - }, - ); - assert(dst_reg.id() == lhs_reg.id()); - try self.asmRegisterRegister( - mir_tag, - registerAlias(dst_reg, abi_size), - shift_reg.to128(), - ); - } - break :result .{ .register = dst_reg }; - }, - else => {}, - }, - else => {}, - }, - else => {}, - } - return self.fail("TODO implement airShlShrBinOp for {f}", .{lhs_ty.fmt(pt)}); - }; - return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn airShlSat(self: *CodeGen, inst: Air.Inst.Index) !void { - const zcu = self.pt.zcu; - const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - const lhs_ty = self.typeOf(bin_op.lhs); - const rhs_ty = self.typeOf(bin_op.rhs); - - const result: MCValue = result: { - switch (lhs_ty.zigTypeTag(zcu)) { - .int => { - const lhs_bits = lhs_ty.bitSize(zcu); - const rhs_bits = rhs_ty.bitSize(zcu); - if (!(lhs_bits <= 32 and rhs_bits <= 5) and !(lhs_bits > 32 and lhs_bits <= 64 and rhs_bits <= 6) and !(rhs_bits <= std.math.log2(lhs_bits))) { - return self.fail("TODO implement shl_sat for {} with lhs bits {}, rhs bits {}", .{ self.target.cpu.arch, lhs_bits, rhs_bits }); - } - - // clobberred by genShiftBinOp - try self.spillRegisters(&.{.rcx}); - - const lhs_mcv = try self.resolveInst(bin_op.lhs); - var lhs_temp1 = try self.tempInit(lhs_ty, lhs_mcv); - const rhs_mcv = try self.resolveInst(bin_op.rhs); - - const lhs_lock = switch (lhs_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock); - - // shift left - const dst_mcv = try self.genShiftBinOp(.shl, null, lhs_mcv, rhs_mcv, lhs_ty, rhs_ty); - switch (dst_mcv) { - .register => |dst_reg| try self.truncateRegister(lhs_ty, dst_reg), - .register_pair => |dst_regs| try self.truncateRegister(lhs_ty, dst_regs[1]), - .load_frame => |frame_addr| { - const tmp_reg = - try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - const lhs_bits_u31: u31 = @intCast(lhs_bits); - const tmp_ty: Type = if (lhs_bits_u31 > 64) .usize else lhs_ty; - const off = frame_addr.off + (lhs_bits_u31 - 1) / 64 * 8; - try self.genSetReg( - tmp_reg, - tmp_ty, - .{ .load_frame = .{ .index = frame_addr.index, .off = off } }, - .{}, - ); - try self.truncateRegister(lhs_ty, tmp_reg); - try self.genSetMem( - .{ .frame = frame_addr.index }, - off, - tmp_ty, - .{ .register = tmp_reg }, - .{}, - ); - }, - else => {}, - } - const dst_lock = switch (dst_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - // shift right - const tmp_mcv = try self.genShiftBinOp(.shr, null, dst_mcv, rhs_mcv, lhs_ty, rhs_ty); - var tmp_temp = try self.tempInit(lhs_ty, tmp_mcv); - - // check if overflow happens - const cc_temp = lhs_temp1.cmpInts(.neq, &tmp_temp, self) catch |err| switch (err) { - error.SelectFailed => unreachable, - else => |e| return e, - }; - try lhs_temp1.die(self); - try tmp_temp.die(self); - const overflow_reloc = try self.genCondBrMir(lhs_ty, cc_temp.tracking(self).short); - try cc_temp.die(self); - - // if overflow, - // for unsigned integers, the saturating result is just its max - // for signed integers, - // if lhs is positive, the result is its max - // if lhs is negative, it is min - switch (lhs_ty.intInfo(zcu).signedness) { - .unsigned => { - const bound_mcv = try self.lowerValue(try lhs_ty.maxIntScalar(self.pt, lhs_ty)); - try self.genCopy(lhs_ty, dst_mcv, bound_mcv, .{}); - }, - .signed => { - // check the sign of lhs - // TODO: optimize this. - // we only need the highest bit so shifting the highest part of lhs_mcv - // is enough to check the signedness. other parts can be skipped here. - var lhs_temp2 = try self.tempInit(lhs_ty, lhs_mcv); - var zero_temp = try self.tempInit(lhs_ty, try self.lowerValue(try self.pt.intValue(lhs_ty, 0))); - const sign_cc_temp = lhs_temp2.cmpInts(.lt, &zero_temp, self) catch |err| switch (err) { - error.SelectFailed => unreachable, - else => |e| return e, - }; - try lhs_temp2.die(self); - try zero_temp.die(self); - const sign_reloc_condbr = try self.genCondBrMir(lhs_ty, sign_cc_temp.tracking(self).short); - try sign_cc_temp.die(self); - - // if it is negative - const min_mcv = try self.lowerValue(try lhs_ty.minIntScalar(self.pt, lhs_ty)); - try self.genCopy(lhs_ty, dst_mcv, min_mcv, .{}); - const sign_reloc_br = try self.asmJmpReloc(undefined); - self.performReloc(sign_reloc_condbr); - - // if it is positive - const max_mcv = try self.lowerValue(try lhs_ty.maxIntScalar(self.pt, lhs_ty)); - try self.genCopy(lhs_ty, dst_mcv, max_mcv, .{}); - self.performReloc(sign_reloc_br); - }, - } - - self.performReloc(overflow_reloc); - break :result dst_mcv; - }, - else => { - return self.fail("TODO implement shl_sat for {} op type {}", .{ self.target.cpu.arch, lhs_ty.zigTypeTag(zcu) }); - }, - } - }; - return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn airOptionalPayload(self: *CodeGen, inst: Air.Inst.Index) !void { - const zcu = self.pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const result: MCValue = result: { - const pl_ty = self.typeOfIndex(inst); - if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none; - - const opt_mcv = try self.resolveInst(ty_op.operand); - if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv)) { - const pl_mcv: MCValue = switch (opt_mcv) { - .register_overflow => |ro| pl: { - self.eflags_inst = null; // actually stop tracking the overflow part - break :pl .{ .register = ro.reg }; - }, - else => opt_mcv, - }; - switch (pl_mcv) { - .register => |pl_reg| try self.truncateRegister(pl_ty, pl_reg), - else => {}, - } - break :result pl_mcv; - } - - const pl_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(pl_ty, pl_mcv, switch (opt_mcv) { - else => opt_mcv, - .register_overflow => |ro| .{ .register = ro.reg }, - }, .{}); - break :result pl_mcv; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn airOptionalPayloadPtr(self: *CodeGen, inst: Air.Inst.Index) !void { - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - - const dst_ty = self.typeOfIndex(inst); - const opt_mcv = try self.resolveInst(ty_op.operand); - - const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv)) - opt_mcv - else - try self.copyToRegisterWithInstTracking(inst, dst_ty, opt_mcv); - return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); -} - -fn airOptionalPayloadPtrSet(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const result = result: { - const dst_ty = self.typeOfIndex(inst); - const src_ty = self.typeOf(ty_op.operand); - const opt_ty = src_ty.childType(zcu); - const src_mcv = try self.resolveInst(ty_op.operand); - - if (opt_ty.optionalReprIsPayload(zcu)) { - break :result if (self.liveness.isUnused(inst)) - .unreach - else if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_mcv - else - try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); - } - - const dst_mcv: MCValue = if (src_mcv.isRegister() and - self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_mcv - else if (self.liveness.isUnused(inst)) - .{ .register = try self.copyToTmpRegister(dst_ty, src_mcv) } - else - try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); - - const pl_ty = dst_ty.childType(zcu); - const pl_abi_size: i32 = @intCast(pl_ty.abiSize(zcu)); - try self.genSetMem( - .{ .reg = dst_mcv.getReg().? }, - pl_abi_size, - .bool, - .{ .immediate = 1 }, - .{}, - ); - break :result if (self.liveness.isUnused(inst)) .unreach else dst_mcv; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn airUnwrapErrUnionErr(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const err_union_ty = self.typeOf(ty_op.operand); - const err_ty = err_union_ty.errorUnionSet(zcu); - const payload_ty = err_union_ty.errorUnionPayload(zcu); - const operand = try self.resolveInst(ty_op.operand); - - const result: MCValue = result: { - if (err_ty.errorSetIsEmpty(zcu)) { - break :result MCValue{ .immediate = 0 }; - } - - if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) { - break :result try self.copyToRegisterWithInstTracking(inst, err_union_ty, operand); - } - - const err_off = codegen.errUnionErrorOffset(payload_ty, zcu); - switch (operand) { - .register => |reg| { - // TODO reuse operand - const eu_lock = self.register_manager.lockReg(reg); - defer if (eu_lock) |lock| self.register_manager.unlockReg(lock); - - const result = try self.copyToRegisterWithInstTracking(inst, err_union_ty, operand); - if (err_off > 0) try self.genShiftBinOpMir( - .{ ._r, .sh }, - err_union_ty, - result, - .u8, - .{ .immediate = @as(u6, @intCast(err_off * 8)) }, - ) else try self.truncateRegister(.anyerror, result.register); - break :result result; - }, - .load_frame => |frame_addr| break :result .{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + @as(i32, @intCast(err_off)), - } }, - else => return self.fail("TODO implement unwrap_err_err for {f}", .{operand}), - } - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn airUnwrapErrUnionPayload(self: *CodeGen, inst: Air.Inst.Index) !void { - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const operand_ty = self.typeOf(ty_op.operand); - const operand = try self.resolveInst(ty_op.operand); - const result = try self.genUnwrapErrUnionPayloadMir(inst, operand_ty, operand); - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -// *(E!T) -> E -fn airUnwrapErrUnionErrPtr(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - - const src_ty = self.typeOf(ty_op.operand); - const src_mcv = try self.resolveInst(ty_op.operand); - const src_reg = switch (src_mcv) { - .register => |reg| reg, - else => try self.copyToTmpRegister(src_ty, src_mcv), - }; - const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); - defer self.register_manager.unlockReg(src_lock); - - const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); - const dst_mcv = MCValue{ .register = dst_reg }; - const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); - defer self.register_manager.unlockReg(dst_lock); - - const eu_ty = src_ty.childType(zcu); - const pl_ty = eu_ty.errorUnionPayload(zcu); - const err_ty = eu_ty.errorUnionSet(zcu); - const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu)); - const err_abi_size: u32 = @intCast(err_ty.abiSize(zcu)); - try self.asmRegisterMemory( - .{ ._, .mov }, - registerAlias(dst_reg, err_abi_size), - .{ - .base = .{ .reg = src_reg }, - .mod = .{ .rm = .{ - .size = .fromSize(err_abi_size), - .disp = err_off, - } }, - }, - ); - - return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); -} - -// *(E!T) -> *T -fn airUnwrapErrUnionPayloadPtr(self: *CodeGen, inst: Air.Inst.Index) !void { - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const operand_ty = self.typeOf(ty_op.operand); - const operand = try self.resolveInst(ty_op.operand); - const result = try self.genUnwrapErrUnionPayloadPtrMir(inst, operand_ty, operand); - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn airErrUnionPayloadPtrSet(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const result: MCValue = result: { - const src_ty = self.typeOf(ty_op.operand); - const src_mcv = try self.resolveInst(ty_op.operand); - const src_reg = switch (src_mcv) { - .register => |reg| reg, - else => try self.copyToTmpRegister(src_ty, src_mcv), - }; - const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); - defer self.register_manager.unlockReg(src_lock); - - const eu_ty = src_ty.childType(zcu); - const pl_ty = eu_ty.errorUnionPayload(zcu); - const err_ty = eu_ty.errorUnionSet(zcu); - const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu)); - const err_abi_size: u32 = @intCast(err_ty.abiSize(zcu)); - try self.asmMemoryImmediate( - .{ ._, .mov }, - .{ - .base = .{ .reg = src_reg }, - .mod = .{ .rm = .{ - .size = .fromSize(err_abi_size), - .disp = err_off, - } }, - }, - .u(0), - ); - - if (self.liveness.isUnused(inst)) break :result .unreach; - - const dst_ty = self.typeOfIndex(inst); - const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_reg - else - try self.register_manager.allocReg(inst, abi.RegisterClass.gp); - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu)); - const dst_abi_size: u32 = @intCast(dst_ty.abiSize(zcu)); - try self.asmRegisterMemory( - .{ ._, .lea }, - registerAlias(dst_reg, dst_abi_size), - .{ - .base = .{ .reg = src_reg }, - .mod = .{ .rm = .{ .disp = pl_off } }, - }, - ); - break :result .{ .register = dst_reg }; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn genUnwrapErrUnionPayloadMir( - self: *CodeGen, - maybe_inst: ?Air.Inst.Index, - err_union_ty: Type, - err_union: MCValue, -) !MCValue { - const pt = self.pt; - const zcu = pt.zcu; - const payload_ty = err_union_ty.errorUnionPayload(zcu); - - const result: MCValue = result: { - if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none; - - const payload_off: u31 = @intCast(codegen.errUnionPayloadOffset(payload_ty, zcu)); - switch (err_union) { - .load_frame => |frame_addr| break :result .{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + payload_off, - } }, - .register => |reg| { - // TODO reuse operand - const eu_lock = self.register_manager.lockReg(reg); - defer if (eu_lock) |lock| self.register_manager.unlockReg(lock); - - const payload_in_gp = self.regSetForType(payload_ty).supersetOf(abi.RegisterClass.gp); - const result_mcv: MCValue = if (payload_in_gp and maybe_inst != null) - try self.copyToRegisterWithInstTracking(maybe_inst.?, err_union_ty, err_union) - else - .{ .register = try self.copyToTmpRegister(err_union_ty, err_union) }; - if (payload_off > 0) try self.genShiftBinOpMir( - .{ ._r, .sh }, - err_union_ty, - result_mcv, - .u8, - .{ .immediate = @as(u6, @intCast(payload_off * 8)) }, - ) else try self.truncateRegister(payload_ty, result_mcv.register); - break :result if (payload_in_gp) - result_mcv - else if (maybe_inst) |inst| - try self.copyToRegisterWithInstTracking(inst, payload_ty, result_mcv) - else - .{ .register = try self.copyToTmpRegister(payload_ty, result_mcv) }; - }, - else => return self.fail("TODO implement genUnwrapErrUnionPayloadMir for {f}", .{err_union}), - } - }; - - return result; -} - -fn genUnwrapErrUnionPayloadPtrMir( - self: *CodeGen, - maybe_inst: ?Air.Inst.Index, - ptr_ty: Type, - ptr_mcv: MCValue, -) !MCValue { - const pt = self.pt; - const zcu = pt.zcu; - const err_union_ty = ptr_ty.childType(zcu); - const payload_ty = err_union_ty.errorUnionPayload(zcu); - - const result: MCValue = result: { - const payload_off = codegen.errUnionPayloadOffset(payload_ty, zcu); - const result_mcv: MCValue = if (maybe_inst) |inst| - try self.copyToRegisterWithInstTracking(inst, ptr_ty, ptr_mcv) - else - .{ .register = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }; - try self.genBinOpMir(.{ ._, .add }, ptr_ty, result_mcv, .{ .immediate = payload_off }); - break :result result_mcv; - }; - - return result; -} - -fn airWrapOptional(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const result: MCValue = result: { - const pl_ty = self.typeOf(ty_op.operand); - if (!pl_ty.hasRuntimeBits(zcu)) break :result .{ .immediate = 1 }; - - const opt_ty = self.typeOfIndex(inst); - const pl_mcv = try self.resolveInst(ty_op.operand); - const same_repr = opt_ty.optionalReprIsPayload(zcu); - if (same_repr and self.reuseOperand(inst, ty_op.operand, 0, pl_mcv)) break :result pl_mcv; - - const pl_lock: ?RegisterLock = switch (pl_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (pl_lock) |lock| self.register_manager.unlockReg(lock); - - const opt_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(pl_ty, opt_mcv, pl_mcv, .{}); - - if (!same_repr) { - const pl_abi_size: i32 = @intCast(pl_ty.abiSize(zcu)); - switch (opt_mcv) { - else => unreachable, - - .register => |opt_reg| { - try self.truncateRegister(pl_ty, opt_reg); - try self.asmRegisterImmediate( - .{ ._s, .bt }, - opt_reg, - .u(@as(u6, @intCast(pl_abi_size * 8))), - ); - }, - - .load_frame => |frame_addr| try self.asmMemoryImmediate( - .{ ._, .mov }, - .{ - .base = .{ .frame = frame_addr.index }, - .mod = .{ .rm = .{ - .size = .byte, - .disp = frame_addr.off + pl_abi_size, - } }, - }, - .u(1), - ), - } - } - break :result opt_mcv; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -/// T to E!T -fn airWrapErrUnionPayload(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - - const eu_ty = ty_op.ty.toType(); - const pl_ty = eu_ty.errorUnionPayload(zcu); - const err_ty = eu_ty.errorUnionSet(zcu); - const operand = try self.resolveInst(ty_op.operand); - - const result: MCValue = result: { - if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .{ .immediate = 0 }; - - const frame_index = try self.allocFrameIndex(.initSpill(eu_ty, zcu)); - const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu)); - const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu)); - try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, operand, .{}); - try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, .{ .immediate = 0 }, .{}); - break :result .{ .load_frame = .{ .index = frame_index } }; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -/// E to E!T -fn airWrapErrUnionErr(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - - const eu_ty = ty_op.ty.toType(); - const pl_ty = eu_ty.errorUnionPayload(zcu); - const err_ty = eu_ty.errorUnionSet(zcu); - - const result: MCValue = result: { - if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result try self.resolveInst(ty_op.operand); - - const frame_index = try self.allocFrameIndex(.initSpill(eu_ty, zcu)); - const pl_off: i32 = @intCast(codegen.errUnionPayloadOffset(pl_ty, zcu)); - const err_off: i32 = @intCast(codegen.errUnionErrorOffset(pl_ty, zcu)); - try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, .undef, .{}); - const operand = try self.resolveInst(ty_op.operand); - try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, operand, .{}); - break :result .{ .load_frame = .{ .index = frame_index } }; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn airSlicePtr(self: *CodeGen, inst: Air.Inst.Index) !void { - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const result = result: { - const src_mcv = try self.resolveInst(ty_op.operand); - const ptr_mcv: MCValue = switch (src_mcv) { - .register_pair => |regs| .{ .register = regs[0] }, - else => src_mcv, - }; - if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { - switch (src_mcv) { - .register_pair => |regs| try self.freeValue(.{ .register = regs[1] }), - else => {}, - } - break :result ptr_mcv; - } - - const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(self.typeOfIndex(inst), dst_mcv, ptr_mcv, .{}); - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn airSliceLen(self: *CodeGen, inst: Air.Inst.Index) !void { - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const result = result: { - const src_mcv = try self.resolveInst(ty_op.operand); - const len_mcv: MCValue = switch (src_mcv) { - .register_pair => |regs| .{ .register = regs[1] }, - .load_frame => |frame_addr| .{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + 8, - } }, - else => return self.fail("TODO implement slice_len for {f}", .{src_mcv}), - }; - if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { - switch (src_mcv) { - .register_pair => |regs| try self.freeValue(.{ .register = regs[0] }), - .load_frame => {}, - else => unreachable, - } - break :result len_mcv; - } - - const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(self.typeOfIndex(inst), dst_mcv, len_mcv, .{}); - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn airPtrSliceLenPtr(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - - const src_ty = self.typeOf(ty_op.operand); - const src_mcv = try self.resolveInst(ty_op.operand); - const src_reg = switch (src_mcv) { - .register => |reg| reg, - else => try self.copyToTmpRegister(src_ty, src_mcv), - }; - const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); - defer self.register_manager.unlockReg(src_lock); - - const dst_ty = self.typeOfIndex(inst); - const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_reg - else - try self.register_manager.allocReg(inst, abi.RegisterClass.gp); - const dst_mcv = MCValue{ .register = dst_reg }; - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - const dst_abi_size: u32 = @intCast(dst_ty.abiSize(pt.zcu)); - try self.asmRegisterMemory( - .{ ._, .lea }, - registerAlias(dst_reg, dst_abi_size), - .{ - .base = .{ .reg = src_reg }, - .mod = .{ .rm = .{ .disp = 8 } }, - }, - ); - - return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); -} - -fn airPtrSlicePtrPtr(self: *CodeGen, inst: Air.Inst.Index) !void { - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - - const dst_ty = self.typeOfIndex(inst); - const opt_mcv = try self.resolveInst(ty_op.operand); - - const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv)) - opt_mcv - else - try self.copyToRegisterWithInstTracking(inst, dst_ty, opt_mcv); - return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); -} - -fn elemOffset(self: *CodeGen, index_ty: Type, index: MCValue, elem_size: u64) !Register { - const reg: Register = blk: { - switch (index) { - .immediate => |imm| { - // Optimisation: if index MCValue is an immediate, we can multiply in `comptime` - // and set the register directly to the scaled offset as an immediate. - const reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - try self.genSetReg(reg, index_ty, .{ .immediate = imm * elem_size }, .{}); - break :blk reg; - }, - else => { - const reg = try self.copyToTmpRegister(index_ty, index); - try self.genIntMulComplexOpMir(index_ty, .{ .register = reg }, .{ .immediate = elem_size }); - break :blk reg; - }, - } - }; - return reg; -} - -fn genSliceElemPtr(self: *CodeGen, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue { - const pt = self.pt; - const zcu = pt.zcu; - const slice_ty = self.typeOf(lhs); - const slice_mcv = try self.resolveInst(lhs); - const slice_mcv_lock: ?RegisterLock = switch (slice_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (slice_mcv_lock) |lock| self.register_manager.unlockReg(lock); - - const elem_ty = slice_ty.childType(zcu); - const elem_size = elem_ty.abiSize(zcu); - const slice_ptr_field_type = slice_ty.slicePtrFieldType(zcu); - - const index_ty = self.typeOf(rhs); - const index_mcv = try self.resolveInst(rhs); - const index_mcv_lock: ?RegisterLock = switch (index_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (index_mcv_lock) |lock| self.register_manager.unlockReg(lock); - - const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_size); - const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg); - defer self.register_manager.unlockReg(offset_reg_lock); - - const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - try self.genSetReg(addr_reg, .usize, slice_mcv, .{}); - // TODO we could allocate register here, but need to expect addr register and potentially - // offset register. - try self.genBinOpMir(.{ ._, .add }, slice_ptr_field_type, .{ .register = addr_reg }, .{ - .register = offset_reg, - }); - return MCValue{ .register = addr_reg.to64() }; -} - -fn airSliceElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - - const result: MCValue = result: { - const elem_ty = self.typeOfIndex(inst); - if (!elem_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none; - - const slice_ty = self.typeOf(bin_op.lhs); - const slice_ptr_field_type = slice_ty.slicePtrFieldType(zcu); - const elem_ptr = try self.genSliceElemPtr(bin_op.lhs, bin_op.rhs); - const dst_mcv = try self.allocRegOrMem(inst, false); - try self.load(dst_mcv, slice_ptr_field_type, elem_ptr); - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn airSliceElemPtr(self: *CodeGen, inst: Air.Inst.Index) !void { - const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; - const dst_mcv = try self.genSliceElemPtr(extra.lhs, extra.rhs); - return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none }); -} - -fn airArrayElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - - const result: MCValue = result: { - const array_ty = self.typeOf(bin_op.lhs); - const elem_ty = array_ty.childType(zcu); - - const array_mcv = try self.resolveInst(bin_op.lhs); - const array_lock: ?RegisterLock = switch (array_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (array_lock) |lock| self.register_manager.unlockReg(lock); - - const index_ty = self.typeOf(bin_op.rhs); - const index_mcv = try self.resolveInst(bin_op.rhs); - const index_lock = switch (index_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (index_lock) |lock| self.register_manager.unlockReg(lock); - - try self.spillEflagsIfOccupied(); - if (array_ty.isVector(zcu) and elem_ty.bitSize(zcu) == 1) { - const array_mat_mcv: MCValue = switch (array_mcv) { - else => array_mcv, - .register_mask => .{ .register = try self.copyToTmpRegister(array_ty, array_mcv) }, - }; - const array_mat_lock = switch (array_mat_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - else => null, - }; - defer if (array_mat_lock) |lock| self.register_manager.unlockReg(lock); - - switch (array_mat_mcv) { - .register => |array_reg| switch (array_reg.class()) { - .general_purpose => switch (index_mcv) { - .immediate => |index_imm| try self.asmRegisterImmediate( - .{ ._, .bt }, - array_reg.to64(), - .u(index_imm), - ), - else => try self.asmRegisterRegister( - .{ ._, .bt }, - array_reg.to64(), - switch (index_mcv) { - .register => |index_reg| index_reg, - else => try self.copyToTmpRegister(index_ty, index_mcv), - }.to64(), - ), - }, - .sse => { - const frame_index = try self.allocFrameIndex(.initType(array_ty, zcu)); - try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array_mat_mcv, .{}); - switch (index_mcv) { - .immediate => |index_imm| try self.asmMemoryImmediate( - .{ ._, .bt }, - .{ - .base = .{ .frame = frame_index }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = @intCast(index_imm / 64 * 8), - } }, - }, - .u(index_imm % 64), - ), - else => try self.asmMemoryRegister( - .{ ._, .bt }, - .{ - .base = .{ .frame = frame_index }, - .mod = .{ .rm = .{ .size = .qword } }, - }, - switch (index_mcv) { - .register => |index_reg| index_reg, - else => try self.copyToTmpRegister(index_ty, index_mcv), - }.to64(), - ), - } - }, - else => unreachable, - }, - .load_frame => switch (index_mcv) { - .immediate => |index_imm| try self.asmMemoryImmediate( - .{ ._, .bt }, - try array_mat_mcv.mem(self, .{ - .size = .qword, - .disp = @intCast(index_imm / 64 * 8), - }), - .u(index_imm % 64), - ), - else => try self.asmMemoryRegister( - .{ ._, .bt }, - try array_mat_mcv.mem(self, .{ .size = .qword }), - switch (index_mcv) { - .register => |index_reg| index_reg, - else => try self.copyToTmpRegister(index_ty, index_mcv), - }.to64(), - ), - }, - .memory, - .load_nav, - .load_uav, - .load_lazy_sym, - .load_extern_func, - => switch (index_mcv) { - .immediate => |index_imm| try self.asmMemoryImmediate( - .{ ._, .bt }, - .{ - .base = .{ - .reg = try self.copyToTmpRegister(.usize, array_mat_mcv.address()), - }, - .mod = .{ .rm = .{ - .size = .qword, - .disp = @intCast(index_imm / 64 * 8), - } }, - }, - .u(index_imm % 64), - ), - else => try self.asmMemoryRegister( - .{ ._, .bt }, - .{ - .base = .{ - .reg = try self.copyToTmpRegister(.usize, array_mat_mcv.address()), - }, - .mod = .{ .rm = .{ .size = .qword } }, - }, - switch (index_mcv) { - .register => |index_reg| index_reg, - else => try self.copyToTmpRegister(index_ty, index_mcv), - }.to64(), - ), - }, - else => return self.fail("TODO airArrayElemVal for {s} of {f}", .{ - @tagName(array_mat_mcv), array_ty.fmt(pt), - }), - } - - const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); - try self.asmSetccRegister(.c, dst_reg.to8()); - break :result .{ .register = dst_reg }; - } - - const elem_abi_size = elem_ty.abiSize(zcu); - const addr_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); - defer self.register_manager.unlockReg(addr_lock); - - switch (array_mcv) { - .register => { - const frame_index = try self.allocFrameIndex(.initType(array_ty, zcu)); - try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array_mcv, .{}); - try self.asmRegisterMemory( - .{ ._, .lea }, - addr_reg, - .{ .base = .{ .frame = frame_index } }, - ); - }, - .load_frame => |frame_addr| try self.asmRegisterMemory( - .{ ._, .lea }, - addr_reg, - .{ - .base = .{ .frame = frame_addr.index }, - .mod = .{ .rm = .{ .disp = frame_addr.off } }, - }, - ), - .memory, - .load_nav, - .lea_nav, - .load_uav, - .lea_uav, - .load_lazy_sym, - .lea_lazy_sym, - .load_extern_func, - .lea_extern_func, - => try self.genSetReg(addr_reg, .usize, array_mcv.address(), .{}), - else => return self.fail("TODO airArrayElemVal_val for {s} of {f}", .{ - @tagName(array_mcv), array_ty.fmt(pt), - }), - } - - const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size); - const offset_lock = self.register_manager.lockRegAssumeUnused(offset_reg); - defer self.register_manager.unlockReg(offset_lock); - - // TODO we could allocate register here, but need to expect addr register and potentially - // offset register. - const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genBinOpMir(.{ ._, .add }, .usize, .{ .register = addr_reg }, .{ .register = offset_reg }); - try self.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } }, .{}); - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn airPtrElemVal(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - const ptr_ty = self.typeOf(bin_op.lhs); - - // this is identical to the `airPtrElemPtr` codegen expect here an - // additional `mov` is needed at the end to get the actual value - - const result = result: { - const elem_ty = ptr_ty.elemType2(zcu); - if (!elem_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none; - - const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu)); - const index_ty = self.typeOf(bin_op.rhs); - const index_mcv = try self.resolveInst(bin_op.rhs); - const index_lock = switch (index_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (index_lock) |lock| self.register_manager.unlockReg(lock); - - const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size); - const offset_lock = self.register_manager.lockRegAssumeUnused(offset_reg); - defer self.register_manager.unlockReg(offset_lock); - - const ptr_mcv = try self.resolveInst(bin_op.lhs); - const elem_ptr_reg = if (ptr_mcv.isRegister() and self.liveness.operandDies(inst, 0)) - ptr_mcv.register - else - try self.copyToTmpRegister(ptr_ty, ptr_mcv); - const elem_ptr_lock = self.register_manager.lockRegAssumeUnused(elem_ptr_reg); - defer self.register_manager.unlockReg(elem_ptr_lock); - try self.asmRegisterRegister( - .{ ._, .add }, - elem_ptr_reg, - offset_reg, - ); - - const dst_mcv = try self.allocRegOrMem(inst, true); - const dst_lock = switch (dst_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - try self.load(dst_mcv, ptr_ty, .{ .register = elem_ptr_reg }); - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn airPtrElemPtr(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; - - const result = result: { - const elem_ptr_ty = self.typeOfIndex(inst); - const base_ptr_ty = self.typeOf(extra.lhs); - - const base_ptr_mcv = try self.resolveInst(extra.lhs); - const base_ptr_lock: ?RegisterLock = switch (base_ptr_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (base_ptr_lock) |lock| self.register_manager.unlockReg(lock); - - if (elem_ptr_ty.ptrInfo(zcu).flags.vector_index != .none) { - break :result if (self.reuseOperand(inst, extra.lhs, 0, base_ptr_mcv)) - base_ptr_mcv - else - try self.copyToRegisterWithInstTracking(inst, elem_ptr_ty, base_ptr_mcv); - } - - const elem_ty = base_ptr_ty.elemType2(zcu); - const elem_abi_size = elem_ty.abiSize(zcu); - const index_ty = self.typeOf(extra.rhs); - const index_mcv = try self.resolveInst(extra.rhs); - const index_lock: ?RegisterLock = switch (index_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (index_lock) |lock| self.register_manager.unlockReg(lock); - - const offset_reg = try self.elemOffset(index_ty, index_mcv, elem_abi_size); - const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg); - defer self.register_manager.unlockReg(offset_reg_lock); - - const dst_mcv = try self.copyToRegisterWithInstTracking(inst, elem_ptr_ty, base_ptr_mcv); - try self.genBinOpMir(.{ ._, .add }, elem_ptr_ty, dst_mcv, .{ .register = offset_reg }); - - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, .none }); -} - -fn airSetUnionTag(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - const ptr_union_ty = self.typeOf(bin_op.lhs); - const union_ty = ptr_union_ty.childType(zcu); - const tag_ty = self.typeOf(bin_op.rhs); - const layout = union_ty.unionGetLayout(zcu); - - if (layout.tag_size == 0) { - return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none }); - } - - const ptr = try self.resolveInst(bin_op.lhs); - const ptr_lock: ?RegisterLock = switch (ptr) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); - - const tag = try self.resolveInst(bin_op.rhs); - const tag_lock: ?RegisterLock = switch (tag) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (tag_lock) |lock| self.register_manager.unlockReg(lock); - - const adjusted_ptr: MCValue = if (layout.payload_size > 0 and layout.tag_align.compare(.lt, layout.payload_align)) blk: { - // TODO reusing the operand - const reg = try self.copyToTmpRegister(ptr_union_ty, ptr); - try self.genBinOpMir( - .{ ._, .add }, - ptr_union_ty, - .{ .register = reg }, - .{ .immediate = layout.payload_size }, - ); - break :blk MCValue{ .register = reg }; - } else ptr; - - const ptr_tag_ty = try pt.adjustPtrTypeChild(ptr_union_ty, tag_ty); - try self.store(ptr_tag_ty, adjusted_ptr, tag, .{}); - - return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn airGetUnionTag(self: *CodeGen, inst: Air.Inst.Index) !void { - const zcu = self.pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - - const tag_ty = self.typeOfIndex(inst); - const union_ty = self.typeOf(ty_op.operand); - const layout = union_ty.unionGetLayout(zcu); - - if (layout.tag_size == 0) { - return self.finishAir(inst, .none, .{ ty_op.operand, .none, .none }); - } - - // TODO reusing the operand - const operand = try self.resolveInst(ty_op.operand); - const operand_lock: ?RegisterLock = switch (operand) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); - - const tag_abi_size = tag_ty.abiSize(zcu); - const dst_mcv: MCValue = blk: { - switch (operand) { - .load_frame => |frame_addr| { - if (tag_abi_size <= 8) { - const off: i32 = @intCast(layout.tagOffset()); - break :blk try self.copyToRegisterWithInstTracking(inst, tag_ty, .{ - .load_frame = .{ .index = frame_addr.index, .off = frame_addr.off + off }, - }); - } - - return self.fail( - "TODO implement get_union_tag for ABI larger than 8 bytes and operand {f}", - .{operand}, - ); - }, - .register => { - const shift: u6 = @intCast(layout.tagOffset() * 8); - const result = try self.copyToRegisterWithInstTracking(inst, union_ty, operand); - try self.genShiftBinOpMir(.{ ._r, .sh }, .usize, result, .u8, .{ .immediate = shift }); - break :blk MCValue{ - .register = registerAlias(result.register, @intCast(layout.tag_size)), - }; - }, - else => return self.fail("TODO implement get_union_tag for {f}", .{operand}), - } - }; - - return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); -} - -fn airClz(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const result = result: { - try self.spillEflagsIfOccupied(); - - const dst_ty = self.typeOfIndex(inst); - const src_ty = self.typeOf(ty_op.operand); - if (src_ty.zigTypeTag(zcu) == .vector) return self.fail("TODO implement airClz for {f}", .{ - src_ty.fmt(pt), - }); - - const src_mcv = try self.resolveInst(ty_op.operand); - const mat_src_mcv = switch (src_mcv) { - .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) }, - else => src_mcv, - }; - const mat_src_lock = switch (mat_src_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - else => null, - }; - defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock); - - const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); - const dst_mcv = MCValue{ .register = dst_reg }; - const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); - defer self.register_manager.unlockReg(dst_lock); - - const abi_size: u31 = @intCast(src_ty.abiSize(zcu)); - const src_bits: u31 = @intCast(src_ty.bitSize(zcu)); - const has_lzcnt = self.hasFeature(.lzcnt); - if (src_bits > @as(u32, if (has_lzcnt) 128 else 64)) { - const src_frame_addr: bits.FrameAddr = src_frame_addr: switch (src_mcv) { - .load_frame => |src_frame_addr| src_frame_addr, - else => { - const src_frame_addr = try self.allocFrameIndex(.initSpill(src_ty, zcu)); - try self.genSetMem(.{ .frame = src_frame_addr }, 0, src_ty, src_mcv, .{}); - break :src_frame_addr .{ .index = src_frame_addr }; - }, - }; - - const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; - const extra_bits = abi_size * 8 - src_bits; - - const index_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const index_lock = self.register_manager.lockRegAssumeUnused(index_reg); - defer self.register_manager.unlockReg(index_lock); - - try self.asmRegisterImmediate(.{ ._, .mov }, index_reg.to32(), .u(limbs_len)); - switch (extra_bits) { - 1 => try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()), - else => try self.asmRegisterImmediate( - .{ ._, .mov }, - dst_reg.to32(), - .s(@as(i32, extra_bits) - 1), - ), - } - const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); - try self.asmRegisterRegister(.{ ._, .@"test" }, index_reg.to32(), index_reg.to32()); - const zero = try self.asmJccReloc(.z, undefined); - if (self.hasFeature(.slow_incdec)) { - try self.asmRegisterImmediate(.{ ._, .sub }, index_reg.to32(), .u(1)); - } else { - try self.asmRegister(.{ ._c, .de }, index_reg.to32()); - } - try self.asmMemoryImmediate(.{ ._, .cmp }, .{ - .base = .{ .frame = src_frame_addr.index }, - .mod = .{ .rm = .{ - .size = .qword, - .index = index_reg.to64(), - .scale = .@"8", - .disp = src_frame_addr.off, - } }, - }, .u(0)); - _ = try self.asmJccReloc(.e, loop); - try self.asmRegisterMemory(.{ ._r, .bs }, dst_reg.to64(), .{ - .base = .{ .frame = src_frame_addr.index }, - .mod = .{ .rm = .{ - .size = .qword, - .index = index_reg.to64(), - .scale = .@"8", - .disp = src_frame_addr.off, - } }, - }); - self.performReloc(zero); - try self.asmRegisterImmediate(.{ ._l, .sh }, index_reg.to32(), .u(6)); - try self.asmRegisterRegister(.{ ._, .add }, index_reg.to32(), dst_reg.to32()); - try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to32(), .u(src_bits - 1)); - try self.asmRegisterRegister(.{ ._, .sub }, dst_reg.to32(), index_reg.to32()); - break :result dst_mcv; - } - - if (has_lzcnt) { - if (src_bits <= 8) { - const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); - try self.truncateRegister(src_ty, wide_reg); - try self.genBinOpMir(.{ ._, .lzcnt }, .u32, dst_mcv, .{ .register = wide_reg }); - try self.genBinOpMir( - .{ ._, .sub }, - dst_ty, - dst_mcv, - .{ .immediate = 32 - src_bits }, - ); - } else if (src_bits <= 64) { - try self.genBinOpMir(.{ ._, .lzcnt }, src_ty, dst_mcv, mat_src_mcv); - const extra_bits = self.regExtraBits(src_ty); - if (extra_bits > 0) { - try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .immediate = extra_bits }); - } - } else { - assert(src_bits <= 128); - const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const tmp_mcv = MCValue{ .register = tmp_reg }; - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - try self.genBinOpMir(.{ ._, .lzcnt }, .u64, dst_mcv, if (mat_src_mcv.isBase()) - mat_src_mcv - else - .{ .register = mat_src_mcv.register_pair[0] }); - try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 }); - try self.genBinOpMir(.{ ._, .lzcnt }, .u64, tmp_mcv, if (mat_src_mcv.isBase()) - mat_src_mcv.address().offset(8).deref() - else - .{ .register = mat_src_mcv.register_pair[1] }); - try self.asmCmovccRegisterRegister(.nc, dst_reg.to32(), tmp_reg.to32()); - - if (src_bits < 128) try self.genBinOpMir( - .{ ._, .sub }, - dst_ty, - dst_mcv, - .{ .immediate = 128 - src_bits }, - ); - } - break :result dst_mcv; - } - - assert(src_bits <= 64); - const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(zcu))), 2); - if (std.math.isPowerOfTwo(src_bits)) { - const imm_reg = try self.copyToTmpRegister(dst_ty, .{ - .immediate = src_bits ^ (src_bits - 1), - }); - const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg); - defer self.register_manager.unlockReg(imm_lock); - - if (src_bits <= 8) { - const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); - const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg); - defer self.register_manager.unlockReg(wide_lock); - - try self.truncateRegister(src_ty, wide_reg); - try self.genBinOpMir(.{ ._r, .bs }, .u16, dst_mcv, .{ .register = wide_reg }); - } else try self.genBinOpMir(.{ ._r, .bs }, src_ty, dst_mcv, mat_src_mcv); - - try self.asmCmovccRegisterRegister( - .z, - registerAlias(dst_reg, cmov_abi_size), - registerAlias(imm_reg, cmov_abi_size), - ); - - try self.genBinOpMir(.{ ._, .xor }, dst_ty, dst_mcv, .{ .immediate = src_bits - 1 }); - } else { - const imm_reg = try self.copyToTmpRegister(dst_ty, .{ - .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - self.regBitSize(dst_ty)), - }); - const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg); - defer self.register_manager.unlockReg(imm_lock); - - const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); - const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg); - defer self.register_manager.unlockReg(wide_lock); - - try self.truncateRegister(src_ty, wide_reg); - try self.genBinOpMir( - .{ ._r, .bs }, - if (src_bits <= 8) .u16 else src_ty, - dst_mcv, - .{ .register = wide_reg }, - ); - - try self.asmCmovccRegisterRegister( - .nz, - registerAlias(imm_reg, cmov_abi_size), - registerAlias(dst_reg, cmov_abi_size), - ); - - try self.genSetReg(dst_reg, dst_ty, .{ .immediate = src_bits - 1 }, .{}); - try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .register = imm_reg }); - } - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn airCtz(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const result = result: { - try self.spillEflagsIfOccupied(); - - const dst_ty = self.typeOfIndex(inst); - const src_ty = self.typeOf(ty_op.operand); - if (src_ty.zigTypeTag(zcu) == .vector) return self.fail("TODO implement airCtz for {f}", .{ - src_ty.fmt(pt), - }); - - const src_mcv = try self.resolveInst(ty_op.operand); - const mat_src_mcv = switch (src_mcv) { - .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) }, - else => src_mcv, - }; - const mat_src_lock = switch (mat_src_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - else => null, - }; - defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock); - - const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); - const dst_mcv = MCValue{ .register = dst_reg }; - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - const abi_size: u31 = @intCast(src_ty.abiSize(zcu)); - const src_bits: u31 = @intCast(src_ty.bitSize(zcu)); - const has_bmi = self.hasFeature(.bmi); - if (src_bits > @as(u32, if (has_bmi) 128 else 64)) { - const src_frame_addr: bits.FrameAddr = src_frame_addr: switch (src_mcv) { - .load_frame => |src_frame_addr| src_frame_addr, - else => { - const src_frame_addr = try self.allocFrameIndex(.initSpill(src_ty, zcu)); - try self.genSetMem(.{ .frame = src_frame_addr }, 0, src_ty, src_mcv, .{}); - break :src_frame_addr .{ .index = src_frame_addr }; - }, - }; - - const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; - const extra_bits = abi_size * 8 - src_bits; - - const index_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const index_lock = self.register_manager.lockRegAssumeUnused(index_reg); - defer self.register_manager.unlockReg(index_lock); - - try self.asmRegisterImmediate(.{ ._, .mov }, index_reg.to32(), .s(-1)); - switch (extra_bits) { - 0 => try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()), - 1 => try self.asmRegisterRegister(.{ ._, .mov }, dst_reg.to32(), dst_reg.to32()), - else => try self.asmRegisterImmediate( - .{ ._, .mov }, - dst_reg.to32(), - .s(-@as(i32, extra_bits)), - ), - } - const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); - if (self.hasFeature(.slow_incdec)) { - try self.asmRegisterImmediate(.{ ._, .add }, index_reg.to32(), .u(1)); - } else { - try self.asmRegister(.{ ._c, .in }, index_reg.to32()); - } - try self.asmRegisterImmediate(.{ ._, .cmp }, index_reg.to32(), .u(limbs_len)); - const zero = try self.asmJccReloc(.nb, undefined); - try self.asmMemoryImmediate(.{ ._, .cmp }, .{ - .base = .{ .frame = src_frame_addr.index }, - .mod = .{ .rm = .{ - .size = .qword, - .index = index_reg.to64(), - .scale = .@"8", - .disp = src_frame_addr.off, - } }, - }, .u(0)); - _ = try self.asmJccReloc(.e, loop); - try self.asmRegisterMemory(.{ ._f, .bs }, dst_reg.to64(), .{ - .base = .{ .frame = src_frame_addr.index }, - .mod = .{ .rm = .{ - .size = .qword, - .index = index_reg.to64(), - .scale = .@"8", - .disp = src_frame_addr.off, - } }, - }); - self.performReloc(zero); - try self.asmRegisterImmediate(.{ ._l, .sh }, index_reg.to32(), .u(6)); - try self.asmRegisterRegister(.{ ._, .add }, dst_reg.to32(), index_reg.to32()); - break :result dst_mcv; - } - - const wide_ty: Type = if (src_bits <= 8) .u16 else src_ty; - if (has_bmi) { - if (src_bits <= 64) { - const extra_bits = self.regExtraBits(src_ty) + @as(u64, if (src_bits <= 8) 8 else 0); - const masked_mcv = if (extra_bits > 0) masked: { - const tmp_mcv = tmp: { - if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0)) - break :tmp src_mcv; - try self.genSetReg(dst_reg, wide_ty, src_mcv, .{}); - break :tmp dst_mcv; - }; - try self.genBinOpMir( - .{ ._, .@"or" }, - wide_ty, - tmp_mcv, - .{ .immediate = (@as(u64, std.math.maxInt(u64)) >> @intCast(64 - extra_bits)) << - @intCast(src_bits) }, - ); - break :masked tmp_mcv; - } else mat_src_mcv; - try self.genBinOpMir(.{ ._, .tzcnt }, wide_ty, dst_mcv, masked_mcv); - } else { - assert(src_bits <= 128); - const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const tmp_mcv = MCValue{ .register = tmp_reg }; - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - const lo_mat_src_mcv: MCValue = if (mat_src_mcv.isBase()) - mat_src_mcv - else - .{ .register = mat_src_mcv.register_pair[0] }; - const hi_mat_src_mcv: MCValue = if (mat_src_mcv.isBase()) - mat_src_mcv.address().offset(8).deref() - else - .{ .register = mat_src_mcv.register_pair[1] }; - const masked_mcv = if (src_bits < 128) masked: { - try self.genCopy(.u64, dst_mcv, hi_mat_src_mcv, .{}); - try self.genBinOpMir( - .{ ._, .@"or" }, - .u64, - dst_mcv, - .{ .immediate = @as(u64, std.math.maxInt(u64)) << @intCast(src_bits - 64) }, - ); - break :masked dst_mcv; - } else hi_mat_src_mcv; - try self.genBinOpMir(.{ ._, .tzcnt }, .u64, dst_mcv, masked_mcv); - try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 }); - try self.genBinOpMir(.{ ._, .tzcnt }, .u64, tmp_mcv, lo_mat_src_mcv); - try self.asmCmovccRegisterRegister(.nc, dst_reg.to32(), tmp_reg.to32()); - } - break :result dst_mcv; - } - - assert(src_bits <= 64); - const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits }); - const width_lock = self.register_manager.lockRegAssumeUnused(width_reg); - defer self.register_manager.unlockReg(width_lock); - - if (src_bits <= 8 or !std.math.isPowerOfTwo(src_bits)) { - const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); - const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg); - defer self.register_manager.unlockReg(wide_lock); - - try self.truncateRegister(src_ty, wide_reg); - try self.genBinOpMir(.{ ._f, .bs }, wide_ty, dst_mcv, .{ .register = wide_reg }); - } else try self.genBinOpMir(.{ ._f, .bs }, src_ty, dst_mcv, mat_src_mcv); - - const cmov_abi_size = @max(@as(u32, @intCast(dst_ty.abiSize(zcu))), 2); - try self.asmCmovccRegisterRegister( - .z, - registerAlias(dst_reg, cmov_abi_size), - registerAlias(width_reg, cmov_abi_size), - ); - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn airPopCount(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const result: MCValue = result: { - try self.spillEflagsIfOccupied(); - - const src_ty = self.typeOf(ty_op.operand); - const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu)); - if (src_ty.zigTypeTag(zcu) == .vector or src_abi_size > 16) - return self.fail("TODO implement airPopCount for {f}", .{src_ty.fmt(pt)}); - const src_mcv = try self.resolveInst(ty_op.operand); - - const mat_src_mcv = switch (src_mcv) { - .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) }, - else => src_mcv, - }; - const mat_src_lock = switch (mat_src_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - else => null, - }; - defer if (mat_src_lock) |lock| self.register_manager.unlockReg(lock); - - if (src_abi_size <= 8) { - const dst_contains_src = - src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv); - const dst_reg = if (dst_contains_src) - src_mcv.getReg().? - else - try self.register_manager.allocReg(inst, abi.RegisterClass.gp); - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - try self.genPopCount(dst_reg, src_ty, mat_src_mcv, dst_contains_src); - break :result .{ .register = dst_reg }; - } - - assert(src_abi_size > 8 and src_abi_size <= 16); - const tmp_regs = try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.gp); - const tmp_locks = self.register_manager.lockRegsAssumeUnused(2, tmp_regs); - defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock); - - try self.genPopCount(tmp_regs[0], .usize, if (mat_src_mcv.isBase()) - mat_src_mcv - else - .{ .register = mat_src_mcv.register_pair[0] }, false); - const src_info = src_ty.intInfo(zcu); - const hi_ty = try pt.intType(src_info.signedness, (src_info.bits - 1) % 64 + 1); - try self.genPopCount(tmp_regs[1], hi_ty, if (mat_src_mcv.isBase()) - mat_src_mcv.address().offset(8).deref() - else - .{ .register = mat_src_mcv.register_pair[1] }, false); - try self.asmRegisterRegister(.{ ._, .add }, tmp_regs[0].to8(), tmp_regs[1].to8()); - break :result .{ .register = tmp_regs[0] }; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn genPopCount( - self: *CodeGen, - dst_reg: Register, - src_ty: Type, - src_mcv: MCValue, - dst_contains_src: bool, -) !void { - const pt = self.pt; - - const src_abi_size: u32 = @intCast(src_ty.abiSize(pt.zcu)); - if (self.hasFeature(.popcnt)) return self.genBinOpMir( - .{ ._, .popcnt }, - if (src_abi_size > 1) src_ty else .u32, - .{ .register = dst_reg }, - if (src_abi_size > 1) src_mcv else src: { - if (!dst_contains_src) try self.genSetReg(dst_reg, src_ty, src_mcv, .{}); - try self.truncateRegister(try src_ty.toUnsigned(pt), dst_reg); - break :src .{ .register = dst_reg }; - }, - ); - - const mask = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - src_abi_size * 8); - const imm_0_1: Immediate = .u(mask / 0b1_1); - const imm_00_11: Immediate = .u(mask / 0b01_01); - const imm_0000_1111: Immediate = .u(mask / 0b0001_0001); - const imm_0000_0001: Immediate = .u(mask / 0b1111_1111); - - const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - const dst = registerAlias(dst_reg, src_abi_size); - const tmp = registerAlias(tmp_reg, src_abi_size); - const imm = if (src_abi_size > 4) - try self.register_manager.allocReg(null, abi.RegisterClass.gp) - else - undefined; - - if (!dst_contains_src) try self.genSetReg(dst, src_ty, src_mcv, .{}); - // dst = operand - try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); - // tmp = operand - try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, .u(1)); - // tmp = operand >> 1 - if (src_abi_size > 4) { - try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1); - try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); - } else try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1); - // tmp = (operand >> 1) & 0x55...55 - try self.asmRegisterRegister(.{ ._, .sub }, dst, tmp); - // dst = temp1 = operand - ((operand >> 1) & 0x55...55) - try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); - // tmp = temp1 - try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u(2)); - // dst = temp1 >> 2 - if (src_abi_size > 4) { - try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11); - try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); - try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); - } else { - try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11); - try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11); - } - // tmp = temp1 & 0x33...33 - // dst = (temp1 >> 2) & 0x33...33 - try self.asmRegisterRegister(.{ ._, .add }, tmp, dst); - // tmp = temp2 = (temp1 & 0x33...33) + ((temp1 >> 2) & 0x33...33) - try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp); - // dst = temp2 - try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, .u(4)); - // tmp = temp2 >> 4 - try self.asmRegisterRegister(.{ ._, .add }, dst, tmp); - // dst = temp2 + (temp2 >> 4) - if (src_abi_size > 4) { - try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111); - try self.asmRegisterImmediate(.{ ._, .mov }, tmp, imm_0000_0001); - try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); - try self.asmRegisterRegister(.{ .i_, .mul }, dst, tmp); - } else { - try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111); - if (src_abi_size > 1) { - try self.asmRegisterRegisterImmediate(.{ .i_, .mul }, dst, dst, imm_0000_0001); - } - } - // dst = temp3 = (temp2 + (temp2 >> 4)) & 0x0f...0f - // dst = temp3 * 0x01...01 - if (src_abi_size > 1) { - try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u((src_abi_size - 1) * 8)); - } - // dst = (temp3 * 0x01...01) >> (bits - 8) -} - -fn genByteSwap( +fn genUnwrapErrUnionPayloadMir( self: *CodeGen, - inst: Air.Inst.Index, - src_ty: Type, - src_mcv: MCValue, - mem_ok: bool, + maybe_inst: ?Air.Inst.Index, + err_union_ty: Type, + err_union: MCValue, ) !MCValue { const pt = self.pt; const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const has_movbe = self.hasFeature(.movbe); - - if (src_ty.zigTypeTag(zcu) == .vector) return self.fail( - "TODO implement genByteSwap for {f}", - .{src_ty.fmt(pt)}, - ); - - const src_lock = switch (src_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - - const abi_size: u32 = @intCast(src_ty.abiSize(zcu)); - switch (abi_size) { - 0 => unreachable, - 1 => return if ((mem_ok or src_mcv.isRegister()) and - self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_mcv - else - try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv), - 2 => if ((mem_ok or src_mcv.isRegister()) and - self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - { - try self.genBinOpMir(.{ ._l, .ro }, src_ty, src_mcv, .{ .immediate = 8 }); - return src_mcv; - }, - 3...8 => if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { - try self.genUnOpMir(.{ .b_, .swap }, src_ty, src_mcv); - return src_mcv; - }, - 9...16 => { - const mat_src_mcv: MCValue = mat_src_mcv: switch (src_mcv) { - .register => { - const frame_index = try self.allocFrameIndex(.initSpill(src_ty, zcu)); - try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, src_mcv, .{}); - break :mat_src_mcv .{ .load_frame = .{ .index = frame_index } }; - }, - .register_pair => |src_regs| if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { - for (src_regs) |src_reg| try self.asmRegister(.{ .b_, .swap }, src_reg.to64()); - return .{ .register_pair = .{ src_regs[1], src_regs[0] } }; - } else src_mcv, - else => src_mcv, - }; - - const dst_regs = - try self.register_manager.allocRegs(2, .{ inst, inst }, abi.RegisterClass.gp); - const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs); - defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); - - for (dst_regs, 0..) |dst_reg, limb_index| { - if (mat_src_mcv.isBase()) { - try self.asmRegisterMemory( - .{ if (has_movbe) ._be else ._, .mov }, - dst_reg.to64(), - try mat_src_mcv.address().offset(@intCast(limb_index * 8)).deref().mem(self, .{ .size = .qword }), - ); - if (!has_movbe) try self.asmRegister(.{ .b_, .swap }, dst_reg.to64()); - } else { - try self.asmRegisterRegister( - .{ ._, .mov }, - dst_reg.to64(), - mat_src_mcv.register_pair[limb_index].to64(), - ); - try self.asmRegister(.{ .b_, .swap }, dst_reg.to64()); - } - } - return .{ .register_pair = .{ dst_regs[1], dst_regs[0] } }; - }, - else => { - const limbs_len = std.math.divCeil(u32, abi_size, 8) catch unreachable; - - const temp_regs = - try self.register_manager.allocRegs(4, @splat(null), abi.RegisterClass.gp); - const temp_locks = self.register_manager.lockRegsAssumeUnused(4, temp_regs); - defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); - - const dst_mcv = try self.allocRegOrMem(inst, false); - try self.asmRegisterRegister(.{ ._, .xor }, temp_regs[0].to32(), temp_regs[0].to32()); - try self.asmRegisterImmediate(.{ ._, .mov }, temp_regs[1].to32(), .u(limbs_len - 1)); - - const loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); - try self.asmRegisterMemory( - .{ if (has_movbe) ._be else ._, .mov }, - temp_regs[2].to64(), - .{ - .base = .{ .frame = dst_mcv.load_frame.index }, - .mod = .{ .rm = .{ - .size = .qword, - .index = temp_regs[0].to64(), - .scale = .@"8", - .disp = dst_mcv.load_frame.off, - } }, - }, - ); - try self.asmRegisterMemory( - .{ if (has_movbe) ._be else ._, .mov }, - temp_regs[3].to64(), - .{ - .base = .{ .frame = dst_mcv.load_frame.index }, - .mod = .{ .rm = .{ - .size = .qword, - .index = temp_regs[1].to64(), - .scale = .@"8", - .disp = dst_mcv.load_frame.off, - } }, - }, - ); - if (!has_movbe) { - try self.asmRegister(.{ .b_, .swap }, temp_regs[2].to64()); - try self.asmRegister(.{ .b_, .swap }, temp_regs[3].to64()); - } - try self.asmMemoryRegister(.{ ._, .mov }, .{ - .base = .{ .frame = dst_mcv.load_frame.index }, - .mod = .{ .rm = .{ - .size = .qword, - .index = temp_regs[0].to64(), - .scale = .@"8", - .disp = dst_mcv.load_frame.off, - } }, - }, temp_regs[3].to64()); - try self.asmMemoryRegister(.{ ._, .mov }, .{ - .base = .{ .frame = dst_mcv.load_frame.index }, - .mod = .{ .rm = .{ - .size = .qword, - .index = temp_regs[1].to64(), - .scale = .@"8", - .disp = dst_mcv.load_frame.off, - } }, - }, temp_regs[2].to64()); - if (self.hasFeature(.slow_incdec)) { - try self.asmRegisterImmediate(.{ ._, .add }, temp_regs[0].to32(), .u(1)); - try self.asmRegisterImmediate(.{ ._, .sub }, temp_regs[1].to32(), .u(1)); - } else { - try self.asmRegister(.{ ._c, .in }, temp_regs[0].to32()); - try self.asmRegister(.{ ._c, .de }, temp_regs[1].to32()); - } - try self.asmRegisterRegister(.{ ._, .cmp }, temp_regs[0].to32(), temp_regs[1].to32()); - _ = try self.asmJccReloc(.be, loop); - return dst_mcv; - }, - } - - const dst_mcv: MCValue = if (mem_ok and has_movbe and src_mcv.isRegister()) - try self.allocRegOrMem(inst, true) - else - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.gp) }; - if (dst_mcv.getReg()) |dst_reg| { - const dst_lock = self.register_manager.lockRegAssumeUnused(dst_mcv.register); - defer self.register_manager.unlockReg(dst_lock); - - try self.genSetReg(dst_reg, src_ty, src_mcv, .{}); - switch (abi_size) { - else => unreachable, - 2 => try self.genBinOpMir(.{ ._l, .ro }, src_ty, dst_mcv, .{ .immediate = 8 }), - 3...8 => try self.genUnOpMir(.{ .b_, .swap }, src_ty, dst_mcv), - } - } else try self.genBinOpMir(.{ ._be, .mov }, src_ty, dst_mcv, src_mcv); - return dst_mcv; -} - -fn airByteSwap(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - - const src_ty = self.typeOf(ty_op.operand); - const src_bits: u32 = @intCast(src_ty.bitSize(zcu)); - const src_mcv = try self.resolveInst(ty_op.operand); - - const dst_mcv = try self.genByteSwap(inst, src_ty, src_mcv, true); - try self.genShiftBinOpMir( - .{ ._r, switch (if (src_ty.isAbiInt(zcu)) src_ty.intInfo(zcu).signedness else .unsigned) { - .signed => .sa, - .unsigned => .sh, - } }, - src_ty, - dst_mcv, - if (src_bits > 256) .u16 else .u8, - .{ .immediate = src_ty.abiSize(zcu) * 8 - src_bits }, - ); - return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); -} - -fn airBitReverse(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - - const src_ty = self.typeOf(ty_op.operand); - const abi_size: u32 = @intCast(src_ty.abiSize(zcu)); - const bit_size: u32 = @intCast(src_ty.bitSize(zcu)); - const src_mcv = try self.resolveInst(ty_op.operand); - - const dst_mcv = try self.genByteSwap(inst, src_ty, src_mcv, false); - const dst_locks: [2]?RegisterLock = switch (dst_mcv) { - .register => |dst_reg| .{ self.register_manager.lockReg(dst_reg), null }, - .register_pair => |dst_regs| self.register_manager.lockRegs(2, dst_regs), - else => unreachable, - }; - defer for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - const limb_abi_size: u32 = @min(abi_size, 8); - const tmp = registerAlias(tmp_reg, limb_abi_size); - const imm = if (limb_abi_size > 4) - try self.register_manager.allocReg(null, abi.RegisterClass.gp) - else - undefined; - - const mask = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - limb_abi_size * 8); - const imm_0000_1111: Immediate = .u(mask / 0b0001_0001); - const imm_00_11: Immediate = .u(mask / 0b01_01); - const imm_0_1: Immediate = .u(mask / 0b1_1); - - for (dst_mcv.getRegs()) |dst_reg| { - const dst = registerAlias(dst_reg, limb_abi_size); - - // dst = temp1 = bswap(operand) - try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); - // tmp = temp1 - try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u(4)); - // dst = temp1 >> 4 - if (limb_abi_size > 4) { - try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111); - try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); - try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); - } else { - try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0000_1111); - try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111); - } - // tmp = temp1 & 0x0f...0f - // dst = (temp1 >> 4) & 0x0f...0f - try self.asmRegisterImmediate(.{ ._l, .sh }, tmp, .u(4)); - // tmp = (temp1 & 0x0f...0f) << 4 - try self.asmRegisterRegister(.{ ._, .@"or" }, dst, tmp); - // dst = temp2 = ((temp1 >> 4) & 0x0f...0f) | ((temp1 & 0x0f...0f) << 4) - try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); - // tmp = temp2 - try self.asmRegisterImmediate(.{ ._r, .sh }, dst, .u(2)); - // dst = temp2 >> 2 - if (limb_abi_size > 4) { - try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11); - try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); - try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); - } else { - try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11); - try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11); - } - // tmp = temp2 & 0x33...33 - // dst = (temp2 >> 2) & 0x33...33 - try self.asmRegisterMemory( - .{ ._, .lea }, - if (limb_abi_size > 4) tmp.to64() else tmp.to32(), - .{ - .base = .{ .reg = dst.to64() }, - .mod = .{ .rm = .{ - .index = tmp.to64(), - .scale = .@"4", - } }, - }, - ); - // tmp = temp3 = ((temp2 >> 2) & 0x33...33) + ((temp2 & 0x33...33) << 2) - try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp); - // dst = temp3 - try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, .u(1)); - // tmp = temp3 >> 1 - if (limb_abi_size > 4) { - try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1); - try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); - try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); - } else { - try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0_1); - try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1); - } - // dst = temp3 & 0x55...55 - // tmp = (temp3 >> 1) & 0x55...55 - try self.asmRegisterMemory( - .{ ._, .lea }, - if (limb_abi_size > 4) dst.to64() else dst.to32(), - .{ - .base = .{ .reg = tmp.to64() }, - .mod = .{ .rm = .{ - .index = dst.to64(), - .scale = .@"2", - } }, - }, - ); - // dst = ((temp3 >> 1) & 0x55...55) + ((temp3 & 0x55...55) << 1) - } - - const extra_bits = abi_size * 8 - bit_size; - const signedness: std.builtin.Signedness = - if (src_ty.isAbiInt(zcu)) src_ty.intInfo(zcu).signedness else .unsigned; - if (extra_bits > 0) try self.genShiftBinOpMir(switch (signedness) { - .signed => .{ ._r, .sa }, - .unsigned => .{ ._r, .sh }, - }, src_ty, dst_mcv, .u8, .{ .immediate = extra_bits }); - - return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); -} - -fn floatSign(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag, operand: Air.Inst.Ref, ty: Type) !void { - const pt = self.pt; - const zcu = pt.zcu; - - const result = result: { - const scalar_bits = ty.scalarType(zcu).floatBits(self.target); - if (scalar_bits == 80) { - if (ty.zigTypeTag(zcu) != .float) return self.fail("TODO implement floatSign for {f}", .{ - ty.fmt(pt), - }); - - const src_mcv = try self.resolveInst(operand); - const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; - defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - - const dst_mcv: MCValue = .{ .register = .st0 }; - if (!std.meta.eql(src_mcv, dst_mcv) or !self.reuseOperand(inst, operand, 0, src_mcv)) - try self.register_manager.getKnownReg(.st0, inst); - - try self.genCopy(ty, dst_mcv, src_mcv, .{}); - switch (tag) { - .neg => try self.asmOpOnly(.{ .f_, .chs }), - .abs => try self.asmOpOnly(.{ .f_, .abs }), - else => unreachable, - } - break :result dst_mcv; - } - - const abi_size: u32 = switch (ty.abiSize(zcu)) { - 1...16 => 16, - 17...32 => 32, - else => return self.fail("TODO implement floatSign for {f}", .{ - ty.fmt(pt), - }), - }; - - const src_mcv = try self.resolveInst(operand); - const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; - defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - - const dst_mcv: MCValue = if (src_mcv.isRegister() and - self.reuseOperand(inst, operand, 0, src_mcv)) - src_mcv - else if (self.hasFeature(.avx)) - .{ .register = try self.register_manager.allocReg(inst, abi.RegisterClass.sse) } - else - try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); - const dst_reg = dst_mcv.getReg().?; - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + const payload_ty = err_union_ty.errorUnionPayload(zcu); - const vec_ty = try pt.vectorType(.{ - .len = @divExact(abi_size * 8, scalar_bits), - .child = (try pt.intType(.signed, scalar_bits)).ip_index, - }); + const result: MCValue = result: { + if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) break :result .none; - const sign_mcv = try self.lowerValue(switch (tag) { - .neg => try vec_ty.minInt(pt, vec_ty), - .abs => try vec_ty.maxInt(pt, vec_ty), - else => unreachable, - }); - const sign_mem: Memory = if (sign_mcv.isBase()) - try sign_mcv.mem(self, .{ .size = .fromSize(abi_size) }) - else - .{ - .base = .{ .reg = try self.copyToTmpRegister(.usize, sign_mcv.address()) }, - .mod = .{ .rm = .{ .size = .fromSize(abi_size) } }, - }; + const payload_off: u31 = @intCast(codegen.errUnionPayloadOffset(payload_ty, zcu)); + switch (err_union) { + .load_frame => |frame_addr| break :result .{ .load_frame = .{ + .index = frame_addr.index, + .off = frame_addr.off + payload_off, + } }, + .register => |reg| { + // TODO reuse operand + const eu_lock = self.register_manager.lockReg(reg); + defer if (eu_lock) |lock| self.register_manager.unlockReg(lock); - if (self.hasFeature(.avx)) try self.asmRegisterRegisterMemory( - switch (scalar_bits) { - 16, 128 => if (abi_size <= 16 or self.hasFeature(.avx2)) switch (tag) { - .neg => .{ .vp_, .xor }, - .abs => .{ .vp_, .@"and" }, - else => unreachable, - } else switch (tag) { - .neg => .{ .v_ps, .xor }, - .abs => .{ .v_ps, .@"and" }, - else => unreachable, - }, - 32 => switch (tag) { - .neg => .{ .v_ps, .xor }, - .abs => .{ .v_ps, .@"and" }, - else => unreachable, - }, - 64 => switch (tag) { - .neg => .{ .v_pd, .xor }, - .abs => .{ .v_pd, .@"and" }, - else => unreachable, - }, - 80 => return self.fail("TODO implement floatSign for {f}", .{ty.fmt(pt)}), - else => unreachable, - }, - registerAlias(dst_reg, abi_size), - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(ty, src_mcv), abi_size), - sign_mem, - ) else try self.asmRegisterMemory( - switch (scalar_bits) { - 16, 128 => switch (tag) { - .neg => .{ .p_, .xor }, - .abs => .{ .p_, .@"and" }, - else => unreachable, - }, - 32 => switch (tag) { - .neg => .{ ._ps, .xor }, - .abs => .{ ._ps, .@"and" }, - else => unreachable, - }, - 64 => switch (tag) { - .neg => .{ ._pd, .xor }, - .abs => .{ ._pd, .@"and" }, - else => unreachable, - }, - 80 => return self.fail("TODO implement floatSign for {f}", .{ty.fmt(pt)}), - else => unreachable, + const payload_in_gp = self.regSetForType(payload_ty).supersetOf(abi.RegisterClass.gp); + const result_mcv: MCValue = if (payload_in_gp and maybe_inst != null) + try self.copyToRegisterWithInstTracking(maybe_inst.?, err_union_ty, err_union) + else + .{ .register = try self.copyToTmpRegister(err_union_ty, err_union) }; + if (payload_off > 0) try self.genShiftBinOpMir( + .{ ._r, .sh }, + err_union_ty, + result_mcv, + .u8, + .{ .immediate = @as(u6, @intCast(payload_off * 8)) }, + ) else try self.truncateRegister(payload_ty, result_mcv.register); + break :result if (payload_in_gp) + result_mcv + else if (maybe_inst) |inst| + try self.copyToRegisterWithInstTracking(inst, payload_ty, result_mcv) + else + .{ .register = try self.copyToTmpRegister(payload_ty, result_mcv) }; }, - registerAlias(dst_reg, abi_size), - sign_mem, - ); - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ operand, .none, .none }); -} - -fn airFloatSign(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { - const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; - const ty = self.typeOf(un_op); - return self.floatSign(inst, tag, un_op, ty); -} - -fn airRound(self: *CodeGen, inst: Air.Inst.Index, mode: bits.RoundMode) !void { - const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; - const ty = self.typeOf(un_op); - - const result = result: { - switch (try self.genRoundLibcall(ty, .{ .air_ref = un_op }, mode)) { - .none => {}, - else => |dst_mcv| break :result dst_mcv, + else => return self.fail("TODO implement genUnwrapErrUnionPayloadMir for {f}", .{err_union}), } - - const src_mcv = try self.resolveInst(un_op); - const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) - src_mcv - else - try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); - const dst_reg = dst_mcv.getReg().?; - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - try self.genRound(ty, dst_reg, src_mcv, mode); - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ un_op, .none, .none }); -} - -fn getRoundTag(self: *CodeGen, ty: Type) ?Mir.Inst.FixedTag { - const pt = self.pt; - const zcu = pt.zcu; - return if (self.hasFeature(.sse4_1)) switch (ty.zigTypeTag(zcu)) { - .float => switch (ty.floatBits(self.target)) { - 32 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round }, - 64 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round }, - 16, 80, 128 => null, - else => unreachable, - }, - .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { - .float => switch (ty.childType(zcu).floatBits(self.target)) { - 32 => switch (ty.vectorLen(zcu)) { - 1 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round }, - 2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else .{ ._ps, .round }, - 5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else null, - else => null, - }, - 64 => switch (ty.vectorLen(zcu)) { - 1 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round }, - 2 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else .{ ._pd, .round }, - 3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else null, - else => null, - }, - 16, 80, 128 => null, - else => unreachable, - }, - else => null, - }, - else => unreachable, - } else null; -} - -fn genRoundLibcall(self: *CodeGen, ty: Type, src_mcv: MCValue, mode: bits.RoundMode) !MCValue { - const pt = self.pt; - const zcu = pt.zcu; - if (self.getRoundTag(ty)) |_| return .none; - - if (ty.zigTypeTag(zcu) != .float) - return self.fail("TODO implement genRound for {f}", .{ty.fmt(pt)}); - - var sym_buf: ["__trunc?".len]u8 = undefined; - return try self.genCall(.{ .extern_func = .{ - .return_type = ty.toIntern(), - .param_types = &.{ty.toIntern()}, - .sym = std.fmt.bufPrint(&sym_buf, "{s}{s}{s}", .{ - floatLibcAbiPrefix(ty), - switch (mode.direction) { - .down => "floor", - .up => "ceil", - .zero => "trunc", - else => unreachable, - }, - floatLibcAbiSuffix(ty), - }) catch unreachable, - } }, &.{ty}, &.{src_mcv}, .{}); -} - -fn genRound(self: *CodeGen, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: bits.RoundMode) !void { - const pt = self.pt; - const mir_tag = self.getRoundTag(ty) orelse { - const result = try self.genRoundLibcall(ty, src_mcv, mode); - return self.genSetReg(dst_reg, ty, result, .{}); }; - const abi_size: u32 = @intCast(ty.abiSize(pt.zcu)); - const dst_alias = registerAlias(dst_reg, abi_size); - switch (mir_tag[0]) { - .v_ss, .v_sd => if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - mir_tag, - dst_alias, - dst_alias, - try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }), - mode.imm(), - ) else try self.asmRegisterRegisterRegisterImmediate( - mir_tag, - dst_alias, - dst_alias, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(ty, src_mcv), abi_size), - mode.imm(), - ), - else => if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate( - mir_tag, - dst_alias, - try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }), - mode.imm(), - ) else try self.asmRegisterRegisterImmediate( - mir_tag, - dst_alias, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(ty, src_mcv), abi_size), - mode.imm(), - ), - } -} - -fn airAbs(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - const ty = self.typeOf(ty_op.operand); - - const result: MCValue = result: { - const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(zcu)) { - else => null, - .int => switch (ty.abiSize(zcu)) { - 0 => unreachable, - 1...8 => { - try self.spillEflagsIfOccupied(); - const src_mcv = try self.resolveInst(ty_op.operand); - const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); - - try self.genUnOpMir(.{ ._, .neg }, ty, dst_mcv); - - const cmov_abi_size = @max(@as(u32, @intCast(ty.abiSize(zcu))), 2); - switch (src_mcv) { - .register => |val_reg| try self.asmCmovccRegisterRegister( - .l, - registerAlias(dst_mcv.register, cmov_abi_size), - registerAlias(val_reg, cmov_abi_size), - ), - .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory( - .l, - registerAlias(dst_mcv.register, cmov_abi_size), - try src_mcv.mem(self, .{ .size = .fromSize(cmov_abi_size) }), - ), - else => { - const val_reg = try self.copyToTmpRegister(ty, src_mcv); - try self.asmCmovccRegisterRegister( - .l, - registerAlias(dst_mcv.register, cmov_abi_size), - registerAlias(val_reg, cmov_abi_size), - ); - }, - } - break :result dst_mcv; - }, - 9...16 => { - try self.spillEflagsIfOccupied(); - const src_mcv = try self.resolveInst(ty_op.operand); - const dst_mcv = if (src_mcv == .register_pair and - self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: { - const dst_regs = try self.register_manager.allocRegs( - 2, - .{ inst, inst }, - abi.RegisterClass.gp, - ); - const dst_mcv: MCValue = .{ .register_pair = dst_regs }; - const dst_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs); - defer for (dst_locks) |lock| self.register_manager.unlockReg(lock); - - try self.genCopy(ty, dst_mcv, src_mcv, .{}); - break :dst dst_mcv; - }; - const dst_regs = dst_mcv.register_pair; - const dst_locks = self.register_manager.lockRegs(2, dst_regs); - defer for (dst_locks) |dst_lock| if (dst_lock) |lock| - self.register_manager.unlockReg(lock); - - const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_regs[1]); - try self.asmRegisterImmediate(.{ ._r, .sa }, tmp_reg, .u(63)); - try self.asmRegisterRegister(.{ ._, .xor }, dst_regs[0], tmp_reg); - try self.asmRegisterRegister(.{ ._, .xor }, dst_regs[1], tmp_reg); - try self.asmRegisterRegister(.{ ._, .sub }, dst_regs[0], tmp_reg); - try self.asmRegisterRegister(.{ ._, .sbb }, dst_regs[1], tmp_reg); - - break :result dst_mcv; - }, - else => { - const abi_size: u31 = @intCast(ty.abiSize(zcu)); - const limb_len = std.math.divCeil(u31, abi_size, 8) catch unreachable; - - const tmp_regs = - try self.register_manager.allocRegs(3, @splat(null), abi.RegisterClass.gp); - const tmp_locks = self.register_manager.lockRegsAssumeUnused(3, tmp_regs); - defer for (tmp_locks) |lock| self.register_manager.unlockReg(lock); - - try self.spillEflagsIfOccupied(); - const src_mcv = try self.resolveInst(ty_op.operand); - const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_mcv - else - try self.allocRegOrMem(inst, false); - - try self.asmMemoryImmediate( - .{ ._, .cmp }, - try dst_mcv.address().offset((limb_len - 1) * 8).deref().mem(self, .{ .size = .qword }), - .u(0), - ); - const positive = try self.asmJccReloc(.ns, undefined); - - try self.asmRegisterRegister(.{ ._, .xor }, tmp_regs[0].to32(), tmp_regs[0].to32()); - try self.asmRegisterRegister(.{ ._, .xor }, tmp_regs[1].to8(), tmp_regs[1].to8()); - - const neg_loop: Mir.Inst.Index = @intCast(self.mir_instructions.len); - try self.asmRegisterRegister(.{ ._, .xor }, tmp_regs[2].to32(), tmp_regs[2].to32()); - try self.asmRegisterImmediate(.{ ._r, .sh }, tmp_regs[1].to8(), .u(1)); - try self.asmRegisterMemory(.{ ._, .sbb }, tmp_regs[2].to64(), .{ - .base = .{ .frame = dst_mcv.load_frame.index }, - .mod = .{ .rm = .{ - .size = .qword, - .index = tmp_regs[0].to64(), - .scale = .@"8", - .disp = dst_mcv.load_frame.off, - } }, - }); - try self.asmSetccRegister(.c, tmp_regs[1].to8()); - try self.asmMemoryRegister(.{ ._, .mov }, .{ - .base = .{ .frame = dst_mcv.load_frame.index }, - .mod = .{ .rm = .{ - .size = .qword, - .index = tmp_regs[0].to64(), - .scale = .@"8", - .disp = dst_mcv.load_frame.off, - } }, - }, tmp_regs[2].to64()); - - if (self.hasFeature(.slow_incdec)) { - try self.asmRegisterImmediate(.{ ._, .add }, tmp_regs[0].to32(), .u(1)); - } else { - try self.asmRegister(.{ ._c, .in }, tmp_regs[0].to32()); - } - try self.asmRegisterImmediate(.{ ._, .cmp }, tmp_regs[0].to32(), .u(limb_len)); - _ = try self.asmJccReloc(.b, neg_loop); - - self.performReloc(positive); - break :result dst_mcv; - }, - }, - .float => return self.floatSign(inst, .abs, ty_op.operand, ty), - .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { - else => null, - .int => switch (ty.childType(zcu).intInfo(zcu).bits) { - else => null, - 8 => switch (ty.vectorLen(zcu)) { - else => null, - 1...16 => if (self.hasFeature(.avx)) - .{ .vp_b, .abs } - else if (self.hasFeature(.ssse3)) - .{ .p_b, .abs } - else - null, - 17...32 => if (self.hasFeature(.avx2)) .{ .vp_b, .abs } else null, - }, - 16 => switch (ty.vectorLen(zcu)) { - else => null, - 1...8 => if (self.hasFeature(.avx)) - .{ .vp_w, .abs } - else if (self.hasFeature(.ssse3)) - .{ .p_w, .abs } - else - null, - 9...16 => if (self.hasFeature(.avx2)) .{ .vp_w, .abs } else null, - }, - 32 => switch (ty.vectorLen(zcu)) { - else => null, - 1...4 => if (self.hasFeature(.avx)) - .{ .vp_d, .abs } - else if (self.hasFeature(.ssse3)) - .{ .p_d, .abs } - else - null, - 5...8 => if (self.hasFeature(.avx2)) .{ .vp_d, .abs } else null, - }, - }, - .float => return self.floatSign(inst, .abs, ty_op.operand, ty), - }, - }) orelse return self.fail("TODO implement airAbs for {f}", .{ty.fmt(pt)}); - const abi_size: u32 = @intCast(ty.abiSize(zcu)); - const src_mcv = try self.resolveInst(ty_op.operand); - const dst_reg = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_mcv.getReg().? - else - try self.register_manager.allocReg(inst, self.regSetForType(ty)); - const dst_alias = registerAlias(dst_reg, abi_size); - if (src_mcv.isBase()) try self.asmRegisterMemory( - mir_tag, - dst_alias, - try src_mcv.mem(self, .{ .size = self.memSize(ty) }), - ) else try self.asmRegisterRegister( - mir_tag, - dst_alias, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(ty, src_mcv), abi_size), - ); - break :result .{ .register = dst_reg }; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); + return result; } -fn airSqrt(self: *CodeGen, inst: Air.Inst.Index) !void { +fn genUnwrapErrUnionPayloadPtrMir( + self: *CodeGen, + maybe_inst: ?Air.Inst.Index, + ptr_ty: Type, + ptr_mcv: MCValue, +) !MCValue { const pt = self.pt; const zcu = pt.zcu; - const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; - const ty = self.typeOf(un_op); - const abi_size: u32 = @intCast(ty.abiSize(zcu)); + const err_union_ty = ptr_ty.childType(zcu); + const payload_ty = err_union_ty.errorUnionPayload(zcu); const result: MCValue = result: { - switch (ty.zigTypeTag(zcu)) { - .float => { - const float_bits = ty.floatBits(self.target); - if (switch (float_bits) { - 16 => !self.hasFeature(.f16c), - 32, 64 => false, - 80, 128 => true, - else => unreachable, - }) { - var sym_buf: ["__sqrt?".len]u8 = undefined; - break :result try self.genCall(.{ .extern_func = .{ - .return_type = ty.toIntern(), - .param_types = &.{ty.toIntern()}, - .sym = std.fmt.bufPrint(&sym_buf, "{s}sqrt{s}", .{ - floatLibcAbiPrefix(ty), - floatLibcAbiSuffix(ty), - }) catch unreachable, - } }, &.{ty}, &.{.{ .air_ref = un_op }}, .{}); - } - }, - else => {}, - } - - const src_mcv = try self.resolveInst(un_op); - const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) - src_mcv + const payload_off = codegen.errUnionPayloadOffset(payload_ty, zcu); + const result_mcv: MCValue = if (maybe_inst) |inst| + try self.copyToRegisterWithInstTracking(inst, ptr_ty, ptr_mcv) else - try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); - const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size); - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - const mir_tag = @as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(zcu)) { - .float => switch (ty.floatBits(self.target)) { - 16 => { - assert(self.hasFeature(.f16c)); - const mat_src_reg = if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(ty, src_mcv); - try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128()); - try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg); - try self.asmRegisterRegisterImmediate( - .{ .v_, .cvtps2ph }, - dst_reg, - dst_reg, - bits.RoundMode.imm(.{}), - ); - break :result dst_mcv; - }, - 32 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt }, - 64 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt }, - else => unreachable, - }, - .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { - .float => switch (ty.childType(zcu).floatBits(self.target)) { - 16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen(zcu)) { - 1 => { - try self.asmRegisterRegister( - .{ .v_ps, .cvtph2 }, - dst_reg, - (if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(ty, src_mcv)).to128(), - ); - try self.asmRegisterRegisterRegister( - .{ .v_ss, .sqrt }, - dst_reg, - dst_reg, - dst_reg, - ); - try self.asmRegisterRegisterImmediate( - .{ .v_, .cvtps2ph }, - dst_reg, - dst_reg, - bits.RoundMode.imm(.{}), - ); - break :result dst_mcv; - }, - 2...8 => { - const wide_reg = registerAlias(dst_reg, abi_size * 2); - if (src_mcv.isBase()) try self.asmRegisterMemory( - .{ .v_ps, .cvtph2 }, - wide_reg, - try src_mcv.mem(self, .{ .size = .fromSize( - @intCast(@divExact(wide_reg.bitSize(), 16)), - ) }), - ) else try self.asmRegisterRegister( - .{ .v_ps, .cvtph2 }, - wide_reg, - (if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(ty, src_mcv)).to128(), - ); - try self.asmRegisterRegister(.{ .v_ps, .sqrt }, wide_reg, wide_reg); - try self.asmRegisterRegisterImmediate( - .{ .v_, .cvtps2ph }, - dst_reg, - wide_reg, - bits.RoundMode.imm(.{}), - ); - break :result dst_mcv; - }, - else => null, - } else null, - 32 => switch (ty.vectorLen(zcu)) { - 1 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt }, - 2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else .{ ._ps, .sqrt }, - 5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else null, - else => null, - }, - 64 => switch (ty.vectorLen(zcu)) { - 1 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt }, - 2 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else .{ ._pd, .sqrt }, - 3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else null, - else => null, - }, - 80, 128 => null, - else => unreachable, - }, - else => unreachable, - }, - else => unreachable, - }) orelse return self.fail("TODO implement airSqrt for {f}", .{ty.fmt(pt)}); - switch (mir_tag[0]) { - .v_ss, .v_sd => if (src_mcv.isBase()) try self.asmRegisterRegisterMemory( - mir_tag, - dst_reg, - dst_reg, - try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }), - ) else try self.asmRegisterRegisterRegister( - mir_tag, - dst_reg, - dst_reg, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(ty, src_mcv), abi_size), - ), - else => if (src_mcv.isBase()) try self.asmRegisterMemory( - mir_tag, - dst_reg, - try src_mcv.mem(self, .{ .size = .fromSize(abi_size) }), - ) else try self.asmRegisterRegister( - mir_tag, - dst_reg, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(ty, src_mcv), abi_size), - ), - } - break :result dst_mcv; + .{ .register = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }; + try self.genBinOpMir(.{ ._, .add }, ptr_ty, result_mcv, .{ .immediate = payload_off }); + break :result result_mcv; }; - return self.finishAir(inst, result, .{ un_op, .none, .none }); -} -fn airUnaryMath(self: *CodeGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { - const un_op = self.air.instructions.items(.data)[@intFromEnum(inst)].un_op; - const ty = self.typeOf(un_op); - var sym_buf: ["__round?".len]u8 = undefined; - const result = try self.genCall(.{ .extern_func = .{ - .return_type = ty.toIntern(), - .param_types = &.{ty.toIntern()}, - .sym = std.fmt.bufPrint(&sym_buf, "{s}{s}{s}", .{ - floatLibcAbiPrefix(ty), - switch (tag) { - .sin, - .cos, - .tan, - .exp, - .exp2, - .log, - .log2, - .log10, - .round, - => @tagName(tag), - else => unreachable, - }, - floatLibcAbiSuffix(ty), - }) catch unreachable, - } }, &.{ty}, &.{.{ .air_ref = un_op }}, .{}); - return self.finishAir(inst, result, .{ un_op, .none, .none }); + return result; } fn reuseOperand( @@ -175573,95 +174524,6 @@ fn store( } } -fn genUnOp(self: *CodeGen, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: Air.Inst.Ref) !MCValue { - const pt = self.pt; - const zcu = pt.zcu; - const src_ty = self.typeOf(src_air); - if (src_ty.zigTypeTag(zcu) == .vector) - return self.fail("TODO implement genUnOp for {f}", .{src_ty.fmt(pt)}); - - var src_mcv = try self.resolveInst(src_air); - switch (src_mcv) { - .eflags => |cc| switch (tag) { - .not => { - if (maybe_inst) |inst| if (self.reuseOperand(inst, src_air, 0, src_mcv)) - return .{ .eflags = cc.negate() }; - try self.spillEflagsIfOccupied(); - src_mcv = try self.resolveInst(src_air); - }, - else => {}, - }, - else => {}, - } - - const src_lock = switch (src_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - - const dst_mcv: MCValue = dst: { - if (maybe_inst) |inst| if (self.reuseOperand(inst, src_air, 0, src_mcv)) break :dst src_mcv; - - const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, maybe_inst, true); - try self.genCopy(src_ty, dst_mcv, src_mcv, .{}); - break :dst dst_mcv; - }; - const dst_lock = switch (dst_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - else => null, - }; - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - const abi_size: u16 = @intCast(src_ty.abiSize(zcu)); - switch (tag) { - .not => { - const limb_abi_size: u16 = @min(abi_size, 8); - const int_info: InternPool.Key.IntType = if (src_ty.ip_index == .bool_type) - .{ .signedness = .unsigned, .bits = 1 } - else - src_ty.intInfo(zcu); - var byte_off: i32 = 0; - while (byte_off * 8 < int_info.bits) : (byte_off += limb_abi_size) { - const limb_bits: u16 = @intCast(@min(switch (int_info.signedness) { - .signed => abi_size * 8, - .unsigned => int_info.bits, - } - byte_off * 8, limb_abi_size * 8)); - const limb_ty = try pt.intType(int_info.signedness, limb_bits); - const limb_mcv = switch (byte_off) { - 0 => dst_mcv, - else => dst_mcv.address().offset(byte_off).deref(), - }; - - if (int_info.signedness == .unsigned and self.regExtraBits(limb_ty) > 0) { - const mask = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - limb_bits); - try self.genBinOpMir(.{ ._, .xor }, limb_ty, limb_mcv, .{ .immediate = mask }); - } else try self.genUnOpMir(.{ ._, .not }, limb_ty, limb_mcv); - } - }, - .neg => { - try self.genUnOpMir(.{ ._, .neg }, src_ty, dst_mcv); - const bit_size = src_ty.intInfo(zcu).bits; - if (abi_size * 8 > bit_size) { - if (dst_mcv.isRegister()) { - try self.truncateRegister(src_ty, dst_mcv.getReg().?); - } else { - const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - const hi_mcv = dst_mcv.address().offset(@intCast(bit_size / 64 * 8)).deref(); - try self.genSetReg(tmp_reg, .usize, hi_mcv, .{}); - try self.truncateRegister(src_ty, tmp_reg); - try self.genCopy(.usize, hi_mcv, .{ .register = tmp_reg }, .{}); - } - } - }, - else => unreachable, - } - return dst_mcv; -} - fn genUnOpMir(self: *CodeGen, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: MCValue) !void { const pt = self.pt; const abi_size: u32 = @intCast(dst_ty.abiSize(pt.zcu)); @@ -176346,1679 +175208,6 @@ fn genShiftBinOpMir( }); } -fn genBinOp( - self: *CodeGen, - maybe_inst: ?Air.Inst.Index, - air_tag: Air.Inst.Tag, - lhs_air: Air.Inst.Ref, - rhs_air: Air.Inst.Ref, -) !MCValue { - const pt = self.pt; - const zcu = pt.zcu; - const lhs_ty = self.typeOf(lhs_air); - const rhs_ty = self.typeOf(rhs_air); - const abi_size: u32 = @intCast(lhs_ty.abiSize(zcu)); - - if (lhs_ty.isRuntimeFloat()) libcall: { - const float_bits = lhs_ty.floatBits(self.target); - const type_needs_libcall = switch (float_bits) { - 16 => !self.hasFeature(.f16c), - 32, 64 => false, - 80, 128 => true, - else => unreachable, - }; - switch (air_tag) { - .rem, .mod => {}, - else => if (!type_needs_libcall) break :libcall, - } - var sym_buf: ["__mod?f3".len]u8 = undefined; - const sym = switch (air_tag) { - .add, - .sub, - .mul, - .div_float, - .div_trunc, - .div_floor, - .div_exact, - => std.fmt.bufPrint(&sym_buf, "__{s}{c}f3", .{ - @tagName(air_tag)[0..3], - floatCompilerRtAbiName(float_bits), - }), - .rem, .mod, .min, .max => std.fmt.bufPrint(&sym_buf, "{s}f{s}{s}", .{ - floatLibcAbiPrefix(lhs_ty), - switch (air_tag) { - .rem, .mod => "mod", - .min => "min", - .max => "max", - else => unreachable, - }, - floatLibcAbiSuffix(lhs_ty), - }), - else => return self.fail("TODO implement genBinOp for {s} {f}", .{ - @tagName(air_tag), lhs_ty.fmt(pt), - }), - } catch unreachable; - const result = try self.genCall(.{ .extern_func = .{ - .return_type = lhs_ty.toIntern(), - .param_types = &.{ lhs_ty.toIntern(), rhs_ty.toIntern() }, - .sym = sym, - } }, &.{ lhs_ty, rhs_ty }, &.{ .{ .air_ref = lhs_air }, .{ .air_ref = rhs_air } }, .{}); - return switch (air_tag) { - .mod => result: { - const adjusted: MCValue = if (type_needs_libcall) adjusted: { - var add_sym_buf: ["__add?f3".len]u8 = undefined; - break :adjusted try self.genCall(.{ .extern_func = .{ - .return_type = lhs_ty.toIntern(), - .param_types = &.{ - lhs_ty.toIntern(), - rhs_ty.toIntern(), - }, - .sym = std.fmt.bufPrint(&add_sym_buf, "__add{c}f3", .{ - floatCompilerRtAbiName(float_bits), - }) catch unreachable, - } }, &.{ lhs_ty, rhs_ty }, &.{ result, .{ .air_ref = rhs_air } }, .{}); - } else switch (float_bits) { - 16, 32, 64 => adjusted: { - const dst_reg = switch (result) { - .register => |reg| reg, - else => if (maybe_inst) |inst| - (try self.copyToRegisterWithInstTracking(inst, lhs_ty, result)).register - else - try self.copyToTmpRegister(lhs_ty, result), - }; - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - const rhs_mcv = try self.resolveInst(rhs_air); - const src_mcv: MCValue = if (float_bits == 16) src: { - assert(self.hasFeature(.f16c)); - const tmp_reg = (try self.register_manager.allocReg( - null, - abi.RegisterClass.sse, - )).to128(); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - if (rhs_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - .{ .vp_w, .insr }, - dst_reg, - dst_reg, - try rhs_mcv.mem(self, .{ .size = .word }), - .u(1), - ) else try self.asmRegisterRegisterRegister( - .{ .vp_, .unpcklwd }, - dst_reg, - dst_reg, - (if (rhs_mcv.isRegister()) - rhs_mcv.getReg().? - else - try self.copyToTmpRegister(rhs_ty, rhs_mcv)).to128(), - ); - try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); - try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); - break :src .{ .register = tmp_reg }; - } else rhs_mcv; - - if (self.hasFeature(.avx)) { - const mir_tag: Mir.Inst.FixedTag = switch (float_bits) { - 16, 32 => .{ .v_ss, .add }, - 64 => .{ .v_sd, .add }, - else => unreachable, - }; - if (src_mcv.isBase()) try self.asmRegisterRegisterMemory( - mir_tag, - dst_reg, - dst_reg, - try src_mcv.mem(self, .{ .size = .fromBitSize(float_bits) }), - ) else try self.asmRegisterRegisterRegister( - mir_tag, - dst_reg, - dst_reg, - (if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), - ); - } else { - const mir_tag: Mir.Inst.FixedTag = switch (float_bits) { - 32 => .{ ._ss, .add }, - 64 => .{ ._sd, .add }, - else => unreachable, - }; - if (src_mcv.isBase()) try self.asmRegisterMemory( - mir_tag, - dst_reg, - try src_mcv.mem(self, .{ .size = .fromBitSize(float_bits) }), - ) else try self.asmRegisterRegister( - mir_tag, - dst_reg, - (if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), - ); - } - - if (float_bits == 16) try self.asmRegisterRegisterImmediate( - .{ .v_, .cvtps2ph }, - dst_reg, - dst_reg, - bits.RoundMode.imm(.{}), - ); - break :adjusted .{ .register = dst_reg }; - }, - 80, 128 => return self.fail("TODO implement genBinOp for {s} of {f}", .{ - @tagName(air_tag), lhs_ty.fmt(pt), - }), - else => unreachable, - }; - break :result try self.genCall(.{ .extern_func = .{ - .return_type = lhs_ty.toIntern(), - .param_types = &.{ lhs_ty.toIntern(), rhs_ty.toIntern() }, - .sym = sym, - } }, &.{ lhs_ty, rhs_ty }, &.{ adjusted, .{ .air_ref = rhs_air } }, .{}); - }, - .div_trunc, .div_floor => try self.genRoundLibcall(lhs_ty, result, .{ - .direction = switch (air_tag) { - .div_trunc => .zero, - .div_floor => .down, - else => unreachable, - }, - .precision = .inexact, - }), - else => result, - }; - } - - const sse_op = switch (lhs_ty.zigTypeTag(zcu)) { - else => false, - .float => true, - .vector => switch (lhs_ty.childType(zcu).toIntern()) { - .bool_type, .u1_type => false, - else => true, - }, - }; - if (sse_op and ((lhs_ty.scalarType(zcu).isRuntimeFloat() and - lhs_ty.scalarType(zcu).floatBits(self.target) == 80) or - lhs_ty.abiSize(zcu) > self.vectorSize(.float))) - return self.fail("TODO implement genBinOp for {s} {f}", .{ @tagName(air_tag), lhs_ty.fmt(pt) }); - - const maybe_mask_reg = switch (air_tag) { - else => null, - .rem, .mod => unreachable, - .max, .min => if (lhs_ty.scalarType(zcu).isRuntimeFloat()) registerAlias( - if (!self.hasFeature(.avx) and self.hasFeature(.sse4_1)) mask: { - try self.register_manager.getKnownReg(.xmm0, null); - break :mask .xmm0; - } else try self.register_manager.allocReg(null, abi.RegisterClass.sse), - abi_size, - ) else null, - }; - const mask_lock = - if (maybe_mask_reg) |mask_reg| self.register_manager.lockRegAssumeUnused(mask_reg) else null; - defer if (mask_lock) |lock| self.register_manager.unlockReg(lock); - - const ordered_air: [2]Air.Inst.Ref = if (lhs_ty.isVector(zcu) and - switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { - .bool => false, - .int => switch (air_tag) { - .cmp_lt, .cmp_gte => true, - else => false, - }, - .float => switch (air_tag) { - .cmp_gte, .cmp_gt => true, - else => false, - }, - else => unreachable, - }) .{ rhs_air, lhs_air } else .{ lhs_air, rhs_air }; - - if (lhs_ty.isAbiInt(zcu)) for (ordered_air) |op_air| { - switch (try self.resolveInst(op_air)) { - .register => |op_reg| switch (op_reg.class()) { - .sse => try self.register_manager.getReg(op_reg, null), - else => {}, - }, - else => {}, - } - }; - - const lhs_mcv = try self.resolveInst(ordered_air[0]); - var rhs_mcv = try self.resolveInst(ordered_air[1]); - switch (lhs_mcv) { - .immediate => |imm| switch (imm) { - 0 => switch (air_tag) { - .sub, .sub_wrap => return self.genUnOp(maybe_inst, .neg, ordered_air[1]), - else => {}, - }, - else => {}, - }, - else => {}, - } - - const is_commutative = switch (air_tag) { - .add, - .add_wrap, - .mul, - .bool_or, - .bit_or, - .bool_and, - .bit_and, - .xor, - .min, - .max, - .cmp_eq, - .cmp_neq, - => true, - - else => false, - }; - - const lhs_locks: [2]?RegisterLock = switch (lhs_mcv) { - .register => |lhs_reg| .{ self.register_manager.lockRegAssumeUnused(lhs_reg), null }, - .register_pair => |lhs_regs| locks: { - const locks = self.register_manager.lockRegsAssumeUnused(2, lhs_regs); - break :locks .{ locks[0], locks[1] }; - }, - else => @splat(null), - }; - defer for (lhs_locks) |lhs_lock| if (lhs_lock) |lock| self.register_manager.unlockReg(lock); - - const rhs_locks: [2]?RegisterLock = switch (rhs_mcv) { - .register => |rhs_reg| .{ self.register_manager.lockReg(rhs_reg), null }, - .register_pair => |rhs_regs| self.register_manager.lockRegs(2, rhs_regs), - else => @splat(null), - }; - defer for (rhs_locks) |rhs_lock| if (rhs_lock) |lock| self.register_manager.unlockReg(lock); - - var flipped = false; - var copied_to_dst = true; - const dst_mcv: MCValue = dst: { - const tracked_inst = switch (air_tag) { - else => maybe_inst, - .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => null, - }; - if (maybe_inst) |inst| { - if ((!sse_op or lhs_mcv.isRegister()) and - self.reuseOperandAdvanced(inst, ordered_air[0], 0, lhs_mcv, tracked_inst)) - break :dst lhs_mcv; - if (is_commutative and (!sse_op or rhs_mcv.isRegister()) and - self.reuseOperandAdvanced(inst, ordered_air[1], 1, rhs_mcv, tracked_inst)) - { - flipped = true; - break :dst rhs_mcv; - } - } - const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, tracked_inst, true); - if (sse_op and lhs_mcv.isRegister() and self.hasFeature(.avx)) - copied_to_dst = false - else - try self.genCopy(lhs_ty, dst_mcv, lhs_mcv, .{}); - rhs_mcv = try self.resolveInst(ordered_air[1]); - break :dst dst_mcv; - }; - const dst_locks: [2]?RegisterLock = switch (dst_mcv) { - .register => |dst_reg| .{ self.register_manager.lockReg(dst_reg), null }, - .register_pair => |dst_regs| self.register_manager.lockRegs(2, dst_regs), - else => @splat(null), - }; - defer for (dst_locks) |dst_lock| if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - const unmat_src_mcv = if (flipped) lhs_mcv else rhs_mcv; - const src_mcv: MCValue = if (maybe_mask_reg) |mask_reg| - if (self.hasFeature(.avx) and unmat_src_mcv.isRegister() and maybe_inst != null and - self.liveness.operandDies(maybe_inst.?, if (flipped) 0 else 1)) unmat_src_mcv else src: { - try self.genSetReg(mask_reg, rhs_ty, unmat_src_mcv, .{}); - break :src .{ .register = mask_reg }; - } - else - unmat_src_mcv; - const src_locks: [2]?RegisterLock = switch (src_mcv) { - .register => |src_reg| .{ self.register_manager.lockReg(src_reg), null }, - .register_pair => |src_regs| self.register_manager.lockRegs(2, src_regs), - else => @splat(null), - }; - defer for (src_locks) |src_lock| if (src_lock) |lock| self.register_manager.unlockReg(lock); - - if (!sse_op) { - switch (air_tag) { - .add, - .add_wrap, - => try self.genBinOpMir(.{ ._, .add }, lhs_ty, dst_mcv, src_mcv), - - .sub, - .sub_wrap, - => try self.genBinOpMir(.{ ._, .sub }, lhs_ty, dst_mcv, src_mcv), - - .ptr_add, - .ptr_sub, - => { - const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv); - const tmp_mcv = MCValue{ .register = tmp_reg }; - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - const elem_size = lhs_ty.elemType2(zcu).abiSize(zcu); - try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size }); - try self.genBinOpMir( - switch (air_tag) { - .ptr_add => .{ ._, .add }, - .ptr_sub => .{ ._, .sub }, - else => unreachable, - }, - lhs_ty, - dst_mcv, - tmp_mcv, - ); - }, - - .bool_or, - .bit_or, - => try self.genBinOpMir(.{ ._, .@"or" }, lhs_ty, dst_mcv, src_mcv), - - .bool_and, - .bit_and, - => try self.genBinOpMir(.{ ._, .@"and" }, lhs_ty, dst_mcv, src_mcv), - - .xor => try self.genBinOpMir(.{ ._, .xor }, lhs_ty, dst_mcv, src_mcv), - - .min, - .max, - => { - const resolved_src_mcv = switch (src_mcv) { - else => src_mcv, - .air_ref => |src_ref| try self.resolveInst(src_ref), - }; - - if (abi_size > 8) { - const dst_regs = switch (dst_mcv) { - .register_pair => |dst_regs| dst_regs, - else => dst: { - const dst_regs = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp); - const dst_regs_locks = self.register_manager.lockRegsAssumeUnused(2, dst_regs); - defer for (dst_regs_locks) |lock| self.register_manager.unlockReg(lock); - - try self.genCopy(lhs_ty, .{ .register_pair = dst_regs }, dst_mcv, .{}); - break :dst dst_regs; - }, - }; - const dst_regs_locks = self.register_manager.lockRegs(2, dst_regs); - defer for (dst_regs_locks) |dst_lock| if (dst_lock) |lock| - self.register_manager.unlockReg(lock); - - const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - const signed = lhs_ty.isSignedInt(zcu); - const cc: Condition = switch (air_tag) { - .min => if (signed) .nl else .nb, - .max => if (signed) .nge else .nae, - else => unreachable, - }; - - try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, dst_regs[1]); - if (src_mcv.isBase()) { - try self.asmRegisterMemory( - .{ ._, .cmp }, - dst_regs[0], - try src_mcv.mem(self, .{ .size = .qword }), - ); - try self.asmRegisterMemory( - .{ ._, .sbb }, - tmp_reg, - try src_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), - ); - try self.asmCmovccRegisterMemory( - cc, - dst_regs[0], - try src_mcv.mem(self, .{ .size = .qword }), - ); - try self.asmCmovccRegisterMemory( - cc, - dst_regs[1], - try src_mcv.address().offset(8).deref().mem(self, .{ .size = .qword }), - ); - } else { - try self.asmRegisterRegister( - .{ ._, .cmp }, - dst_regs[0], - src_mcv.register_pair[0], - ); - try self.asmRegisterRegister( - .{ ._, .sbb }, - tmp_reg, - src_mcv.register_pair[1], - ); - try self.asmCmovccRegisterRegister(cc, dst_regs[0], src_mcv.register_pair[0]); - try self.asmCmovccRegisterRegister(cc, dst_regs[1], src_mcv.register_pair[1]); - } - try self.genCopy(lhs_ty, dst_mcv, .{ .register_pair = dst_regs }, .{}); - } else { - const mat_src_mcv: MCValue = if (switch (resolved_src_mcv) { - .immediate, - .eflags, - .register_offset, - .lea_frame, - .load_nav, - .lea_nav, - .load_uav, - .lea_uav, - .load_lazy_sym, - .lea_lazy_sym, - .load_extern_func, - .lea_extern_func, - => true, - .memory => |addr| std.math.cast(i32, @as(i64, @bitCast(addr))) == null, - else => false, - .register_pair, - .register_overflow, - => unreachable, - }) - .{ .register = try self.copyToTmpRegister(rhs_ty, resolved_src_mcv) } - else - resolved_src_mcv; - const mat_mcv_lock = switch (mat_src_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - else => null, - }; - defer if (mat_mcv_lock) |lock| self.register_manager.unlockReg(lock); - - try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, dst_mcv, mat_src_mcv); - - const int_info = lhs_ty.intInfo(zcu); - const cc: Condition = switch (int_info.signedness) { - .unsigned => switch (air_tag) { - .min => .a, - .max => .b, - else => unreachable, - }, - .signed => switch (air_tag) { - .min => .g, - .max => .l, - else => unreachable, - }, - }; - - const cmov_abi_size = @max(@as(u32, @intCast(lhs_ty.abiSize(zcu))), 2); - const tmp_reg = switch (dst_mcv) { - .register => |reg| reg, - else => try self.copyToTmpRegister(lhs_ty, dst_mcv), - }; - const tmp_lock = self.register_manager.lockReg(tmp_reg); - defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock); - switch (mat_src_mcv) { - .none, - .unreach, - .dead, - .undef, - .immediate, - .eflags, - .register_pair, - .register_triple, - .register_quadruple, - .register_offset, - .register_overflow, - .register_mask, - .indirect_load_frame, - .lea_frame, - .load_nav, - .lea_nav, - .load_uav, - .lea_uav, - .load_lazy_sym, - .lea_lazy_sym, - .load_extern_func, - .lea_extern_func, - .elementwise_args, - .reserved_frame, - .air_ref, - => unreachable, - .register => |src_reg| try self.asmCmovccRegisterRegister( - cc, - registerAlias(tmp_reg, cmov_abi_size), - registerAlias(src_reg, cmov_abi_size), - ), - .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory( - cc, - registerAlias(tmp_reg, cmov_abi_size), - switch (mat_src_mcv) { - .memory => |addr| .{ - .base = .{ .reg = .ds }, - .mod = .{ .rm = .{ - .size = .fromSize(cmov_abi_size), - .disp = @intCast(@as(i64, @bitCast(addr))), - } }, - }, - .indirect => |reg_off| .{ - .base = .{ .reg = reg_off.reg }, - .mod = .{ .rm = .{ - .size = .fromSize(cmov_abi_size), - .disp = reg_off.off, - } }, - }, - .load_frame => |frame_addr| .{ - .base = .{ .frame = frame_addr.index }, - .mod = .{ .rm = .{ - .size = .fromSize(cmov_abi_size), - .disp = frame_addr.off, - } }, - }, - else => unreachable, - }, - ), - } - try self.genCopy(lhs_ty, dst_mcv, .{ .register = tmp_reg }, .{}); - } - }, - - .cmp_eq, .cmp_neq => { - assert(lhs_ty.isVector(zcu) and lhs_ty.childType(zcu).toIntern() == .bool_type); - try self.genBinOpMir(.{ ._, .xor }, lhs_ty, dst_mcv, src_mcv); - switch (air_tag) { - .cmp_eq => try self.genUnOpMir(.{ ._, .not }, lhs_ty, dst_mcv), - .cmp_neq => {}, - else => unreachable, - } - }, - - else => return self.fail("TODO implement genBinOp for {s} {f}", .{ - @tagName(air_tag), lhs_ty.fmt(pt), - }), - } - return dst_mcv; - } - - const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size); - const mir_tag = @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) { - else => unreachable, - .float => switch (lhs_ty.floatBits(self.target)) { - 16 => { - assert(self.hasFeature(.f16c)); - const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); - - const tmp_reg = (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128(); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - .{ .vp_w, .insr }, - dst_reg, - lhs_reg, - try src_mcv.mem(self, .{ .size = .word }), - .u(1), - ) else try self.asmRegisterRegisterRegister( - .{ .vp_, .unpcklwd }, - dst_reg, - lhs_reg, - (if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), - ); - try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); - try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); - try self.asmRegisterRegisterRegister( - switch (air_tag) { - .add => .{ .v_ss, .add }, - .sub => .{ .v_ss, .sub }, - .mul => .{ .v_ss, .mul }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div }, - .max => .{ .v_ss, .max }, - .min => .{ .v_ss, .min }, - else => unreachable, - }, - dst_reg, - dst_reg, - tmp_reg, - ); - switch (air_tag) { - .div_trunc, .div_floor => try self.asmRegisterRegisterRegisterImmediate( - .{ .v_ss, .round }, - dst_reg, - dst_reg, - dst_reg, - bits.RoundMode.imm(.{ - .direction = switch (air_tag) { - .div_trunc => .zero, - .div_floor => .down, - else => unreachable, - }, - .precision = .inexact, - }), - ), - else => {}, - } - try self.asmRegisterRegisterImmediate( - .{ .v_, .cvtps2ph }, - dst_reg, - dst_reg, - bits.RoundMode.imm(.{}), - ); - return dst_mcv; - }, - 32 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add }, - .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub }, - .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul }, - .div_float, - .div_trunc, - .div_floor, - .div_exact, - => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div }, - .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, - .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min }, - else => unreachable, - }, - 64 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add }, - .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub }, - .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul }, - .div_float, - .div_trunc, - .div_floor, - .div_exact, - => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div }, - .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, - .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min }, - else => unreachable, - }, - 80, 128 => null, - else => unreachable, - }, - .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { - else => null, - .int => switch (lhs_ty.childType(zcu).intInfo(zcu).bits) { - 8 => switch (lhs_ty.vectorLen(zcu)) { - 1...16 => switch (air_tag) { - .add, - .add_wrap, - => if (self.hasFeature(.avx)) .{ .vp_b, .add } else .{ .p_b, .add }, - .sub, - .sub_wrap, - => if (self.hasFeature(.avx)) .{ .vp_b, .sub } else .{ .p_b, .sub }, - .bit_and => if (self.hasFeature(.avx)) - .{ .vp_, .@"and" } - else - .{ .p_, .@"and" }, - .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, - .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, - .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) - .{ .vp_b, .mins } - else if (self.hasFeature(.sse4_1)) - .{ .p_b, .mins } - else - null, - .unsigned => if (self.hasFeature(.avx)) - .{ .vp_b, .minu } - else if (self.hasFeature(.sse4_1)) - .{ .p_b, .minu } - else - null, - }, - .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) - .{ .vp_b, .maxs } - else if (self.hasFeature(.sse4_1)) - .{ .p_b, .maxs } - else - null, - .unsigned => if (self.hasFeature(.avx)) - .{ .vp_b, .maxu } - else if (self.hasFeature(.sse4_1)) - .{ .p_b, .maxu } - else - null, - }, - .cmp_lt, - .cmp_lte, - .cmp_gte, - .cmp_gt, - => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) - .{ .vp_b, .cmpgt } - else - .{ .p_b, .cmpgt }, - .unsigned => null, - }, - .cmp_eq, - .cmp_neq, - => if (self.hasFeature(.avx)) .{ .vp_b, .cmpeq } else .{ .p_b, .cmpeq }, - else => null, - }, - 17...32 => switch (air_tag) { - .add, - .add_wrap, - => if (self.hasFeature(.avx2)) .{ .vp_b, .add } else null, - .sub, - .sub_wrap, - => if (self.hasFeature(.avx2)) .{ .vp_b, .sub } else null, - .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, - .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, - .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, - .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .mins } else null, - .unsigned => if (self.hasFeature(.avx)) .{ .vp_b, .minu } else null, - }, - .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .maxs } else null, - .unsigned => if (self.hasFeature(.avx2)) .{ .vp_b, .maxu } else null, - }, - .cmp_lt, - .cmp_lte, - .cmp_gte, - .cmp_gt, - => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) .{ .vp_b, .cmpgt } else null, - .unsigned => null, - }, - .cmp_eq, - .cmp_neq, - => if (self.hasFeature(.avx)) .{ .vp_b, .cmpeq } else null, - else => null, - }, - else => null, - }, - 16 => switch (lhs_ty.vectorLen(zcu)) { - 1...8 => switch (air_tag) { - .add, - .add_wrap, - => if (self.hasFeature(.avx)) .{ .vp_w, .add } else .{ .p_w, .add }, - .sub, - .sub_wrap, - => if (self.hasFeature(.avx)) .{ .vp_w, .sub } else .{ .p_w, .sub }, - .mul, - .mul_wrap, - => if (self.hasFeature(.avx)) .{ .vp_w, .mull } else .{ .p_d, .mull }, - .bit_and => if (self.hasFeature(.avx)) - .{ .vp_, .@"and" } - else - .{ .p_, .@"and" }, - .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, - .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, - .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) - .{ .vp_w, .mins } - else - .{ .p_w, .mins }, - .unsigned => if (self.hasFeature(.avx)) - .{ .vp_w, .minu } - else - .{ .p_w, .minu }, - }, - .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) - .{ .vp_w, .maxs } - else - .{ .p_w, .maxs }, - .unsigned => if (self.hasFeature(.avx)) - .{ .vp_w, .maxu } - else - .{ .p_w, .maxu }, - }, - .cmp_lt, - .cmp_lte, - .cmp_gte, - .cmp_gt, - => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) - .{ .vp_w, .cmpgt } - else - .{ .p_w, .cmpgt }, - .unsigned => null, - }, - .cmp_eq, - .cmp_neq, - => if (self.hasFeature(.avx)) .{ .vp_w, .cmpeq } else .{ .p_w, .cmpeq }, - else => null, - }, - 9...16 => switch (air_tag) { - .add, - .add_wrap, - => if (self.hasFeature(.avx2)) .{ .vp_w, .add } else null, - .sub, - .sub_wrap, - => if (self.hasFeature(.avx2)) .{ .vp_w, .sub } else null, - .mul, - .mul_wrap, - => if (self.hasFeature(.avx2)) .{ .vp_w, .mull } else null, - .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, - .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, - .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, - .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .mins } else null, - .unsigned => if (self.hasFeature(.avx)) .{ .vp_w, .minu } else null, - }, - .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .maxs } else null, - .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .maxu } else null, - }, - .cmp_lt, - .cmp_lte, - .cmp_gte, - .cmp_gt, - => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) .{ .vp_w, .cmpgt } else null, - .unsigned => null, - }, - .cmp_eq, - .cmp_neq, - => if (self.hasFeature(.avx)) .{ .vp_w, .cmpeq } else null, - else => null, - }, - else => null, - }, - 32 => switch (lhs_ty.vectorLen(zcu)) { - 1...4 => switch (air_tag) { - .add, - .add_wrap, - => if (self.hasFeature(.avx)) .{ .vp_d, .add } else .{ .p_d, .add }, - .sub, - .sub_wrap, - => if (self.hasFeature(.avx)) .{ .vp_d, .sub } else .{ .p_d, .sub }, - .mul, - .mul_wrap, - => if (self.hasFeature(.avx)) - .{ .vp_d, .mull } - else if (self.hasFeature(.sse4_1)) - .{ .p_d, .mull } - else - null, - .bit_and => if (self.hasFeature(.avx)) - .{ .vp_, .@"and" } - else - .{ .p_, .@"and" }, - .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, - .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, - .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) - .{ .vp_d, .mins } - else if (self.hasFeature(.sse4_1)) - .{ .p_d, .mins } - else - null, - .unsigned => if (self.hasFeature(.avx)) - .{ .vp_d, .minu } - else if (self.hasFeature(.sse4_1)) - .{ .p_d, .minu } - else - null, - }, - .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) - .{ .vp_d, .maxs } - else if (self.hasFeature(.sse4_1)) - .{ .p_d, .maxs } - else - null, - .unsigned => if (self.hasFeature(.avx)) - .{ .vp_d, .maxu } - else if (self.hasFeature(.sse4_1)) - .{ .p_d, .maxu } - else - null, - }, - .cmp_lt, - .cmp_lte, - .cmp_gte, - .cmp_gt, - => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) - .{ .vp_d, .cmpgt } - else - .{ .p_d, .cmpgt }, - .unsigned => null, - }, - .cmp_eq, - .cmp_neq, - => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else .{ .p_d, .cmpeq }, - else => null, - }, - 5...8 => switch (air_tag) { - .add, - .add_wrap, - => if (self.hasFeature(.avx2)) .{ .vp_d, .add } else null, - .sub, - .sub_wrap, - => if (self.hasFeature(.avx2)) .{ .vp_d, .sub } else null, - .mul, - .mul_wrap, - => if (self.hasFeature(.avx2)) .{ .vp_d, .mull } else null, - .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, - .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, - .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, - .min => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .mins } else null, - .unsigned => if (self.hasFeature(.avx)) .{ .vp_d, .minu } else null, - }, - .max => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .maxs } else null, - .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .maxu } else null, - }, - .cmp_lt, - .cmp_lte, - .cmp_gte, - .cmp_gt, - => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) .{ .vp_d, .cmpgt } else null, - .unsigned => null, - }, - .cmp_eq, - .cmp_neq, - => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else null, - else => null, - }, - else => null, - }, - 64 => switch (lhs_ty.vectorLen(zcu)) { - 1...2 => switch (air_tag) { - .add, - .add_wrap, - => if (self.hasFeature(.avx)) .{ .vp_q, .add } else .{ .p_q, .add }, - .sub, - .sub_wrap, - => if (self.hasFeature(.avx)) .{ .vp_q, .sub } else .{ .p_q, .sub }, - .bit_and => if (self.hasFeature(.avx)) - .{ .vp_, .@"and" } - else - .{ .p_, .@"and" }, - .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, - .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, - .cmp_lt, - .cmp_lte, - .cmp_gte, - .cmp_gt, - => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) - .{ .vp_q, .cmpgt } - else if (self.hasFeature(.sse4_2)) - .{ .p_q, .cmpgt } - else - null, - .unsigned => null, - }, - .cmp_eq, - .cmp_neq, - => if (self.hasFeature(.avx)) - .{ .vp_q, .cmpeq } - else if (self.hasFeature(.sse4_1)) - .{ .p_q, .cmpeq } - else - null, - else => null, - }, - 3...4 => switch (air_tag) { - .add, - .add_wrap, - => if (self.hasFeature(.avx2)) .{ .vp_q, .add } else null, - .sub, - .sub_wrap, - => if (self.hasFeature(.avx2)) .{ .vp_q, .sub } else null, - .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, - .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, - .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, - .cmp_eq, - .cmp_neq, - => if (self.hasFeature(.avx)) .{ .vp_d, .cmpeq } else null, - .cmp_lt, - .cmp_lte, - .cmp_gt, - .cmp_gte, - => switch (lhs_ty.childType(zcu).intInfo(zcu).signedness) { - .signed => if (self.hasFeature(.avx)) .{ .vp_d, .cmpgt } else null, - .unsigned => null, - }, - else => null, - }, - else => null, - }, - else => null, - }, - .float => switch (lhs_ty.childType(zcu).floatBits(self.target)) { - 16 => tag: { - assert(self.hasFeature(.f16c)); - const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); - switch (lhs_ty.vectorLen(zcu)) { - 1 => { - const tmp_reg = - (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128(); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - .{ .vp_w, .insr }, - dst_reg, - lhs_reg, - try src_mcv.mem(self, .{ .size = .word }), - .u(1), - ) else try self.asmRegisterRegisterRegister( - .{ .vp_, .unpcklwd }, - dst_reg, - lhs_reg, - (if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), - ); - try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); - try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); - try self.asmRegisterRegisterRegister( - switch (air_tag) { - .add => .{ .v_ss, .add }, - .sub => .{ .v_ss, .sub }, - .mul => .{ .v_ss, .mul }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div }, - .max => .{ .v_ss, .max }, - .min => .{ .v_ss, .max }, - else => unreachable, - }, - dst_reg, - dst_reg, - tmp_reg, - ); - try self.asmRegisterRegisterImmediate( - .{ .v_, .cvtps2ph }, - dst_reg, - dst_reg, - bits.RoundMode.imm(.{}), - ); - return dst_mcv; - }, - 2 => { - const tmp_reg = (try self.register_manager.allocReg( - null, - abi.RegisterClass.sse, - )).to128(); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - .{ .vp_d, .insr }, - dst_reg, - lhs_reg, - try src_mcv.mem(self, .{ .size = .dword }), - .u(1), - ) else try self.asmRegisterRegisterRegister( - .{ .v_ps, .unpckl }, - dst_reg, - lhs_reg, - (if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), - ); - try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); - try self.asmRegisterRegisterRegister( - .{ .v_ps, .movhl }, - tmp_reg, - dst_reg, - dst_reg, - ); - try self.asmRegisterRegisterRegister( - switch (air_tag) { - .add => .{ .v_ps, .add }, - .sub => .{ .v_ps, .sub }, - .mul => .{ .v_ps, .mul }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, - .max => .{ .v_ps, .max }, - .min => .{ .v_ps, .max }, - else => unreachable, - }, - dst_reg, - dst_reg, - tmp_reg, - ); - try self.asmRegisterRegisterImmediate( - .{ .v_, .cvtps2ph }, - dst_reg, - dst_reg, - bits.RoundMode.imm(.{}), - ); - return dst_mcv; - }, - 3...4 => { - const tmp_reg = (try self.register_manager.allocReg( - null, - abi.RegisterClass.sse, - )).to128(); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, lhs_reg); - if (src_mcv.isBase()) try self.asmRegisterMemory( - .{ .v_ps, .cvtph2 }, - tmp_reg, - try src_mcv.mem(self, .{ .size = .qword }), - ) else try self.asmRegisterRegister( - .{ .v_ps, .cvtph2 }, - tmp_reg, - (if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), - ); - try self.asmRegisterRegisterRegister( - switch (air_tag) { - .add => .{ .v_ps, .add }, - .sub => .{ .v_ps, .sub }, - .mul => .{ .v_ps, .mul }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, - .max => .{ .v_ps, .max }, - .min => .{ .v_ps, .max }, - else => unreachable, - }, - dst_reg, - dst_reg, - tmp_reg, - ); - try self.asmRegisterRegisterImmediate( - .{ .v_, .cvtps2ph }, - dst_reg, - dst_reg, - bits.RoundMode.imm(.{}), - ); - return dst_mcv; - }, - 5...8 => { - const tmp_reg = (try self.register_manager.allocReg( - null, - abi.RegisterClass.sse, - )).to256(); - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), lhs_reg); - if (src_mcv.isBase()) try self.asmRegisterMemory( - .{ .v_ps, .cvtph2 }, - tmp_reg, - try src_mcv.mem(self, .{ .size = .xword }), - ) else try self.asmRegisterRegister( - .{ .v_ps, .cvtph2 }, - tmp_reg, - (if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), - ); - try self.asmRegisterRegisterRegister( - switch (air_tag) { - .add => .{ .v_ps, .add }, - .sub => .{ .v_ps, .sub }, - .mul => .{ .v_ps, .mul }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, - .max => .{ .v_ps, .max }, - .min => .{ .v_ps, .max }, - else => unreachable, - }, - dst_reg.to256(), - dst_reg.to256(), - tmp_reg, - ); - try self.asmRegisterRegisterImmediate( - .{ .v_, .cvtps2ph }, - dst_reg, - dst_reg.to256(), - bits.RoundMode.imm(.{}), - ); - return dst_mcv; - }, - else => break :tag null, - } - }, - 32 => switch (lhs_ty.vectorLen(zcu)) { - 1 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add }, - .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub }, - .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul }, - .div_float, - .div_trunc, - .div_floor, - .div_exact, - => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div }, - .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, - .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min }, - .cmp_lt, - .cmp_lte, - .cmp_eq, - .cmp_gte, - .cmp_gt, - .cmp_neq, - => if (self.hasFeature(.avx)) .{ .v_ss, .cmp } else .{ ._ss, .cmp }, - else => unreachable, - }, - 2...4 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ .v_ps, .add } else .{ ._ps, .add }, - .sub => if (self.hasFeature(.avx)) .{ .v_ps, .sub } else .{ ._ps, .sub }, - .mul => if (self.hasFeature(.avx)) .{ .v_ps, .mul } else .{ ._ps, .mul }, - .div_float, - .div_trunc, - .div_floor, - .div_exact, - => if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div }, - .max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max }, - .min => if (self.hasFeature(.avx)) .{ .v_ps, .min } else .{ ._ps, .min }, - .cmp_lt, - .cmp_lte, - .cmp_eq, - .cmp_gte, - .cmp_gt, - .cmp_neq, - => if (self.hasFeature(.avx)) .{ .v_ps, .cmp } else .{ ._ps, .cmp }, - else => unreachable, - }, - 5...8 => if (self.hasFeature(.avx)) switch (air_tag) { - .add => .{ .v_ps, .add }, - .sub => .{ .v_ps, .sub }, - .mul => .{ .v_ps, .mul }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, - .max => .{ .v_ps, .max }, - .min => .{ .v_ps, .min }, - .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => .{ .v_ps, .cmp }, - else => unreachable, - } else null, - else => null, - }, - 64 => switch (lhs_ty.vectorLen(zcu)) { - 1 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add }, - .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub }, - .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul }, - .div_float, - .div_trunc, - .div_floor, - .div_exact, - => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div }, - .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, - .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min }, - .cmp_lt, - .cmp_lte, - .cmp_eq, - .cmp_gte, - .cmp_gt, - .cmp_neq, - => if (self.hasFeature(.avx)) .{ .v_sd, .cmp } else .{ ._sd, .cmp }, - else => unreachable, - }, - 2 => switch (air_tag) { - .add => if (self.hasFeature(.avx)) .{ .v_pd, .add } else .{ ._pd, .add }, - .sub => if (self.hasFeature(.avx)) .{ .v_pd, .sub } else .{ ._pd, .sub }, - .mul => if (self.hasFeature(.avx)) .{ .v_pd, .mul } else .{ ._pd, .mul }, - .div_float, - .div_trunc, - .div_floor, - .div_exact, - => if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div }, - .max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max }, - .min => if (self.hasFeature(.avx)) .{ .v_pd, .min } else .{ ._pd, .min }, - .cmp_lt, - .cmp_lte, - .cmp_eq, - .cmp_gte, - .cmp_gt, - .cmp_neq, - => if (self.hasFeature(.avx)) .{ .v_pd, .cmp } else .{ ._pd, .cmp }, - else => unreachable, - }, - 3...4 => if (self.hasFeature(.avx)) switch (air_tag) { - .add => .{ .v_pd, .add }, - .sub => .{ .v_pd, .sub }, - .mul => .{ .v_pd, .mul }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div }, - .max => .{ .v_pd, .max }, - .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => .{ .v_pd, .cmp }, - .min => .{ .v_pd, .min }, - else => unreachable, - } else null, - else => null, - }, - 80, 128 => null, - else => unreachable, - }, - }, - }) orelse return self.fail("TODO implement genBinOp for {s} {f}", .{ - @tagName(air_tag), lhs_ty.fmt(pt), - }); - - const lhs_copy_reg = if (maybe_mask_reg) |_| registerAlias( - if (copied_to_dst) try self.copyToTmpRegister(lhs_ty, dst_mcv) else lhs_mcv.getReg().?, - abi_size, - ) else null; - const lhs_copy_lock = if (lhs_copy_reg) |reg| self.register_manager.lockReg(reg) else null; - defer if (lhs_copy_lock) |lock| self.register_manager.unlockReg(lock); - - switch (mir_tag[1]) { - else => if (self.hasFeature(.avx)) { - const lhs_reg = if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); - if (src_mcv.isBase()) try self.asmRegisterRegisterMemory( - mir_tag, - dst_reg, - lhs_reg, - try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) { - else => .fromSize(abi_size), - .vector => dst_reg.size(), - } }), - ) else try self.asmRegisterRegisterRegister( - mir_tag, - dst_reg, - lhs_reg, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), - ); - } else { - assert(copied_to_dst); - if (src_mcv.isBase()) try self.asmRegisterMemory( - mir_tag, - dst_reg, - try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) { - else => .fromSize(abi_size), - .vector => dst_reg.size(), - } }), - ) else try self.asmRegisterRegister( - mir_tag, - dst_reg, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), - ); - }, - .cmp => { - const imm: Immediate = .u(switch (air_tag) { - .cmp_eq => 0, - .cmp_lt, .cmp_gt => 1, - .cmp_lte, .cmp_gte => 2, - .cmp_neq => 4, - else => unreachable, - }); - if (self.hasFeature(.avx)) { - const lhs_reg = - if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); - if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - mir_tag, - dst_reg, - lhs_reg, - try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) { - else => .fromSize(abi_size), - .vector => dst_reg.size(), - } }), - imm, - ) else try self.asmRegisterRegisterRegisterImmediate( - mir_tag, - dst_reg, - lhs_reg, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), - imm, - ); - } else { - assert(copied_to_dst); - if (src_mcv.isBase()) try self.asmRegisterMemoryImmediate( - mir_tag, - dst_reg, - try src_mcv.mem(self, .{ .size = switch (lhs_ty.zigTypeTag(zcu)) { - else => .fromSize(abi_size), - .vector => dst_reg.size(), - } }), - imm, - ) else try self.asmRegisterRegisterImmediate( - mir_tag, - dst_reg, - registerAlias(if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), - imm, - ); - } - }, - } - - switch (air_tag) { - .bit_and, .bit_or, .xor => {}, - .max, .min => if (maybe_mask_reg) |mask_reg| if (self.hasFeature(.avx)) { - const rhs_copy_reg = registerAlias(src_mcv.getReg().?, abi_size); - - try self.asmRegisterRegisterRegisterImmediate( - @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) { - .float => switch (lhs_ty.floatBits(self.target)) { - 32 => .{ .v_ss, .cmp }, - 64 => .{ .v_sd, .cmp }, - 16, 80, 128 => null, - else => unreachable, - }, - .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { - .float => switch (lhs_ty.childType(zcu).floatBits(self.target)) { - 32 => switch (lhs_ty.vectorLen(zcu)) { - 1 => .{ .v_ss, .cmp }, - 2...8 => .{ .v_ps, .cmp }, - else => null, - }, - 64 => switch (lhs_ty.vectorLen(zcu)) { - 1 => .{ .v_sd, .cmp }, - 2...4 => .{ .v_pd, .cmp }, - else => null, - }, - 16, 80, 128 => null, - else => unreachable, - }, - else => unreachable, - }, - else => unreachable, - }) orelse return self.fail("TODO implement genBinOp for {s} {f}", .{ - @tagName(air_tag), lhs_ty.fmt(pt), - }), - mask_reg, - rhs_copy_reg, - rhs_copy_reg, - bits.VexFloatPredicate.imm(.unord), - ); - try self.asmRegisterRegisterRegisterRegister( - @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) { - .float => switch (lhs_ty.floatBits(self.target)) { - 32 => .{ .v_ps, .blendv }, - 64 => .{ .v_pd, .blendv }, - 16, 80, 128 => null, - else => unreachable, - }, - .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { - .float => switch (lhs_ty.childType(zcu).floatBits(self.target)) { - 32 => switch (lhs_ty.vectorLen(zcu)) { - 1...8 => .{ .v_ps, .blendv }, - else => null, - }, - 64 => switch (lhs_ty.vectorLen(zcu)) { - 1...4 => .{ .v_pd, .blendv }, - else => null, - }, - 16, 80, 128 => null, - else => unreachable, - }, - else => unreachable, - }, - else => unreachable, - }) orelse return self.fail("TODO implement genBinOp for {s} {f}", .{ - @tagName(air_tag), lhs_ty.fmt(pt), - }), - dst_reg, - dst_reg, - lhs_copy_reg.?, - mask_reg, - ); - } else { - const has_blend = self.hasFeature(.sse4_1); - try self.asmRegisterRegisterImmediate( - @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) { - .float => switch (lhs_ty.floatBits(self.target)) { - 32 => .{ ._ss, .cmp }, - 64 => .{ ._sd, .cmp }, - 16, 80, 128 => null, - else => unreachable, - }, - .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { - .float => switch (lhs_ty.childType(zcu).floatBits(self.target)) { - 32 => switch (lhs_ty.vectorLen(zcu)) { - 1 => .{ ._ss, .cmp }, - 2...4 => .{ ._ps, .cmp }, - else => null, - }, - 64 => switch (lhs_ty.vectorLen(zcu)) { - 1 => .{ ._sd, .cmp }, - 2 => .{ ._pd, .cmp }, - else => null, - }, - 16, 80, 128 => null, - else => unreachable, - }, - else => unreachable, - }, - else => unreachable, - }) orelse return self.fail("TODO implement genBinOp for {s} {f}", .{ - @tagName(air_tag), lhs_ty.fmt(pt), - }), - mask_reg, - mask_reg, - bits.SseFloatPredicate.imm(if (has_blend) .unord else .ord), - ); - if (has_blend) try self.asmRegisterRegisterRegister( - @as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(zcu)) { - .float => switch (lhs_ty.floatBits(self.target)) { - 32 => .{ ._ps, .blendv }, - 64 => .{ ._pd, .blendv }, - 16, 80, 128 => null, - else => unreachable, - }, - .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { - .float => switch (lhs_ty.childType(zcu).floatBits(self.target)) { - 32 => switch (lhs_ty.vectorLen(zcu)) { - 1...4 => .{ ._ps, .blendv }, - else => null, - }, - 64 => switch (lhs_ty.vectorLen(zcu)) { - 1...2 => .{ ._pd, .blendv }, - else => null, - }, - 16, 80, 128 => null, - else => unreachable, - }, - else => unreachable, - }, - else => unreachable, - }) orelse return self.fail("TODO implement genBinOp for {s} {f}", .{ - @tagName(air_tag), lhs_ty.fmt(pt), - }), - dst_reg, - lhs_copy_reg.?, - mask_reg, - ) else { - const mir_fixes = @as(?Mir.Inst.Fixes, switch (lhs_ty.zigTypeTag(zcu)) { - .float => switch (lhs_ty.floatBits(self.target)) { - 32 => ._ps, - 64 => ._pd, - 16, 80, 128 => null, - else => unreachable, - }, - .vector => switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { - .float => switch (lhs_ty.childType(zcu).floatBits(self.target)) { - 32 => switch (lhs_ty.vectorLen(zcu)) { - 1...4 => ._ps, - else => null, - }, - 64 => switch (lhs_ty.vectorLen(zcu)) { - 1...2 => ._pd, - else => null, - }, - 16, 80, 128 => null, - else => unreachable, - }, - else => unreachable, - }, - else => unreachable, - }) orelse return self.fail("TODO implement genBinOp for {s} {f}", .{ - @tagName(air_tag), lhs_ty.fmt(pt), - }); - try self.asmRegisterRegister(.{ mir_fixes, .@"and" }, dst_reg, mask_reg); - try self.asmRegisterRegister(.{ mir_fixes, .andn }, mask_reg, lhs_copy_reg.?); - try self.asmRegisterRegister(.{ mir_fixes, .@"or" }, dst_reg, mask_reg); - } - }, - .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => { - switch (lhs_ty.childType(zcu).zigTypeTag(zcu)) { - .int => switch (air_tag) { - .cmp_lt, - .cmp_eq, - .cmp_gt, - => {}, - .cmp_lte, - .cmp_gte, - .cmp_neq, - => { - const unsigned_ty = try lhs_ty.toUnsigned(pt); - const not_mcv = try self.lowerValue(try unsigned_ty.maxInt(pt, unsigned_ty)); - const not_mem: Memory = if (not_mcv.isBase()) - try not_mcv.mem(self, .{ .size = .fromSize(abi_size) }) - else - .{ .base = .{ - .reg = try self.copyToTmpRegister(.usize, not_mcv.address()), - }, .mod = .{ .rm = .{ .size = .fromSize(abi_size) } } }; - switch (mir_tag[0]) { - .vp_b, .vp_d, .vp_q, .vp_w => try self.asmRegisterRegisterMemory( - .{ .vp_, .xor }, - dst_reg, - dst_reg, - not_mem, - ), - .p_b, .p_d, .p_q, .p_w => try self.asmRegisterMemory( - .{ .p_, .xor }, - dst_reg, - not_mem, - ), - else => unreachable, - } - }, - else => unreachable, - }, - .float => {}, - else => unreachable, - } - - const gp_reg = try self.register_manager.allocReg(maybe_inst, abi.RegisterClass.gp); - const gp_lock = self.register_manager.lockRegAssumeUnused(gp_reg); - defer self.register_manager.unlockReg(gp_lock); - - try self.asmRegisterRegister(switch (mir_tag[0]) { - ._pd, ._sd, .p_q => .{ ._pd, .movmsk }, - ._ps, ._ss, .p_d => .{ ._ps, .movmsk }, - .p_b => .{ .p_b, .movmsk }, - .p_w => movmsk: { - try self.asmRegisterRegister(.{ .p_b, .ackssw }, dst_reg, dst_reg); - break :movmsk .{ .p_b, .movmsk }; - }, - .v_pd, .v_sd, .vp_q => .{ .v_pd, .movmsk }, - .v_ps, .v_ss, .vp_d => .{ .v_ps, .movmsk }, - .vp_b => .{ .vp_b, .movmsk }, - .vp_w => movmsk: { - try self.asmRegisterRegisterRegister( - .{ .vp_b, .ackssw }, - dst_reg, - dst_reg, - dst_reg, - ); - break :movmsk .{ .vp_b, .movmsk }; - }, - else => unreachable, - }, gp_reg.to32(), dst_reg); - return .{ .register = gp_reg }; - }, - else => unreachable, - } - - return dst_mcv; -} - fn genBinOpMir( self: *CodeGen, mir_tag: Mir.Inst.FixedTag, @@ -178472,168 +175661,6 @@ fn genBinOpMir( } } -/// Performs multi-operand integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv. -/// Does not support byte-size operands. -fn genIntMulComplexOpMir(self: *CodeGen, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void { - const pt = self.pt; - const abi_size: u32 = @intCast(dst_ty.abiSize(pt.zcu)); - try self.spillEflagsIfOccupied(); - switch (dst_mcv) { - .none, - .unreach, - .dead, - .undef, - .immediate, - .eflags, - .register_offset, - .register_overflow, - .register_mask, - .indirect_load_frame, - .lea_frame, - .lea_nav, - .lea_uav, - .lea_lazy_sym, - .lea_extern_func, - .elementwise_args, - .reserved_frame, - .air_ref, - => unreachable, // unmodifiable destination - .register => |dst_reg| { - const alias_size = switch (abi_size) { - 1 => 4, - else => abi_size, - }; - const dst_alias = registerAlias(dst_reg, alias_size); - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - switch (abi_size) { - 1 => try self.asmRegisterRegister(.{ ._, .movzx }, dst_reg.to32(), dst_reg.to8()), - else => {}, - } - - const resolved_src_mcv = switch (src_mcv) { - else => src_mcv, - .air_ref => |src_ref| try self.resolveInst(src_ref), - }; - switch (resolved_src_mcv) { - .none, - .unreach, - .dead, - .undef, - .register_pair, - .register_triple, - .register_quadruple, - .register_overflow, - .register_mask, - .indirect_load_frame, - .elementwise_args, - .reserved_frame, - .air_ref, - => unreachable, - .register => |src_reg| { - switch (abi_size) { - 1 => try self.asmRegisterRegister(.{ ._, .movzx }, src_reg.to32(), src_reg.to8()), - else => {}, - } - try self.asmRegisterRegister( - .{ .i_, .mul }, - dst_alias, - registerAlias(src_reg, alias_size), - ); - }, - .immediate => |imm| { - if (std.math.cast(i32, @as(i64, @bitCast(imm)))) |small| { - try self.asmRegisterRegisterImmediate(.{ .i_, .mul }, dst_alias, dst_alias, .s(small)); - } else { - const src_reg = try self.copyToTmpRegister(dst_ty, resolved_src_mcv); - return self.genIntMulComplexOpMir(dst_ty, dst_mcv, MCValue{ .register = src_reg }); - } - }, - .register_offset, - .eflags, - .lea_frame, - .load_nav, - .lea_nav, - .load_uav, - .lea_uav, - .load_lazy_sym, - .lea_lazy_sym, - .load_extern_func, - .lea_extern_func, - => { - const src_reg = try self.copyToTmpRegister(dst_ty, resolved_src_mcv); - switch (abi_size) { - 1 => try self.asmRegisterRegister(.{ ._, .movzx }, src_reg.to32(), src_reg.to8()), - else => {}, - } - try self.asmRegisterRegister(.{ .i_, .mul }, dst_alias, registerAlias(src_reg, alias_size)); - }, - .memory, .indirect, .load_frame => switch (abi_size) { - 1 => { - const src_reg = try self.copyToTmpRegister(dst_ty, resolved_src_mcv); - try self.asmRegisterRegister(.{ ._, .movzx }, src_reg.to32(), src_reg.to8()); - try self.asmRegisterRegister(.{ .i_, .mul }, dst_alias, registerAlias(src_reg, alias_size)); - }, - else => try self.asmRegisterMemory( - .{ .i_, .mul }, - dst_alias, - switch (resolved_src_mcv) { - .memory => |addr| .{ - .base = .{ .reg = .ds }, - .mod = .{ .rm = .{ - .size = .fromSize(abi_size), - .disp = std.math.cast(i32, @as(i64, @bitCast(addr))) orelse - return self.asmRegisterRegister( - .{ .i_, .mul }, - dst_alias, - registerAlias( - try self.copyToTmpRegister(dst_ty, resolved_src_mcv), - abi_size, - ), - ), - } }, - }, - .indirect => |reg_off| .{ - .base = .{ .reg = reg_off.reg }, - .mod = .{ .rm = .{ - .size = .fromSize(abi_size), - .disp = reg_off.off, - } }, - }, - .load_frame => |frame_addr| .{ - .base = .{ .frame = frame_addr.index }, - .mod = .{ .rm = .{ - .size = .fromSize(abi_size), - .disp = frame_addr.off, - } }, - }, - else => unreachable, - }, - ), - }, - } - }, - .register_pair, .register_triple, .register_quadruple => unreachable, // unimplemented - .memory, - .indirect, - .load_frame, - .load_nav, - .load_uav, - .load_lazy_sym, - .load_extern_func, - => { - const tmp_reg = try self.copyToTmpRegister(dst_ty, dst_mcv); - const tmp_mcv = MCValue{ .register = tmp_reg }; - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); - - try self.genIntMulComplexOpMir(dst_ty, tmp_mcv, src_mcv); - try self.genCopy(dst_ty, dst_mcv, tmp_mcv, .{}); - }, - } -} - fn airArg(self: *CodeGen, inst: Air.Inst.Index) !void { const zcu = self.pt.zcu; const arg_index = for (self.args, 0..) |arg, arg_index| { @@ -179247,475 +176274,6 @@ fn airRetLoad(self: *CodeGen, inst: Air.Inst.Index) !void { try self.epilogue_relocs.append(self.gpa, jmp_reloc); } -fn airCmp(self: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) !void { - const pt = self.pt; - const zcu = pt.zcu; - const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; - var ty = self.typeOf(bin_op.lhs); - var null_compare: ?Mir.Inst.Index = null; - - const result: Condition = result: { - try self.spillEflagsIfOccupied(); - - const lhs_mcv = try self.resolveInst(bin_op.lhs); - const lhs_locks: [2]?RegisterLock = switch (lhs_mcv) { - .register => |lhs_reg| .{ self.register_manager.lockRegAssumeUnused(lhs_reg), null }, - .register_pair => |lhs_regs| locks: { - const locks = self.register_manager.lockRegsAssumeUnused(2, lhs_regs); - break :locks .{ locks[0], locks[1] }; - }, - .register_offset => |lhs_ro| .{ - self.register_manager.lockRegAssumeUnused(lhs_ro.reg), - null, - }, - else => @splat(null), - }; - defer for (lhs_locks) |lhs_lock| if (lhs_lock) |lock| self.register_manager.unlockReg(lock); - - const rhs_mcv = try self.resolveInst(bin_op.rhs); - const rhs_locks: [2]?RegisterLock = switch (rhs_mcv) { - .register => |rhs_reg| .{ self.register_manager.lockReg(rhs_reg), null }, - .register_pair => |rhs_regs| self.register_manager.lockRegs(2, rhs_regs), - .register_offset => |rhs_ro| .{ self.register_manager.lockReg(rhs_ro.reg), null }, - else => @splat(null), - }; - defer for (rhs_locks) |rhs_lock| if (rhs_lock) |lock| self.register_manager.unlockReg(lock); - - switch (ty.zigTypeTag(zcu)) { - .float => { - const float_bits = ty.floatBits(self.target); - if (!switch (float_bits) { - 16 => self.hasFeature(.f16c), - 32 => self.hasFeature(.sse), - 64 => self.hasFeature(.sse2), - 80, 128 => false, - else => unreachable, - }) { - var sym_buf: ["__???f2".len]u8 = undefined; - const ret = try self.genCall(.{ .extern_func = .{ - .return_type = .i32_type, - .param_types = &.{ ty.toIntern(), ty.toIntern() }, - .sym = std.fmt.bufPrint(&sym_buf, "__{s}{c}f2", .{ - switch (op) { - .eq => "eq", - .neq => "ne", - .lt => "lt", - .lte => "le", - .gt => "gt", - .gte => "ge", - }, - floatCompilerRtAbiName(float_bits), - }) catch unreachable, - } }, &.{ ty, ty }, &.{ .{ .air_ref = bin_op.lhs }, .{ .air_ref = bin_op.rhs } }, .{}); - try self.genBinOpMir(.{ ._, .@"test" }, .i32, ret, ret); - break :result switch (op) { - .eq => .e, - .neq => .ne, - .lt => .l, - .lte => .le, - .gt => .g, - .gte => .ge, - }; - } - }, - .optional => if (!ty.optionalReprIsPayload(zcu)) { - const opt_ty = ty; - const opt_abi_size: u31 = @intCast(opt_ty.abiSize(zcu)); - ty = opt_ty.optionalChild(zcu); - const payload_abi_size: u31 = @intCast(ty.abiSize(zcu)); - - const temp_lhs_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp); - const temp_lhs_lock = self.register_manager.lockRegAssumeUnused(temp_lhs_reg); - defer self.register_manager.unlockReg(temp_lhs_lock); - - if (lhs_mcv.isBase()) try self.asmRegisterMemory( - .{ ._, .mov }, - temp_lhs_reg.to8(), - try lhs_mcv.address().offset(payload_abi_size).deref().mem(self, .{ .size = .byte }), - ) else { - try self.genSetReg(temp_lhs_reg, opt_ty, lhs_mcv, .{}); - try self.asmRegisterImmediate( - .{ ._r, .sh }, - registerAlias(temp_lhs_reg, opt_abi_size), - .u(payload_abi_size * 8), - ); - } - - const payload_compare = payload_compare: { - if (rhs_mcv.isBase()) { - const rhs_mem = - try rhs_mcv.address().offset(payload_abi_size).deref().mem(self, .{ .size = .byte }); - try self.asmMemoryRegister(.{ ._, .@"test" }, rhs_mem, temp_lhs_reg.to8()); - const payload_compare = try self.asmJccReloc(.nz, undefined); - try self.asmRegisterMemory(.{ ._, .cmp }, temp_lhs_reg.to8(), rhs_mem); - break :payload_compare payload_compare; - } - - const temp_rhs_reg = try self.copyToTmpRegister(opt_ty, rhs_mcv); - const temp_rhs_lock = self.register_manager.lockRegAssumeUnused(temp_rhs_reg); - defer self.register_manager.unlockReg(temp_rhs_lock); - - try self.asmRegisterImmediate( - .{ ._r, .sh }, - registerAlias(temp_rhs_reg, opt_abi_size), - .u(payload_abi_size * 8), - ); - try self.asmRegisterRegister( - .{ ._, .@"test" }, - temp_lhs_reg.to8(), - temp_rhs_reg.to8(), - ); - const payload_compare = try self.asmJccReloc(.nz, undefined); - try self.asmRegisterRegister( - .{ ._, .cmp }, - temp_lhs_reg.to8(), - temp_rhs_reg.to8(), - ); - break :payload_compare payload_compare; - }; - null_compare = try self.asmJmpReloc(undefined); - self.performReloc(payload_compare); - }, - else => {}, - } - - switch (ty.zigTypeTag(zcu)) { - else => { - const abi_size: u16 = @intCast(ty.abiSize(zcu)); - const may_flip: enum { - may_flip, - must_flip, - must_not_flip, - } = if (abi_size > 8) switch (op) { - .lt, .gte => .must_not_flip, - .lte, .gt => .must_flip, - .eq, .neq => .may_flip, - } else .may_flip; - - const flipped = switch (may_flip) { - .may_flip => !lhs_mcv.isRegister() and !lhs_mcv.isBase(), - .must_flip => true, - .must_not_flip => false, - }; - const unmat_dst_mcv = if (flipped) rhs_mcv else lhs_mcv; - const dst_mcv = if (unmat_dst_mcv.isRegister() or - (abi_size <= 8 and unmat_dst_mcv.isBase())) unmat_dst_mcv else dst: { - const dst_mcv = try self.allocTempRegOrMem(ty, true); - try self.genCopy(ty, dst_mcv, unmat_dst_mcv, .{}); - break :dst dst_mcv; - }; - const dst_lock = - if (dst_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - - const src_mcv = try self.resolveInst(if (flipped) bin_op.lhs else bin_op.rhs); - const src_lock = - if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; - defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - - break :result .fromCompareOperator( - if (ty.isAbiInt(zcu)) ty.intInfo(zcu).signedness else .unsigned, - result_op: { - const flipped_op = if (flipped) op.reverse() else op; - if (abi_size > 8) switch (flipped_op) { - .lt, .gte => {}, - .lte, .gt => unreachable, - .eq, .neq => { - const OpInfo = ?struct { addr_reg: Register, addr_lock: RegisterLock }; - - const resolved_dst_mcv = switch (dst_mcv) { - else => dst_mcv, - .air_ref => |dst_ref| try self.resolveInst(dst_ref), - }; - const dst_info: OpInfo = switch (resolved_dst_mcv) { - .none, - .unreach, - .dead, - .undef, - .immediate, - .eflags, - .register_offset, - .register_overflow, - .register_mask, - .indirect, - .lea_frame, - .lea_nav, - .lea_uav, - .lea_lazy_sym, - .lea_extern_func, - .elementwise_args, - .reserved_frame, - .air_ref, - => unreachable, - .register, - .register_pair, - .register_triple, - .register_quadruple, - .load_frame, - => null, - .memory, - .load_nav, - .load_uav, - .load_lazy_sym, - .load_extern_func, - => dst: { - switch (resolved_dst_mcv) { - .memory => |addr| if (std.math.cast( - i32, - @as(i64, @bitCast(addr)), - ) != null and std.math.cast( - i32, - @as(i64, @bitCast(addr)) + abi_size - 8, - ) != null) break :dst null, - .load_nav, .load_uav, .load_lazy_sym, .load_extern_func => {}, - else => unreachable, - } - - const dst_addr_reg = (try self.register_manager.allocReg( - null, - abi.RegisterClass.gp, - )).to64(); - const dst_addr_lock = - self.register_manager.lockRegAssumeUnused(dst_addr_reg); - errdefer self.register_manager.unlockReg(dst_addr_lock); - - try self.genSetReg(dst_addr_reg, .usize, resolved_dst_mcv.address(), .{}); - break :dst .{ - .addr_reg = dst_addr_reg, - .addr_lock = dst_addr_lock, - }; - }, - }; - defer if (dst_info) |info| self.register_manager.unlockReg(info.addr_lock); - - const resolved_src_mcv = switch (src_mcv) { - else => src_mcv, - .air_ref => |src_ref| try self.resolveInst(src_ref), - }; - const src_info: OpInfo = switch (resolved_src_mcv) { - .none, - .unreach, - .dead, - .undef, - .immediate, - .eflags, - .register, - .register_offset, - .register_overflow, - .register_mask, - .indirect, - .lea_frame, - .lea_nav, - .lea_uav, - .lea_lazy_sym, - .lea_extern_func, - .elementwise_args, - .reserved_frame, - .air_ref, - => unreachable, - .register_pair, - .register_triple, - .register_quadruple, - .load_frame, - => null, - .memory, - .load_nav, - .load_uav, - .load_lazy_sym, - .load_extern_func, - => src: { - switch (resolved_src_mcv) { - .memory => |addr| if (std.math.cast( - i32, - @as(i64, @bitCast(addr)), - ) != null and std.math.cast( - i32, - @as(i64, @bitCast(addr)) + abi_size - 8, - ) != null) break :src null, - .load_nav, .load_uav, .load_lazy_sym, .load_extern_func => {}, - else => unreachable, - } - - const src_addr_reg = (try self.register_manager.allocReg( - null, - abi.RegisterClass.gp, - )).to64(); - const src_addr_lock = - self.register_manager.lockRegAssumeUnused(src_addr_reg); - errdefer self.register_manager.unlockReg(src_addr_lock); - - try self.genSetReg(src_addr_reg, .usize, resolved_src_mcv.address(), .{}); - break :src .{ - .addr_reg = src_addr_reg, - .addr_lock = src_addr_lock, - }; - }, - }; - defer if (src_info) |info| - self.register_manager.unlockReg(info.addr_lock); - - const regs = try self.register_manager.allocRegs(2, @splat(null), abi.RegisterClass.gp); - const acc_reg = regs[0].to64(); - const locks = self.register_manager.lockRegsAssumeUnused(2, regs); - defer for (locks) |lock| self.register_manager.unlockReg(lock); - - const limbs_len = std.math.divCeil(u16, abi_size, 8) catch unreachable; - var limb_i: u16 = 0; - while (limb_i < limbs_len) : (limb_i += 1) { - const off = limb_i * 8; - const tmp_reg = regs[@min(limb_i, 1)].to64(); - - try self.genSetReg(tmp_reg, .usize, if (dst_info) |info| .{ - .indirect = .{ .reg = info.addr_reg, .off = off }, - } else switch (resolved_dst_mcv) { - inline .register_pair, - .register_triple, - .register_quadruple, - => |dst_regs| .{ .register = dst_regs[limb_i] }, - .memory => |dst_addr| .{ - .memory = @bitCast(@as(i64, @bitCast(dst_addr)) + off), - }, - .indirect => |reg_off| .{ .indirect = .{ - .reg = reg_off.reg, - .off = reg_off.off + off, - } }, - .load_frame => |frame_addr| .{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + off, - } }, - else => unreachable, - }, .{}); - - try self.genBinOpMir( - .{ ._, .xor }, - .usize, - .{ .register = tmp_reg }, - if (src_info) |info| .{ - .indirect = .{ .reg = info.addr_reg, .off = off }, - } else switch (resolved_src_mcv) { - inline .register_pair, - .register_triple, - .register_quadruple, - => |src_regs| .{ .register = src_regs[limb_i] }, - .memory => |src_addr| .{ - .memory = @bitCast(@as(i64, @bitCast(src_addr)) + off), - }, - .indirect => |reg_off| .{ .indirect = .{ - .reg = reg_off.reg, - .off = reg_off.off + off, - } }, - .load_frame => |frame_addr| .{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + off, - } }, - else => unreachable, - }, - ); - - if (limb_i > 0) - try self.asmRegisterRegister(.{ ._, .@"or" }, acc_reg, tmp_reg); - } - assert(limbs_len >= 2); // use flags from or - break :result_op flipped_op; - }, - }; - try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, src_mcv); - break :result_op flipped_op; - }, - ); - }, - .float => { - const flipped = switch (op) { - .lt, .lte => true, - .eq, .gte, .gt, .neq => false, - }; - - const dst_mcv = if (flipped) rhs_mcv else lhs_mcv; - const dst_reg = if (dst_mcv.isRegister()) - dst_mcv.getReg().? - else - try self.copyToTmpRegister(ty, dst_mcv); - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - const src_mcv = if (flipped) lhs_mcv else rhs_mcv; - - switch (ty.floatBits(self.target)) { - 16 => { - assert(self.hasFeature(.f16c)); - const tmp1_reg = - (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128(); - const tmp1_mcv = MCValue{ .register = tmp1_reg }; - const tmp1_lock = self.register_manager.lockRegAssumeUnused(tmp1_reg); - defer self.register_manager.unlockReg(tmp1_lock); - - const tmp2_reg = - (try self.register_manager.allocReg(null, abi.RegisterClass.sse)).to128(); - const tmp2_mcv = MCValue{ .register = tmp2_reg }; - const tmp2_lock = self.register_manager.lockRegAssumeUnused(tmp2_reg); - defer self.register_manager.unlockReg(tmp2_lock); - - if (src_mcv.isBase()) try self.asmRegisterRegisterMemoryImmediate( - .{ .vp_w, .insr }, - tmp1_reg, - dst_reg.to128(), - try src_mcv.mem(self, .{ .size = .word }), - .u(1), - ) else try self.asmRegisterRegisterRegister( - .{ .vp_, .unpcklwd }, - tmp1_reg, - dst_reg.to128(), - (if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(ty, src_mcv)).to128(), - ); - try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, tmp1_reg, tmp1_reg); - try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg); - try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv); - }, - 32 => try self.genBinOpMir( - .{ ._ss, .ucomi }, - ty, - .{ .register = dst_reg }, - src_mcv, - ), - 64 => try self.genBinOpMir( - .{ ._sd, .ucomi }, - ty, - .{ .register = dst_reg }, - src_mcv, - ), - else => unreachable, - } - - break :result switch (if (flipped) op.reverse() else op) { - .lt, .lte => unreachable, // required to have been canonicalized to gt(e) - .gt => .a, - .gte => .ae, - .eq => .z_and_np, - .neq => .nz_or_p, - }; - }, - } - }; - - if (null_compare) |reloc| self.performReloc(reloc); - self.eflags_inst = inst; - return self.finishAir(inst, .{ .eflags = result }, .{ bin_op.lhs, bin_op.rhs, .none }); -} - -fn airCmpVector(self: *CodeGen, inst: Air.Inst.Index) !void { - const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - const extra = self.air.extraData(Air.VectorCmp, ty_pl.payload).data; - const dst_mcv = try self.genBinOp( - inst, - .fromCmpOp(extra.compareOperator(), false), - extra.lhs, - extra.rhs, - ); - return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none }); -} - fn airTry(self: *CodeGen, inst: Air.Inst.Index) !void { const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; const extra = self.air.extraData(Air.Try, pl_op.payload); @@ -181223,16 +177781,13 @@ fn airAsm(self: *CodeGen, inst: Air.Inst.Index) !void { .@".cfi_escape" => error.InvalidInstruction, else => unreachable, } else self.asmOps(mnem_fixed_tag, ops)) catch |err| switch (err) { - error.InvalidInstruction => return self.fail( - "invalid instruction: '{s} {s} {s} {s} {s}'", - .{ - mnem_str, - @tagName(ops[0]), - @tagName(ops[1]), - @tagName(ops[2]), - @tagName(ops[3]), - }, - ), + error.InvalidInstruction => return self.fail("invalid instruction: '{s} {s} {s} {s} {s}'", .{ + mnem_str, + @tagName(ops[0]), + @tagName(ops[1]), + @tagName(ops[2]), + @tagName(ops[3]), + }), else => |e| return e, }; } @@ -182904,183 +179459,6 @@ fn airBitCast(self: *CodeGen, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } -fn airArrayToSlice(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - - const slice_ty = self.typeOfIndex(inst); - const ptr_ty = self.typeOf(ty_op.operand); - const ptr = try self.resolveInst(ty_op.operand); - const array_ty = ptr_ty.childType(zcu); - const array_len = array_ty.arrayLen(zcu); - - const frame_index = try self.allocFrameIndex(.initSpill(slice_ty, zcu)); - try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr, .{}); - try self.genSetMem( - .{ .frame = frame_index }, - @intCast(ptr_ty.abiSize(zcu)), - .usize, - .{ .immediate = array_len }, - .{}, - ); - - const result = MCValue{ .load_frame = .{ .index = frame_index } }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn airFloatFromInt(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - - const dst_ty = self.typeOfIndex(inst); - const dst_bits = dst_ty.floatBits(self.target); - - const src_ty = self.typeOf(ty_op.operand); - const src_bits: u32 = @intCast(src_ty.bitSize(zcu)); - const src_signedness = - if (src_ty.isAbiInt(zcu)) src_ty.intInfo(zcu).signedness else .unsigned; - const src_size = std.math.divCeil(u32, @max(switch (src_signedness) { - .signed => src_bits, - .unsigned => src_bits + 1, - }, 32), 8) catch unreachable; - - const result = result: { - if (switch (dst_bits) { - 16, 80, 128 => true, - 32, 64 => src_size > 8, - else => unreachable, - }) { - if (src_bits > 128) return self.fail("TODO implement airFloatFromInt from {f} to {f}", .{ - src_ty.fmt(pt), dst_ty.fmt(pt), - }); - - var sym_buf: ["__floatun?i?f".len]u8 = undefined; - break :result try self.genCall(.{ .extern_func = .{ - .return_type = dst_ty.toIntern(), - .param_types = &.{src_ty.toIntern()}, - .sym = std.fmt.bufPrint(&sym_buf, "__float{s}{c}i{c}f", .{ - switch (src_signedness) { - .signed => "", - .unsigned => "un", - }, - intCompilerRtAbiName(src_bits), - floatCompilerRtAbiName(dst_bits), - }) catch unreachable, - } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}, .{}); - } - - const src_mcv = try self.resolveInst(ty_op.operand); - const src_reg = if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(src_ty, src_mcv); - const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); - defer self.register_manager.unlockReg(src_lock); - - if (src_bits < src_size * 8) try self.truncateRegister(src_ty, src_reg); - - const dst_reg = try self.register_manager.allocReg(inst, self.regSetForType(dst_ty)); - const dst_mcv = MCValue{ .register = dst_reg }; - const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); - defer self.register_manager.unlockReg(dst_lock); - - const mir_tag = @as(?Mir.Inst.FixedTag, switch (dst_ty.zigTypeTag(zcu)) { - .float => switch (dst_ty.floatBits(self.target)) { - 32 => if (self.hasFeature(.avx)) .{ .v_ss, .cvtsi2 } else .{ ._ss, .cvtsi2 }, - 64 => if (self.hasFeature(.avx)) .{ .v_sd, .cvtsi2 } else .{ ._sd, .cvtsi2 }, - 16, 80, 128 => null, - else => unreachable, - }, - else => null, - }) orelse return self.fail("TODO implement airFloatFromInt from {f} to {f}", .{ - src_ty.fmt(pt), dst_ty.fmt(pt), - }); - const dst_alias = dst_reg.to128(); - const src_alias = registerAlias(src_reg, src_size); - switch (mir_tag[0]) { - .v_ss, .v_sd => try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, src_alias), - else => try self.asmRegisterRegister(mir_tag, dst_alias, src_alias), - } - - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - -fn airIntFromFloat(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; - - const dst_ty = self.typeOfIndex(inst); - const dst_bits: u32 = @intCast(dst_ty.bitSize(zcu)); - const dst_signedness = - if (dst_ty.isAbiInt(zcu)) dst_ty.intInfo(zcu).signedness else .unsigned; - const dst_size = std.math.divCeil(u32, @max(switch (dst_signedness) { - .signed => dst_bits, - .unsigned => dst_bits + 1, - }, 32), 8) catch unreachable; - - const src_ty = self.typeOf(ty_op.operand); - const src_bits = src_ty.floatBits(self.target); - - const result = result: { - if (switch (src_bits) { - 16, 80, 128 => true, - 32, 64 => dst_size > 8, - else => unreachable, - }) { - if (dst_bits > 128) return self.fail("TODO implement airIntFromFloat from {f} to {f}", .{ - src_ty.fmt(pt), dst_ty.fmt(pt), - }); - - var sym_buf: ["__fixuns?f?i".len]u8 = undefined; - break :result try self.genCall(.{ .extern_func = .{ - .return_type = dst_ty.toIntern(), - .param_types = &.{src_ty.toIntern()}, - .sym = std.fmt.bufPrint(&sym_buf, "__fix{s}{c}f{c}i", .{ - switch (dst_signedness) { - .signed => "", - .unsigned => "uns", - }, - floatCompilerRtAbiName(src_bits), - intCompilerRtAbiName(dst_bits), - }) catch unreachable, - } }, &.{src_ty}, &.{.{ .air_ref = ty_op.operand }}, .{}); - } - - const src_mcv = try self.resolveInst(ty_op.operand); - const src_reg = if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(src_ty, src_mcv); - const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); - defer self.register_manager.unlockReg(src_lock); - - const dst_reg = try self.register_manager.allocReg(inst, self.regSetForType(dst_ty)); - const dst_mcv = MCValue{ .register = dst_reg }; - const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); - defer self.register_manager.unlockReg(dst_lock); - - try self.asmRegisterRegister( - switch (src_bits) { - 32 => if (self.hasFeature(.avx)) .{ .v_, .cvttss2si } else .{ ._, .cvttss2si }, - 64 => if (self.hasFeature(.avx)) .{ .v_, .cvttsd2si } else .{ ._, .cvttsd2si }, - else => unreachable, - }, - registerAlias(dst_reg, dst_size), - src_reg.to128(), - ); - - if (dst_bits < dst_size * 8) try self.truncateRegister(dst_ty, dst_reg); - - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); -} - fn airCmpxchg(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; @@ -183747,331 +180125,46 @@ fn airSplat(self: *CodeGen, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; const vector_ty = self.typeOfIndex(inst); const vector_len = vector_ty.vectorLen(zcu); - const dst_rc = self.regSetForType(vector_ty); const scalar_ty = self.typeOf(ty_op.operand); const result: MCValue = result: { - switch (scalar_ty.zigTypeTag(zcu)) { - else => {}, - .bool => { - const regs = - try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.gp); - const reg_locks = self.register_manager.lockRegsAssumeUnused(2, regs); - defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - - try self.genSetReg(regs[1], vector_ty, .{ .immediate = 0 }, .{}); - try self.genSetReg( - regs[1], - vector_ty, - .{ .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - vector_len) }, - .{}, - ); - const src_mcv = try self.resolveInst(ty_op.operand); - const abi_size = @max(std.math.divCeil(u32, vector_len, 8) catch unreachable, 4); - try self.asmCmovccRegisterRegister( - switch (src_mcv) { - .eflags => |cc| cc, - .register => |src_reg| cc: { - try self.asmRegisterImmediate(.{ ._, .@"test" }, src_reg.to8(), .u(1)); - break :cc .nz; - }, - else => cc: { - try self.asmMemoryImmediate( - .{ ._, .@"test" }, - try src_mcv.mem(self, .{ .size = .byte }), - .u(1), - ); - break :cc .nz; - }, - }, - registerAlias(regs[0], abi_size), - registerAlias(regs[1], abi_size), - ); - break :result .{ .register = regs[0] }; - }, - .int => if (self.hasFeature(.avx2)) avx2: { - const mir_tag = @as(?Mir.Inst.FixedTag, switch (scalar_ty.intInfo(zcu).bits) { - else => null, - 1...8 => switch (vector_len) { - else => null, - 1...32 => .{ .vp_b, .broadcast }, - }, - 9...16 => switch (vector_len) { - else => null, - 1...16 => .{ .vp_w, .broadcast }, - }, - 17...32 => switch (vector_len) { - else => null, - 1...8 => .{ .vp_d, .broadcast }, - }, - 33...64 => switch (vector_len) { - else => null, - 1...4 => .{ .vp_q, .broadcast }, - }, - 65...128 => switch (vector_len) { - else => null, - 1...2 => .{ .v_i128, .broadcast }, - }, - }) orelse break :avx2; - - const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse); - const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); - defer self.register_manager.unlockReg(dst_lock); - - const src_mcv = try self.resolveInst(ty_op.operand); - if (src_mcv.isBase()) try self.asmRegisterMemory( - mir_tag, - registerAlias(dst_reg, @intCast(vector_ty.abiSize(zcu))), - try src_mcv.mem(self, .{ .size = self.memSize(scalar_ty) }), - ) else { - if (mir_tag[0] == .v_i128) break :avx2; - try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{}); - try self.asmRegisterRegister( - mir_tag, - registerAlias(dst_reg, @intCast(vector_ty.abiSize(zcu))), - registerAlias(dst_reg, @intCast(scalar_ty.abiSize(zcu))), - ); - } - break :result .{ .register = dst_reg }; - } else { - const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.sse); - const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); - defer self.register_manager.unlockReg(dst_lock); - - try self.genSetReg(dst_reg, scalar_ty, .{ .air_ref = ty_op.operand }, .{}); - if (vector_len == 1) break :result .{ .register = dst_reg }; + if (scalar_ty.toIntern() != .bool_type) return self.fail("TODO implement airSplat for {f}", .{ + vector_ty.fmt(pt), + }); + const regs = + try self.register_manager.allocRegs(2, .{ inst, null }, abi.RegisterClass.gp); + const reg_locks = self.register_manager.lockRegsAssumeUnused(2, regs); + defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); - const dst_alias = registerAlias(dst_reg, @intCast(vector_ty.abiSize(zcu))); - const scalar_bits = scalar_ty.intInfo(zcu).bits; - if (switch (scalar_bits) { - 1...8 => true, - 9...128 => false, - else => unreachable, - }) if (self.hasFeature(.avx)) try self.asmRegisterRegisterRegister( - .{ .vp_, .unpcklbw }, - dst_alias, - dst_alias, - dst_alias, - ) else try self.asmRegisterRegister( - .{ .p_, .unpcklbw }, - dst_alias, - dst_alias, - ); - if (switch (scalar_bits) { - 1...8 => vector_len > 2, - 9...16 => true, - 17...128 => false, - else => unreachable, - }) try self.asmRegisterRegisterImmediate( - .{ if (self.hasFeature(.avx)) .vp_w else .p_w, .shufl }, - dst_alias, - dst_alias, - .u(0b00_00_00_00), - ); - if (switch (scalar_bits) { - 1...8 => vector_len > 4, - 9...16 => vector_len > 2, - 17...64 => true, - 65...128 => false, - else => unreachable, - }) try self.asmRegisterRegisterImmediate( - .{ if (self.hasFeature(.avx)) .vp_d else .p_d, .shuf }, - dst_alias, - dst_alias, - .u(if (scalar_bits <= 64) 0b00_00_00_00 else 0b01_00_01_00), - ); - break :result .{ .register = dst_reg }; - }, - .float => switch (scalar_ty.floatBits(self.target)) { - 32 => switch (vector_len) { - 1 => { - const src_mcv = try self.resolveInst(ty_op.operand); - if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; - const dst_reg = try self.register_manager.allocReg(inst, dst_rc); - try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{}); - break :result .{ .register = dst_reg }; - }, - 2...4 => { - const src_mcv = try self.resolveInst(ty_op.operand); - if (self.hasFeature(.avx)) { - const dst_reg = try self.register_manager.allocReg(inst, dst_rc); - if (src_mcv.isBase()) try self.asmRegisterMemory( - .{ .v_ss, .broadcast }, - dst_reg.to128(), - try src_mcv.mem(self, .{ .size = .dword }), - ) else { - const src_reg = if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(scalar_ty, src_mcv); - try self.asmRegisterRegisterRegisterImmediate( - .{ .v_ps, .shuf }, - dst_reg.to128(), - src_reg.to128(), - src_reg.to128(), - .u(0), - ); - } - break :result .{ .register = dst_reg }; - } else { - const dst_mcv = if (src_mcv.isRegister() and - self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_mcv - else - try self.copyToRegisterWithInstTracking(inst, scalar_ty, src_mcv); - const dst_reg = dst_mcv.getReg().?; - try self.asmRegisterRegisterImmediate( - .{ ._ps, .shuf }, - dst_reg.to128(), - dst_reg.to128(), - .u(0), - ); - break :result dst_mcv; - } - }, - 5...8 => if (self.hasFeature(.avx)) { - const src_mcv = try self.resolveInst(ty_op.operand); - const dst_reg = try self.register_manager.allocReg(inst, dst_rc); - if (src_mcv.isBase()) try self.asmRegisterMemory( - .{ .v_ss, .broadcast }, - dst_reg.to256(), - try src_mcv.mem(self, .{ .size = .dword }), - ) else { - const src_reg = if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(scalar_ty, src_mcv); - if (self.hasFeature(.avx2)) try self.asmRegisterRegister( - .{ .v_ss, .broadcast }, - dst_reg.to256(), - src_reg.to128(), - ) else { - try self.asmRegisterRegisterRegisterImmediate( - .{ .v_ps, .shuf }, - dst_reg.to128(), - src_reg.to128(), - src_reg.to128(), - .u(0), - ); - try self.asmRegisterRegisterRegisterImmediate( - .{ .v_f128, .insert }, - dst_reg.to256(), - dst_reg.to256(), - dst_reg.to128(), - .u(1), - ); - } - } - break :result .{ .register = dst_reg }; - }, - else => {}, - }, - 64 => switch (vector_len) { - 1 => { - const src_mcv = try self.resolveInst(ty_op.operand); - if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; - const dst_reg = try self.register_manager.allocReg(inst, dst_rc); - try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{}); - break :result .{ .register = dst_reg }; - }, - 2 => { - const src_mcv = try self.resolveInst(ty_op.operand); - const dst_reg = try self.register_manager.allocReg(inst, dst_rc); - if (self.hasFeature(.sse3)) { - if (src_mcv.isBase()) try self.asmRegisterMemory( - if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup }, - dst_reg.to128(), - try src_mcv.mem(self, .{ .size = .qword }), - ) else try self.asmRegisterRegister( - if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup }, - dst_reg.to128(), - (if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(), - ); - break :result .{ .register = dst_reg }; - } else try self.asmRegisterRegister( - .{ ._ps, .movlh }, - dst_reg.to128(), - (if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(), - ); - }, - 3...4 => if (self.hasFeature(.avx)) { - const src_mcv = try self.resolveInst(ty_op.operand); - const dst_reg = try self.register_manager.allocReg(inst, dst_rc); - if (src_mcv.isBase()) try self.asmRegisterMemory( - .{ .v_sd, .broadcast }, - dst_reg.to256(), - try src_mcv.mem(self, .{ .size = .qword }), - ) else { - const src_reg = if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(scalar_ty, src_mcv); - if (self.hasFeature(.avx2)) try self.asmRegisterRegister( - .{ .v_sd, .broadcast }, - dst_reg.to256(), - src_reg.to128(), - ) else { - try self.asmRegisterRegister( - .{ .v_, .movddup }, - dst_reg.to128(), - src_reg.to128(), - ); - try self.asmRegisterRegisterRegisterImmediate( - .{ .v_f128, .insert }, - dst_reg.to256(), - dst_reg.to256(), - dst_reg.to128(), - .u(1), - ); - } - } - break :result .{ .register = dst_reg }; - }, - else => {}, + try self.genSetReg(regs[1], vector_ty, .{ .immediate = 0 }, .{}); + try self.genSetReg( + regs[1], + vector_ty, + .{ .immediate = @as(u64, std.math.maxInt(u64)) >> @intCast(64 - vector_len) }, + .{}, + ); + const src_mcv = try self.resolveInst(ty_op.operand); + const abi_size = @max(std.math.divCeil(u32, vector_len, 8) catch unreachable, 4); + try self.asmCmovccRegisterRegister( + switch (src_mcv) { + .eflags => |cc| cc, + .register => |src_reg| cc: { + try self.asmRegisterImmediate(.{ ._, .@"test" }, src_reg.to8(), .u(1)); + break :cc .nz; }, - 128 => switch (vector_len) { - 1 => { - const src_mcv = try self.resolveInst(ty_op.operand); - if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; - const dst_reg = try self.register_manager.allocReg(inst, dst_rc); - try self.genSetReg(dst_reg, scalar_ty, src_mcv, .{}); - break :result .{ .register = dst_reg }; - }, - 2 => if (self.hasFeature(.avx)) { - const src_mcv = try self.resolveInst(ty_op.operand); - const dst_reg = try self.register_manager.allocReg(inst, dst_rc); - if (src_mcv.isBase()) try self.asmRegisterMemory( - .{ .v_f128, .broadcast }, - dst_reg.to256(), - try src_mcv.mem(self, .{ .size = .xword }), - ) else { - const src_reg = if (src_mcv.isRegister()) - src_mcv.getReg().? - else - try self.copyToTmpRegister(scalar_ty, src_mcv); - try self.asmRegisterRegisterRegisterImmediate( - .{ .v_f128, .insert }, - dst_reg.to256(), - src_reg.to256(), - src_reg.to128(), - .u(1), - ); - } - break :result .{ .register = dst_reg }; - }, - else => {}, + else => cc: { + try self.asmMemoryImmediate( + .{ ._, .@"test" }, + try src_mcv.mem(self, .{ .size = .byte }), + .u(1), + ); + break :cc .nz; }, - 16, 80 => {}, - else => unreachable, }, - } - return self.fail("TODO implement airSplat for {f}", .{vector_ty.fmt(pt)}); + registerAlias(regs[0], abi_size), + registerAlias(regs[1], abi_size), + ); + break :result .{ .register = regs[0] }; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } @@ -185349,161 +181442,135 @@ fn airAggregateInit(self: *CodeGen, inst: Air.Inst.Index) !void { const result: MCValue = result: { switch (result_ty.zigTypeTag(zcu)) { .@"struct" => { + if (result_ty.containerLayout(zcu) == .@"packed") return self.fail( + "TODO implement airAggregateInit for {f}", + .{result_ty.fmt(pt)}, + ); const frame_index = try self.allocFrameIndex(.initSpill(result_ty, zcu)); - if (result_ty.containerLayout(zcu) == .@"packed") { - const loaded_struct = zcu.intern_pool.loadStructType(result_ty.toIntern()); - try self.genInlineMemset( - .{ .lea_frame = .{ .index = frame_index } }, - .{ .immediate = 0 }, - .{ .immediate = result_ty.abiSize(zcu) }, - .{}, - ); - for (elements, 0..) |elem, elem_i_usize| { - const elem_i: u32 = @intCast(elem_i_usize); - if ((try result_ty.structFieldValueComptime(pt, elem_i)) != null) continue; - - const elem_ty = result_ty.fieldType(elem_i, zcu); - const elem_bit_size: u32 = @intCast(elem_ty.bitSize(zcu)); - if (elem_bit_size > 64) { - return self.fail( - "TODO airAggregateInit implement packed structs with large fields", - .{}, - ); - } - const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu)); - const elem_abi_bits = elem_abi_size * 8; - const elem_off = zcu.structPackedFieldBitOffset(loaded_struct, elem_i); - const elem_byte_off: i32 = @intCast(elem_off / elem_abi_bits * elem_abi_size); - const elem_bit_off = elem_off % elem_abi_bits; - const elem_mcv = try self.resolveInst(elem); - const elem_lock = switch (elem_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - .immediate => |imm| lock: { - if (imm == 0) continue; - break :lock null; - }, - else => null, - }; - defer if (elem_lock) |lock| self.register_manager.unlockReg(lock); - - const elem_extra_bits = self.regExtraBits(elem_ty); - { - const temp_reg = try self.copyToTmpRegister(elem_ty, elem_mcv); - const temp_alias = registerAlias(temp_reg, elem_abi_size); - const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg); - defer self.register_manager.unlockReg(temp_lock); - - if (elem_bit_off < elem_extra_bits) { - try self.truncateRegister(elem_ty, temp_alias); - } - if (elem_bit_off > 0) try self.genShiftBinOpMir( - .{ ._l, .sh }, - elem_ty, - .{ .register = temp_alias }, - .u8, - .{ .immediate = elem_bit_off }, - ); - try self.genBinOpMir( - .{ ._, .@"or" }, - elem_ty, - .{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } }, - .{ .register = temp_alias }, - ); - } - if (elem_bit_off > elem_extra_bits) { - const temp_reg = try self.copyToTmpRegister(elem_ty, elem_mcv); - const temp_alias = registerAlias(temp_reg, elem_abi_size); - const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg); - defer self.register_manager.unlockReg(temp_lock); - - if (elem_extra_bits > 0) { - try self.truncateRegister(elem_ty, temp_alias); - } - try self.genShiftBinOpMir( - .{ ._r, .sh }, - elem_ty, - .{ .register = temp_reg }, - .u8, - .{ .immediate = elem_abi_bits - elem_bit_off }, - ); - try self.genBinOpMir( - .{ ._, .@"or" }, - elem_ty, - .{ .load_frame = .{ - .index = frame_index, - .off = elem_byte_off + @as(i32, @intCast(elem_abi_size)), - } }, - .{ .register = temp_alias }, - ); - } - } - } else for (elements, 0..) |elem, elem_i| { + const loaded_struct = zcu.intern_pool.loadStructType(result_ty.toIntern()); + try self.genInlineMemset( + .{ .lea_frame = .{ .index = frame_index } }, + .{ .immediate = 0 }, + .{ .immediate = result_ty.abiSize(zcu) }, + .{}, + ); + for (elements, 0..) |elem, elem_i_usize| { + const elem_i: u32 = @intCast(elem_i_usize); if ((try result_ty.structFieldValueComptime(pt, elem_i)) != null) continue; const elem_ty = result_ty.fieldType(elem_i, zcu); - const elem_off: i32 = @intCast(result_ty.structFieldOffset(elem_i, zcu)); + const elem_bit_size: u32 = @intCast(elem_ty.bitSize(zcu)); + if (elem_bit_size > 64) { + return self.fail( + "TODO airAggregateInit implement packed structs with large fields", + .{}, + ); + } + const elem_abi_size: u32 = @intCast(elem_ty.abiSize(zcu)); + const elem_abi_bits = elem_abi_size * 8; + const elem_off = zcu.structPackedFieldBitOffset(loaded_struct, elem_i); + const elem_byte_off: i32 = @intCast(elem_off / elem_abi_bits * elem_abi_size); + const elem_bit_off = elem_off % elem_abi_bits; const elem_mcv = try self.resolveInst(elem); - try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, elem_mcv, .{}); - } - break :result .{ .load_frame = .{ .index = frame_index } }; - }, - .array, .vector => { - const elem_ty = result_ty.childType(zcu); - if (result_ty.isVector(zcu) and elem_ty.toIntern() == .bool_type) { - const result_size: u32 = @intCast(result_ty.abiSize(zcu)); - const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); - try self.asmRegisterRegister( - .{ ._, .xor }, - registerAlias(dst_reg, @min(result_size, 4)), - registerAlias(dst_reg, @min(result_size, 4)), - ); + const elem_lock = switch (elem_mcv) { + .register => |reg| self.register_manager.lockReg(reg), + .immediate => |imm| lock: { + if (imm == 0) continue; + break :lock null; + }, + else => null, + }; + defer if (elem_lock) |lock| self.register_manager.unlockReg(lock); - for (elements, 0..) |elem, elem_i| { - const elem_reg = try self.copyToTmpRegister(elem_ty, .{ .air_ref = elem }); - const elem_lock = self.register_manager.lockRegAssumeUnused(elem_reg); - defer self.register_manager.unlockReg(elem_lock); + const elem_extra_bits = self.regExtraBits(elem_ty); + { + const temp_reg = try self.copyToTmpRegister(elem_ty, elem_mcv); + const temp_alias = registerAlias(temp_reg, elem_abi_size); + const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg); + defer self.register_manager.unlockReg(temp_lock); - try self.asmRegisterImmediate( - .{ ._, .@"and" }, - registerAlias(elem_reg, @min(result_size, 4)), - .u(1), - ); - if (elem_i > 0) try self.asmRegisterImmediate( + if (elem_bit_off < elem_extra_bits) { + try self.truncateRegister(elem_ty, temp_alias); + } + if (elem_bit_off > 0) try self.genShiftBinOpMir( .{ ._l, .sh }, - registerAlias(elem_reg, result_size), - .u(@intCast(elem_i)), + elem_ty, + .{ .register = temp_alias }, + .u8, + .{ .immediate = elem_bit_off }, ); - try self.asmRegisterRegister( + try self.genBinOpMir( .{ ._, .@"or" }, - registerAlias(dst_reg, result_size), - registerAlias(elem_reg, result_size), + elem_ty, + .{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } }, + .{ .register = temp_alias }, ); } - break :result .{ .register = dst_reg }; - } else { - const frame_index = try self.allocFrameIndex(.initSpill(result_ty, zcu)); - const elem_size: u32 = @intCast(elem_ty.abiSize(zcu)); - - for (elements, 0..) |elem, elem_i| { - const elem_mcv = try self.resolveInst(elem); - const elem_off: i32 = @intCast(elem_size * elem_i); - try self.genSetMem( - .{ .frame = frame_index }, - elem_off, + if (elem_bit_off > elem_extra_bits) { + const temp_reg = try self.copyToTmpRegister(elem_ty, elem_mcv); + const temp_alias = registerAlias(temp_reg, elem_abi_size); + const temp_lock = self.register_manager.lockRegAssumeUnused(temp_reg); + defer self.register_manager.unlockReg(temp_lock); + + if (elem_extra_bits > 0) { + try self.truncateRegister(elem_ty, temp_alias); + } + try self.genShiftBinOpMir( + .{ ._r, .sh }, elem_ty, - elem_mcv, - .{}, + .{ .register = temp_reg }, + .u8, + .{ .immediate = elem_abi_bits - elem_bit_off }, + ); + try self.genBinOpMir( + .{ ._, .@"or" }, + elem_ty, + .{ .load_frame = .{ + .index = frame_index, + .off = elem_byte_off + @as(i32, @intCast(elem_abi_size)), + } }, + .{ .register = temp_alias }, ); } - if (result_ty.sentinel(zcu)) |sentinel| try self.genSetMem( - .{ .frame = frame_index }, - @intCast(elem_size * elements.len), - elem_ty, - try self.lowerValue(sentinel), - .{}, + } + break :result .{ .load_frame = .{ .index = frame_index } }; + }, + .vector => { + const elem_ty = result_ty.childType(zcu); + if (elem_ty.toIntern() != .bool_type) return self.fail( + "TODO implement airAggregateInit for {f}", + .{result_ty.fmt(pt)}, + ); + const result_size: u32 = @intCast(result_ty.abiSize(zcu)); + const dst_reg = try self.register_manager.allocReg(inst, abi.RegisterClass.gp); + try self.asmRegisterRegister( + .{ ._, .xor }, + registerAlias(dst_reg, @min(result_size, 4)), + registerAlias(dst_reg, @min(result_size, 4)), + ); + + for (elements, 0..) |elem, elem_i| { + const elem_reg = try self.copyToTmpRegister(elem_ty, .{ .air_ref = elem }); + const elem_lock = self.register_manager.lockRegAssumeUnused(elem_reg); + defer self.register_manager.unlockReg(elem_lock); + + try self.asmRegisterImmediate( + .{ ._, .@"and" }, + registerAlias(elem_reg, @min(result_size, 4)), + .u(1), + ); + if (elem_i > 0) try self.asmRegisterImmediate( + .{ ._l, .sh }, + registerAlias(elem_reg, result_size), + .u(@intCast(elem_i)), + ); + try self.asmRegisterRegister( + .{ ._, .@"or" }, + registerAlias(dst_reg, result_size), + registerAlias(elem_reg, result_size), ); - break :result .{ .load_frame = .{ .index = frame_index } }; } + break :result .{ .register = dst_reg }; }, else => unreachable, } @@ -185519,220 +181586,6 @@ fn airAggregateInit(self: *CodeGen, inst: Air.Inst.Index) !void { return self.finishAirResult(inst, result); } -fn airUnionInit(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const ip = &zcu.intern_pool; - const ty_pl = self.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; - const extra = self.air.extraData(Air.UnionInit, ty_pl.payload).data; - const result: MCValue = result: { - const union_ty = self.typeOfIndex(inst); - const layout = union_ty.unionGetLayout(zcu); - - const src_ty = self.typeOf(extra.init); - const src_mcv = try self.resolveInst(extra.init); - if (layout.tag_size == 0) { - if (layout.abi_size <= src_ty.abiSize(zcu) and - self.reuseOperand(inst, extra.init, 0, src_mcv)) break :result src_mcv; - - const dst_mcv = try self.allocRegOrMem(inst, true); - try self.genCopy(src_ty, dst_mcv, src_mcv, .{}); - break :result dst_mcv; - } - - const dst_mcv = try self.allocRegOrMem(inst, false); - - const loaded_union = zcu.typeToUnion(union_ty).?; - const field_name = loaded_union.loadTagType(ip).names.get(ip)[extra.field_index]; - const tag_ty: Type = .fromInterned(loaded_union.enum_tag_ty); - const field_index = tag_ty.enumFieldIndex(field_name, zcu).?; - const tag_val = try pt.enumValueFieldIndex(tag_ty, field_index); - const tag_int_val = try tag_val.intFromEnum(tag_ty, pt); - const tag_int = tag_int_val.toUnsignedInt(zcu); - const tag_off: i32 = @intCast(layout.tagOffset()); - try self.genCopy( - tag_ty, - dst_mcv.address().offset(tag_off).deref(), - .{ .immediate = tag_int }, - .{}, - ); - - const pl_off: i32 = @intCast(layout.payloadOffset()); - try self.genCopy(src_ty, dst_mcv.address().offset(pl_off).deref(), src_mcv, .{}); - - break :result dst_mcv; - }; - return self.finishAir(inst, result, .{ extra.init, .none, .none }); -} - -fn airMulAdd(self: *CodeGen, inst: Air.Inst.Index) !void { - const pt = self.pt; - const zcu = pt.zcu; - const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; - const extra = self.air.extraData(Air.Bin, pl_op.payload).data; - const ty = self.typeOfIndex(inst); - - const ops = [3]Air.Inst.Ref{ extra.lhs, extra.rhs, pl_op.operand }; - const result = result: { - if (switch (ty.scalarType(zcu).floatBits(self.target)) { - 16, 80, 128 => true, - 32, 64 => !self.hasFeature(.fma), - else => unreachable, - }) { - if (ty.zigTypeTag(zcu) != .float) return self.fail("TODO implement airMulAdd for {f}", .{ - ty.fmt(pt), - }); - - var sym_buf: ["__fma?".len]u8 = undefined; - break :result try self.genCall(.{ .extern_func = .{ - .return_type = ty.toIntern(), - .param_types = &.{ ty.toIntern(), ty.toIntern(), ty.toIntern() }, - .sym = std.fmt.bufPrint(&sym_buf, "{s}fma{s}", .{ - floatLibcAbiPrefix(ty), - floatLibcAbiSuffix(ty), - }) catch unreachable, - } }, &.{ ty, ty, ty }, &.{ - .{ .air_ref = extra.lhs }, .{ .air_ref = extra.rhs }, .{ .air_ref = pl_op.operand }, - }, .{}); - } - - var mcvs: [3]MCValue = undefined; - var locks: [3]?RegisterManager.RegisterLock = @splat(null); - defer for (locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock); - var order: [3]u2 = @splat(0); - var unused: std.StaticBitSet(3) = .initFull(); - for (ops, &mcvs, &locks, 0..) |op, *mcv, *lock, op_i| { - const op_index: u2 = @intCast(op_i); - mcv.* = try self.resolveInst(op); - if (unused.isSet(0) and mcv.isRegister() and self.reuseOperand(inst, op, op_index, mcv.*)) { - order[op_index] = 1; - unused.unset(0); - } else if (unused.isSet(2) and mcv.isBase()) { - order[op_index] = 3; - unused.unset(2); - } - switch (mcv.*) { - .register => |reg| lock.* = self.register_manager.lockReg(reg), - else => {}, - } - } - for (&order, &mcvs, &locks) |*mop_index, *mcv, *lock| { - if (mop_index.* != 0) continue; - mop_index.* = 1 + @as(u2, @intCast(unused.toggleFirstSet().?)); - if (mop_index.* > 1 and mcv.isRegister()) continue; - const reg = try self.copyToTmpRegister(ty, mcv.*); - mcv.* = .{ .register = reg }; - if (lock.*) |old_lock| self.register_manager.unlockReg(old_lock); - lock.* = self.register_manager.lockRegAssumeUnused(reg); - } - - const mir_tag = @as(?Mir.Inst.FixedTag, if (std.mem.eql(u2, &order, &.{ 1, 3, 2 }) or - std.mem.eql(u2, &order, &.{ 3, 1, 2 })) - switch (ty.zigTypeTag(zcu)) { - .float => switch (ty.floatBits(self.target)) { - 32 => .{ .v_ss, .fmadd132 }, - 64 => .{ .v_sd, .fmadd132 }, - 16, 80, 128 => null, - else => unreachable, - }, - .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { - .float => switch (ty.childType(zcu).floatBits(self.target)) { - 32 => switch (ty.vectorLen(zcu)) { - 1 => .{ .v_ss, .fmadd132 }, - 2...8 => .{ .v_ps, .fmadd132 }, - else => null, - }, - 64 => switch (ty.vectorLen(zcu)) { - 1 => .{ .v_sd, .fmadd132 }, - 2...4 => .{ .v_pd, .fmadd132 }, - else => null, - }, - 16, 80, 128 => null, - else => unreachable, - }, - else => unreachable, - }, - else => unreachable, - } - else if (std.mem.eql(u2, &order, &.{ 2, 1, 3 }) or std.mem.eql(u2, &order, &.{ 1, 2, 3 })) - switch (ty.zigTypeTag(zcu)) { - .float => switch (ty.floatBits(self.target)) { - 32 => .{ .v_ss, .fmadd213 }, - 64 => .{ .v_sd, .fmadd213 }, - 16, 80, 128 => null, - else => unreachable, - }, - .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { - .float => switch (ty.childType(zcu).floatBits(self.target)) { - 32 => switch (ty.vectorLen(zcu)) { - 1 => .{ .v_ss, .fmadd213 }, - 2...8 => .{ .v_ps, .fmadd213 }, - else => null, - }, - 64 => switch (ty.vectorLen(zcu)) { - 1 => .{ .v_sd, .fmadd213 }, - 2...4 => .{ .v_pd, .fmadd213 }, - else => null, - }, - 16, 80, 128 => null, - else => unreachable, - }, - else => unreachable, - }, - else => unreachable, - } - else if (std.mem.eql(u2, &order, &.{ 2, 3, 1 }) or std.mem.eql(u2, &order, &.{ 3, 2, 1 })) - switch (ty.zigTypeTag(zcu)) { - .float => switch (ty.floatBits(self.target)) { - 32 => .{ .v_ss, .fmadd231 }, - 64 => .{ .v_sd, .fmadd231 }, - 16, 80, 128 => null, - else => unreachable, - }, - .vector => switch (ty.childType(zcu).zigTypeTag(zcu)) { - .float => switch (ty.childType(zcu).floatBits(self.target)) { - 32 => switch (ty.vectorLen(zcu)) { - 1 => .{ .v_ss, .fmadd231 }, - 2...8 => .{ .v_ps, .fmadd231 }, - else => null, - }, - 64 => switch (ty.vectorLen(zcu)) { - 1 => .{ .v_sd, .fmadd231 }, - 2...4 => .{ .v_pd, .fmadd231 }, - else => null, - }, - 16, 80, 128 => null, - else => unreachable, - }, - else => unreachable, - }, - else => unreachable, - } - else - unreachable) orelse return self.fail("TODO implement airMulAdd for {f}", .{ty.fmt(pt)}); - - var mops: [3]MCValue = undefined; - for (order, mcvs) |mop_index, mcv| mops[mop_index - 1] = mcv; - - const abi_size: u32 = @intCast(ty.abiSize(zcu)); - const mop1_reg = registerAlias(mops[0].getReg().?, abi_size); - const mop2_reg = registerAlias(mops[1].getReg().?, abi_size); - if (mops[2].isRegister()) try self.asmRegisterRegisterRegister( - mir_tag, - mop1_reg, - mop2_reg, - registerAlias(mops[2].getReg().?, abi_size), - ) else try self.asmRegisterRegisterMemory( - mir_tag, - mop1_reg, - mop2_reg, - try mops[2].mem(self, .{ .size = .fromSize(abi_size) }), - ); - break :result mops[0]; - }; - return self.finishAir(inst, result, ops); -} - fn airVaStart(self: *CodeGen, inst: Air.Inst.Index) !void { const pt = self.pt; const zcu = pt.zcu; @@ -186004,27 +181857,6 @@ fn getResolvedInstValue(self: *CodeGen, inst: Air.Inst.Index) *InstTracking { }; } -/// If the MCValue is an immediate, and it does not fit within this type, -/// we put it in a register. -/// A potential opportunity for future optimization here would be keeping track -/// of the fact that the instruction is available both as an immediate -/// and as a register. -fn limitImmediateType(self: *CodeGen, operand: Air.Inst.Ref, comptime T: type) !MCValue { - const mcv = try self.resolveInst(operand); - const ti = @typeInfo(T).int; - switch (mcv) { - .immediate => |imm| { - // This immediate is unsigned. - const U = std.meta.Int(.unsigned, ti.bits - @intFromBool(ti.signedness == .signed)); - if (imm >= std.math.maxInt(U)) { - return MCValue{ .register = try self.copyToTmpRegister(.usize, mcv) }; - } - }, - else => {}, - } - return mcv; -} - fn lowerValue(cg: *CodeGen, val: Value) Allocator.Error!MCValue { return switch (try codegen.lowerValue(cg.pt, val, cg.target)) { .none => .none, @@ -186134,7 +181966,7 @@ fn resolveCallingConventionValues( const classes = switch (cc) { .x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ret_ty, zcu, cg.target, .ret), .none), - .x86_64_win => &.{abi.classifyWindows(ret_ty, zcu, cg.target)}, + .x86_64_win => &.{abi.classifyWindows(ret_ty, zcu, cg.target, .ret)}, else => unreachable, }; for (classes) |class| switch (class) { @@ -186215,7 +182047,7 @@ fn resolveCallingConventionValues( const classes = switch (cc) { .x86_64_sysv => std.mem.sliceTo(&abi.classifySystemV(ty, zcu, cg.target, .arg), .none), - .x86_64_win => &.{abi.classifyWindows(ty, zcu, cg.target)}, + .x86_64_win => &.{abi.classifyWindows(ty, zcu, cg.target, .arg)}, else => unreachable, }; classes: for (classes) |class| switch (class) { @@ -186678,53 +182510,6 @@ fn typeOfIndex(self: *CodeGen, inst: Air.Inst.Index) Type { return Temp.typeOf(.{ .index = inst }, self); } -fn intCompilerRtAbiName(int_bits: u32) u8 { - return switch (int_bits) { - 1...32 => 's', - 33...64 => 'd', - 65...128 => 't', - else => unreachable, - }; -} - -fn floatCompilerRtAbiName(float_bits: u32) u8 { - return switch (float_bits) { - 16 => 'h', - 32 => 's', - 64 => 'd', - 80 => 'x', - 128 => 't', - else => unreachable, - }; -} - -fn floatCompilerRtAbiType(self: *CodeGen, ty: Type, other_ty: Type) Type { - if (ty.toIntern() == .f16_type and - (other_ty.toIntern() == .f32_type or other_ty.toIntern() == .f64_type) and - self.target.os.tag.isDarwin()) return .u16; - return ty; -} - -fn floatLibcAbiPrefix(ty: Type) []const u8 { - return switch (ty.toIntern()) { - .f16_type, .f80_type => "__", - .f32_type, .f64_type, .f128_type, .c_longdouble_type => "", - else => unreachable, - }; -} - -fn floatLibcAbiSuffix(ty: Type) []const u8 { - return switch (ty.toIntern()) { - .f16_type => "h", - .f32_type => "f", - .f64_type => "", - .f80_type => "x", - .f128_type => "q", - .c_longdouble_type => "l", - else => unreachable, - }; -} - fn promoteInt(self: *CodeGen, ty: Type) Type { const pt = self.pt; const zcu = pt.zcu; diff --git a/src/codegen/x86_64/Emit.zig b/src/codegen/x86_64/Emit.zig index c2b38d8e6d45..377171683b92 100644 --- a/src/codegen/x86_64/Emit.zig +++ b/src/codegen/x86_64/Emit.zig @@ -89,6 +89,7 @@ pub fn emitMir(emit: *Emit) Error!void { } var reloc_info_buf: [2]RelocInfo = undefined; var reloc_info_index: usize = 0; + const ip = &emit.pt.zcu.intern_pool; while (lowered_relocs.len > 0 and lowered_relocs[0].lowered_inst_index == lowered_index) : ({ lowered_relocs = lowered_relocs[1..]; @@ -114,7 +115,6 @@ pub fn emitMir(emit: *Emit) Error!void { return error.EmitFail; }, }; - const ip = &emit.pt.zcu.intern_pool; break :target switch (ip.getNav(nav).status) { .unresolved => unreachable, .type_resolved => |type_resolved| .{ @@ -170,11 +170,8 @@ pub fn emitMir(emit: *Emit) Error!void { else if (emit.bin_file.cast(.macho)) |macho_file| macho_file.getZigObject().?.getOrCreateMetadataForLazySymbol(macho_file, emit.pt, lazy_sym) catch |err| return emit.fail("{s} creating lazy symbol", .{@errorName(err)}) - else if (emit.bin_file.cast(.coff)) |coff_file| - if (coff_file.getOrCreateAtomForLazySymbol(emit.pt, lazy_sym)) |atom| - coff_file.getAtom(atom).getSymbolIndex().? - else |err| - return emit.fail("{s} creating lazy symbol", .{@errorName(err)}) + else if (emit.bin_file.cast(.coff2)) |elf| + @intFromEnum(try elf.lazySymbol(lazy_sym)) else return emit.fail("lazy symbols unimplemented for {s}", .{@tagName(emit.bin_file.tag)}), .is_extern = false, @@ -188,10 +185,13 @@ pub fn emitMir(emit: *Emit) Error!void { .type = .FUNC, })) else if (emit.bin_file.cast(.macho)) |macho_file| try macho_file.getGlobalSymbol(extern_func.toSlice(&emit.lower.mir).?, null) - else if (emit.bin_file.cast(.coff)) |coff_file| - try coff_file.getGlobalSymbol(extern_func.toSlice(&emit.lower.mir).?, "compiler_rt") - else - return emit.fail("external symbol unimplemented for {s}", .{@tagName(emit.bin_file.tag)}), + else if (emit.bin_file.cast(.coff2)) |coff| @intFromEnum(try coff.globalSymbol( + extern_func.toSlice(&emit.lower.mir).?, + switch (comp.compiler_rt_strat) { + .none, .lib, .obj, .zcu => null, + .dyn_lib => "compiler_rt", + }, + )) else return emit.fail("external symbol unimplemented for {s}", .{@tagName(emit.bin_file.tag)}), .is_extern = true, .type = .symbol, }, @@ -204,9 +204,7 @@ pub fn emitMir(emit: *Emit) Error!void { switch (lowered_inst.encoding.mnemonic) { .call => { reloc.target.type = .branch; - if (emit.bin_file.cast(.coff)) |_| try emit.encodeInst(try .new(.none, .call, &.{ - .{ .mem = .initRip(.ptr, 0) }, - }, emit.lower.target), reloc_info) else try emit.encodeInst(lowered_inst, reloc_info); + try emit.encodeInst(lowered_inst, reloc_info); continue :lowered_inst; }, else => {}, @@ -283,27 +281,8 @@ pub fn emitMir(emit: *Emit) Error!void { }, emit.lower.target), reloc_info), else => unreachable, } - } else if (emit.bin_file.cast(.coff)) |_| { - if (reloc.target.is_extern) switch (lowered_inst.encoding.mnemonic) { - .lea => try emit.encodeInst(try .new(.none, .mov, &.{ - lowered_inst.ops[0], - .{ .mem = .initRip(.ptr, 0) }, - }, emit.lower.target), reloc_info), - .mov => { - const dst_reg = lowered_inst.ops[0].reg.to64(); - try emit.encodeInst(try .new(.none, .mov, &.{ - .{ .reg = dst_reg }, - .{ .mem = .initRip(.ptr, 0) }, - }, emit.lower.target), reloc_info); - try emit.encodeInst(try .new(.none, .mov, &.{ - lowered_inst.ops[0], - .{ .mem = .initSib(lowered_inst.ops[reloc.op_index].mem.sib.ptr_size, .{ .base = .{ - .reg = dst_reg, - } }) }, - }, emit.lower.target), &.{}); - }, - else => unreachable, - } else switch (lowered_inst.encoding.mnemonic) { + } else if (emit.bin_file.cast(.coff2)) |_| { + switch (lowered_inst.encoding.mnemonic) { .lea => try emit.encodeInst(try .new(.none, .lea, &.{ lowered_inst.ops[0], .{ .mem = .initRip(.none, 0) }, @@ -683,7 +662,7 @@ pub fn emitMir(emit: *Emit) Error!void { table_reloc.source_offset, @enumFromInt(emit.atom_index), @as(i64, table_offset) + table_reloc.target_offset, - .{ .x86_64 = .@"32" }, + .{ .X86_64 = .@"32" }, ); for (emit.lower.mir.table) |entry| { try elf.addReloc( @@ -691,7 +670,7 @@ pub fn emitMir(emit: *Emit) Error!void { table_offset, @enumFromInt(emit.atom_index), emit.code_offset_mapping.items[entry], - .{ .x86_64 = .@"64" }, + .{ .X86_64 = .@"64" }, ); table_offset += ptr_size; } @@ -800,23 +779,14 @@ fn encodeInst(emit: *Emit, lowered_inst: Instruction, reloc_info: []const RelocI end_offset - 4, @enumFromInt(reloc.target.index), reloc.off, - .{ .x86_64 = .@"32" }, - ) else if (emit.bin_file.cast(.coff)) |coff_file| { - const atom_index = coff_file.getAtomIndexForSymbol( - .{ .sym_index = emit.atom_index, .file = null }, - ).?; - try coff_file.addRelocation(atom_index, .{ - .type = if (reloc.target.is_extern) .got else .direct, - .target = if (reloc.target.is_extern) - coff_file.getGlobalByIndex(reloc.target.index) - else - .{ .sym_index = reloc.target.index, .file = null }, - .offset = end_offset - 4, - .addend = @intCast(reloc.off), - .pcrel = true, - .length = 2, - }); - } else unreachable, + .{ .X86_64 = .@"32" }, + ) else if (emit.bin_file.cast(.coff2)) |coff| try coff.addReloc( + @enumFromInt(emit.atom_index), + end_offset - 4, + @enumFromInt(reloc.target.index), + reloc.off, + .{ .AMD64 = .REL32 }, + ) else unreachable, .branch => if (emit.bin_file.cast(.elf)) |elf_file| { const zo = elf_file.zigObjectPtr().?; const atom = zo.symbol(emit.atom_index).atom(elf_file).?; @@ -831,7 +801,7 @@ fn encodeInst(emit: *Emit, lowered_inst: Instruction, reloc_info: []const RelocI end_offset - 4, @enumFromInt(reloc.target.index), reloc.off - 4, - .{ .x86_64 = .PC32 }, + .{ .X86_64 = .PC32 }, ) else if (emit.bin_file.cast(.macho)) |macho_file| { const zo = macho_file.getZigObject().?; const atom = zo.symbols.items[emit.atom_index].getAtom(macho_file).?; @@ -848,22 +818,13 @@ fn encodeInst(emit: *Emit, lowered_inst: Instruction, reloc_info: []const RelocI .symbolnum = @intCast(reloc.target.index), }, }); - } else if (emit.bin_file.cast(.coff)) |coff_file| { - const atom_index = coff_file.getAtomIndexForSymbol( - .{ .sym_index = emit.atom_index, .file = null }, - ).?; - try coff_file.addRelocation(atom_index, .{ - .type = if (reloc.target.is_extern) .import else .got, - .target = if (reloc.target.is_extern) - coff_file.getGlobalByIndex(reloc.target.index) - else - .{ .sym_index = reloc.target.index, .file = null }, - .offset = end_offset - 4, - .addend = @intCast(reloc.off), - .pcrel = true, - .length = 2, - }); - } else return emit.fail("TODO implement {s} reloc for {s}", .{ + } else if (emit.bin_file.cast(.coff2)) |coff| try coff.addReloc( + @enumFromInt(emit.atom_index), + end_offset - 4, + @enumFromInt(reloc.target.index), + reloc.off, + .{ .AMD64 = .REL32 }, + ) else return emit.fail("TODO implement {s} reloc for {s}", .{ @tagName(reloc.target.type), @tagName(emit.bin_file.tag), }), .tls => if (emit.bin_file.cast(.elf)) |elf_file| { @@ -892,7 +853,7 @@ fn encodeInst(emit: *Emit, lowered_inst: Instruction, reloc_info: []const RelocI end_offset - 4, @enumFromInt(reloc.target.index), reloc.off, - .{ .x86_64 = .TPOFF32 }, + .{ .X86_64 = .TPOFF32 }, ) else if (emit.bin_file.cast(.macho)) |macho_file| { const zo = macho_file.getZigObject().?; const atom = zo.symbols.items[emit.atom_index].getAtom(macho_file).?; diff --git a/src/codegen/x86_64/abi.zig b/src/codegen/x86_64/abi.zig index 041eb1632d3b..2a296dc93042 100644 --- a/src/codegen/x86_64/abi.zig +++ b/src/codegen/x86_64/abi.zig @@ -110,7 +110,9 @@ pub const Class = enum { } }; -pub fn classifyWindows(ty: Type, zcu: *Zcu, target: *const std.Target) Class { +pub const Context = enum { ret, arg, other }; + +pub fn classifyWindows(ty: Type, zcu: *Zcu, target: *const std.Target, ctx: Context) Class { // https://docs.microsoft.com/en-gb/cpp/build/x64-calling-convention?view=vs-2017 // "There's a strict one-to-one correspondence between a function call's arguments // and the registers used for those arguments. Any argument that doesn't fit in 8 @@ -148,8 +150,9 @@ pub fn classifyWindows(ty: Type, zcu: *Zcu, target: *const std.Target) Class { }, .float => switch (ty.floatBits(target)) { - 16, 32, 64, 128 => .sse, + 16, 32, 64 => .sse, 80 => .memory, + 128 => if (ctx == .arg) .memory else .sse, else => unreachable, }, .vector => .sse, @@ -166,8 +169,6 @@ pub fn classifyWindows(ty: Type, zcu: *Zcu, target: *const std.Target) Class { }; } -pub const Context = enum { ret, arg, other }; - /// There are a maximum of 8 possible return slots. Returned values are in /// the beginning of the array; unused slots are filled with .none. pub fn classifySystemV(ty: Type, zcu: *Zcu, target: *const std.Target, ctx: Context) [8]Class { diff --git a/src/dev.zig b/src/dev.zig index 266796a2dc2d..eb5ac52550fb 100644 --- a/src/dev.zig +++ b/src/dev.zig @@ -96,6 +96,7 @@ pub const Env = enum { .spirv_backend, .lld_linker, .coff_linker, + .coff2_linker, .elf_linker, .elf2_linker, .macho_linker, @@ -284,6 +285,7 @@ pub const Feature = enum { lld_linker, coff_linker, + coff2_linker, elf_linker, elf2_linker, macho_linker, diff --git a/src/link.zig b/src/link.zig index 277013efb309..e876e8677a9a 100644 --- a/src/link.zig +++ b/src/link.zig @@ -574,16 +574,13 @@ pub const File = struct { const gpa = comp.gpa; switch (base.tag) { .lld => assert(base.file == null), - .coff, .elf, .macho, .wasm, .goff, .xcoff => { + .elf, .macho, .wasm, .goff, .xcoff => { if (base.file != null) return; dev.checkAny(&.{ .coff_linker, .elf_linker, .macho_linker, .plan9_linker, .wasm_linker, .goff_linker, .xcoff_linker }); const emit = base.emit; if (base.child_pid) |pid| { if (builtin.os.tag == .windows) { - const coff_file = base.cast(.coff).?; - coff_file.ptraceAttach(pid) catch |err| { - log.warn("attaching failed with error: {s}", .{@errorName(err)}); - }; + return error.HotSwapUnavailableOnHostOperatingSystem; } else { // If we try to open the output file in write mode while it is running, // it will return ETXTBSY. So instead, we copy the file, atomically rename it @@ -610,27 +607,20 @@ pub const File = struct { } } } - const output_mode = comp.config.output_mode; - const link_mode = comp.config.link_mode; - base.file = try emit.root_dir.handle.createFile(emit.sub_path, .{ - .truncate = false, - .read = true, - .mode = determineMode(output_mode, link_mode), - }); + base.file = try emit.root_dir.handle.openFile(emit.sub_path, .{ .mode = .read_write }); }, - .elf2 => { - const elf = base.cast(.elf2).?; - if (base.file == null) { - elf.mf.file = try base.emit.root_dir.handle.createFile(base.emit.sub_path, .{ - .truncate = false, - .read = true, - .mode = determineMode(comp.config.output_mode, comp.config.link_mode), - }); - base.file = elf.mf.file; - try elf.mf.ensureTotalCapacity( - @intCast(elf.mf.nodes.items[0].location().resolve(&elf.mf)[1]), - ); - } + .elf2, .coff2 => if (base.file == null) { + const mf = if (base.cast(.elf2)) |elf| + &elf.mf + else if (base.cast(.coff2)) |coff| + &coff.mf + else + unreachable; + mf.file = try base.emit.root_dir.handle.openFile(base.emit.sub_path, .{ + .mode = .read_write, + }); + base.file = mf.file; + try mf.ensureTotalCapacity(@intCast(mf.nodes.items[0].location().resolve(mf)[1])); }, .c, .spirv => dev.checkAny(&.{ .c_linker, .spirv_linker }), .plan9 => unreachable, @@ -654,12 +644,9 @@ pub const File = struct { pub fn makeExecutable(base: *File) !void { dev.check(.make_executable); const comp = base.comp; - const output_mode = comp.config.output_mode; - const link_mode = comp.config.link_mode; - - switch (output_mode) { + switch (comp.config.output_mode) { .Obj => return, - .Lib => switch (link_mode) { + .Lib => switch (comp.config.link_mode) { .static => return, .dynamic => {}, }, @@ -681,7 +668,7 @@ pub const File = struct { } } }, - .coff, .macho, .wasm, .goff, .xcoff => if (base.file) |f| { + .macho, .wasm, .goff, .xcoff => if (base.file) |f| { dev.checkAny(&.{ .coff_linker, .macho_linker, .plan9_linker, .wasm_linker, .goff_linker, .xcoff_linker }); f.close(); base.file = null; @@ -694,23 +681,22 @@ pub const File = struct { log.warn("detaching failed with error: {s}", .{@errorName(err)}); }; }, - .windows => { - const coff_file = base.cast(.coff).?; - coff_file.ptraceDetach(pid); - }, else => return error.HotSwapUnavailableOnHostOperatingSystem, } } }, - .elf2 => { - const elf = base.cast(.elf2).?; - if (base.file) |f| { - elf.mf.unmap(); - assert(elf.mf.file.handle == f.handle); - elf.mf.file = undefined; - f.close(); - base.file = null; - } + .elf2, .coff2 => if (base.file) |f| { + const mf = if (base.cast(.elf2)) |elf| + &elf.mf + else if (base.cast(.coff2)) |coff| + &coff.mf + else + unreachable; + mf.unmap(); + assert(mf.file.handle == f.handle); + mf.file = undefined; + f.close(); + base.file = null; }, .c, .spirv => dev.checkAny(&.{ .c_linker, .spirv_linker }), .plan9 => unreachable, @@ -828,7 +814,7 @@ pub const File = struct { .spirv => {}, .goff, .xcoff => {}, .plan9 => unreachable, - .elf2 => {}, + .elf2, .coff2 => {}, inline else => |tag| { dev.check(tag.devFeature()); return @as(*tag.Type(), @fieldParentPtr("base", base)).updateLineNumber(pt, ti_id); @@ -864,7 +850,7 @@ pub const File = struct { pub fn idle(base: *File, tid: Zcu.PerThread.Id) !bool { switch (base.tag) { else => return false, - inline .elf2 => |tag| { + inline .elf2, .coff2 => |tag| { dev.check(tag.devFeature()); return @as(*tag.Type(), @fieldParentPtr("base", base)).idle(tid); }, @@ -874,7 +860,7 @@ pub const File = struct { pub fn updateErrorData(base: *File, pt: Zcu.PerThread) !void { switch (base.tag) { else => {}, - inline .elf2 => |tag| { + inline .elf2, .coff2 => |tag| { dev.check(tag.devFeature()); return @as(*tag.Type(), @fieldParentPtr("base", base)).updateErrorData(pt); }, @@ -1155,7 +1141,7 @@ pub const File = struct { if (base.zcu_object_basename != null) return; switch (base.tag) { - inline .elf2, .wasm => |tag| { + inline .elf2, .coff2, .wasm => |tag| { dev.check(tag.devFeature()); return @as(*tag.Type(), @fieldParentPtr("base", base)).prelink(base.comp.link_prog_node); }, @@ -1164,7 +1150,7 @@ pub const File = struct { } pub const Tag = enum { - coff, + coff2, elf, elf2, macho, @@ -1178,7 +1164,7 @@ pub const File = struct { pub fn Type(comptime tag: Tag) type { return switch (tag) { - .coff => Coff, + .coff2 => Coff2, .elf => Elf, .elf2 => Elf2, .macho => MachO, @@ -1194,7 +1180,7 @@ pub const File = struct { fn fromObjectFormat(ofmt: std.Target.ObjectFormat, use_new_linker: bool) Tag { return switch (ofmt) { - .coff => .coff, + .coff => .coff2, .elf => if (use_new_linker) .elf2 else .elf, .macho => .macho, .wasm => .wasm, @@ -1279,7 +1265,7 @@ pub const File = struct { pub const Lld = @import("link/Lld.zig"); pub const C = @import("link/C.zig"); - pub const Coff = @import("link/Coff.zig"); + pub const Coff2 = @import("link/Coff2.zig"); pub const Elf = @import("link/Elf.zig"); pub const Elf2 = @import("link/Elf2.zig"); pub const MachO = @import("link/MachO.zig"); diff --git a/src/link/Coff.zig b/src/link/Coff.zig deleted file mode 100644 index f3e3b3d0b530..000000000000 --- a/src/link/Coff.zig +++ /dev/null @@ -1,3169 +0,0 @@ -//! The main driver of the self-hosted COFF linker. -const Coff = @This(); - -const std = @import("std"); -const build_options = @import("build_options"); -const builtin = @import("builtin"); -const assert = std.debug.assert; -const coff_util = std.coff; -const fmt = std.fmt; -const fs = std.fs; -const log = std.log.scoped(.link); -const math = std.math; -const mem = std.mem; - -const Allocator = std.mem.Allocator; -const Path = std.Build.Cache.Path; -const Directory = std.Build.Cache.Directory; -const Cache = std.Build.Cache; - -const aarch64_util = link.aarch64; -const allocPrint = std.fmt.allocPrint; -const codegen = @import("../codegen.zig"); -const link = @import("../link.zig"); -const target_util = @import("../target.zig"); -const trace = @import("../tracy.zig").trace; - -const Compilation = @import("../Compilation.zig"); -const Zcu = @import("../Zcu.zig"); -const InternPool = @import("../InternPool.zig"); -const TableSection = @import("table_section.zig").TableSection; -const StringTable = @import("StringTable.zig"); -const Type = @import("../Type.zig"); -const Value = @import("../Value.zig"); -const AnalUnit = InternPool.AnalUnit; -const dev = @import("../dev.zig"); - -base: link.File, -image_base: u64, -/// TODO this and minor_subsystem_version should be combined into one property and left as -/// default or populated together. They should not be separate fields. -major_subsystem_version: u16, -minor_subsystem_version: u16, -entry: link.File.OpenOptions.Entry, -entry_addr: ?u32, -module_definition_file: ?[]const u8, -repro: bool, - -ptr_width: PtrWidth, -page_size: u32, - -sections: std.MultiArrayList(Section) = .{}, -data_directories: [coff_util.IMAGE_NUMBEROF_DIRECTORY_ENTRIES]coff_util.ImageDataDirectory, - -text_section_index: ?u16 = null, -got_section_index: ?u16 = null, -rdata_section_index: ?u16 = null, -data_section_index: ?u16 = null, -reloc_section_index: ?u16 = null, -idata_section_index: ?u16 = null, - -locals: std.ArrayListUnmanaged(coff_util.Symbol) = .empty, -globals: std.ArrayListUnmanaged(SymbolWithLoc) = .empty, -resolver: std.StringHashMapUnmanaged(u32) = .empty, -unresolved: std.AutoArrayHashMapUnmanaged(u32, bool) = .empty, -need_got_table: std.AutoHashMapUnmanaged(u32, void) = .empty, - -locals_free_list: std.ArrayListUnmanaged(u32) = .empty, -globals_free_list: std.ArrayListUnmanaged(u32) = .empty, - -strtab: StringTable = .{}, -strtab_offset: ?u32 = null, - -temp_strtab: StringTable = .{}, - -got_table: TableSection(SymbolWithLoc) = .{}, - -/// A table of ImportTables partitioned by the library name. -/// Key is an offset into the interning string table `temp_strtab`. -import_tables: std.AutoArrayHashMapUnmanaged(u32, ImportTable) = .empty, - -got_table_count_dirty: bool = true, -got_table_contents_dirty: bool = true, -imports_count_dirty: bool = true, - -/// Table of tracked LazySymbols. -lazy_syms: LazySymbolTable = .{}, - -/// Table of tracked `Nav`s. -navs: NavTable = .{}, - -/// List of atoms that are either synthetic or map directly to the Zig source program. -atoms: std.ArrayListUnmanaged(Atom) = .empty, - -/// Table of atoms indexed by the symbol index. -atom_by_index_table: std.AutoHashMapUnmanaged(u32, Atom.Index) = .empty, - -uavs: UavTable = .{}, - -/// A table of relocations indexed by the owning them `Atom`. -/// Note that once we refactor `Atom`'s lifetime and ownership rules, -/// this will be a table indexed by index into the list of Atoms. -relocs: RelocTable = .{}, - -/// A table of base relocations indexed by the owning them `Atom`. -/// Note that once we refactor `Atom`'s lifetime and ownership rules, -/// this will be a table indexed by index into the list of Atoms. -base_relocs: BaseRelocationTable = .{}, - -/// Hot-code swapping state. -hot_state: if (is_hot_update_compatible) HotUpdateState else struct {} = .{}, - -const is_hot_update_compatible = switch (builtin.target.os.tag) { - .windows => true, - else => false, -}; - -const HotUpdateState = struct { - /// Base address at which the process (image) got loaded. - /// We need this info to correctly slide pointers when relocating. - loaded_base_address: ?std.os.windows.HMODULE = null, -}; - -const NavTable = std.AutoArrayHashMapUnmanaged(InternPool.Nav.Index, AvMetadata); -const UavTable = std.AutoHashMapUnmanaged(InternPool.Index, AvMetadata); -const RelocTable = std.AutoArrayHashMapUnmanaged(Atom.Index, std.ArrayListUnmanaged(Relocation)); -const BaseRelocationTable = std.AutoArrayHashMapUnmanaged(Atom.Index, std.ArrayListUnmanaged(u32)); - -const default_file_alignment: u16 = 0x200; -const default_size_of_stack_reserve: u32 = 0x1000000; -const default_size_of_stack_commit: u32 = 0x1000; -const default_size_of_heap_reserve: u32 = 0x100000; -const default_size_of_heap_commit: u32 = 0x1000; - -const Section = struct { - header: coff_util.SectionHeader, - - last_atom_index: ?Atom.Index = null, - - /// A list of atoms that have surplus capacity. This list can have false - /// positives, as functions grow and shrink over time, only sometimes being added - /// or removed from the freelist. - /// - /// An atom has surplus capacity when its overcapacity value is greater than - /// padToIdeal(minimum_atom_size). That is, when it has so - /// much extra capacity, that we could fit a small new symbol in it, itself with - /// ideal_capacity or more. - /// - /// Ideal capacity is defined by size + (size / ideal_factor). - /// - /// Overcapacity is measured by actual_capacity - ideal_capacity. Note that - /// overcapacity can be negative. A simple way to have negative overcapacity is to - /// allocate a fresh atom, which will have ideal capacity, and then grow it - /// by 1 byte. It will then have -1 overcapacity. - free_list: std.ArrayListUnmanaged(Atom.Index) = .empty, -}; - -const LazySymbolTable = std.AutoArrayHashMapUnmanaged(InternPool.Index, LazySymbolMetadata); - -const LazySymbolMetadata = struct { - const State = enum { unused, pending_flush, flushed }; - text_atom: Atom.Index = undefined, - rdata_atom: Atom.Index = undefined, - text_state: State = .unused, - rdata_state: State = .unused, -}; - -const AvMetadata = struct { - atom: Atom.Index, - section: u16, - /// A list of all exports aliases of this Decl. - exports: std.ArrayListUnmanaged(u32) = .empty, - - fn deinit(m: *AvMetadata, allocator: Allocator) void { - m.exports.deinit(allocator); - } - - fn getExport(m: AvMetadata, coff: *const Coff, name: []const u8) ?u32 { - for (m.exports.items) |exp| { - if (mem.eql(u8, name, coff.getSymbolName(.{ - .sym_index = exp, - .file = null, - }))) return exp; - } - return null; - } - - fn getExportPtr(m: *AvMetadata, coff: *Coff, name: []const u8) ?*u32 { - for (m.exports.items) |*exp| { - if (mem.eql(u8, name, coff.getSymbolName(.{ - .sym_index = exp.*, - .file = null, - }))) return exp; - } - return null; - } -}; - -pub const PtrWidth = enum { - p32, - p64, - - /// Size in bytes. - pub fn size(pw: PtrWidth) u4 { - return switch (pw) { - .p32 => 4, - .p64 => 8, - }; - } -}; - -pub const SymbolWithLoc = struct { - // Index into the respective symbol table. - sym_index: u32, - - // null means it's a synthetic global or Zig source. - file: ?u32 = null, - - pub fn eql(this: SymbolWithLoc, other: SymbolWithLoc) bool { - if (this.file == null and other.file == null) { - return this.sym_index == other.sym_index; - } - if (this.file != null and other.file != null) { - return this.sym_index == other.sym_index and this.file.? == other.file.?; - } - return false; - } -}; - -/// When allocating, the ideal_capacity is calculated by -/// actual_capacity + (actual_capacity / ideal_factor) -const ideal_factor = 3; - -/// In order for a slice of bytes to be considered eligible to keep metadata pointing at -/// it as a possible place to put new symbols, it must have enough room for this many bytes -/// (plus extra for reserved capacity). -const minimum_text_block_size = 64; -pub const min_text_capacity = padToIdeal(minimum_text_block_size); - -pub fn createEmpty( - arena: Allocator, - comp: *Compilation, - emit: Path, - options: link.File.OpenOptions, -) !*Coff { - const target = &comp.root_mod.resolved_target.result; - assert(target.ofmt == .coff); - const optimize_mode = comp.root_mod.optimize_mode; - const output_mode = comp.config.output_mode; - const link_mode = comp.config.link_mode; - const use_llvm = comp.config.use_llvm; - - const ptr_width: PtrWidth = switch (target.ptrBitWidth()) { - 0...32 => .p32, - 33...64 => .p64, - else => return error.UnsupportedCOFFArchitecture, - }; - const page_size: u32 = switch (target.cpu.arch) { - else => 0x1000, - }; - - const coff = try arena.create(Coff); - coff.* = .{ - .base = .{ - .tag = .coff, - .comp = comp, - .emit = emit, - .zcu_object_basename = if (use_llvm) - try std.fmt.allocPrint(arena, "{s}_zcu.obj", .{fs.path.stem(emit.sub_path)}) - else - null, - .stack_size = options.stack_size orelse 16777216, - .gc_sections = options.gc_sections orelse (optimize_mode != .Debug), - .print_gc_sections = options.print_gc_sections, - .allow_shlib_undefined = options.allow_shlib_undefined orelse false, - .file = null, - .build_id = options.build_id, - }, - .ptr_width = ptr_width, - .page_size = page_size, - - .data_directories = [1]coff_util.ImageDataDirectory{.{ - .virtual_address = 0, - .size = 0, - }} ** coff_util.IMAGE_NUMBEROF_DIRECTORY_ENTRIES, - - .image_base = options.image_base orelse switch (output_mode) { - .Exe => switch (target.cpu.arch) { - .aarch64, .x86_64 => 0x140000000, - .thumb, .x86 => 0x400000, - else => unreachable, - }, - .Lib => switch (target.cpu.arch) { - .aarch64, .x86_64 => 0x180000000, - .thumb, .x86 => 0x10000000, - else => unreachable, - }, - .Obj => 0, - }, - - .entry = options.entry, - - .major_subsystem_version = options.major_subsystem_version orelse 6, - .minor_subsystem_version = options.minor_subsystem_version orelse 0, - .entry_addr = math.cast(u32, options.entry_addr orelse 0) orelse - return error.EntryAddressTooBig, - .module_definition_file = options.module_definition_file, - .repro = options.repro, - }; - errdefer coff.base.destroy(); - - coff.base.file = try emit.root_dir.handle.createFile(emit.sub_path, .{ - .truncate = true, - .read = true, - .mode = link.File.determineMode(output_mode, link_mode), - }); - - const gpa = comp.gpa; - - try coff.strtab.buffer.ensureUnusedCapacity(gpa, @sizeOf(u32)); - coff.strtab.buffer.appendNTimesAssumeCapacity(0, @sizeOf(u32)); - - try coff.temp_strtab.buffer.append(gpa, 0); - - // Index 0 is always a null symbol. - try coff.locals.append(gpa, .{ - .name = [_]u8{0} ** 8, - .value = 0, - .section_number = .UNDEFINED, - .type = .{ .base_type = .NULL, .complex_type = .NULL }, - .storage_class = .NULL, - .number_of_aux_symbols = 0, - }); - - if (coff.text_section_index == null) { - const file_size: u32 = @intCast(options.program_code_size_hint); - coff.text_section_index = try coff.allocateSection(".text", file_size, .{ - .CNT_CODE = 1, - .MEM_EXECUTE = 1, - .MEM_READ = 1, - }); - } - - if (coff.got_section_index == null) { - const file_size = @as(u32, @intCast(options.symbol_count_hint)) * coff.ptr_width.size(); - coff.got_section_index = try coff.allocateSection(".got", file_size, .{ - .CNT_INITIALIZED_DATA = 1, - .MEM_READ = 1, - }); - } - - if (coff.rdata_section_index == null) { - const file_size: u32 = coff.page_size; - coff.rdata_section_index = try coff.allocateSection(".rdata", file_size, .{ - .CNT_INITIALIZED_DATA = 1, - .MEM_READ = 1, - }); - } - - if (coff.data_section_index == null) { - const file_size: u32 = coff.page_size; - coff.data_section_index = try coff.allocateSection(".data", file_size, .{ - .CNT_INITIALIZED_DATA = 1, - .MEM_READ = 1, - .MEM_WRITE = 1, - }); - } - - if (coff.idata_section_index == null) { - const file_size = @as(u32, @intCast(options.symbol_count_hint)) * coff.ptr_width.size(); - coff.idata_section_index = try coff.allocateSection(".idata", file_size, .{ - .CNT_INITIALIZED_DATA = 1, - .MEM_READ = 1, - }); - } - - if (coff.reloc_section_index == null) { - const file_size = @as(u32, @intCast(options.symbol_count_hint)) * @sizeOf(coff_util.BaseRelocation); - coff.reloc_section_index = try coff.allocateSection(".reloc", file_size, .{ - .CNT_INITIALIZED_DATA = 1, - .MEM_DISCARDABLE = 1, - .MEM_READ = 1, - }); - } - - if (coff.strtab_offset == null) { - const file_size = @as(u32, @intCast(coff.strtab.buffer.items.len)); - coff.strtab_offset = coff.findFreeSpace(file_size, @alignOf(u32)); // 4bytes aligned seems like a good idea here - log.debug("found strtab free space 0x{x} to 0x{x}", .{ coff.strtab_offset.?, coff.strtab_offset.? + file_size }); - } - - { - // We need to find out what the max file offset is according to section headers. - // Otherwise, we may end up with an COFF binary with file size not matching the final section's - // offset + it's filesize. - // TODO I don't like this here one bit - var max_file_offset: u64 = 0; - for (coff.sections.items(.header)) |header| { - if (header.pointer_to_raw_data + header.size_of_raw_data > max_file_offset) { - max_file_offset = header.pointer_to_raw_data + header.size_of_raw_data; - } - } - try coff.pwriteAll(&[_]u8{0}, max_file_offset); - } - - return coff; -} - -pub fn open( - arena: Allocator, - comp: *Compilation, - emit: Path, - options: link.File.OpenOptions, -) !*Coff { - // TODO: restore saved linker state, don't truncate the file, and - // participate in incremental compilation. - return createEmpty(arena, comp, emit, options); -} - -pub fn deinit(coff: *Coff) void { - const gpa = coff.base.comp.gpa; - - for (coff.sections.items(.free_list)) |*free_list| { - free_list.deinit(gpa); - } - coff.sections.deinit(gpa); - - coff.atoms.deinit(gpa); - coff.locals.deinit(gpa); - coff.globals.deinit(gpa); - - { - var it = coff.resolver.keyIterator(); - while (it.next()) |key_ptr| { - gpa.free(key_ptr.*); - } - coff.resolver.deinit(gpa); - } - - coff.unresolved.deinit(gpa); - coff.need_got_table.deinit(gpa); - coff.locals_free_list.deinit(gpa); - coff.globals_free_list.deinit(gpa); - coff.strtab.deinit(gpa); - coff.temp_strtab.deinit(gpa); - coff.got_table.deinit(gpa); - - for (coff.import_tables.values()) |*itab| { - itab.deinit(gpa); - } - coff.import_tables.deinit(gpa); - - coff.lazy_syms.deinit(gpa); - - for (coff.navs.values()) |*metadata| { - metadata.deinit(gpa); - } - coff.navs.deinit(gpa); - - coff.atom_by_index_table.deinit(gpa); - - { - var it = coff.uavs.iterator(); - while (it.next()) |entry| { - entry.value_ptr.exports.deinit(gpa); - } - coff.uavs.deinit(gpa); - } - - for (coff.relocs.values()) |*relocs| { - relocs.deinit(gpa); - } - coff.relocs.deinit(gpa); - - for (coff.base_relocs.values()) |*relocs| { - relocs.deinit(gpa); - } - coff.base_relocs.deinit(gpa); -} - -fn allocateSection(coff: *Coff, name: []const u8, size: u32, flags: coff_util.SectionHeaderFlags) !u16 { - const index = @as(u16, @intCast(coff.sections.slice().len)); - const off = coff.findFreeSpace(size, default_file_alignment); - // Memory is always allocated in sequence - // TODO: investigate if we can allocate .text last; this way it would never need to grow in memory! - const vaddr = blk: { - if (index == 0) break :blk coff.page_size; - const prev_header = coff.sections.items(.header)[index - 1]; - break :blk mem.alignForward(u32, prev_header.virtual_address + prev_header.virtual_size, coff.page_size); - }; - // We commit more memory than needed upfront so that we don't have to reallocate too soon. - const memsz = mem.alignForward(u32, size, coff.page_size) * 100; - log.debug("found {s} free space 0x{x} to 0x{x} (0x{x} - 0x{x})", .{ - name, - off, - off + size, - vaddr, - vaddr + size, - }); - var header = coff_util.SectionHeader{ - .name = undefined, - .virtual_size = memsz, - .virtual_address = vaddr, - .size_of_raw_data = size, - .pointer_to_raw_data = off, - .pointer_to_relocations = 0, - .pointer_to_linenumbers = 0, - .number_of_relocations = 0, - .number_of_linenumbers = 0, - .flags = flags, - }; - const gpa = coff.base.comp.gpa; - try coff.setSectionName(&header, name); - try coff.sections.append(gpa, .{ .header = header }); - return index; -} - -fn growSection(coff: *Coff, sect_id: u32, needed_size: u32) !void { - const header = &coff.sections.items(.header)[sect_id]; - const maybe_last_atom_index = coff.sections.items(.last_atom_index)[sect_id]; - const sect_capacity = coff.allocatedSize(header.pointer_to_raw_data); - - if (needed_size > sect_capacity) { - const new_offset = coff.findFreeSpace(needed_size, default_file_alignment); - const current_size = if (maybe_last_atom_index) |last_atom_index| blk: { - const last_atom = coff.getAtom(last_atom_index); - const sym = last_atom.getSymbol(coff); - break :blk (sym.value + last_atom.size) - header.virtual_address; - } else 0; - log.debug("moving {s} from 0x{x} to 0x{x}", .{ - coff.getSectionName(header), - header.pointer_to_raw_data, - new_offset, - }); - const amt = try coff.base.file.?.copyRangeAll( - header.pointer_to_raw_data, - coff.base.file.?, - new_offset, - current_size, - ); - if (amt != current_size) return error.InputOutput; - header.pointer_to_raw_data = new_offset; - } - - const sect_vm_capacity = coff.allocatedVirtualSize(header.virtual_address); - if (needed_size > sect_vm_capacity) { - coff.markRelocsDirtyByAddress(header.virtual_address + header.virtual_size); - try coff.growSectionVirtualMemory(sect_id, needed_size); - } - - header.virtual_size = @max(header.virtual_size, needed_size); - header.size_of_raw_data = needed_size; -} - -fn growSectionVirtualMemory(coff: *Coff, sect_id: u32, needed_size: u32) !void { - const header = &coff.sections.items(.header)[sect_id]; - const increased_size = padToIdeal(needed_size); - const old_aligned_end = header.virtual_address + mem.alignForward(u32, header.virtual_size, coff.page_size); - const new_aligned_end = header.virtual_address + mem.alignForward(u32, increased_size, coff.page_size); - const diff = new_aligned_end - old_aligned_end; - log.debug("growing {s} in virtual memory by {x}", .{ coff.getSectionName(header), diff }); - - // TODO: enforce order by increasing VM addresses in coff.sections container. - // This is required by the loader anyhow as far as I can tell. - for (coff.sections.items(.header)[sect_id + 1 ..], 0..) |*next_header, next_sect_id| { - const maybe_last_atom_index = coff.sections.items(.last_atom_index)[sect_id + 1 + next_sect_id]; - next_header.virtual_address += diff; - - if (maybe_last_atom_index) |last_atom_index| { - var atom_index = last_atom_index; - while (true) { - const atom = coff.getAtom(atom_index); - const sym = atom.getSymbolPtr(coff); - sym.value += diff; - - if (atom.prev_index) |prev_index| { - atom_index = prev_index; - } else break; - } - } - } - - header.virtual_size = increased_size; -} - -fn allocateAtom(coff: *Coff, atom_index: Atom.Index, new_atom_size: u32, alignment: u32) !u32 { - const tracy = trace(@src()); - defer tracy.end(); - - const atom = coff.getAtom(atom_index); - const sect_id = @intFromEnum(atom.getSymbol(coff).section_number) - 1; - const header = &coff.sections.items(.header)[sect_id]; - const free_list = &coff.sections.items(.free_list)[sect_id]; - const maybe_last_atom_index = &coff.sections.items(.last_atom_index)[sect_id]; - const new_atom_ideal_capacity = if (header.isCode()) padToIdeal(new_atom_size) else new_atom_size; - - // We use these to indicate our intention to update metadata, placing the new atom, - // and possibly removing a free list node. - // It would be simpler to do it inside the for loop below, but that would cause a - // problem if an error was returned later in the function. So this action - // is actually carried out at the end of the function, when errors are no longer possible. - var atom_placement: ?Atom.Index = null; - var free_list_removal: ?usize = null; - - // First we look for an appropriately sized free list node. - // The list is unordered. We'll just take the first thing that works. - const vaddr = blk: { - var i: usize = 0; - while (i < free_list.items.len) { - const big_atom_index = free_list.items[i]; - const big_atom = coff.getAtom(big_atom_index); - // We now have a pointer to a live atom that has too much capacity. - // Is it enough that we could fit this new atom? - const sym = big_atom.getSymbol(coff); - const capacity = big_atom.capacity(coff); - const ideal_capacity = if (header.isCode()) padToIdeal(capacity) else capacity; - const ideal_capacity_end_vaddr = math.add(u32, sym.value, ideal_capacity) catch ideal_capacity; - const capacity_end_vaddr = sym.value + capacity; - const new_start_vaddr_unaligned = capacity_end_vaddr - new_atom_ideal_capacity; - const new_start_vaddr = mem.alignBackward(u32, new_start_vaddr_unaligned, alignment); - if (new_start_vaddr < ideal_capacity_end_vaddr) { - // Additional bookkeeping here to notice if this free list node - // should be deleted because the atom that it points to has grown to take up - // more of the extra capacity. - if (!big_atom.freeListEligible(coff)) { - _ = free_list.swapRemove(i); - } else { - i += 1; - } - continue; - } - // At this point we know that we will place the new atom here. But the - // remaining question is whether there is still yet enough capacity left - // over for there to still be a free list node. - const remaining_capacity = new_start_vaddr - ideal_capacity_end_vaddr; - const keep_free_list_node = remaining_capacity >= min_text_capacity; - - // Set up the metadata to be updated, after errors are no longer possible. - atom_placement = big_atom_index; - if (!keep_free_list_node) { - free_list_removal = i; - } - break :blk new_start_vaddr; - } else if (maybe_last_atom_index.*) |last_index| { - const last = coff.getAtom(last_index); - const last_symbol = last.getSymbol(coff); - const ideal_capacity = if (header.isCode()) padToIdeal(last.size) else last.size; - const ideal_capacity_end_vaddr = last_symbol.value + ideal_capacity; - const new_start_vaddr = mem.alignForward(u32, ideal_capacity_end_vaddr, alignment); - atom_placement = last_index; - break :blk new_start_vaddr; - } else { - break :blk mem.alignForward(u32, header.virtual_address, alignment); - } - }; - - const expand_section = if (atom_placement) |placement_index| - coff.getAtom(placement_index).next_index == null - else - true; - if (expand_section) { - const needed_size: u32 = (vaddr + new_atom_size) - header.virtual_address; - try coff.growSection(sect_id, needed_size); - maybe_last_atom_index.* = atom_index; - } - coff.getAtomPtr(atom_index).size = new_atom_size; - - if (atom.prev_index) |prev_index| { - const prev = coff.getAtomPtr(prev_index); - prev.next_index = atom.next_index; - } - if (atom.next_index) |next_index| { - const next = coff.getAtomPtr(next_index); - next.prev_index = atom.prev_index; - } - - if (atom_placement) |big_atom_index| { - const big_atom = coff.getAtomPtr(big_atom_index); - const atom_ptr = coff.getAtomPtr(atom_index); - atom_ptr.prev_index = big_atom_index; - atom_ptr.next_index = big_atom.next_index; - big_atom.next_index = atom_index; - } else { - const atom_ptr = coff.getAtomPtr(atom_index); - atom_ptr.prev_index = null; - atom_ptr.next_index = null; - } - if (free_list_removal) |i| { - _ = free_list.swapRemove(i); - } - - return vaddr; -} - -pub fn allocateSymbol(coff: *Coff) !u32 { - const gpa = coff.base.comp.gpa; - try coff.locals.ensureUnusedCapacity(gpa, 1); - - const index = blk: { - if (coff.locals_free_list.pop()) |index| { - log.debug(" (reusing symbol index {d})", .{index}); - break :blk index; - } else { - log.debug(" (allocating symbol index {d})", .{coff.locals.items.len}); - const index = @as(u32, @intCast(coff.locals.items.len)); - _ = coff.locals.addOneAssumeCapacity(); - break :blk index; - } - }; - - coff.locals.items[index] = .{ - .name = [_]u8{0} ** 8, - .value = 0, - .section_number = .UNDEFINED, - .type = .{ .base_type = .NULL, .complex_type = .NULL }, - .storage_class = .NULL, - .number_of_aux_symbols = 0, - }; - - return index; -} - -fn allocateGlobal(coff: *Coff) !u32 { - const gpa = coff.base.comp.gpa; - try coff.globals.ensureUnusedCapacity(gpa, 1); - - const index = blk: { - if (coff.globals_free_list.pop()) |index| { - log.debug(" (reusing global index {d})", .{index}); - break :blk index; - } else { - log.debug(" (allocating global index {d})", .{coff.globals.items.len}); - const index = @as(u32, @intCast(coff.globals.items.len)); - _ = coff.globals.addOneAssumeCapacity(); - break :blk index; - } - }; - - coff.globals.items[index] = .{ - .sym_index = 0, - .file = null, - }; - - return index; -} - -fn addGotEntry(coff: *Coff, target: SymbolWithLoc) !void { - const gpa = coff.base.comp.gpa; - if (coff.got_table.lookup.contains(target)) return; - const got_index = try coff.got_table.allocateEntry(gpa, target); - try coff.writeOffsetTableEntry(got_index); - coff.got_table_count_dirty = true; - coff.markRelocsDirtyByTarget(target); -} - -pub fn createAtom(coff: *Coff) !Atom.Index { - const gpa = coff.base.comp.gpa; - const atom_index = @as(Atom.Index, @intCast(coff.atoms.items.len)); - const atom = try coff.atoms.addOne(gpa); - const sym_index = try coff.allocateSymbol(); - try coff.atom_by_index_table.putNoClobber(gpa, sym_index, atom_index); - atom.* = .{ - .sym_index = sym_index, - .file = null, - .size = 0, - .prev_index = null, - .next_index = null, - }; - log.debug("creating ATOM(%{d}) at index {d}", .{ sym_index, atom_index }); - return atom_index; -} - -fn growAtom(coff: *Coff, atom_index: Atom.Index, new_atom_size: u32, alignment: u32) !u32 { - const atom = coff.getAtom(atom_index); - const sym = atom.getSymbol(coff); - const align_ok = mem.alignBackward(u32, sym.value, alignment) == sym.value; - const need_realloc = !align_ok or new_atom_size > atom.capacity(coff); - if (!need_realloc) return sym.value; - return coff.allocateAtom(atom_index, new_atom_size, alignment); -} - -fn shrinkAtom(coff: *Coff, atom_index: Atom.Index, new_block_size: u32) void { - _ = coff; - _ = atom_index; - _ = new_block_size; - // TODO check the new capacity, and if it crosses the size threshold into a big enough - // capacity, insert a free list node for it. -} - -fn writeAtom(coff: *Coff, atom_index: Atom.Index, code: []u8, resolve_relocs: bool) !void { - const atom = coff.getAtom(atom_index); - const sym = atom.getSymbol(coff); - const section = coff.sections.get(@intFromEnum(sym.section_number) - 1); - const file_offset = section.header.pointer_to_raw_data + sym.value - section.header.virtual_address; - - log.debug("writing atom for symbol {s} at file offset 0x{x} to 0x{x}", .{ - atom.getName(coff), - file_offset, - file_offset + code.len, - }); - - const gpa = coff.base.comp.gpa; - - // Gather relocs which can be resolved. - // We need to do this as we will be applying different slide values depending - // if we are running in hot-code swapping mode or not. - // TODO: how crazy would it be to try and apply the actual image base of the loaded - // process for the in-file values rather than the Windows defaults? - var relocs = std.array_list.Managed(*Relocation).init(gpa); - defer relocs.deinit(); - - if (resolve_relocs) { - if (coff.relocs.getPtr(atom_index)) |rels| { - try relocs.ensureTotalCapacityPrecise(rels.items.len); - for (rels.items) |*reloc| { - if (reloc.isResolvable(coff) and reloc.dirty) { - relocs.appendAssumeCapacity(reloc); - } - } - } - } - - if (is_hot_update_compatible) { - if (coff.base.child_pid) |handle| { - const slide = @intFromPtr(coff.hot_state.loaded_base_address.?); - - const mem_code = try gpa.dupe(u8, code); - defer gpa.free(mem_code); - coff.resolveRelocs(atom_index, relocs.items, mem_code, slide); - - const vaddr = sym.value + slide; - const pvaddr = @as(*anyopaque, @ptrFromInt(vaddr)); - - log.debug("writing to memory at address {x}", .{vaddr}); - - if (build_options.enable_logging) { - try debugMem(gpa, handle, pvaddr, mem_code); - } - - if (section.header.flags.MEM_WRITE == 0) { - writeMemProtected(handle, pvaddr, mem_code) catch |err| { - log.warn("writing to protected memory failed with error: {s}", .{@errorName(err)}); - }; - } else { - writeMem(handle, pvaddr, mem_code) catch |err| { - log.warn("writing to protected memory failed with error: {s}", .{@errorName(err)}); - }; - } - } - } - - if (resolve_relocs) { - coff.resolveRelocs(atom_index, relocs.items, code, coff.image_base); - } - try coff.pwriteAll(code, file_offset); - if (resolve_relocs) { - // Now we can mark the relocs as resolved. - while (relocs.pop()) |reloc| { - reloc.dirty = false; - } - } -} - -fn debugMem(allocator: Allocator, handle: std.process.Child.Id, pvaddr: std.os.windows.LPVOID, code: []const u8) !void { - const buffer = try allocator.alloc(u8, code.len); - defer allocator.free(buffer); - const memread = try std.os.windows.ReadProcessMemory(handle, pvaddr, buffer); - log.debug("to write: {x}", .{code}); - log.debug("in memory: {x}", .{memread}); -} - -fn writeMemProtected(handle: std.process.Child.Id, pvaddr: std.os.windows.LPVOID, code: []const u8) !void { - const old_prot = try std.os.windows.VirtualProtectEx(handle, pvaddr, code.len, std.os.windows.PAGE_EXECUTE_WRITECOPY); - try writeMem(handle, pvaddr, code); - // TODO: We can probably just set the pages writeable and leave it at that without having to restore the attributes. - // For that though, we want to track which page has already been modified. - _ = try std.os.windows.VirtualProtectEx(handle, pvaddr, code.len, old_prot); -} - -fn writeMem(handle: std.process.Child.Id, pvaddr: std.os.windows.LPVOID, code: []const u8) !void { - const amt = try std.os.windows.WriteProcessMemory(handle, pvaddr, code); - if (amt != code.len) return error.InputOutput; -} - -fn writeOffsetTableEntry(coff: *Coff, index: usize) !void { - const sect_id = coff.got_section_index.?; - - if (coff.got_table_count_dirty) { - const needed_size: u32 = @intCast(coff.got_table.entries.items.len * coff.ptr_width.size()); - try coff.growSection(sect_id, needed_size); - coff.got_table_count_dirty = false; - } - - const header = &coff.sections.items(.header)[sect_id]; - const entry = coff.got_table.entries.items[index]; - const entry_value = coff.getSymbol(entry).value; - const entry_offset = index * coff.ptr_width.size(); - const file_offset = header.pointer_to_raw_data + entry_offset; - const vmaddr = header.virtual_address + entry_offset; - - log.debug("writing GOT entry {d}: @{x} => {x}", .{ index, vmaddr, entry_value + coff.image_base }); - - switch (coff.ptr_width) { - .p32 => { - var buf: [4]u8 = undefined; - mem.writeInt(u32, &buf, @intCast(entry_value + coff.image_base), .little); - try coff.base.file.?.pwriteAll(&buf, file_offset); - }, - .p64 => { - var buf: [8]u8 = undefined; - mem.writeInt(u64, &buf, entry_value + coff.image_base, .little); - try coff.base.file.?.pwriteAll(&buf, file_offset); - }, - } - - if (is_hot_update_compatible) { - if (coff.base.child_pid) |handle| { - const gpa = coff.base.comp.gpa; - const slide = @intFromPtr(coff.hot_state.loaded_base_address.?); - const actual_vmaddr = vmaddr + slide; - const pvaddr = @as(*anyopaque, @ptrFromInt(actual_vmaddr)); - log.debug("writing GOT entry to memory at address {x}", .{actual_vmaddr}); - if (build_options.enable_logging) { - switch (coff.ptr_width) { - .p32 => { - var buf: [4]u8 = undefined; - try debugMem(gpa, handle, pvaddr, &buf); - }, - .p64 => { - var buf: [8]u8 = undefined; - try debugMem(gpa, handle, pvaddr, &buf); - }, - } - } - - switch (coff.ptr_width) { - .p32 => { - var buf: [4]u8 = undefined; - mem.writeInt(u32, &buf, @as(u32, @intCast(entry_value + slide)), .little); - writeMem(handle, pvaddr, &buf) catch |err| { - log.warn("writing to protected memory failed with error: {s}", .{@errorName(err)}); - }; - }, - .p64 => { - var buf: [8]u8 = undefined; - mem.writeInt(u64, &buf, entry_value + slide, .little); - writeMem(handle, pvaddr, &buf) catch |err| { - log.warn("writing to protected memory failed with error: {s}", .{@errorName(err)}); - }; - }, - } - } - } -} - -fn markRelocsDirtyByTarget(coff: *Coff, target: SymbolWithLoc) void { - if (!coff.base.comp.config.incremental) return; - // TODO: reverse-lookup might come in handy here - for (coff.relocs.values()) |*relocs| { - for (relocs.items) |*reloc| { - if (!reloc.target.eql(target)) continue; - reloc.dirty = true; - } - } -} - -fn markRelocsDirtyByAddress(coff: *Coff, addr: u32) void { - if (!coff.base.comp.config.incremental) return; - const got_moved = blk: { - const sect_id = coff.got_section_index orelse break :blk false; - break :blk coff.sections.items(.header)[sect_id].virtual_address >= addr; - }; - - // TODO: dirty relocations targeting import table if that got moved in memory - - for (coff.relocs.values()) |*relocs| { - for (relocs.items) |*reloc| { - if (reloc.isGotIndirection()) { - reloc.dirty = reloc.dirty or got_moved; - } else { - const target_vaddr = reloc.getTargetAddress(coff) orelse continue; - if (target_vaddr >= addr) reloc.dirty = true; - } - } - } - - // TODO: dirty only really affected GOT cells - for (coff.got_table.entries.items) |entry| { - const target_addr = coff.getSymbol(entry).value; - if (target_addr >= addr) { - coff.got_table_contents_dirty = true; - break; - } - } -} - -fn resolveRelocs(coff: *Coff, atom_index: Atom.Index, relocs: []const *const Relocation, code: []u8, image_base: u64) void { - log.debug("relocating '{s}'", .{coff.getAtom(atom_index).getName(coff)}); - for (relocs) |reloc| { - reloc.resolve(atom_index, code, image_base, coff); - } -} - -pub fn ptraceAttach(coff: *Coff, handle: std.process.Child.Id) !void { - if (!is_hot_update_compatible) return; - - log.debug("attaching to process with handle {*}", .{handle}); - coff.hot_state.loaded_base_address = std.os.windows.ProcessBaseAddress(handle) catch |err| { - log.warn("failed to get base address for the process with error: {s}", .{@errorName(err)}); - return; - }; -} - -pub fn ptraceDetach(coff: *Coff, handle: std.process.Child.Id) void { - if (!is_hot_update_compatible) return; - - log.debug("detaching from process with handle {*}", .{handle}); - coff.hot_state.loaded_base_address = null; -} - -fn freeAtom(coff: *Coff, atom_index: Atom.Index) void { - log.debug("freeAtom {d}", .{atom_index}); - - const gpa = coff.base.comp.gpa; - - // Remove any relocs and base relocs associated with this Atom - coff.freeRelocations(atom_index); - - const atom = coff.getAtom(atom_index); - const sym = atom.getSymbol(coff); - const sect_id = @intFromEnum(sym.section_number) - 1; - const free_list = &coff.sections.items(.free_list)[sect_id]; - var already_have_free_list_node = false; - { - var i: usize = 0; - // TODO turn free_list into a hash map - while (i < free_list.items.len) { - if (free_list.items[i] == atom_index) { - _ = free_list.swapRemove(i); - continue; - } - if (free_list.items[i] == atom.prev_index) { - already_have_free_list_node = true; - } - i += 1; - } - } - - const maybe_last_atom_index = &coff.sections.items(.last_atom_index)[sect_id]; - if (maybe_last_atom_index.*) |last_atom_index| { - if (last_atom_index == atom_index) { - if (atom.prev_index) |prev_index| { - // TODO shrink the section size here - maybe_last_atom_index.* = prev_index; - } else { - maybe_last_atom_index.* = null; - } - } - } - - if (atom.prev_index) |prev_index| { - const prev = coff.getAtomPtr(prev_index); - prev.next_index = atom.next_index; - - if (!already_have_free_list_node and prev.*.freeListEligible(coff)) { - // The free list is heuristics, it doesn't have to be perfect, so we can - // ignore the OOM here. - free_list.append(gpa, prev_index) catch {}; - } - } else { - coff.getAtomPtr(atom_index).prev_index = null; - } - - if (atom.next_index) |next_index| { - coff.getAtomPtr(next_index).prev_index = atom.prev_index; - } else { - coff.getAtomPtr(atom_index).next_index = null; - } - - // Appending to free lists is allowed to fail because the free lists are heuristics based anyway. - const sym_index = atom.getSymbolIndex().?; - coff.locals_free_list.append(gpa, sym_index) catch {}; - - // Try freeing GOT atom if this decl had one - coff.got_table.freeEntry(gpa, .{ .sym_index = sym_index }); - - coff.locals.items[sym_index].section_number = .UNDEFINED; - _ = coff.atom_by_index_table.remove(sym_index); - log.debug(" adding local symbol index {d} to free list", .{sym_index}); - coff.getAtomPtr(atom_index).sym_index = 0; -} - -pub fn updateFunc( - coff: *Coff, - pt: Zcu.PerThread, - func_index: InternPool.Index, - mir: *const codegen.AnyMir, -) link.File.UpdateNavError!void { - if (build_options.skip_non_native and builtin.object_format != .coff) { - @panic("Attempted to compile for object format that was disabled by build configuration"); - } - const tracy = trace(@src()); - defer tracy.end(); - - const zcu = pt.zcu; - const gpa = zcu.gpa; - const func = zcu.funcInfo(func_index); - const nav_index = func.owner_nav; - - const atom_index = try coff.getOrCreateAtomForNav(nav_index); - coff.freeRelocations(atom_index); - - coff.navs.getPtr(func.owner_nav).?.section = coff.text_section_index.?; - - var aw: std.Io.Writer.Allocating = .init(gpa); - defer aw.deinit(); - - codegen.emitFunction( - &coff.base, - pt, - zcu.navSrcLoc(nav_index), - func_index, - coff.getAtom(atom_index).getSymbolIndex().?, - mir, - &aw.writer, - .none, - ) catch |err| switch (err) { - error.WriteFailed => return error.OutOfMemory, - else => |e| return e, - }; - - try coff.updateNavCode(pt, nav_index, aw.written(), .FUNCTION); - - // Exports will be updated by `Zcu.processExports` after the update. -} - -const LowerConstResult = union(enum) { - ok: Atom.Index, - fail: *Zcu.ErrorMsg, -}; - -fn lowerConst( - coff: *Coff, - pt: Zcu.PerThread, - name: []const u8, - val: Value, - required_alignment: InternPool.Alignment, - sect_id: u16, - src_loc: Zcu.LazySrcLoc, -) !LowerConstResult { - const gpa = coff.base.comp.gpa; - - var aw: std.Io.Writer.Allocating = .init(gpa); - defer aw.deinit(); - - const atom_index = try coff.createAtom(); - const sym = coff.getAtom(atom_index).getSymbolPtr(coff); - try coff.setSymbolName(sym, name); - sym.section_number = @as(coff_util.SectionNumber, @enumFromInt(sect_id + 1)); - - try codegen.generateSymbol(&coff.base, pt, src_loc, val, &aw.writer, .{ - .atom_index = coff.getAtom(atom_index).getSymbolIndex().?, - }); - const code = aw.written(); - - const atom = coff.getAtomPtr(atom_index); - atom.size = @intCast(code.len); - atom.getSymbolPtr(coff).value = try coff.allocateAtom( - atom_index, - atom.size, - @intCast(required_alignment.toByteUnits().?), - ); - errdefer coff.freeAtom(atom_index); - - log.debug("allocated atom for {s} at 0x{x}", .{ name, atom.getSymbol(coff).value }); - log.debug(" (required alignment 0x{x})", .{required_alignment}); - - try coff.writeAtom(atom_index, code, coff.base.comp.config.incremental); - - return .{ .ok = atom_index }; -} - -pub fn updateNav( - coff: *Coff, - pt: Zcu.PerThread, - nav_index: InternPool.Nav.Index, -) link.File.UpdateNavError!void { - if (build_options.skip_non_native and builtin.object_format != .coff) { - @panic("Attempted to compile for object format that was disabled by build configuration"); - } - const tracy = trace(@src()); - defer tracy.end(); - - const zcu = pt.zcu; - const gpa = zcu.gpa; - const ip = &zcu.intern_pool; - const nav = ip.getNav(nav_index); - - const nav_val = zcu.navValue(nav_index); - const nav_init = switch (ip.indexToKey(nav_val.toIntern())) { - .func => return, - .variable => |variable| Value.fromInterned(variable.init), - .@"extern" => |@"extern"| { - if (ip.isFunctionType(@"extern".ty)) return; - // TODO make this part of getGlobalSymbol - const name = nav.name.toSlice(ip); - const lib_name = @"extern".lib_name.toSlice(ip); - const global_index = try coff.getGlobalSymbol(name, lib_name); - try coff.need_got_table.put(gpa, global_index, {}); - return; - }, - else => nav_val, - }; - - if (nav_init.typeOf(zcu).hasRuntimeBits(zcu)) { - const atom_index = try coff.getOrCreateAtomForNav(nav_index); - coff.freeRelocations(atom_index); - const atom = coff.getAtom(atom_index); - - coff.navs.getPtr(nav_index).?.section = coff.getNavOutputSection(nav_index); - - var aw: std.Io.Writer.Allocating = .init(gpa); - defer aw.deinit(); - - codegen.generateSymbol( - &coff.base, - pt, - zcu.navSrcLoc(nav_index), - nav_init, - &aw.writer, - .{ .atom_index = atom.getSymbolIndex().? }, - ) catch |err| switch (err) { - error.WriteFailed => return error.OutOfMemory, - else => |e| return e, - }; - - try coff.updateNavCode(pt, nav_index, aw.written(), .NULL); - } - - // Exports will be updated by `Zcu.processExports` after the update. -} - -fn updateLazySymbolAtom( - coff: *Coff, - pt: Zcu.PerThread, - sym: link.File.LazySymbol, - atom_index: Atom.Index, - section_index: u16, -) !void { - const zcu = pt.zcu; - const comp = coff.base.comp; - const gpa = comp.gpa; - - var required_alignment: InternPool.Alignment = .none; - var aw: std.Io.Writer.Allocating = .init(gpa); - defer aw.deinit(); - - const name = try allocPrint(gpa, "__lazy_{s}_{f}", .{ - @tagName(sym.kind), - Type.fromInterned(sym.ty).fmt(pt), - }); - defer gpa.free(name); - - const local_sym_index = coff.getAtomPtr(atom_index).getSymbolIndex().?; - - const src = Type.fromInterned(sym.ty).srcLocOrNull(zcu) orelse Zcu.LazySrcLoc.unneeded; - try codegen.generateLazySymbol( - &coff.base, - pt, - src, - sym, - &required_alignment, - &aw.writer, - .none, - .{ .atom_index = local_sym_index }, - ); - const code = aw.written(); - - const atom = coff.getAtomPtr(atom_index); - const symbol = atom.getSymbolPtr(coff); - try coff.setSymbolName(symbol, name); - symbol.section_number = @enumFromInt(section_index + 1); - symbol.type = .{ .complex_type = .NULL, .base_type = .NULL }; - - const code_len: u32 = @intCast(code.len); - const vaddr = try coff.allocateAtom(atom_index, code_len, @intCast(required_alignment.toByteUnits() orelse 0)); - errdefer coff.freeAtom(atom_index); - - log.debug("allocated atom for {s} at 0x{x}", .{ name, vaddr }); - log.debug(" (required alignment 0x{x})", .{required_alignment}); - - atom.size = code_len; - symbol.value = vaddr; - - try coff.addGotEntry(.{ .sym_index = local_sym_index }); - try coff.writeAtom(atom_index, code, coff.base.comp.config.incremental); -} - -pub fn getOrCreateAtomForLazySymbol( - coff: *Coff, - pt: Zcu.PerThread, - lazy_sym: link.File.LazySymbol, -) !Atom.Index { - const gop = try coff.lazy_syms.getOrPut(pt.zcu.gpa, lazy_sym.ty); - errdefer _ = if (!gop.found_existing) coff.lazy_syms.pop(); - if (!gop.found_existing) gop.value_ptr.* = .{}; - const atom_ptr, const state_ptr = switch (lazy_sym.kind) { - .code => .{ &gop.value_ptr.text_atom, &gop.value_ptr.text_state }, - .const_data => .{ &gop.value_ptr.rdata_atom, &gop.value_ptr.rdata_state }, - }; - switch (state_ptr.*) { - .unused => atom_ptr.* = try coff.createAtom(), - .pending_flush => return atom_ptr.*, - .flushed => {}, - } - state_ptr.* = .pending_flush; - const atom = atom_ptr.*; - // anyerror needs to be deferred until flush - if (lazy_sym.ty != .anyerror_type) try coff.updateLazySymbolAtom(pt, lazy_sym, atom, switch (lazy_sym.kind) { - .code => coff.text_section_index.?, - .const_data => coff.rdata_section_index.?, - }); - return atom; -} - -pub fn getOrCreateAtomForNav(coff: *Coff, nav_index: InternPool.Nav.Index) !Atom.Index { - const gpa = coff.base.comp.gpa; - const gop = try coff.navs.getOrPut(gpa, nav_index); - if (!gop.found_existing) { - gop.value_ptr.* = .{ - .atom = try coff.createAtom(), - // If necessary, this will be modified by `updateNav` or `updateFunc`. - .section = coff.rdata_section_index.?, - .exports = .{}, - }; - } - return gop.value_ptr.atom; -} - -fn getNavOutputSection(coff: *Coff, nav_index: InternPool.Nav.Index) u16 { - const zcu = coff.base.comp.zcu.?; - const ip = &zcu.intern_pool; - const nav = ip.getNav(nav_index); - const ty = Type.fromInterned(nav.typeOf(ip)); - const zig_ty = ty.zigTypeTag(zcu); - const val = Value.fromInterned(nav.status.fully_resolved.val); - const index: u16 = blk: { - if (val.isUndef(zcu)) { - // TODO in release-fast and release-small, we should put undef in .bss - break :blk coff.data_section_index.?; - } - - switch (zig_ty) { - // TODO: what if this is a function pointer? - .@"fn" => break :blk coff.text_section_index.?, - else => { - if (val.getVariable(zcu)) |_| { - break :blk coff.data_section_index.?; - } - break :blk coff.rdata_section_index.?; - }, - } - }; - return index; -} - -fn updateNavCode( - coff: *Coff, - pt: Zcu.PerThread, - nav_index: InternPool.Nav.Index, - code: []u8, - complex_type: coff_util.ComplexType, -) link.File.UpdateNavError!void { - const zcu = pt.zcu; - const ip = &zcu.intern_pool; - const nav = ip.getNav(nav_index); - - log.debug("updateNavCode {f} 0x{x}", .{ nav.fqn.fmt(ip), nav_index }); - - const mod = zcu.navFileScope(nav_index).mod.?; - const target = &mod.resolved_target.result; - const required_alignment = switch (nav.status.fully_resolved.alignment) { - .none => switch (mod.optimize_mode) { - .Debug, .ReleaseSafe, .ReleaseFast => target_util.defaultFunctionAlignment(target), - .ReleaseSmall => target_util.minFunctionAlignment(target), - }, - else => |a| a.maxStrict(target_util.minFunctionAlignment(target)), - }; - - const nav_metadata = coff.navs.get(nav_index).?; - const atom_index = nav_metadata.atom; - const atom = coff.getAtom(atom_index); - const sym_index = atom.getSymbolIndex().?; - const sect_index = nav_metadata.section; - const code_len: u32 = @intCast(code.len); - - if (atom.size != 0) { - const sym = atom.getSymbolPtr(coff); - try coff.setSymbolName(sym, nav.fqn.toSlice(ip)); - sym.section_number = @enumFromInt(sect_index + 1); - sym.type = .{ .complex_type = complex_type, .base_type = .NULL }; - - const capacity = atom.capacity(coff); - const need_realloc = code.len > capacity or !required_alignment.check(sym.value); - if (need_realloc) { - const vaddr = coff.growAtom(atom_index, code_len, @intCast(required_alignment.toByteUnits() orelse 0)) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - else => |e| return coff.base.cgFail(nav_index, "failed to grow atom: {s}", .{@errorName(e)}), - }; - log.debug("growing {f} from 0x{x} to 0x{x}", .{ nav.fqn.fmt(ip), sym.value, vaddr }); - log.debug(" (required alignment 0x{x}", .{required_alignment}); - - if (vaddr != sym.value) { - sym.value = vaddr; - log.debug(" (updating GOT entry)", .{}); - const got_entry_index = coff.got_table.lookup.get(.{ .sym_index = sym_index }).?; - coff.writeOffsetTableEntry(got_entry_index) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - else => |e| return coff.base.cgFail(nav_index, "failed to write offset table entry: {s}", .{@errorName(e)}), - }; - coff.markRelocsDirtyByTarget(.{ .sym_index = sym_index }); - } - } else if (code_len < atom.size) { - coff.shrinkAtom(atom_index, code_len); - } - coff.getAtomPtr(atom_index).size = code_len; - } else { - const sym = atom.getSymbolPtr(coff); - try coff.setSymbolName(sym, nav.fqn.toSlice(ip)); - sym.section_number = @enumFromInt(sect_index + 1); - sym.type = .{ .complex_type = complex_type, .base_type = .NULL }; - - const vaddr = coff.allocateAtom(atom_index, code_len, @intCast(required_alignment.toByteUnits() orelse 0)) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - else => |e| return coff.base.cgFail(nav_index, "failed to allocate atom: {s}", .{@errorName(e)}), - }; - errdefer coff.freeAtom(atom_index); - log.debug("allocated atom for {f} at 0x{x}", .{ nav.fqn.fmt(ip), vaddr }); - coff.getAtomPtr(atom_index).size = code_len; - sym.value = vaddr; - - coff.addGotEntry(.{ .sym_index = sym_index }) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - else => |e| return coff.base.cgFail(nav_index, "failed to add GOT entry: {s}", .{@errorName(e)}), - }; - } - - coff.writeAtom(atom_index, code, coff.base.comp.config.incremental) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - else => |e| return coff.base.cgFail(nav_index, "failed to write atom: {s}", .{@errorName(e)}), - }; -} - -pub fn freeNav(coff: *Coff, nav_index: InternPool.NavIndex) void { - const gpa = coff.base.comp.gpa; - - if (coff.decls.fetchOrderedRemove(nav_index)) |const_kv| { - var kv = const_kv; - coff.freeAtom(kv.value.atom); - kv.value.exports.deinit(gpa); - } -} - -pub fn updateExports( - coff: *Coff, - pt: Zcu.PerThread, - exported: Zcu.Exported, - export_indices: []const Zcu.Export.Index, -) link.File.UpdateExportsError!void { - if (build_options.skip_non_native and builtin.object_format != .coff) { - @panic("Attempted to compile for object format that was disabled by build configuration"); - } - - const zcu = pt.zcu; - const gpa = zcu.gpa; - - const metadata = switch (exported) { - .nav => |nav| blk: { - _ = try coff.getOrCreateAtomForNav(nav); - break :blk coff.navs.getPtr(nav).?; - }, - .uav => |uav| coff.uavs.getPtr(uav) orelse blk: { - const first_exp = export_indices[0].ptr(zcu); - const res = try coff.lowerUav(pt, uav, .none, first_exp.src); - switch (res) { - .sym_index => {}, - .fail => |em| { - // TODO maybe it's enough to return an error here and let Module.processExportsInner - // handle the error? - try zcu.failed_exports.ensureUnusedCapacity(zcu.gpa, 1); - zcu.failed_exports.putAssumeCapacityNoClobber(export_indices[0], em); - return; - }, - } - break :blk coff.uavs.getPtr(uav).?; - }, - }; - const atom_index = metadata.atom; - const atom = coff.getAtom(atom_index); - - for (export_indices) |export_idx| { - const exp = export_idx.ptr(zcu); - log.debug("adding new export '{f}'", .{exp.opts.name.fmt(&zcu.intern_pool)}); - - if (exp.opts.section.toSlice(&zcu.intern_pool)) |section_name| { - if (!mem.eql(u8, section_name, ".text")) { - try zcu.failed_exports.putNoClobber(gpa, export_idx, try Zcu.ErrorMsg.create( - gpa, - exp.src, - "Unimplemented: ExportOptions.section", - .{}, - )); - continue; - } - } - - if (exp.opts.linkage == .link_once) { - try zcu.failed_exports.putNoClobber(gpa, export_idx, try Zcu.ErrorMsg.create( - gpa, - exp.src, - "Unimplemented: GlobalLinkage.link_once", - .{}, - )); - continue; - } - - const exp_name = exp.opts.name.toSlice(&zcu.intern_pool); - const sym_index = metadata.getExport(coff, exp_name) orelse blk: { - const sym_index = if (coff.getGlobalIndex(exp_name)) |global_index| ind: { - const global = coff.globals.items[global_index]; - // TODO this is just plain wrong as it all should happen in a single `resolveSymbols` - // pass. This will go away once we abstact away Zig's incremental compilation into - // its own module. - if (global.file == null and coff.getSymbol(global).section_number == .UNDEFINED) { - _ = coff.unresolved.swapRemove(global_index); - break :ind global.sym_index; - } - break :ind try coff.allocateSymbol(); - } else try coff.allocateSymbol(); - try metadata.exports.append(gpa, sym_index); - break :blk sym_index; - }; - const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = null }; - const sym = coff.getSymbolPtr(sym_loc); - try coff.setSymbolName(sym, exp_name); - sym.value = atom.getSymbol(coff).value; - sym.section_number = @as(coff_util.SectionNumber, @enumFromInt(metadata.section + 1)); - sym.type = atom.getSymbol(coff).type; - - sym.storage_class = switch (exp.opts.linkage) { - .internal => .EXTERNAL, - .strong => .EXTERNAL, - .weak => @panic("TODO WeakExternal"), - else => unreachable, - }; - - try coff.resolveGlobalSymbol(sym_loc); - } -} - -pub fn deleteExport( - coff: *Coff, - exported: Zcu.Exported, - name: InternPool.NullTerminatedString, -) void { - const metadata = switch (exported) { - .nav => |nav| coff.navs.getPtr(nav), - .uav => |uav| coff.uavs.getPtr(uav), - } orelse return; - const zcu = coff.base.comp.zcu.?; - const name_slice = name.toSlice(&zcu.intern_pool); - const sym_index = metadata.getExportPtr(coff, name_slice) orelse return; - - const gpa = coff.base.comp.gpa; - const sym_loc = SymbolWithLoc{ .sym_index = sym_index.*, .file = null }; - const sym = coff.getSymbolPtr(sym_loc); - log.debug("deleting export '{f}'", .{name.fmt(&zcu.intern_pool)}); - assert(sym.storage_class == .EXTERNAL and sym.section_number != .UNDEFINED); - sym.* = .{ - .name = [_]u8{0} ** 8, - .value = 0, - .section_number = .UNDEFINED, - .type = .{ .base_type = .NULL, .complex_type = .NULL }, - .storage_class = .NULL, - .number_of_aux_symbols = 0, - }; - coff.locals_free_list.append(gpa, sym_index.*) catch {}; - - if (coff.resolver.fetchRemove(name_slice)) |entry| { - defer gpa.free(entry.key); - coff.globals_free_list.append(gpa, entry.value) catch {}; - coff.globals.items[entry.value] = .{ - .sym_index = 0, - .file = null, - }; - } - - sym_index.* = 0; -} - -fn resolveGlobalSymbol(coff: *Coff, current: SymbolWithLoc) !void { - const gpa = coff.base.comp.gpa; - const sym = coff.getSymbol(current); - const sym_name = coff.getSymbolName(current); - - const gop = try coff.getOrPutGlobalPtr(sym_name); - if (!gop.found_existing) { - gop.value_ptr.* = current; - if (sym.section_number == .UNDEFINED) { - try coff.unresolved.putNoClobber(gpa, coff.getGlobalIndex(sym_name).?, false); - } - return; - } - - log.debug("TODO finish resolveGlobalSymbols implementation", .{}); - - if (sym.section_number == .UNDEFINED) return; - - _ = coff.unresolved.swapRemove(coff.getGlobalIndex(sym_name).?); - - gop.value_ptr.* = current; -} - -pub fn flush( - coff: *Coff, - arena: Allocator, - tid: Zcu.PerThread.Id, - prog_node: std.Progress.Node, -) link.File.FlushError!void { - const tracy = trace(@src()); - defer tracy.end(); - - const comp = coff.base.comp; - const diags = &comp.link_diags; - - switch (coff.base.comp.config.output_mode) { - .Exe, .Obj => {}, - .Lib => return diags.fail("writing lib files not yet implemented for COFF", .{}), - } - - const sub_prog_node = prog_node.start("COFF Flush", 0); - defer sub_prog_node.end(); - - return flushInner(coff, arena, tid) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - error.LinkFailure => return error.LinkFailure, - else => |e| return diags.fail("COFF flush failed: {s}", .{@errorName(e)}), - }; -} - -fn flushInner(coff: *Coff, arena: Allocator, tid: Zcu.PerThread.Id) !void { - _ = arena; - - const comp = coff.base.comp; - const gpa = comp.gpa; - const diags = &comp.link_diags; - - const pt: Zcu.PerThread = .activate( - comp.zcu orelse return diags.fail("linking without zig source is not yet implemented", .{}), - tid, - ); - defer pt.deactivate(); - - if (coff.lazy_syms.getPtr(.anyerror_type)) |metadata| { - // Most lazy symbols can be updated on first use, but - // anyerror needs to wait for everything to be flushed. - if (metadata.text_state != .unused) try coff.updateLazySymbolAtom( - pt, - .{ .kind = .code, .ty = .anyerror_type }, - metadata.text_atom, - coff.text_section_index.?, - ); - if (metadata.rdata_state != .unused) try coff.updateLazySymbolAtom( - pt, - .{ .kind = .const_data, .ty = .anyerror_type }, - metadata.rdata_atom, - coff.rdata_section_index.?, - ); - } - for (coff.lazy_syms.values()) |*metadata| { - if (metadata.text_state != .unused) metadata.text_state = .flushed; - if (metadata.rdata_state != .unused) metadata.rdata_state = .flushed; - } - - { - var it = coff.need_got_table.iterator(); - while (it.next()) |entry| { - const global = coff.globals.items[entry.key_ptr.*]; - try coff.addGotEntry(global); - } - } - - while (coff.unresolved.pop()) |entry| { - assert(entry.value); - const global = coff.globals.items[entry.key]; - const sym = coff.getSymbol(global); - const res = try coff.import_tables.getOrPut(gpa, sym.value); - const itable = res.value_ptr; - if (!res.found_existing) { - itable.* = .{}; - } - if (itable.lookup.contains(global)) continue; - // TODO: we could technically write the pointer placeholder for to-be-bound import here, - // but since this happens in flush, there is currently no point. - _ = try itable.addImport(gpa, global); - coff.imports_count_dirty = true; - } - - try coff.writeImportTables(); - - for (coff.relocs.keys(), coff.relocs.values()) |atom_index, relocs| { - const needs_update = for (relocs.items) |reloc| { - if (reloc.dirty) break true; - } else false; - - if (!needs_update) continue; - - const atom = coff.getAtom(atom_index); - const sym = atom.getSymbol(coff); - const section = coff.sections.get(@intFromEnum(sym.section_number) - 1).header; - const file_offset = section.pointer_to_raw_data + sym.value - section.virtual_address; - - var code = std.array_list.Managed(u8).init(gpa); - defer code.deinit(); - try code.resize(math.cast(usize, atom.size) orelse return error.Overflow); - assert(atom.size > 0); - - const amt = try coff.base.file.?.preadAll(code.items, file_offset); - if (amt != code.items.len) return error.InputOutput; - - try coff.writeAtom(atom_index, code.items, true); - } - - // Update GOT if it got moved in memory. - if (coff.got_table_contents_dirty) { - for (coff.got_table.entries.items, 0..) |entry, i| { - if (!coff.got_table.lookup.contains(entry)) continue; - // TODO: write all in one go rather than incrementally. - try coff.writeOffsetTableEntry(i); - } - coff.got_table_contents_dirty = false; - } - - try coff.writeBaseRelocations(); - - if (coff.getEntryPoint()) |entry_sym_loc| { - coff.entry_addr = coff.getSymbol(entry_sym_loc).value; - } - - if (build_options.enable_logging) { - coff.logSymtab(); - coff.logImportTables(); - } - - try coff.writeStrtab(); - try coff.writeDataDirectoriesHeaders(); - try coff.writeSectionHeaders(); - - if (coff.entry_addr == null and comp.config.output_mode == .Exe) { - log.debug("flushing. no_entry_point_found = true\n", .{}); - diags.flags.no_entry_point_found = true; - } else { - log.debug("flushing. no_entry_point_found = false\n", .{}); - diags.flags.no_entry_point_found = false; - try coff.writeHeader(); - } - - assert(!coff.imports_count_dirty); - - // hack for stage2_x86_64 + coff - if (comp.compiler_rt_dyn_lib) |crt_file| { - const compiler_rt_sub_path = try std.fs.path.join(gpa, &.{ - std.fs.path.dirname(coff.base.emit.sub_path) orelse "", - std.fs.path.basename(crt_file.full_object_path.sub_path), - }); - defer gpa.free(compiler_rt_sub_path); - try crt_file.full_object_path.root_dir.handle.copyFile( - crt_file.full_object_path.sub_path, - coff.base.emit.root_dir.handle, - compiler_rt_sub_path, - .{}, - ); - } -} - -pub fn getNavVAddr( - coff: *Coff, - pt: Zcu.PerThread, - nav_index: InternPool.Nav.Index, - reloc_info: link.File.RelocInfo, -) !u64 { - const zcu = pt.zcu; - const ip = &zcu.intern_pool; - const nav = ip.getNav(nav_index); - log.debug("getNavVAddr {f}({d})", .{ nav.fqn.fmt(ip), nav_index }); - const sym_index = if (nav.getExtern(ip)) |e| - try coff.getGlobalSymbol(nav.name.toSlice(ip), e.lib_name.toSlice(ip)) - else - coff.getAtom(try coff.getOrCreateAtomForNav(nav_index)).getSymbolIndex().?; - const atom_index = coff.getAtomIndexForSymbol(.{ - .sym_index = reloc_info.parent.atom_index, - .file = null, - }).?; - const target = SymbolWithLoc{ .sym_index = sym_index, .file = null }; - try coff.addRelocation(atom_index, .{ - .type = .direct, - .target = target, - .offset = @as(u32, @intCast(reloc_info.offset)), - .addend = reloc_info.addend, - .pcrel = false, - .length = 3, - }); - try coff.addBaseRelocation(atom_index, @as(u32, @intCast(reloc_info.offset))); - - return 0; -} - -pub fn lowerUav( - coff: *Coff, - pt: Zcu.PerThread, - uav: InternPool.Index, - explicit_alignment: InternPool.Alignment, - src_loc: Zcu.LazySrcLoc, -) !codegen.SymbolResult { - const zcu = pt.zcu; - const gpa = zcu.gpa; - const val = Value.fromInterned(uav); - const uav_alignment = switch (explicit_alignment) { - .none => val.typeOf(zcu).abiAlignment(zcu), - else => explicit_alignment, - }; - if (coff.uavs.get(uav)) |metadata| { - const atom = coff.getAtom(metadata.atom); - const existing_addr = atom.getSymbol(coff).value; - if (uav_alignment.check(existing_addr)) - return .{ .sym_index = atom.getSymbolIndex().? }; - } - - var name_buf: [32]u8 = undefined; - const name = std.fmt.bufPrint(&name_buf, "__anon_{d}", .{ - @intFromEnum(uav), - }) catch unreachable; - const res = coff.lowerConst( - pt, - name, - val, - uav_alignment, - coff.rdata_section_index.?, - src_loc, - ) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - else => |e| return .{ .fail = try Zcu.ErrorMsg.create( - gpa, - src_loc, - "lowerAnonDecl failed with error: {s}", - .{@errorName(e)}, - ) }, - }; - const atom_index = switch (res) { - .ok => |atom_index| atom_index, - .fail => |em| return .{ .fail = em }, - }; - try coff.uavs.put(gpa, uav, .{ - .atom = atom_index, - .section = coff.rdata_section_index.?, - }); - return .{ .sym_index = coff.getAtom(atom_index).getSymbolIndex().? }; -} - -pub fn getUavVAddr( - coff: *Coff, - uav: InternPool.Index, - reloc_info: link.File.RelocInfo, -) !u64 { - const this_atom_index = coff.uavs.get(uav).?.atom; - const sym_index = coff.getAtom(this_atom_index).getSymbolIndex().?; - const atom_index = coff.getAtomIndexForSymbol(.{ - .sym_index = reloc_info.parent.atom_index, - .file = null, - }).?; - const target = SymbolWithLoc{ .sym_index = sym_index, .file = null }; - try coff.addRelocation(atom_index, .{ - .type = .direct, - .target = target, - .offset = @as(u32, @intCast(reloc_info.offset)), - .addend = reloc_info.addend, - .pcrel = false, - .length = 3, - }); - try coff.addBaseRelocation(atom_index, @as(u32, @intCast(reloc_info.offset))); - - return 0; -} - -pub fn getGlobalSymbol(coff: *Coff, name: []const u8, lib_name_name: ?[]const u8) !u32 { - const gop = try coff.getOrPutGlobalPtr(name); - const global_index = coff.getGlobalIndex(name).?; - - if (gop.found_existing) { - return global_index; - } - - const sym_index = try coff.allocateSymbol(); - const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = null }; - gop.value_ptr.* = sym_loc; - - const gpa = coff.base.comp.gpa; - const sym = coff.getSymbolPtr(sym_loc); - try coff.setSymbolName(sym, name); - sym.storage_class = .EXTERNAL; - - if (lib_name_name) |lib_name| { - // We repurpose the 'value' of the Symbol struct to store an offset into - // temporary string table where we will store the library name hint. - sym.value = try coff.temp_strtab.insert(gpa, lib_name); - } - - try coff.unresolved.putNoClobber(gpa, global_index, true); - - return global_index; -} - -pub fn updateLineNumber(coff: *Coff, pt: Zcu.PerThread, ti_id: InternPool.TrackedInst.Index) !void { - _ = coff; - _ = pt; - _ = ti_id; - log.debug("TODO implement updateLineNumber", .{}); -} - -/// TODO: note if we need to rewrite base relocations by dirtying any of the entries in the global table -/// TODO: note that .ABSOLUTE is used as padding within each block; we could use this fact to do -/// incremental updates and writes into the table instead of doing it all at once -fn writeBaseRelocations(coff: *Coff) !void { - const gpa = coff.base.comp.gpa; - - var page_table = std.AutoHashMap(u32, std.array_list.Managed(coff_util.BaseRelocation)).init(gpa); - defer { - var it = page_table.valueIterator(); - while (it.next()) |inner| { - inner.deinit(); - } - page_table.deinit(); - } - - { - var it = coff.base_relocs.iterator(); - while (it.next()) |entry| { - const atom_index = entry.key_ptr.*; - const atom = coff.getAtom(atom_index); - const sym = atom.getSymbol(coff); - const offsets = entry.value_ptr.*; - - for (offsets.items) |offset| { - const rva = sym.value + offset; - const page = mem.alignBackward(u32, rva, coff.page_size); - const gop = try page_table.getOrPut(page); - if (!gop.found_existing) { - gop.value_ptr.* = std.array_list.Managed(coff_util.BaseRelocation).init(gpa); - } - try gop.value_ptr.append(.{ - .offset = @as(u12, @intCast(rva - page)), - .type = .DIR64, - }); - } - } - - { - const header = &coff.sections.items(.header)[coff.got_section_index.?]; - for (coff.got_table.entries.items, 0..) |entry, index| { - if (!coff.got_table.lookup.contains(entry)) continue; - - const sym = coff.getSymbol(entry); - if (sym.section_number == .UNDEFINED) continue; - - const rva = @as(u32, @intCast(header.virtual_address + index * coff.ptr_width.size())); - const page = mem.alignBackward(u32, rva, coff.page_size); - const gop = try page_table.getOrPut(page); - if (!gop.found_existing) { - gop.value_ptr.* = std.array_list.Managed(coff_util.BaseRelocation).init(gpa); - } - try gop.value_ptr.append(.{ - .offset = @as(u12, @intCast(rva - page)), - .type = .DIR64, - }); - } - } - } - - // Sort pages by address. - var pages = try std.array_list.Managed(u32).initCapacity(gpa, page_table.count()); - defer pages.deinit(); - { - var it = page_table.keyIterator(); - while (it.next()) |page| { - pages.appendAssumeCapacity(page.*); - } - } - mem.sort(u32, pages.items, {}, std.sort.asc(u32)); - - var buffer = std.array_list.Managed(u8).init(gpa); - defer buffer.deinit(); - - for (pages.items) |page| { - const entries = page_table.getPtr(page).?; - // Pad to required 4byte alignment - if (!mem.isAlignedGeneric( - usize, - entries.items.len * @sizeOf(coff_util.BaseRelocation), - @sizeOf(u32), - )) { - try entries.append(.{ - .offset = 0, - .type = .ABSOLUTE, - }); - } - - const block_size = @as( - u32, - @intCast(entries.items.len * @sizeOf(coff_util.BaseRelocation) + @sizeOf(coff_util.BaseRelocationDirectoryEntry)), - ); - try buffer.ensureUnusedCapacity(block_size); - buffer.appendSliceAssumeCapacity(mem.asBytes(&coff_util.BaseRelocationDirectoryEntry{ - .page_rva = page, - .block_size = block_size, - })); - buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(entries.items)); - } - - const header = &coff.sections.items(.header)[coff.reloc_section_index.?]; - const needed_size = @as(u32, @intCast(buffer.items.len)); - try coff.growSection(coff.reloc_section_index.?, needed_size); - - try coff.pwriteAll(buffer.items, header.pointer_to_raw_data); - - coff.data_directories[@intFromEnum(coff_util.DirectoryEntry.BASERELOC)] = .{ - .virtual_address = header.virtual_address, - .size = needed_size, - }; -} - -fn writeImportTables(coff: *Coff) !void { - if (coff.idata_section_index == null) return; - if (!coff.imports_count_dirty) return; - - const gpa = coff.base.comp.gpa; - - const ext = ".dll"; - const header = &coff.sections.items(.header)[coff.idata_section_index.?]; - - // Calculate needed size - var iat_size: u32 = 0; - var dir_table_size: u32 = @sizeOf(coff_util.ImportDirectoryEntry); // sentinel - var lookup_table_size: u32 = 0; - var names_table_size: u32 = 0; - var dll_names_size: u32 = 0; - for (coff.import_tables.keys(), 0..) |off, i| { - const lib_name = coff.temp_strtab.getAssumeExists(off); - const itable = coff.import_tables.values()[i]; - iat_size += itable.size() + 8; - dir_table_size += @sizeOf(coff_util.ImportDirectoryEntry); - lookup_table_size += @as(u32, @intCast(itable.entries.items.len + 1)) * @sizeOf(coff_util.ImportLookupEntry64.ByName); - for (itable.entries.items) |entry| { - const sym_name = coff.getSymbolName(entry); - names_table_size += 2 + mem.alignForward(u32, @as(u32, @intCast(sym_name.len + 1)), 2); - } - dll_names_size += @as(u32, @intCast(lib_name.len + ext.len + 1)); - } - - const needed_size = iat_size + dir_table_size + lookup_table_size + names_table_size + dll_names_size; - try coff.growSection(coff.idata_section_index.?, needed_size); - - // Do the actual writes - var buffer = std.array_list.Managed(u8).init(gpa); - defer buffer.deinit(); - try buffer.ensureTotalCapacityPrecise(needed_size); - buffer.resize(needed_size) catch unreachable; - - const dir_header_size = @sizeOf(coff_util.ImportDirectoryEntry); - const lookup_entry_size = @sizeOf(coff_util.ImportLookupEntry64.ByName); - - var iat_offset: u32 = 0; - var dir_table_offset = iat_size; - var lookup_table_offset = dir_table_offset + dir_table_size; - var names_table_offset = lookup_table_offset + lookup_table_size; - var dll_names_offset = names_table_offset + names_table_size; - for (coff.import_tables.keys(), 0..) |off, i| { - const lib_name = coff.temp_strtab.getAssumeExists(off); - const itable = coff.import_tables.values()[i]; - - // Lookup table header - const lookup_header = coff_util.ImportDirectoryEntry{ - .import_lookup_table_rva = header.virtual_address + lookup_table_offset, - .time_date_stamp = 0, - .forwarder_chain = 0, - .name_rva = header.virtual_address + dll_names_offset, - .import_address_table_rva = header.virtual_address + iat_offset, - }; - @memcpy(buffer.items[dir_table_offset..][0..@sizeOf(coff_util.ImportDirectoryEntry)], mem.asBytes(&lookup_header)); - dir_table_offset += dir_header_size; - - for (itable.entries.items) |entry| { - const import_name = coff.getSymbolName(entry); - - // IAT and lookup table entry - const lookup = coff_util.ImportLookupEntry64.ByName{ .name_table_rva = @as(u31, @intCast(header.virtual_address + names_table_offset)) }; - @memcpy( - buffer.items[iat_offset..][0..@sizeOf(coff_util.ImportLookupEntry64.ByName)], - mem.asBytes(&lookup), - ); - iat_offset += lookup_entry_size; - @memcpy( - buffer.items[lookup_table_offset..][0..@sizeOf(coff_util.ImportLookupEntry64.ByName)], - mem.asBytes(&lookup), - ); - lookup_table_offset += lookup_entry_size; - - // Names table entry - mem.writeInt(u16, buffer.items[names_table_offset..][0..2], 0, .little); // Hint set to 0 until we learn how to parse DLLs - names_table_offset += 2; - @memcpy(buffer.items[names_table_offset..][0..import_name.len], import_name); - names_table_offset += @as(u32, @intCast(import_name.len)); - buffer.items[names_table_offset] = 0; - names_table_offset += 1; - if (!mem.isAlignedGeneric(usize, names_table_offset, @sizeOf(u16))) { - buffer.items[names_table_offset] = 0; - names_table_offset += 1; - } - } - - // IAT sentinel - mem.writeInt(u64, buffer.items[iat_offset..][0..lookup_entry_size], 0, .little); - iat_offset += 8; - - // Lookup table sentinel - @memcpy( - buffer.items[lookup_table_offset..][0..@sizeOf(coff_util.ImportLookupEntry64.ByName)], - mem.asBytes(&coff_util.ImportLookupEntry64.ByName{ .name_table_rva = 0 }), - ); - lookup_table_offset += lookup_entry_size; - - // DLL name - @memcpy(buffer.items[dll_names_offset..][0..lib_name.len], lib_name); - dll_names_offset += @as(u32, @intCast(lib_name.len)); - @memcpy(buffer.items[dll_names_offset..][0..ext.len], ext); - dll_names_offset += @as(u32, @intCast(ext.len)); - buffer.items[dll_names_offset] = 0; - dll_names_offset += 1; - } - - // Sentinel - const lookup_header = coff_util.ImportDirectoryEntry{ - .import_lookup_table_rva = 0, - .time_date_stamp = 0, - .forwarder_chain = 0, - .name_rva = 0, - .import_address_table_rva = 0, - }; - @memcpy( - buffer.items[dir_table_offset..][0..@sizeOf(coff_util.ImportDirectoryEntry)], - mem.asBytes(&lookup_header), - ); - dir_table_offset += dir_header_size; - - assert(dll_names_offset == needed_size); - - try coff.pwriteAll(buffer.items, header.pointer_to_raw_data); - - coff.data_directories[@intFromEnum(coff_util.DirectoryEntry.IMPORT)] = .{ - .virtual_address = header.virtual_address + iat_size, - .size = dir_table_size, - }; - coff.data_directories[@intFromEnum(coff_util.DirectoryEntry.IAT)] = .{ - .virtual_address = header.virtual_address, - .size = iat_size, - }; - - coff.imports_count_dirty = false; -} - -fn writeStrtab(coff: *Coff) !void { - if (coff.strtab_offset == null) return; - - const comp = coff.base.comp; - const gpa = comp.gpa; - const diags = &comp.link_diags; - const allocated_size = coff.allocatedSize(coff.strtab_offset.?); - const needed_size: u32 = @intCast(coff.strtab.buffer.items.len); - - if (needed_size > allocated_size) { - coff.strtab_offset = null; - coff.strtab_offset = @intCast(coff.findFreeSpace(needed_size, @alignOf(u32))); - } - - log.debug("writing strtab from 0x{x} to 0x{x}", .{ coff.strtab_offset.?, coff.strtab_offset.? + needed_size }); - - var buffer = std.array_list.Managed(u8).init(gpa); - defer buffer.deinit(); - try buffer.ensureTotalCapacityPrecise(needed_size); - buffer.appendSliceAssumeCapacity(coff.strtab.buffer.items); - // Here, we do a trick in that we do not commit the size of the strtab to strtab buffer, instead - // we write the length of the strtab to a temporary buffer that goes to file. - mem.writeInt(u32, buffer.items[0..4], @as(u32, @intCast(coff.strtab.buffer.items.len)), .little); - - coff.pwriteAll(buffer.items, coff.strtab_offset.?) catch |err| { - return diags.fail("failed to write: {s}", .{@errorName(err)}); - }; -} - -fn writeSectionHeaders(coff: *Coff) !void { - const offset = coff.getSectionHeadersOffset(); - try coff.pwriteAll(@ptrCast(coff.sections.items(.header)), offset); -} - -fn writeDataDirectoriesHeaders(coff: *Coff) !void { - const offset = coff.getDataDirectoryHeadersOffset(); - try coff.pwriteAll(@ptrCast(&coff.data_directories), offset); -} - -fn writeHeader(coff: *Coff) !void { - const target = &coff.base.comp.root_mod.resolved_target.result; - const gpa = coff.base.comp.gpa; - var buffer: std.Io.Writer.Allocating = .init(gpa); - defer buffer.deinit(); - const writer = &buffer.writer; - - try buffer.ensureTotalCapacity(coff.getSizeOfHeaders()); - writer.writeAll(&msdos_stub) catch unreachable; - mem.writeInt(u32, buffer.writer.buffer[0x3c..][0..4], msdos_stub.len, .little); - - writer.writeAll("PE\x00\x00") catch unreachable; - var flags = coff_util.CoffHeaderFlags{ - .EXECUTABLE_IMAGE = 1, - .DEBUG_STRIPPED = 1, // TODO - }; - switch (coff.ptr_width) { - .p32 => flags.@"32BIT_MACHINE" = 1, - .p64 => flags.LARGE_ADDRESS_AWARE = 1, - } - if (coff.base.comp.config.output_mode == .Lib and coff.base.comp.config.link_mode == .dynamic) { - flags.DLL = 1; - } - - const timestamp = if (coff.repro) 0 else std.time.timestamp(); - const size_of_optional_header = @as(u16, @intCast(coff.getOptionalHeaderSize() + coff.getDataDirectoryHeadersSize())); - var coff_header = coff_util.CoffHeader{ - .machine = target.toCoffMachine(), - .number_of_sections = @as(u16, @intCast(coff.sections.slice().len)), // TODO what if we prune a section - .time_date_stamp = @as(u32, @truncate(@as(u64, @bitCast(timestamp)))), - .pointer_to_symbol_table = coff.strtab_offset orelse 0, - .number_of_symbols = 0, - .size_of_optional_header = size_of_optional_header, - .flags = flags, - }; - - writer.writeAll(mem.asBytes(&coff_header)) catch unreachable; - - const dll_flags: coff_util.DllFlags = .{ - .HIGH_ENTROPY_VA = 1, // TODO do we want to permit non-PIE builds at all? - .DYNAMIC_BASE = 1, - .TERMINAL_SERVER_AWARE = 1, // We are not a legacy app - .NX_COMPAT = 1, // We are compatible with Data Execution Prevention - }; - const subsystem: coff_util.Subsystem = .WINDOWS_CUI; - const size_of_image: u32 = coff.getSizeOfImage(); - const size_of_headers: u32 = mem.alignForward(u32, coff.getSizeOfHeaders(), default_file_alignment); - const base_of_code = coff.sections.get(coff.text_section_index.?).header.virtual_address; - const base_of_data = coff.sections.get(coff.data_section_index.?).header.virtual_address; - - var size_of_code: u32 = 0; - var size_of_initialized_data: u32 = 0; - var size_of_uninitialized_data: u32 = 0; - for (coff.sections.items(.header)) |header| { - if (header.flags.CNT_CODE == 1) { - size_of_code += header.size_of_raw_data; - } - if (header.flags.CNT_INITIALIZED_DATA == 1) { - size_of_initialized_data += header.size_of_raw_data; - } - if (header.flags.CNT_UNINITIALIZED_DATA == 1) { - size_of_uninitialized_data += header.size_of_raw_data; - } - } - - switch (coff.ptr_width) { - .p32 => { - var opt_header = coff_util.OptionalHeaderPE32{ - .magic = coff_util.IMAGE_NT_OPTIONAL_HDR32_MAGIC, - .major_linker_version = 0, - .minor_linker_version = 0, - .size_of_code = size_of_code, - .size_of_initialized_data = size_of_initialized_data, - .size_of_uninitialized_data = size_of_uninitialized_data, - .address_of_entry_point = coff.entry_addr orelse 0, - .base_of_code = base_of_code, - .base_of_data = base_of_data, - .image_base = @intCast(coff.image_base), - .section_alignment = coff.page_size, - .file_alignment = default_file_alignment, - .major_operating_system_version = 6, - .minor_operating_system_version = 0, - .major_image_version = 0, - .minor_image_version = 0, - .major_subsystem_version = @intCast(coff.major_subsystem_version), - .minor_subsystem_version = @intCast(coff.minor_subsystem_version), - .win32_version_value = 0, - .size_of_image = size_of_image, - .size_of_headers = size_of_headers, - .checksum = 0, - .subsystem = subsystem, - .dll_flags = dll_flags, - .size_of_stack_reserve = default_size_of_stack_reserve, - .size_of_stack_commit = default_size_of_stack_commit, - .size_of_heap_reserve = default_size_of_heap_reserve, - .size_of_heap_commit = default_size_of_heap_commit, - .loader_flags = 0, - .number_of_rva_and_sizes = @intCast(coff.data_directories.len), - }; - writer.writeAll(mem.asBytes(&opt_header)) catch unreachable; - }, - .p64 => { - var opt_header = coff_util.OptionalHeaderPE64{ - .magic = coff_util.IMAGE_NT_OPTIONAL_HDR64_MAGIC, - .major_linker_version = 0, - .minor_linker_version = 0, - .size_of_code = size_of_code, - .size_of_initialized_data = size_of_initialized_data, - .size_of_uninitialized_data = size_of_uninitialized_data, - .address_of_entry_point = coff.entry_addr orelse 0, - .base_of_code = base_of_code, - .image_base = coff.image_base, - .section_alignment = coff.page_size, - .file_alignment = default_file_alignment, - .major_operating_system_version = 6, - .minor_operating_system_version = 0, - .major_image_version = 0, - .minor_image_version = 0, - .major_subsystem_version = coff.major_subsystem_version, - .minor_subsystem_version = coff.minor_subsystem_version, - .win32_version_value = 0, - .size_of_image = size_of_image, - .size_of_headers = size_of_headers, - .checksum = 0, - .subsystem = subsystem, - .dll_flags = dll_flags, - .size_of_stack_reserve = default_size_of_stack_reserve, - .size_of_stack_commit = default_size_of_stack_commit, - .size_of_heap_reserve = default_size_of_heap_reserve, - .size_of_heap_commit = default_size_of_heap_commit, - .loader_flags = 0, - .number_of_rva_and_sizes = @intCast(coff.data_directories.len), - }; - writer.writeAll(mem.asBytes(&opt_header)) catch unreachable; - }, - } - - try coff.pwriteAll(buffer.written(), 0); -} - -pub fn padToIdeal(actual_size: anytype) @TypeOf(actual_size) { - return actual_size +| (actual_size / ideal_factor); -} - -fn detectAllocCollision(coff: *Coff, start: u32, size: u32) ?u32 { - const headers_size = @max(coff.getSizeOfHeaders(), coff.page_size); - if (start < headers_size) - return headers_size; - - const end = start + padToIdeal(size); - - if (coff.strtab_offset) |off| { - const tight_size = @as(u32, @intCast(coff.strtab.buffer.items.len)); - const increased_size = padToIdeal(tight_size); - const test_end = off + increased_size; - if (end > off and start < test_end) { - return test_end; - } - } - - for (coff.sections.items(.header)) |header| { - const tight_size = header.size_of_raw_data; - const increased_size = padToIdeal(tight_size); - const test_end = header.pointer_to_raw_data + increased_size; - if (end > header.pointer_to_raw_data and start < test_end) { - return test_end; - } - } - - return null; -} - -fn allocatedSize(coff: *Coff, start: u32) u32 { - if (start == 0) - return 0; - var min_pos: u32 = std.math.maxInt(u32); - if (coff.strtab_offset) |off| { - if (off > start and off < min_pos) min_pos = off; - } - for (coff.sections.items(.header)) |header| { - if (header.pointer_to_raw_data <= start) continue; - if (header.pointer_to_raw_data < min_pos) min_pos = header.pointer_to_raw_data; - } - return min_pos - start; -} - -fn findFreeSpace(coff: *Coff, object_size: u32, min_alignment: u32) u32 { - var start: u32 = 0; - while (coff.detectAllocCollision(start, object_size)) |item_end| { - start = mem.alignForward(u32, item_end, min_alignment); - } - return start; -} - -fn allocatedVirtualSize(coff: *Coff, start: u32) u32 { - if (start == 0) - return 0; - var min_pos: u32 = std.math.maxInt(u32); - for (coff.sections.items(.header)) |header| { - if (header.virtual_address <= start) continue; - if (header.virtual_address < min_pos) min_pos = header.virtual_address; - } - return min_pos - start; -} - -fn getSizeOfHeaders(coff: Coff) u32 { - const msdos_hdr_size = msdos_stub.len + 4; - return @as(u32, @intCast(msdos_hdr_size + @sizeOf(coff_util.CoffHeader) + coff.getOptionalHeaderSize() + - coff.getDataDirectoryHeadersSize() + coff.getSectionHeadersSize())); -} - -fn getOptionalHeaderSize(coff: Coff) u32 { - return switch (coff.ptr_width) { - .p32 => @as(u32, @intCast(@sizeOf(coff_util.OptionalHeaderPE32))), - .p64 => @as(u32, @intCast(@sizeOf(coff_util.OptionalHeaderPE64))), - }; -} - -fn getDataDirectoryHeadersSize(coff: Coff) u32 { - return @as(u32, @intCast(coff.data_directories.len * @sizeOf(coff_util.ImageDataDirectory))); -} - -fn getSectionHeadersSize(coff: Coff) u32 { - return @as(u32, @intCast(coff.sections.slice().len * @sizeOf(coff_util.SectionHeader))); -} - -fn getDataDirectoryHeadersOffset(coff: Coff) u32 { - const msdos_hdr_size = msdos_stub.len + 4; - return @as(u32, @intCast(msdos_hdr_size + @sizeOf(coff_util.CoffHeader) + coff.getOptionalHeaderSize())); -} - -fn getSectionHeadersOffset(coff: Coff) u32 { - return coff.getDataDirectoryHeadersOffset() + coff.getDataDirectoryHeadersSize(); -} - -fn getSizeOfImage(coff: Coff) u32 { - var image_size: u32 = mem.alignForward(u32, coff.getSizeOfHeaders(), coff.page_size); - for (coff.sections.items(.header)) |header| { - image_size += mem.alignForward(u32, header.virtual_size, coff.page_size); - } - return image_size; -} - -/// Returns symbol location corresponding to the set entrypoint (if any). -pub fn getEntryPoint(coff: Coff) ?SymbolWithLoc { - const comp = coff.base.comp; - - // TODO This is incomplete. - // The entry symbol name depends on the subsystem as well as the set of - // public symbol names from linked objects. - // See LinkerDriver::findDefaultEntry from the LLD project for the flow chart. - const entry_name = switch (coff.entry) { - .disabled => return null, - .default => switch (comp.config.output_mode) { - .Exe => "wWinMainCRTStartup", - .Obj, .Lib => return null, - }, - .enabled => "wWinMainCRTStartup", - .named => |name| name, - }; - const global_index = coff.resolver.get(entry_name) orelse return null; - return coff.globals.items[global_index]; -} - -/// Returns pointer-to-symbol described by `sym_loc` descriptor. -pub fn getSymbolPtr(coff: *Coff, sym_loc: SymbolWithLoc) *coff_util.Symbol { - assert(sym_loc.file == null); // TODO linking object files - return &coff.locals.items[sym_loc.sym_index]; -} - -/// Returns symbol described by `sym_loc` descriptor. -pub fn getSymbol(coff: *const Coff, sym_loc: SymbolWithLoc) *const coff_util.Symbol { - assert(sym_loc.file == null); // TODO linking object files - return &coff.locals.items[sym_loc.sym_index]; -} - -/// Returns name of the symbol described by `sym_loc` descriptor. -pub fn getSymbolName(coff: *const Coff, sym_loc: SymbolWithLoc) []const u8 { - assert(sym_loc.file == null); // TODO linking object files - const sym = coff.getSymbol(sym_loc); - const offset = sym.getNameOffset() orelse return sym.getName().?; - return coff.strtab.get(offset).?; -} - -/// Returns pointer to the global entry for `name` if one exists. -pub fn getGlobalPtr(coff: *Coff, name: []const u8) ?*SymbolWithLoc { - const global_index = coff.resolver.get(name) orelse return null; - return &coff.globals.items[global_index]; -} - -/// Returns the global entry for `name` if one exists. -pub fn getGlobal(coff: *const Coff, name: []const u8) ?SymbolWithLoc { - const global_index = coff.resolver.get(name) orelse return null; - return coff.globals.items[global_index]; -} - -/// Returns the index of the global entry for `name` if one exists. -pub fn getGlobalIndex(coff: *const Coff, name: []const u8) ?u32 { - return coff.resolver.get(name); -} - -/// Returns global entry at `index`. -pub fn getGlobalByIndex(coff: *const Coff, index: u32) SymbolWithLoc { - assert(index < coff.globals.items.len); - return coff.globals.items[index]; -} - -const GetOrPutGlobalPtrResult = struct { - found_existing: bool, - value_ptr: *SymbolWithLoc, -}; - -/// Return pointer to the global entry for `name` if one exists. -/// Puts a new global entry for `name` if one doesn't exist, and -/// returns a pointer to it. -pub fn getOrPutGlobalPtr(coff: *Coff, name: []const u8) !GetOrPutGlobalPtrResult { - if (coff.getGlobalPtr(name)) |ptr| { - return GetOrPutGlobalPtrResult{ .found_existing = true, .value_ptr = ptr }; - } - const gpa = coff.base.comp.gpa; - const global_index = try coff.allocateGlobal(); - const global_name = try gpa.dupe(u8, name); - _ = try coff.resolver.put(gpa, global_name, global_index); - const ptr = &coff.globals.items[global_index]; - return GetOrPutGlobalPtrResult{ .found_existing = false, .value_ptr = ptr }; -} - -pub fn getAtom(coff: *const Coff, atom_index: Atom.Index) Atom { - assert(atom_index < coff.atoms.items.len); - return coff.atoms.items[atom_index]; -} - -pub fn getAtomPtr(coff: *Coff, atom_index: Atom.Index) *Atom { - assert(atom_index < coff.atoms.items.len); - return &coff.atoms.items[atom_index]; -} - -/// Returns atom if there is an atom referenced by the symbol described by `sym_loc` descriptor. -/// Returns null on failure. -pub fn getAtomIndexForSymbol(coff: *const Coff, sym_loc: SymbolWithLoc) ?Atom.Index { - assert(sym_loc.file == null); // TODO linking with object files - return coff.atom_by_index_table.get(sym_loc.sym_index); -} - -fn setSectionName(coff: *Coff, header: *coff_util.SectionHeader, name: []const u8) !void { - if (name.len <= 8) { - @memcpy(header.name[0..name.len], name); - @memset(header.name[name.len..], 0); - return; - } - const gpa = coff.base.comp.gpa; - const offset = try coff.strtab.insert(gpa, name); - const name_offset = fmt.bufPrint(&header.name, "/{d}", .{offset}) catch unreachable; - @memset(header.name[name_offset.len..], 0); -} - -fn getSectionName(coff: *const Coff, header: *const coff_util.SectionHeader) []const u8 { - if (header.getName()) |name| { - return name; - } - const offset = header.getNameOffset().?; - return coff.strtab.get(offset).?; -} - -fn setSymbolName(coff: *Coff, symbol: *coff_util.Symbol, name: []const u8) !void { - if (name.len <= 8) { - @memcpy(symbol.name[0..name.len], name); - @memset(symbol.name[name.len..], 0); - return; - } - const gpa = coff.base.comp.gpa; - const offset = try coff.strtab.insert(gpa, name); - @memset(symbol.name[0..4], 0); - mem.writeInt(u32, symbol.name[4..8], offset, .little); -} - -fn logSymAttributes(sym: *const coff_util.Symbol, buf: *[4]u8) []const u8 { - @memset(buf[0..4], '_'); - switch (sym.section_number) { - .UNDEFINED => { - buf[3] = 'u'; - switch (sym.storage_class) { - .EXTERNAL => buf[1] = 'e', - .WEAK_EXTERNAL => buf[1] = 'w', - .NULL => {}, - else => unreachable, - } - }, - .ABSOLUTE => unreachable, // handle ABSOLUTE - .DEBUG => unreachable, - else => { - buf[0] = 's'; - switch (sym.storage_class) { - .EXTERNAL => buf[1] = 'e', - .WEAK_EXTERNAL => buf[1] = 'w', - .NULL => {}, - else => unreachable, - } - }, - } - return buf[0..]; -} - -fn logSymtab(coff: *Coff) void { - var buf: [4]u8 = undefined; - - log.debug("symtab:", .{}); - log.debug(" object(null)", .{}); - for (coff.locals.items, 0..) |*sym, sym_id| { - const where = if (sym.section_number == .UNDEFINED) "ord" else "sect"; - const def_index: u16 = switch (sym.section_number) { - .UNDEFINED => 0, // TODO - .ABSOLUTE => unreachable, // TODO - .DEBUG => unreachable, // TODO - else => @intFromEnum(sym.section_number), - }; - log.debug(" %{d}: {s} @{x} in {s}({d}), {s}", .{ - sym_id, - coff.getSymbolName(.{ .sym_index = @as(u32, @intCast(sym_id)), .file = null }), - sym.value, - where, - def_index, - logSymAttributes(sym, &buf), - }); - } - - log.debug("globals table:", .{}); - for (coff.globals.items) |sym_loc| { - const sym_name = coff.getSymbolName(sym_loc); - log.debug(" {s} => %{d} in object({?d})", .{ sym_name, sym_loc.sym_index, sym_loc.file }); - } - - log.debug("GOT entries:", .{}); - log.debug("{f}", .{coff.got_table}); -} - -fn logSections(coff: *Coff) void { - log.debug("sections:", .{}); - for (coff.sections.items(.header)) |*header| { - log.debug(" {s}: VM({x}, {x}) FILE({x}, {x})", .{ - coff.getSectionName(header), - header.virtual_address, - header.virtual_address + header.virtual_size, - header.pointer_to_raw_data, - header.pointer_to_raw_data + header.size_of_raw_data, - }); - } -} - -fn logImportTables(coff: *const Coff) void { - log.debug("import tables:", .{}); - for (coff.import_tables.keys(), 0..) |off, i| { - const itable = coff.import_tables.values()[i]; - log.debug("{f}", .{itable.fmtDebug(.{ - .coff = coff, - .index = i, - .name_off = off, - })}); - } -} - -pub const Atom = struct { - /// Each decl always gets a local symbol with the fully qualified name. - /// The vaddr and size are found here directly. - /// The file offset is found by computing the vaddr offset from the section vaddr - /// the symbol references, and adding that to the file offset of the section. - /// If this field is 0, it means the codegen size = 0 and there is no symbol or - /// offset table entry. - sym_index: u32, - - /// null means symbol defined by Zig source. - file: ?u32, - - /// Size of the atom - size: u32, - - /// Points to the previous and next neighbors, based on the `text_offset`. - /// This can be used to find, for example, the capacity of this `Atom`. - prev_index: ?Index, - next_index: ?Index, - - const Index = u32; - - pub fn getSymbolIndex(atom: Atom) ?u32 { - if (atom.sym_index == 0) return null; - return atom.sym_index; - } - - /// Returns symbol referencing this atom. - fn getSymbol(atom: Atom, coff: *const Coff) *const coff_util.Symbol { - const sym_index = atom.getSymbolIndex().?; - return coff.getSymbol(.{ - .sym_index = sym_index, - .file = atom.file, - }); - } - - /// Returns pointer-to-symbol referencing this atom. - fn getSymbolPtr(atom: Atom, coff: *Coff) *coff_util.Symbol { - const sym_index = atom.getSymbolIndex().?; - return coff.getSymbolPtr(.{ - .sym_index = sym_index, - .file = atom.file, - }); - } - - fn getSymbolWithLoc(atom: Atom) SymbolWithLoc { - const sym_index = atom.getSymbolIndex().?; - return .{ .sym_index = sym_index, .file = atom.file }; - } - - /// Returns the name of this atom. - fn getName(atom: Atom, coff: *const Coff) []const u8 { - const sym_index = atom.getSymbolIndex().?; - return coff.getSymbolName(.{ - .sym_index = sym_index, - .file = atom.file, - }); - } - - /// Returns how much room there is to grow in virtual address space. - fn capacity(atom: Atom, coff: *const Coff) u32 { - const atom_sym = atom.getSymbol(coff); - if (atom.next_index) |next_index| { - const next = coff.getAtom(next_index); - const next_sym = next.getSymbol(coff); - return next_sym.value - atom_sym.value; - } else { - // We are the last atom. - // The capacity is limited only by virtual address space. - return std.math.maxInt(u32) - atom_sym.value; - } - } - - fn freeListEligible(atom: Atom, coff: *const Coff) bool { - // No need to keep a free list node for the last atom. - const next_index = atom.next_index orelse return false; - const next = coff.getAtom(next_index); - const atom_sym = atom.getSymbol(coff); - const next_sym = next.getSymbol(coff); - const cap = next_sym.value - atom_sym.value; - const ideal_cap = padToIdeal(atom.size); - if (cap <= ideal_cap) return false; - const surplus = cap - ideal_cap; - return surplus >= min_text_capacity; - } -}; - -pub const Relocation = struct { - type: enum { - // x86, x86_64 - /// RIP-relative displacement to a GOT pointer - got, - /// RIP-relative displacement to an import pointer - import, - - // aarch64 - /// PC-relative distance to target page in GOT section - got_page, - /// Offset to a GOT pointer relative to the start of a page in GOT section - got_pageoff, - /// PC-relative distance to target page in a section (e.g., .rdata) - page, - /// Offset to a pointer relative to the start of a page in a section (e.g., .rdata) - pageoff, - /// PC-relative distance to target page in a import section - import_page, - /// Offset to a pointer relative to the start of a page in an import section (e.g., .rdata) - import_pageoff, - - // common - /// Absolute pointer value - direct, - }, - target: SymbolWithLoc, - offset: u32, - addend: u32, - pcrel: bool, - length: u2, - dirty: bool = true, - - /// Returns true if and only if the reloc can be resolved. - fn isResolvable(reloc: Relocation, coff: *Coff) bool { - _ = reloc.getTargetAddress(coff) orelse return false; - return true; - } - - fn isGotIndirection(reloc: Relocation) bool { - return switch (reloc.type) { - .got, .got_page, .got_pageoff => true, - else => false, - }; - } - - /// Returns address of the target if any. - fn getTargetAddress(reloc: Relocation, coff: *const Coff) ?u32 { - switch (reloc.type) { - .got, .got_page, .got_pageoff => { - const got_index = coff.got_table.lookup.get(reloc.target) orelse return null; - const header = coff.sections.items(.header)[coff.got_section_index.?]; - return header.virtual_address + got_index * coff.ptr_width.size(); - }, - .import, .import_page, .import_pageoff => { - const sym = coff.getSymbol(reloc.target); - const index = coff.import_tables.getIndex(sym.value) orelse return null; - const itab = coff.import_tables.values()[index]; - return itab.getImportAddress(reloc.target, .{ - .coff = coff, - .index = index, - .name_off = sym.value, - }); - }, - else => { - const target_atom_index = coff.getAtomIndexForSymbol(reloc.target) orelse return null; - const target_atom = coff.getAtom(target_atom_index); - return target_atom.getSymbol(coff).value; - }, - } - } - - fn resolve(reloc: Relocation, atom_index: Atom.Index, code: []u8, image_base: u64, coff: *Coff) void { - const atom = coff.getAtom(atom_index); - const source_sym = atom.getSymbol(coff); - const source_vaddr = source_sym.value + reloc.offset; - - const target_vaddr = reloc.getTargetAddress(coff).?; // Oops, you didn't check if the relocation can be resolved with isResolvable(). - const target_vaddr_with_addend = target_vaddr + reloc.addend; - - log.debug(" ({x}: [() => 0x{x} ({s})) ({s}) ", .{ - source_vaddr, - target_vaddr_with_addend, - coff.getSymbolName(reloc.target), - @tagName(reloc.type), - }); - - const ctx: Context = .{ - .source_vaddr = source_vaddr, - .target_vaddr = target_vaddr_with_addend, - .image_base = image_base, - .code = code, - .ptr_width = coff.ptr_width, - }; - - const target = &coff.base.comp.root_mod.resolved_target.result; - switch (target.cpu.arch) { - .aarch64 => reloc.resolveAarch64(ctx), - .x86, .x86_64 => reloc.resolveX86(ctx), - else => unreachable, // unhandled target architecture - } - } - - const Context = struct { - source_vaddr: u32, - target_vaddr: u32, - image_base: u64, - code: []u8, - ptr_width: PtrWidth, - }; - - fn resolveAarch64(reloc: Relocation, ctx: Context) void { - const Instruction = aarch64_util.encoding.Instruction; - var buffer = ctx.code[reloc.offset..]; - switch (reloc.type) { - .got_page, .import_page, .page => { - const source_page = @as(i32, @intCast(ctx.source_vaddr >> 12)); - const target_page = @as(i32, @intCast(ctx.target_vaddr >> 12)); - const pages: i21 = @intCast(target_page - source_page); - var inst: Instruction = .read(buffer[0..Instruction.size]); - inst.data_processing_immediate.pc_relative_addressing.group.immhi = @intCast(pages >> 2); - inst.data_processing_immediate.pc_relative_addressing.group.immlo = @truncate(@as(u21, @bitCast(pages))); - inst.write(buffer[0..Instruction.size]); - }, - .got_pageoff, .import_pageoff, .pageoff => { - assert(!reloc.pcrel); - - const narrowed: u12 = @truncate(@as(u64, @intCast(ctx.target_vaddr))); - var inst: Instruction = .read(buffer[0..Instruction.size]); - switch (inst.decode()) { - else => unreachable, - .data_processing_immediate => inst.data_processing_immediate.add_subtract_immediate.group.imm12 = narrowed, - .load_store => |load_store| inst.load_store.register_unsigned_immediate.group.imm12 = - switch (load_store.register_unsigned_immediate.decode()) { - .integer => |integer| @shrExact(narrowed, @intFromEnum(integer.group.size)), - .vector => |vector| @shrExact(narrowed, @intFromEnum(vector.group.opc1.decode(vector.group.size))), - }, - } - inst.write(buffer[0..Instruction.size]); - }, - .direct => { - assert(!reloc.pcrel); - switch (reloc.length) { - 2 => mem.writeInt( - u32, - buffer[0..4], - @as(u32, @truncate(ctx.target_vaddr + ctx.image_base)), - .little, - ), - 3 => mem.writeInt(u64, buffer[0..8], ctx.target_vaddr + ctx.image_base, .little), - else => unreachable, - } - }, - - .got => unreachable, - .import => unreachable, - } - } - - fn resolveX86(reloc: Relocation, ctx: Context) void { - var buffer = ctx.code[reloc.offset..]; - switch (reloc.type) { - .got_page => unreachable, - .got_pageoff => unreachable, - .page => unreachable, - .pageoff => unreachable, - .import_page => unreachable, - .import_pageoff => unreachable, - - .got, .import => { - assert(reloc.pcrel); - const disp = @as(i32, @intCast(ctx.target_vaddr)) - @as(i32, @intCast(ctx.source_vaddr)) - 4; - mem.writeInt(i32, buffer[0..4], disp, .little); - }, - .direct => { - if (reloc.pcrel) { - const disp = @as(i32, @intCast(ctx.target_vaddr)) - @as(i32, @intCast(ctx.source_vaddr)) - 4; - mem.writeInt(i32, buffer[0..4], disp, .little); - } else switch (ctx.ptr_width) { - .p32 => mem.writeInt(u32, buffer[0..4], @as(u32, @intCast(ctx.target_vaddr + ctx.image_base)), .little), - .p64 => switch (reloc.length) { - 2 => mem.writeInt(u32, buffer[0..4], @as(u32, @truncate(ctx.target_vaddr + ctx.image_base)), .little), - 3 => mem.writeInt(u64, buffer[0..8], ctx.target_vaddr + ctx.image_base, .little), - else => unreachable, - }, - } - }, - } - } -}; - -pub fn addRelocation(coff: *Coff, atom_index: Atom.Index, reloc: Relocation) !void { - const comp = coff.base.comp; - const gpa = comp.gpa; - log.debug(" (adding reloc of type {s} to target %{d})", .{ @tagName(reloc.type), reloc.target.sym_index }); - const gop = try coff.relocs.getOrPut(gpa, atom_index); - if (!gop.found_existing) { - gop.value_ptr.* = .{}; - } - try gop.value_ptr.append(gpa, reloc); -} - -fn addBaseRelocation(coff: *Coff, atom_index: Atom.Index, offset: u32) !void { - const comp = coff.base.comp; - const gpa = comp.gpa; - log.debug(" (adding base relocation at offset 0x{x} in %{d})", .{ - offset, - coff.getAtom(atom_index).getSymbolIndex().?, - }); - const gop = try coff.base_relocs.getOrPut(gpa, atom_index); - if (!gop.found_existing) { - gop.value_ptr.* = .{}; - } - try gop.value_ptr.append(gpa, offset); -} - -fn freeRelocations(coff: *Coff, atom_index: Atom.Index) void { - const comp = coff.base.comp; - const gpa = comp.gpa; - var removed_relocs = coff.relocs.fetchOrderedRemove(atom_index); - if (removed_relocs) |*relocs| relocs.value.deinit(gpa); - var removed_base_relocs = coff.base_relocs.fetchOrderedRemove(atom_index); - if (removed_base_relocs) |*base_relocs| base_relocs.value.deinit(gpa); -} - -/// Represents an import table in the .idata section where each contained pointer -/// is to a symbol from the same DLL. -/// -/// The layout of .idata section is as follows: -/// -/// --- ADDR1 : IAT (all import tables concatenated together) -/// ptr -/// ptr -/// 0 sentinel -/// ptr -/// 0 sentinel -/// --- ADDR2: headers -/// ImportDirectoryEntry header -/// ImportDirectoryEntry header -/// sentinel -/// --- ADDR2: lookup tables -/// Lookup table -/// 0 sentinel -/// Lookup table -/// 0 sentinel -/// --- ADDR3: name hint tables -/// hint-symname -/// hint-symname -/// --- ADDR4: DLL names -/// DLL#1 name -/// DLL#2 name -/// --- END -const ImportTable = struct { - entries: std.ArrayListUnmanaged(SymbolWithLoc) = .empty, - free_list: std.ArrayListUnmanaged(u32) = .empty, - lookup: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .empty, - - fn deinit(itab: *ImportTable, allocator: Allocator) void { - itab.entries.deinit(allocator); - itab.free_list.deinit(allocator); - itab.lookup.deinit(allocator); - } - - /// Size of the import table does not include the sentinel. - fn size(itab: ImportTable) u32 { - return @as(u32, @intCast(itab.entries.items.len)) * @sizeOf(u64); - } - - fn addImport(itab: *ImportTable, allocator: Allocator, target: SymbolWithLoc) !ImportIndex { - try itab.entries.ensureUnusedCapacity(allocator, 1); - const index: u32 = blk: { - if (itab.free_list.pop()) |index| { - log.debug(" (reusing import entry index {d})", .{index}); - break :blk index; - } else { - log.debug(" (allocating import entry at index {d})", .{itab.entries.items.len}); - const index = @as(u32, @intCast(itab.entries.items.len)); - _ = itab.entries.addOneAssumeCapacity(); - break :blk index; - } - }; - itab.entries.items[index] = target; - try itab.lookup.putNoClobber(allocator, target, index); - return index; - } - - const Context = struct { - coff: *const Coff, - /// Index of this ImportTable in a global list of all tables. - /// This is required in order to calculate the base vaddr of this ImportTable. - index: usize, - /// Offset into the string interning table of the DLL this ImportTable corresponds to. - name_off: u32, - }; - - fn getBaseAddress(ctx: Context) u32 { - const header = ctx.coff.sections.items(.header)[ctx.coff.idata_section_index.?]; - var addr = header.virtual_address; - for (ctx.coff.import_tables.values(), 0..) |other_itab, i| { - if (ctx.index == i) break; - addr += @as(u32, @intCast(other_itab.entries.items.len * @sizeOf(u64))) + 8; - } - return addr; - } - - fn getImportAddress(itab: *const ImportTable, target: SymbolWithLoc, ctx: Context) ?u32 { - const index = itab.lookup.get(target) orelse return null; - const base_vaddr = getBaseAddress(ctx); - return base_vaddr + index * @sizeOf(u64); - } - - const Format = struct { - itab: ImportTable, - ctx: Context, - - fn default(f: Format, writer: *std.Io.Writer) std.Io.Writer.Error!void { - const lib_name = f.ctx.coff.temp_strtab.getAssumeExists(f.ctx.name_off); - const base_vaddr = getBaseAddress(f.ctx); - try writer.print("IAT({s}.dll) @{x}:", .{ lib_name, base_vaddr }); - for (f.itab.entries.items, 0..) |entry, i| { - try writer.print("\n {d}@{?x} => {s}", .{ - i, - f.itab.getImportAddress(entry, f.ctx), - f.ctx.coff.getSymbolName(entry), - }); - } - } - }; - - fn fmtDebug(itab: ImportTable, ctx: Context) fmt.Alt(Format, Format.default) { - return .{ .data = .{ .itab = itab, .ctx = ctx } }; - } - - const ImportIndex = u32; -}; - -fn pwriteAll(coff: *Coff, bytes: []const u8, offset: u64) error{LinkFailure}!void { - const comp = coff.base.comp; - const diags = &comp.link_diags; - coff.base.file.?.pwriteAll(bytes, offset) catch |err| { - return diags.fail("failed to write: {s}", .{@errorName(err)}); - }; -} - -/// This is the start of a Portable Executable (PE) file. -/// It starts with a MS-DOS header followed by a MS-DOS stub program. -/// This data does not change so we include it as follows in all binaries. -/// -/// In this context, -/// A "paragraph" is 16 bytes. -/// A "page" is 512 bytes. -/// A "long" is 4 bytes. -/// A "word" is 2 bytes. -const msdos_stub: [120]u8 = .{ - 'M', 'Z', // Magic number. Stands for Mark Zbikowski (designer of the MS-DOS executable format). - 0x78, 0x00, // Number of bytes in the last page. This matches the size of this entire MS-DOS stub. - 0x01, 0x00, // Number of pages. - 0x00, 0x00, // Number of entries in the relocation table. - 0x04, 0x00, // The number of paragraphs taken up by the header. 4 * 16 = 64, which matches the header size (all bytes before the MS-DOS stub program). - 0x00, 0x00, // The number of paragraphs required by the program. - 0x00, 0x00, // The number of paragraphs requested by the program. - 0x00, 0x00, // Initial value for SS (relocatable segment address). - 0x00, 0x00, // Initial value for SP. - 0x00, 0x00, // Checksum. - 0x00, 0x00, // Initial value for IP. - 0x00, 0x00, // Initial value for CS (relocatable segment address). - 0x40, 0x00, // Absolute offset to relocation table. 64 matches the header size (all bytes before the MS-DOS stub program). - 0x00, 0x00, // Overlay number. Zero means this is the main executable. -} - // Reserved words. - ++ .{ 0x00, 0x00 } ** 4 - // OEM-related fields. - ++ .{ - 0x00, 0x00, // OEM identifier. - 0x00, 0x00, // OEM information. - } - // Reserved words. - ++ .{ 0x00, 0x00 } ** 10 - // Address of the PE header (a long). This matches the size of this entire MS-DOS stub, so that's the address of what's after this MS-DOS stub. - ++ .{ 0x78, 0x00, 0x00, 0x00 } - // What follows is a 16-bit x86 MS-DOS program of 7 instructions that prints the bytes after these instructions and then exits. - ++ .{ - // Set the value of the data segment to the same value as the code segment. - 0x0e, // push cs - 0x1f, // pop ds - // Set the DX register to the address of the message. - // If you count all bytes of these 7 instructions you get 14, so that's the address of what's after these instructions. - 0xba, 14, 0x00, // mov dx, 14 - // Set AH to the system call code for printing a message. - 0xb4, 0x09, // mov ah, 0x09 - // Perform the system call to print the message. - 0xcd, 0x21, // int 0x21 - // Set AH to 0x4c which is the system call code for exiting, and set AL to 0x01 which is the exit code. - 0xb8, 0x01, 0x4c, // mov ax, 0x4c01 - // Peform the system call to exit the program with exit code 1. - 0xcd, 0x21, // int 0x21 - } - // Message to print. - ++ "This program cannot be run in DOS mode.".* - // Message terminators. - ++ .{ - '$', // We do not pass a length to the print system call; the string is terminated by this character. - 0x00, 0x00, // Terminating zero bytes. - }; diff --git a/src/link/Coff2.zig b/src/link/Coff2.zig new file mode 100644 index 000000000000..79d4b1750563 --- /dev/null +++ b/src/link/Coff2.zig @@ -0,0 +1,2193 @@ +base: link.File, +endian: std.builtin.Endian, +mf: MappedFile, +nodes: std.MultiArrayList(Node), +import_table: ImportTable, +strings: std.HashMapUnmanaged( + u32, + void, + std.hash_map.StringIndexContext, + std.hash_map.default_max_load_percentage, +), +string_bytes: std.ArrayList(u8), +section_table: std.ArrayList(Symbol.Index), +symbol_table: std.ArrayList(Symbol), +globals: std.AutoArrayHashMapUnmanaged(GlobalName, Symbol.Index), +global_pending_index: u32, +navs: std.AutoArrayHashMapUnmanaged(InternPool.Nav.Index, Symbol.Index), +uavs: std.AutoArrayHashMapUnmanaged(InternPool.Index, Symbol.Index), +lazy: std.EnumArray(link.File.LazySymbol.Kind, struct { + map: std.AutoArrayHashMapUnmanaged(InternPool.Index, Symbol.Index), + pending_index: u32, +}), +pending_uavs: std.AutoArrayHashMapUnmanaged(Node.UavMapIndex, struct { + alignment: InternPool.Alignment, + src_loc: Zcu.LazySrcLoc, +}), +relocs: std.ArrayList(Reloc), +/// This is hiding actual bugs with global symbols! Reconsider once they are implemented correctly. +entry_hack: Symbol.Index, + +pub const default_file_alignment: u16 = 0x200; +pub const default_size_of_stack_reserve: u32 = 0x1000000; +pub const default_size_of_stack_commit: u32 = 0x1000; +pub const default_size_of_heap_reserve: u32 = 0x100000; +pub const default_size_of_heap_commit: u32 = 0x1000; + +/// This is the start of a Portable Executable (PE) file. +/// It starts with a MS-DOS header followed by a MS-DOS stub program. +/// This data does not change so we include it as follows in all binaries. +/// +/// In this context, +/// A "paragraph" is 16 bytes. +/// A "page" is 512 bytes. +/// A "long" is 4 bytes. +/// A "word" is 2 bytes. +pub const msdos_stub: [120]u8 = .{ + 'M', 'Z', // Magic number. Stands for Mark Zbikowski (designer of the MS-DOS executable format). + 0x78, 0x00, // Number of bytes in the last page. This matches the size of this entire MS-DOS stub. + 0x01, 0x00, // Number of pages. + 0x00, 0x00, // Number of entries in the relocation table. + 0x04, 0x00, // The number of paragraphs taken up by the header. 4 * 16 = 64, which matches the header size (all bytes before the MS-DOS stub program). + 0x00, 0x00, // The number of paragraphs required by the program. + 0x00, 0x00, // The number of paragraphs requested by the program. + 0x00, 0x00, // Initial value for SS (relocatable segment address). + 0x00, 0x00, // Initial value for SP. + 0x00, 0x00, // Checksum. + 0x00, 0x00, // Initial value for IP. + 0x00, 0x00, // Initial value for CS (relocatable segment address). + 0x40, 0x00, // Absolute offset to relocation table. 64 matches the header size (all bytes before the MS-DOS stub program). + 0x00, 0x00, // Overlay number. Zero means this is the main executable. +} + // Reserved words. + ++ .{ 0x00, 0x00 } ** 4 + // OEM-related fields. + ++ .{ + 0x00, 0x00, // OEM identifier. + 0x00, 0x00, // OEM information. + } + // Reserved words. + ++ .{ 0x00, 0x00 } ** 10 + // Address of the PE header (a long). This matches the size of this entire MS-DOS stub, so that's the address of what's after this MS-DOS stub. + ++ .{ 0x78, 0x00, 0x00, 0x00 } + // What follows is a 16-bit x86 MS-DOS program of 7 instructions that prints the bytes after these instructions and then exits. + ++ .{ + // Set the value of the data segment to the same value as the code segment. + 0x0e, // push cs + 0x1f, // pop ds + // Set the DX register to the address of the message. + // If you count all bytes of these 7 instructions you get 14, so that's the address of what's after these instructions. + 0xba, 14, 0x00, // mov dx, 14 + // Set AH to the system call code for printing a message. + 0xb4, 0x09, // mov ah, 0x09 + // Perform the system call to print the message. + 0xcd, 0x21, // int 0x21 + // Set AH to 0x4c which is the system call code for exiting, and set AL to 0x01 which is the exit code. + 0xb8, 0x01, 0x4c, // mov ax, 0x4c01 + // Peform the system call to exit the program with exit code 1. + 0xcd, 0x21, // int 0x21 + } + // Message to print. + ++ "This program cannot be run in DOS mode.".* + // Message terminators. + ++ .{ + '$', // We do not pass a length to the print system call; the string is terminated by this character. + 0x00, 0x00, // Terminating zero bytes. + }; + +pub const Node = union(enum) { + file, + header, + signature, + coff_header, + optional_header, + data_directories, + section_table, + section: Symbol.Index, + import_directory_table, + import_lookup_table: u32, + import_address_table: u32, + import_hint_name_table: u32, + global: GlobalMapIndex, + nav: NavMapIndex, + uav: UavMapIndex, + lazy_code: LazyMapRef.Index(.code), + lazy_const_data: LazyMapRef.Index(.const_data), + + pub const GlobalMapIndex = enum(u32) { + _, + + pub fn globalName(gmi: GlobalMapIndex, coff: *const Coff) GlobalName { + return coff.globals.keys()[@intFromEnum(gmi)]; + } + + pub fn symbol(gmi: GlobalMapIndex, coff: *const Coff) Symbol.Index { + return coff.globals.values()[@intFromEnum(gmi)]; + } + }; + + pub const NavMapIndex = enum(u32) { + _, + + pub fn navIndex(nmi: NavMapIndex, coff: *const Coff) InternPool.Nav.Index { + return coff.navs.keys()[@intFromEnum(nmi)]; + } + + pub fn symbol(nmi: NavMapIndex, coff: *const Coff) Symbol.Index { + return coff.navs.values()[@intFromEnum(nmi)]; + } + }; + + pub const UavMapIndex = enum(u32) { + _, + + pub fn uavValue(umi: UavMapIndex, coff: *const Coff) InternPool.Index { + return coff.uavs.keys()[@intFromEnum(umi)]; + } + + pub fn symbol(umi: UavMapIndex, coff: *const Coff) Symbol.Index { + return coff.uavs.values()[@intFromEnum(umi)]; + } + }; + + pub const LazyMapRef = struct { + kind: link.File.LazySymbol.Kind, + index: u32, + + pub fn Index(comptime kind: link.File.LazySymbol.Kind) type { + return enum(u32) { + _, + + pub fn ref(lmi: @This()) LazyMapRef { + return .{ .kind = kind, .index = @intFromEnum(lmi) }; + } + + pub fn lazySymbol(lmi: @This(), coff: *const Coff) link.File.LazySymbol { + return lmi.ref().lazySymbol(coff); + } + + pub fn symbol(lmi: @This(), coff: *const Coff) Symbol.Index { + return lmi.ref().symbol(coff); + } + }; + } + + pub fn lazySymbol(lmr: LazyMapRef, coff: *const Coff) link.File.LazySymbol { + return .{ .kind = lmr.kind, .ty = coff.lazy.getPtrConst(lmr.kind).map.keys()[lmr.index] }; + } + + pub fn symbol(lmr: LazyMapRef, coff: *const Coff) Symbol.Index { + return coff.lazy.getPtrConst(lmr.kind).map.values()[lmr.index]; + } + }; + + pub const Tag = @typeInfo(Node).@"union".tag_type.?; + + const known_count = @typeInfo(@TypeOf(known)).@"struct".fields.len; + const known = known: { + const Known = enum { + file, + header, + signature, + coff_header, + optional_header, + data_directories, + section_table, + }; + var mut_known: std.enums.EnumFieldStruct(Known, MappedFile.Node.Index, null) = undefined; + for (@typeInfo(Known).@"enum".fields) |field| + @field(mut_known, field.name) = @enumFromInt(field.value); + break :known mut_known; + }; + + comptime { + if (!std.debug.runtime_safety) std.debug.assert(@sizeOf(Node) == 8); + } +}; + +pub const DataDirectory = enum { + export_table, + import_table, + resorce_table, + exception_table, + certificate_table, + base_relocation_table, + debug, + architecture, + global_ptr, + tls_table, + load_config_table, + bound_import, + import_address_table, + delay_import_descriptor, + clr_runtime_header, + reserved, +}; + +pub const ImportTable = struct { + directory_table_ni: MappedFile.Node.Index, + dlls: std.AutoArrayHashMapUnmanaged(void, Dll), + + pub const Dll = struct { + import_lookup_table_ni: MappedFile.Node.Index, + import_address_table_si: Symbol.Index, + import_hint_name_table_ni: MappedFile.Node.Index, + len: u32, + hint_name_len: u32, + }; + + const Adapter = struct { + coff: *Coff, + + pub fn eql(adapter: Adapter, lhs_key: []const u8, _: void, rhs_index: usize) bool { + const coff = adapter.coff; + const dll_name = coff.import_table.dlls.values()[rhs_index] + .import_hint_name_table_ni.sliceConst(&coff.mf); + return std.mem.startsWith(u8, dll_name, lhs_key) and + std.mem.startsWith(u8, dll_name[lhs_key.len..], ".dll\x00"); + } + + pub fn hash(_: Adapter, key: []const u8) u32 { + assert(std.mem.indexOfScalar(u8, key, 0) == null); + return std.array_hash_map.hashString(key); + } + }; +}; + +pub const String = enum(u32) { + _, + + pub const Optional = enum(u32) { + none = std.math.maxInt(u32), + _, + + pub fn unwrap(os: String.Optional) ?String { + return switch (os) { + else => |s| @enumFromInt(@intFromEnum(s)), + .none => null, + }; + } + + pub fn toSlice(os: String.Optional, coff: *Coff) ?[:0]const u8 { + return (os.unwrap() orelse return null).toSlice(coff); + } + }; + + pub fn toSlice(s: String, coff: *Coff) [:0]const u8 { + const slice = coff.string_bytes.items[@intFromEnum(s)..]; + return slice[0..std.mem.indexOfScalar(u8, slice, 0).? :0]; + } + + pub fn toOptional(s: String) String.Optional { + return @enumFromInt(@intFromEnum(s)); + } +}; + +pub const GlobalName = struct { name: String, lib_name: String.Optional }; + +pub const Symbol = struct { + ni: MappedFile.Node.Index, + rva: u32, + size: u32, + /// Relocations contained within this symbol + loc_relocs: Reloc.Index, + /// Relocations targeting this symbol + target_relocs: Reloc.Index, + section_number: SectionNumber, + data_directory: ?DataDirectory, + unused0: u32 = 0, + unused1: u32 = 0, + + pub const SectionNumber = enum(i16) { + UNDEFINED = 0, + ABSOLUTE = -1, + DEBUG = -2, + _, + + fn toIndex(sn: SectionNumber) u15 { + return @intCast(@intFromEnum(sn) - 1); + } + + pub fn symbol(sn: SectionNumber, coff: *const Coff) Symbol.Index { + return coff.section_table.items[sn.toIndex()]; + } + + pub fn header(sn: SectionNumber, coff: *Coff) *std.coff.SectionHeader { + return &coff.sectionTableSlice()[sn.toIndex()]; + } + }; + + pub const Index = enum(u32) { + null, + data, + idata, + rdata, + text, + _, + + const known_count = @typeInfo(Index).@"enum".fields.len; + + pub fn get(si: Symbol.Index, coff: *Coff) *Symbol { + return &coff.symbol_table.items[@intFromEnum(si)]; + } + + pub fn node(si: Symbol.Index, coff: *Coff) MappedFile.Node.Index { + const ni = si.get(coff).ni; + assert(ni != .none); + return ni; + } + + pub fn flushMoved(si: Symbol.Index, coff: *Coff) void { + const sym = si.get(coff); + sym.rva = coff.computeNodeRva(sym.ni); + if (si == coff.entry_hack) + coff.targetStore(&coff.optionalHeaderStandardPtr().address_of_entry_point, sym.rva); + si.applyLocationRelocs(coff); + si.applyTargetRelocs(coff); + } + + pub fn applyLocationRelocs(si: Symbol.Index, coff: *Coff) void { + for (coff.relocs.items[@intFromEnum(si.get(coff).loc_relocs)..]) |*reloc| { + if (reloc.loc != si) break; + reloc.apply(coff); + } + } + + pub fn applyTargetRelocs(si: Symbol.Index, coff: *Coff) void { + var ri = si.get(coff).target_relocs; + while (ri != .none) { + const reloc = ri.get(coff); + assert(reloc.target == si); + reloc.apply(coff); + ri = reloc.next; + } + } + + pub fn deleteLocationRelocs(si: Symbol.Index, coff: *Coff) void { + const sym = si.get(coff); + for (coff.relocs.items[@intFromEnum(sym.loc_relocs)..]) |*reloc| { + if (reloc.loc != si) break; + reloc.delete(coff); + } + sym.loc_relocs = .none; + } + }; + + comptime { + if (!std.debug.runtime_safety) std.debug.assert(@sizeOf(Symbol) == 32); + } +}; + +pub const Reloc = extern struct { + type: Reloc.Type, + prev: Reloc.Index, + next: Reloc.Index, + loc: Symbol.Index, + target: Symbol.Index, + unused: u32, + offset: u64, + addend: i64, + + pub const Type = extern union { + AMD64: std.coff.IMAGE.REL.AMD64, + ARM: std.coff.IMAGE.REL.ARM, + ARM64: std.coff.IMAGE.REL.ARM64, + SH: std.coff.IMAGE.REL.SH, + PPC: std.coff.IMAGE.REL.PPC, + I386: std.coff.IMAGE.REL.I386, + IA64: std.coff.IMAGE.REL.IA64, + MIPS: std.coff.IMAGE.REL.MIPS, + M32R: std.coff.IMAGE.REL.M32R, + }; + + pub const Index = enum(u32) { + none = std.math.maxInt(u32), + _, + + pub fn get(si: Reloc.Index, coff: *Coff) *Reloc { + return &coff.relocs.items[@intFromEnum(si)]; + } + }; + + pub fn apply(reloc: *const Reloc, coff: *Coff) void { + const loc_sym = reloc.loc.get(coff); + switch (loc_sym.ni) { + .none => return, + else => |ni| if (ni.hasMoved(&coff.mf)) return, + } + const target_sym = reloc.target.get(coff); + switch (target_sym.ni) { + .none => return, + else => |ni| if (ni.hasMoved(&coff.mf)) return, + } + const loc_slice = loc_sym.ni.slice(&coff.mf)[@intCast(reloc.offset)..]; + const target_rva = target_sym.rva +% @as(u64, @bitCast(reloc.addend)); + const target_endian = coff.targetEndian(); + switch (coff.targetLoad(&coff.headerPtr().machine)) { + else => |machine| @panic(@tagName(machine)), + .AMD64 => switch (reloc.type.AMD64) { + else => |kind| @panic(@tagName(kind)), + .ABSOLUTE => {}, + .ADDR64 => std.mem.writeInt( + u64, + loc_slice[0..8], + coff.optionalHeaderField(.image_base) + target_rva, + target_endian, + ), + .ADDR32 => std.mem.writeInt( + u32, + loc_slice[0..4], + @intCast(coff.optionalHeaderField(.image_base) + target_rva), + target_endian, + ), + .ADDR32NB => std.mem.writeInt( + u32, + loc_slice[0..4], + @intCast(target_rva), + target_endian, + ), + .REL32 => std.mem.writeInt( + i32, + loc_slice[0..4], + @intCast(@as(i64, @bitCast(target_rva -% (loc_sym.rva + reloc.offset + 4)))), + target_endian, + ), + .REL32_1 => std.mem.writeInt( + i32, + loc_slice[0..4], + @intCast(@as(i64, @bitCast(target_rva -% (loc_sym.rva + reloc.offset + 5)))), + target_endian, + ), + .REL32_2 => std.mem.writeInt( + i32, + loc_slice[0..4], + @intCast(@as(i64, @bitCast(target_rva -% (loc_sym.rva + reloc.offset + 6)))), + target_endian, + ), + .REL32_3 => std.mem.writeInt( + i32, + loc_slice[0..4], + @intCast(@as(i64, @bitCast(target_rva -% (loc_sym.rva + reloc.offset + 7)))), + target_endian, + ), + .REL32_4 => std.mem.writeInt( + i32, + loc_slice[0..4], + @intCast(@as(i64, @bitCast(target_rva -% (loc_sym.rva + reloc.offset + 8)))), + target_endian, + ), + .REL32_5 => std.mem.writeInt( + i32, + loc_slice[0..4], + @intCast(@as(i64, @bitCast(target_rva -% (loc_sym.rva + reloc.offset + 9)))), + target_endian, + ), + }, + .I386 => switch (reloc.type.I386) { + else => |kind| @panic(@tagName(kind)), + .ABSOLUTE => {}, + .DIR16 => std.mem.writeInt( + u16, + loc_slice[0..2], + @intCast(coff.optionalHeaderField(.image_base) + target_rva), + target_endian, + ), + .REL16 => std.mem.writeInt( + i16, + loc_slice[0..2], + @intCast(@as(i64, @bitCast(target_rva -% (loc_sym.rva + reloc.offset + 2)))), + target_endian, + ), + .DIR32 => std.mem.writeInt( + u32, + loc_slice[0..4], + @intCast(coff.optionalHeaderField(.image_base) + target_rva), + target_endian, + ), + .DIR32NB => std.mem.writeInt( + u32, + loc_slice[0..4], + @intCast(target_rva), + target_endian, + ), + .REL32 => std.mem.writeInt( + i32, + loc_slice[0..4], + @intCast(@as(i64, @bitCast(target_rva -% (loc_sym.rva + reloc.offset + 4)))), + target_endian, + ), + }, + } + } + + pub fn delete(reloc: *Reloc, coff: *Coff) void { + switch (reloc.prev) { + .none => { + const target = reloc.target.get(coff); + assert(target.target_relocs.get(coff) == reloc); + target.target_relocs = reloc.next; + }, + else => |prev| prev.get(coff).next = reloc.next, + } + switch (reloc.next) { + .none => {}, + else => |next| next.get(coff).prev = reloc.prev, + } + reloc.* = undefined; + } + + comptime { + if (!std.debug.runtime_safety) std.debug.assert(@sizeOf(Reloc) == 40); + } +}; + +pub fn open( + arena: std.mem.Allocator, + comp: *Compilation, + path: std.Build.Cache.Path, + options: link.File.OpenOptions, +) !*Coff { + return create(arena, comp, path, options); +} +pub fn createEmpty( + arena: std.mem.Allocator, + comp: *Compilation, + path: std.Build.Cache.Path, + options: link.File.OpenOptions, +) !*Coff { + return create(arena, comp, path, options); +} +fn create( + arena: std.mem.Allocator, + comp: *Compilation, + path: std.Build.Cache.Path, + options: link.File.OpenOptions, +) !*Coff { + const target = &comp.root_mod.resolved_target.result; + assert(target.ofmt == .coff); + const is_image = switch (comp.config.output_mode) { + .Exe => true, + .Lib => switch (comp.config.link_mode) { + .static => false, + .dynamic => true, + }, + .Obj => false, + }; + const machine = target.toCoffMachine(); + const timestamp: u32 = if (options.repro) 0 else @truncate(@as(u64, @bitCast(std.time.timestamp()))); + const major_subsystem_version = options.major_subsystem_version orelse 6; + const minor_subsystem_version = options.minor_subsystem_version orelse 0; + const magic: std.coff.OptionalHeader.Magic = switch (target.ptrBitWidth()) { + 0...32 => .PE32, + 33...64 => .@"PE32+", + else => return error.UnsupportedCOFFArchitecture, + }; + const section_align: std.mem.Alignment = switch (machine) { + .AMD64, .I386 => @enumFromInt(12), + .SH3, .SH3DSP, .SH4, .SH5 => @enumFromInt(12), + .MIPS16, .MIPSFPU, .MIPSFPU16, .WCEMIPSV2 => @enumFromInt(12), + .POWERPC, .POWERPCFP => @enumFromInt(12), + .ALPHA, .ALPHA64 => @enumFromInt(13), + .IA64 => @enumFromInt(13), + .ARM => @enumFromInt(12), + else => return error.UnsupportedCOFFArchitecture, + }; + + const coff = try arena.create(Coff); + const file = try path.root_dir.handle.createFile(path.sub_path, .{ + .read = true, + .mode = link.File.determineMode(comp.config.output_mode, comp.config.link_mode), + }); + errdefer file.close(); + coff.* = .{ + .base = .{ + .tag = .coff2, + + .comp = comp, + .emit = path, + + .file = file, + .gc_sections = false, + .print_gc_sections = false, + .build_id = .none, + .allow_shlib_undefined = false, + .stack_size = 0, + }, + .endian = target.cpu.arch.endian(), + .mf = try .init(file, comp.gpa), + .nodes = .empty, + .import_table = .{ + .directory_table_ni = .none, + .dlls = .empty, + }, + .strings = .empty, + .string_bytes = .empty, + .section_table = .empty, + .symbol_table = .empty, + .globals = .empty, + .global_pending_index = 0, + .navs = .empty, + .uavs = .empty, + .lazy = .initFill(.{ + .map = .empty, + .pending_index = 0, + }), + .pending_uavs = .empty, + .relocs = .empty, + .entry_hack = .null, + }; + errdefer coff.deinit(); + + try coff.initHeaders( + is_image, + machine, + timestamp, + major_subsystem_version, + minor_subsystem_version, + magic, + section_align, + ); + return coff; +} + +pub fn deinit(coff: *Coff) void { + const gpa = coff.base.comp.gpa; + coff.mf.deinit(gpa); + coff.nodes.deinit(gpa); + coff.import_table.dlls.deinit(gpa); + coff.strings.deinit(gpa); + coff.string_bytes.deinit(gpa); + coff.section_table.deinit(gpa); + coff.symbol_table.deinit(gpa); + coff.globals.deinit(gpa); + coff.navs.deinit(gpa); + coff.uavs.deinit(gpa); + for (&coff.lazy.values) |*lazy| lazy.map.deinit(gpa); + coff.pending_uavs.deinit(gpa); + coff.relocs.deinit(gpa); + coff.* = undefined; +} + +fn initHeaders( + coff: *Coff, + is_image: bool, + machine: std.coff.IMAGE.FILE.MACHINE, + timestamp: u32, + major_subsystem_version: u16, + minor_subsystem_version: u16, + magic: std.coff.OptionalHeader.Magic, + section_align: std.mem.Alignment, +) !void { + const comp = coff.base.comp; + const gpa = comp.gpa; + const file_align: std.mem.Alignment = comptime .fromByteUnits(default_file_alignment); + const target_endian = coff.targetEndian(); + + const optional_header_size: u16 = if (is_image) switch (magic) { + _ => unreachable, + inline else => |ct_magic| @sizeOf(@field(std.coff.OptionalHeader, @tagName(ct_magic))), + } else 0; + const data_directories_len = @typeInfo(DataDirectory).@"enum".fields.len; + const data_directories_size: u16 = if (is_image) + @sizeOf(std.coff.ImageDataDirectory) * data_directories_len + else + 0; + + try coff.nodes.ensureTotalCapacity(gpa, Node.known_count); + coff.nodes.appendAssumeCapacity(.file); + + const header_ni = Node.known.header; + assert(header_ni == try coff.mf.addOnlyChildNode(gpa, .root, .{ + .alignment = coff.mf.flags.block_size, + .fixed = true, + })); + coff.nodes.appendAssumeCapacity(.header); + + const signature_ni = Node.known.signature; + assert(signature_ni == try coff.mf.addOnlyChildNode(gpa, header_ni, .{ + .size = (if (is_image) msdos_stub.len else 0) + "PE\x00\x00".len, + .alignment = .@"4", + .fixed = true, + })); + coff.nodes.appendAssumeCapacity(.signature); + { + const signature_slice = signature_ni.slice(&coff.mf); + if (is_image) @memcpy(signature_slice[0..msdos_stub.len], &msdos_stub); + @memcpy(signature_slice[signature_slice.len - 4 ..], "PE\x00\x00"); + } + + const coff_header_ni = Node.known.coff_header; + assert(coff_header_ni == try coff.mf.addLastChildNode(gpa, header_ni, .{ + .size = @sizeOf(std.coff.Header), + .alignment = .@"4", + .fixed = true, + })); + coff.nodes.appendAssumeCapacity(.coff_header); + { + const coff_header: *std.coff.Header = @ptrCast(@alignCast(coff_header_ni.slice(&coff.mf))); + coff_header.* = .{ + .machine = machine, + .number_of_sections = 0, + .time_date_stamp = timestamp, + .pointer_to_symbol_table = 0, + .number_of_symbols = 0, + .size_of_optional_header = optional_header_size + data_directories_size, + .flags = .{ + .RELOCS_STRIPPED = is_image, + .EXECUTABLE_IMAGE = is_image, + .DEBUG_STRIPPED = true, + .@"32BIT_MACHINE" = magic == .PE32, + .LARGE_ADDRESS_AWARE = magic == .@"PE32+", + .DLL = comp.config.output_mode == .Lib and comp.config.link_mode == .dynamic, + }, + }; + if (target_endian != native_endian) std.mem.byteSwapAllFields(std.coff.Header, coff_header); + } + + const optional_header_ni = Node.known.optional_header; + assert(optional_header_ni == try coff.mf.addLastChildNode(gpa, header_ni, .{ + .size = optional_header_size, + .alignment = .@"4", + .fixed = true, + })); + coff.nodes.appendAssumeCapacity(.optional_header); + if (is_image) switch (magic) { + _ => unreachable, + .PE32 => { + const optional_header: *std.coff.OptionalHeader.PE32 = + @ptrCast(@alignCast(optional_header_ni.slice(&coff.mf))); + optional_header.* = .{ + .standard = .{ + .magic = .PE32, + .major_linker_version = 0, + .minor_linker_version = 0, + .size_of_code = 0, + .size_of_initialized_data = 0, + .size_of_uninitialized_data = 0, + .address_of_entry_point = 0, + .base_of_code = 0, + }, + .base_of_data = 0, + .image_base = switch (coff.base.comp.config.output_mode) { + .Exe => 0x400000, + .Lib => switch (coff.base.comp.config.link_mode) { + .static => 0, + .dynamic => 0x10000000, + }, + .Obj => 0, + }, + .section_alignment = @intCast(section_align.toByteUnits()), + .file_alignment = @intCast(file_align.toByteUnits()), + .major_operating_system_version = 6, + .minor_operating_system_version = 0, + .major_image_version = 0, + .minor_image_version = 0, + .major_subsystem_version = major_subsystem_version, + .minor_subsystem_version = minor_subsystem_version, + .win32_version_value = 0, + .size_of_image = 0, + .size_of_headers = 0, + .checksum = 0, + .subsystem = .WINDOWS_CUI, + .dll_flags = .{ + .HIGH_ENTROPY_VA = true, + .DYNAMIC_BASE = true, + .TERMINAL_SERVER_AWARE = true, + .NX_COMPAT = true, + }, + .size_of_stack_reserve = default_size_of_stack_reserve, + .size_of_stack_commit = default_size_of_stack_commit, + .size_of_heap_reserve = default_size_of_heap_reserve, + .size_of_heap_commit = default_size_of_heap_commit, + .loader_flags = 0, + .number_of_rva_and_sizes = data_directories_len, + }; + if (target_endian != native_endian) + std.mem.byteSwapAllFields(std.coff.OptionalHeader.PE32, optional_header); + }, + .@"PE32+" => { + const header: *std.coff.OptionalHeader.@"PE32+" = + @ptrCast(@alignCast(optional_header_ni.slice(&coff.mf))); + header.* = .{ + .standard = .{ + .magic = .@"PE32+", + .major_linker_version = 0, + .minor_linker_version = 0, + .size_of_code = 0, + .size_of_initialized_data = 0, + .size_of_uninitialized_data = 0, + .address_of_entry_point = 0, + .base_of_code = 0, + }, + .image_base = switch (coff.base.comp.config.output_mode) { + .Exe => 0x140000000, + .Lib => switch (coff.base.comp.config.link_mode) { + .static => 0, + .dynamic => 0x180000000, + }, + .Obj => 0, + }, + .section_alignment = @intCast(section_align.toByteUnits()), + .file_alignment = @intCast(file_align.toByteUnits()), + .major_operating_system_version = 6, + .minor_operating_system_version = 0, + .major_image_version = 0, + .minor_image_version = 0, + .major_subsystem_version = major_subsystem_version, + .minor_subsystem_version = minor_subsystem_version, + .win32_version_value = 0, + .size_of_image = 0, + .size_of_headers = 0, + .checksum = 0, + .subsystem = .WINDOWS_CUI, + .dll_flags = .{ + .HIGH_ENTROPY_VA = true, + .DYNAMIC_BASE = true, + .TERMINAL_SERVER_AWARE = true, + .NX_COMPAT = true, + }, + .size_of_stack_reserve = default_size_of_stack_reserve, + .size_of_stack_commit = default_size_of_stack_commit, + .size_of_heap_reserve = default_size_of_heap_reserve, + .size_of_heap_commit = default_size_of_heap_commit, + .loader_flags = 0, + .number_of_rva_and_sizes = data_directories_len, + }; + if (target_endian != native_endian) + std.mem.byteSwapAllFields(std.coff.OptionalHeader.@"PE32+", header); + }, + }; + + const data_directories_ni = Node.known.data_directories; + assert(data_directories_ni == try coff.mf.addLastChildNode(gpa, header_ni, .{ + .size = data_directories_size, + .alignment = .@"4", + .fixed = true, + })); + coff.nodes.appendAssumeCapacity(.data_directories); + { + const data_directories: *[data_directories_len]std.coff.ImageDataDirectory = + @ptrCast(@alignCast(data_directories_ni.slice(&coff.mf))); + @memset(data_directories, .{ .virtual_address = 0, .size = 0 }); + if (target_endian != native_endian) for (data_directories) |*data_directory| + std.mem.byteSwapAllFields(std.coff.ImageDataDirectory, data_directory); + } + + const section_table_ni = Node.known.section_table; + assert(section_table_ni == try coff.mf.addLastChildNode(gpa, header_ni, .{ + .alignment = .@"4", + .fixed = true, + })); + coff.nodes.appendAssumeCapacity(.section_table); + + assert(coff.nodes.len == Node.known_count); + + try coff.symbol_table.ensureTotalCapacity(gpa, Symbol.Index.known_count); + coff.symbol_table.addOneAssumeCapacity().* = .{ + .ni = .none, + .rva = 0, + .size = 0, + .loc_relocs = .none, + .target_relocs = .none, + .section_number = .UNDEFINED, + .data_directory = null, + }; + assert(try coff.addSection(".data", null, .{ + .CNT_INITIALIZED_DATA = true, + .MEM_READ = true, + .MEM_WRITE = true, + }) == .data); + assert(try coff.addSection(".idata", .import_table, .{ + .CNT_INITIALIZED_DATA = true, + .MEM_READ = true, + }) == .idata); + assert(try coff.addSection(".rdata", null, .{ + .CNT_INITIALIZED_DATA = true, + .MEM_READ = true, + }) == .rdata); + assert(try coff.addSection(".text", null, .{ + .CNT_CODE = true, + .MEM_EXECUTE = true, + .MEM_READ = true, + }) == .text); + coff.import_table.directory_table_ni = try coff.mf.addLastChildNode( + gpa, + Symbol.Index.idata.node(coff), + .{ + .alignment = .@"4", + .fixed = true, + }, + ); + coff.nodes.appendAssumeCapacity(.import_directory_table); + assert(coff.symbol_table.items.len == Symbol.Index.known_count); +} + +fn getNode(coff: *const Coff, ni: MappedFile.Node.Index) Node { + return coff.nodes.get(@intFromEnum(ni)); +} +fn computeNodeRva(coff: *Coff, ni: MappedFile.Node.Index) u32 { + var section_offset: u32 = 0; + var parent_ni = ni; + while (true) { + assert(parent_ni != .none); + switch (coff.getNode(parent_ni)) { + else => {}, + .section => |si| return si.get(coff).rva + section_offset, + } + const parent_offset, _ = parent_ni.location(&coff.mf).resolve(&coff.mf); + section_offset += @intCast(parent_offset); + parent_ni = parent_ni.parent(&coff.mf); + } +} + +pub inline fn targetEndian(coff: *const Coff) std.builtin.Endian { + return coff.endian; +} +fn targetLoad(coff: *const Coff, ptr: anytype) @typeInfo(@TypeOf(ptr)).pointer.child { + const Child = @typeInfo(@TypeOf(ptr)).pointer.child; + return switch (@typeInfo(Child)) { + else => @compileError(@typeName(Child)), + .int => std.mem.toNative(Child, ptr.*, coff.targetEndian()), + .@"enum" => |@"enum"| @enumFromInt(coff.targetLoad(@as(*@"enum".tag_type, @ptrCast(ptr)))), + .@"struct" => |@"struct"| @bitCast( + coff.targetLoad(@as(*@"struct".backing_integer.?, @ptrCast(ptr))), + ), + }; +} +fn targetStore(coff: *const Coff, ptr: anytype, val: @typeInfo(@TypeOf(ptr)).pointer.child) void { + const Child = @typeInfo(@TypeOf(ptr)).pointer.child; + return switch (@typeInfo(Child)) { + else => @compileError(@typeName(Child)), + .int => ptr.* = std.mem.nativeTo(Child, val, coff.targetEndian()), + .@"enum" => |@"enum"| coff.targetStore( + @as(*@"enum".tag_type, @ptrCast(ptr)), + @intFromEnum(val), + ), + .@"struct" => |@"struct"| coff.targetStore( + @as(*@"struct".backing_integer.?, @ptrCast(ptr)), + @bitCast(val), + ), + }; +} + +pub fn headerPtr(coff: *Coff) *std.coff.Header { + return @ptrCast(@alignCast(Node.known.coff_header.slice(&coff.mf))); +} + +pub fn optionalHeaderStandardPtr(coff: *Coff) *std.coff.OptionalHeader { + return @ptrCast(@alignCast( + Node.known.optional_header.slice(&coff.mf)[0..@sizeOf(std.coff.OptionalHeader)], + )); +} + +pub const OptionalHeaderPtr = union(std.coff.OptionalHeader.Magic) { + PE32: *std.coff.OptionalHeader.PE32, + @"PE32+": *std.coff.OptionalHeader.@"PE32+", +}; +pub fn optionalHeaderPtr(coff: *Coff) OptionalHeaderPtr { + const slice = Node.known.optional_header.slice(&coff.mf); + return switch (coff.targetLoad(&coff.optionalHeaderStandardPtr().magic)) { + _ => unreachable, + inline else => |magic| @unionInit( + OptionalHeaderPtr, + @tagName(magic), + @ptrCast(@alignCast(slice)), + ), + }; +} +pub fn optionalHeaderField( + coff: *Coff, + comptime field: std.meta.FieldEnum(std.coff.OptionalHeader.@"PE32+"), +) @FieldType(std.coff.OptionalHeader.@"PE32+", @tagName(field)) { + return switch (coff.optionalHeaderPtr()) { + inline else => |optional_header| coff.targetLoad(&@field(optional_header, @tagName(field))), + }; +} + +pub fn dataDirectoriesSlice(coff: *Coff) []std.coff.ImageDataDirectory { + return @ptrCast(@alignCast(Node.known.data_directories.slice(&coff.mf))); +} + +pub fn sectionTableSlice(coff: *Coff) []std.coff.SectionHeader { + return @ptrCast(@alignCast(Node.known.section_table.slice(&coff.mf))); +} + +fn addSymbolAssumeCapacity(coff: *Coff) Symbol.Index { + defer coff.symbol_table.addOneAssumeCapacity().* = .{ + .ni = .none, + .rva = 0, + .size = 0, + .loc_relocs = .none, + .target_relocs = .none, + .section_number = .UNDEFINED, + .data_directory = null, + }; + return @enumFromInt(coff.symbol_table.items.len); +} + +fn initSymbolAssumeCapacity(coff: *Coff) !Symbol.Index { + const si = coff.addSymbolAssumeCapacity(); + return si; +} + +fn getOrPutString(coff: *Coff, string: []const u8) !String { + const gpa = coff.base.comp.gpa; + try coff.string_bytes.ensureUnusedCapacity(gpa, string.len + 1); + const gop = try coff.strings.getOrPutContextAdapted( + gpa, + string, + std.hash_map.StringIndexAdapter{ .bytes = &coff.string_bytes }, + .{ .bytes = &coff.string_bytes }, + ); + if (!gop.found_existing) { + gop.key_ptr.* = @intCast(coff.string_bytes.items.len); + gop.value_ptr.* = {}; + coff.string_bytes.appendSliceAssumeCapacity(string); + coff.string_bytes.appendAssumeCapacity(0); + } + return @enumFromInt(gop.key_ptr.*); +} + +fn getOrPutOptionalString(coff: *Coff, string: ?[]const u8) !String.Optional { + return (try coff.getOrPutString(string orelse return .none)).toOptional(); +} + +pub fn globalSymbol(coff: *Coff, name: []const u8, lib_name: ?[]const u8) !Symbol.Index { + const gpa = coff.base.comp.gpa; + try coff.symbol_table.ensureUnusedCapacity(gpa, 1); + const sym_gop = try coff.globals.getOrPut(gpa, .{ + .name = try coff.getOrPutString(name), + .lib_name = try coff.getOrPutOptionalString(lib_name), + }); + if (!sym_gop.found_existing) { + sym_gop.value_ptr.* = coff.addSymbolAssumeCapacity(); + coff.base.comp.link_synth_prog_node.increaseEstimatedTotalItems(1); + } + return sym_gop.value_ptr.*; +} + +fn navMapIndex(coff: *Coff, zcu: *Zcu, nav_index: InternPool.Nav.Index) !Node.NavMapIndex { + const gpa = zcu.gpa; + try coff.symbol_table.ensureUnusedCapacity(gpa, 1); + const sym_gop = try coff.navs.getOrPut(gpa, nav_index); + if (!sym_gop.found_existing) sym_gop.value_ptr.* = coff.addSymbolAssumeCapacity(); + return @enumFromInt(sym_gop.index); +} +pub fn navSymbol(coff: *Coff, zcu: *Zcu, nav_index: InternPool.Nav.Index) !Symbol.Index { + const ip = &zcu.intern_pool; + const nav = ip.getNav(nav_index); + if (nav.getExtern(ip)) |@"extern"| return coff.globalSymbol( + @"extern".name.toSlice(ip), + @"extern".lib_name.toSlice(ip), + ); + const nmi = try coff.navMapIndex(zcu, nav_index); + return nmi.symbol(coff); +} + +fn uavMapIndex(coff: *Coff, uav_val: InternPool.Index) !Node.UavMapIndex { + const gpa = coff.base.comp.gpa; + try coff.symbol_table.ensureUnusedCapacity(gpa, 1); + const sym_gop = try coff.uavs.getOrPut(gpa, uav_val); + if (!sym_gop.found_existing) sym_gop.value_ptr.* = coff.addSymbolAssumeCapacity(); + return @enumFromInt(sym_gop.index); +} +pub fn uavSymbol(coff: *Coff, uav_val: InternPool.Index) !Symbol.Index { + const umi = try coff.uavMapIndex(uav_val); + return umi.symbol(coff); +} + +pub fn lazySymbol(coff: *Coff, lazy: link.File.LazySymbol) !Symbol.Index { + const gpa = coff.base.comp.gpa; + try coff.symbol_table.ensureUnusedCapacity(gpa, 1); + const sym_gop = try coff.lazy.getPtr(lazy.kind).map.getOrPut(gpa, lazy.ty); + if (!sym_gop.found_existing) { + sym_gop.value_ptr.* = try coff.initSymbolAssumeCapacity(); + coff.base.comp.link_synth_prog_node.increaseEstimatedTotalItems(1); + } + return sym_gop.value_ptr.*; +} + +pub fn getNavVAddr( + coff: *Coff, + pt: Zcu.PerThread, + nav: InternPool.Nav.Index, + reloc_info: link.File.RelocInfo, +) !u64 { + return coff.getVAddr(reloc_info, try coff.navSymbol(pt.zcu, nav)); +} + +pub fn getUavVAddr( + coff: *Coff, + uav: InternPool.Index, + reloc_info: link.File.RelocInfo, +) !u64 { + return coff.getVAddr(reloc_info, try coff.uavSymbol(uav)); +} + +pub fn getVAddr(coff: *Coff, reloc_info: link.File.RelocInfo, target_si: Symbol.Index) !u64 { + try coff.addReloc( + @enumFromInt(reloc_info.parent.atom_index), + reloc_info.offset, + target_si, + reloc_info.addend, + switch (coff.targetLoad(&coff.headerPtr().machine)) { + else => unreachable, + .AMD64 => .{ .AMD64 = .ADDR64 }, + .I386 => .{ .I386 = .DIR32 }, + }, + ); + return coff.optionalHeaderField(.image_base) + target_si.get(coff).rva; +} + +fn addSection( + coff: *Coff, + name: []const u8, + maybe_data_directory: ?DataDirectory, + flags: std.coff.SectionHeader.Flags, +) !Symbol.Index { + const gpa = coff.base.comp.gpa; + try coff.nodes.ensureUnusedCapacity(gpa, 1); + try coff.section_table.ensureUnusedCapacity(gpa, 1); + try coff.symbol_table.ensureUnusedCapacity(gpa, 1); + + const coff_header = coff.headerPtr(); + const section_index = coff.targetLoad(&coff_header.number_of_sections); + const section_table_len = section_index + 1; + coff.targetStore(&coff_header.number_of_sections, section_table_len); + try Node.known.section_table.resize( + &coff.mf, + gpa, + @sizeOf(std.coff.SectionHeader) * section_table_len, + ); + const ni = try coff.mf.addLastChildNode(gpa, .root, .{ + .alignment = coff.mf.flags.block_size, + .moved = true, + .bubbles_moved = false, + }); + const si = coff.addSymbolAssumeCapacity(); + coff.section_table.appendAssumeCapacity(si); + coff.nodes.appendAssumeCapacity(.{ .section = si }); + const section_table = coff.sectionTableSlice(); + const virtual_size = coff.optionalHeaderField(.section_alignment); + const rva: u32 = switch (section_index) { + 0 => @intCast(Node.known.header.location(&coff.mf).resolve(&coff.mf)[1]), + else => coff.section_table.items[section_index - 1].get(coff).rva + + coff.targetLoad(§ion_table[section_index - 1].virtual_size), + }; + { + const sym = si.get(coff); + sym.ni = ni; + sym.rva = rva; + sym.section_number = @enumFromInt(section_table_len); + sym.data_directory = maybe_data_directory; + } + const section = §ion_table[section_index]; + section.* = .{ + .name = undefined, + .virtual_size = virtual_size, + .virtual_address = rva, + .size_of_raw_data = 0, + .pointer_to_raw_data = 0, + .pointer_to_relocations = 0, + .pointer_to_linenumbers = 0, + .number_of_relocations = 0, + .number_of_linenumbers = 0, + .flags = flags, + }; + @memcpy(section.name[0..name.len], name); + @memset(section.name[name.len..], 0); + if (coff.targetEndian() != native_endian) + std.mem.byteSwapAllFields(std.coff.SectionHeader, section); + if (maybe_data_directory) |data_directory| + coff.dataDirectoriesSlice()[@intFromEnum(data_directory)] = .{ + .virtual_address = section.virtual_address, + .size = section.virtual_size, + }; + switch (coff.optionalHeaderPtr()) { + inline else => |optional_header| coff.targetStore( + &optional_header.size_of_image, + @intCast(rva + virtual_size), + ), + } + return si; +} + +pub fn addReloc( + coff: *Coff, + loc_si: Symbol.Index, + offset: u64, + target_si: Symbol.Index, + addend: i64, + @"type": Reloc.Type, +) !void { + const gpa = coff.base.comp.gpa; + const target = target_si.get(coff); + const ri: Reloc.Index = @enumFromInt(coff.relocs.items.len); + (try coff.relocs.addOne(gpa)).* = .{ + .type = @"type", + .prev = .none, + .next = target.target_relocs, + .loc = loc_si, + .target = target_si, + .unused = 0, + .offset = offset, + .addend = addend, + }; + switch (target.target_relocs) { + .none => {}, + else => |target_ri| target_ri.get(coff).prev = ri, + } + target.target_relocs = ri; +} + +pub fn prelink(coff: *Coff, prog_node: std.Progress.Node) void { + _ = coff; + _ = prog_node; +} + +pub fn updateNav(coff: *Coff, pt: Zcu.PerThread, nav_index: InternPool.Nav.Index) !void { + coff.updateNavInner(pt, nav_index) catch |err| switch (err) { + error.OutOfMemory, + error.Overflow, + error.RelocationNotByteAligned, + => |e| return e, + else => |e| return coff.base.cgFail(nav_index, "linker failed to update variable: {t}", .{e}), + }; +} +fn updateNavInner(coff: *Coff, pt: Zcu.PerThread, nav_index: InternPool.Nav.Index) !void { + const zcu = pt.zcu; + const gpa = zcu.gpa; + const ip = &zcu.intern_pool; + + const nav = ip.getNav(nav_index); + const nav_val = nav.status.fully_resolved.val; + const nav_init, const is_threadlocal = switch (ip.indexToKey(nav_val)) { + else => .{ nav_val, false }, + .variable => |variable| .{ variable.init, variable.is_threadlocal }, + .@"extern" => return, + .func => .{ .none, false }, + }; + if (nav_init == .none or !Type.fromInterned(ip.typeOf(nav_init)).hasRuntimeBits(zcu)) return; + + const nmi = try coff.navMapIndex(zcu, nav_index); + const si = nmi.symbol(coff); + const ni = ni: { + const sym = si.get(coff); + switch (sym.ni) { + .none => { + try coff.nodes.ensureUnusedCapacity(gpa, 1); + _ = is_threadlocal; + const ni = try coff.mf.addLastChildNode(gpa, Symbol.Index.data.node(coff), .{ + .alignment = pt.navAlignment(nav_index).toStdMem(), + .moved = true, + }); + coff.nodes.appendAssumeCapacity(.{ .nav = nmi }); + sym.ni = ni; + sym.section_number = Symbol.Index.data.get(coff).section_number; + }, + else => si.deleteLocationRelocs(coff), + } + assert(sym.loc_relocs == .none); + sym.loc_relocs = @enumFromInt(coff.relocs.items.len); + break :ni sym.ni; + }; + + var nw: MappedFile.Node.Writer = undefined; + ni.writer(&coff.mf, gpa, &nw); + defer nw.deinit(); + codegen.generateSymbol( + &coff.base, + pt, + zcu.navSrcLoc(nav_index), + .fromInterned(nav_init), + &nw.interface, + .{ .atom_index = @intFromEnum(si) }, + ) catch |err| switch (err) { + error.WriteFailed => return error.OutOfMemory, + else => |e| return e, + }; + si.get(coff).size = @intCast(nw.interface.end); + si.applyLocationRelocs(coff); +} + +pub fn lowerUav( + coff: *Coff, + pt: Zcu.PerThread, + uav_val: InternPool.Index, + uav_align: InternPool.Alignment, + src_loc: Zcu.LazySrcLoc, +) !codegen.SymbolResult { + const zcu = pt.zcu; + const gpa = zcu.gpa; + + try coff.pending_uavs.ensureUnusedCapacity(gpa, 1); + const umi = try coff.uavMapIndex(uav_val); + const si = umi.symbol(coff); + if (switch (si.get(coff).ni) { + .none => true, + else => |ni| uav_align.toStdMem().order(ni.alignment(&coff.mf)).compare(.gt), + }) { + const gop = coff.pending_uavs.getOrPutAssumeCapacity(umi); + if (gop.found_existing) { + gop.value_ptr.alignment = gop.value_ptr.alignment.max(uav_align); + } else { + gop.value_ptr.* = .{ + .alignment = uav_align, + .src_loc = src_loc, + }; + coff.base.comp.link_const_prog_node.increaseEstimatedTotalItems(1); + } + } + return .{ .sym_index = @intFromEnum(si) }; +} + +pub fn updateFunc( + coff: *Coff, + pt: Zcu.PerThread, + func_index: InternPool.Index, + mir: *const codegen.AnyMir, +) !void { + coff.updateFuncInner(pt, func_index, mir) catch |err| switch (err) { + error.OutOfMemory, + error.Overflow, + error.RelocationNotByteAligned, + error.CodegenFail, + => |e| return e, + else => |e| return coff.base.cgFail( + pt.zcu.funcInfo(func_index).owner_nav, + "linker failed to update function: {s}", + .{@errorName(e)}, + ), + }; +} +fn updateFuncInner( + coff: *Coff, + pt: Zcu.PerThread, + func_index: InternPool.Index, + mir: *const codegen.AnyMir, +) !void { + const zcu = pt.zcu; + const gpa = zcu.gpa; + const ip = &zcu.intern_pool; + const func = zcu.funcInfo(func_index); + const nav = ip.getNav(func.owner_nav); + + const nmi = try coff.navMapIndex(zcu, func.owner_nav); + const si = nmi.symbol(coff); + log.debug("updateFunc({f}) = {d}", .{ nav.fqn.fmt(ip), si }); + const ni = ni: { + const sym = si.get(coff); + switch (sym.ni) { + .none => { + try coff.nodes.ensureUnusedCapacity(gpa, 1); + const mod = zcu.navFileScope(func.owner_nav).mod.?; + const target = &mod.resolved_target.result; + const ni = try coff.mf.addLastChildNode(gpa, Symbol.Index.text.node(coff), .{ + .alignment = switch (nav.status.fully_resolved.alignment) { + .none => switch (mod.optimize_mode) { + .Debug, + .ReleaseSafe, + .ReleaseFast, + => target_util.defaultFunctionAlignment(target), + .ReleaseSmall => target_util.minFunctionAlignment(target), + }, + else => |a| a.maxStrict(target_util.minFunctionAlignment(target)), + }.toStdMem(), + .moved = true, + }); + coff.nodes.appendAssumeCapacity(.{ .nav = nmi }); + sym.ni = ni; + sym.section_number = Symbol.Index.text.get(coff).section_number; + }, + else => si.deleteLocationRelocs(coff), + } + assert(sym.loc_relocs == .none); + sym.loc_relocs = @enumFromInt(coff.relocs.items.len); + break :ni sym.ni; + }; + + var nw: MappedFile.Node.Writer = undefined; + ni.writer(&coff.mf, gpa, &nw); + defer nw.deinit(); + codegen.emitFunction( + &coff.base, + pt, + zcu.navSrcLoc(func.owner_nav), + func_index, + @intFromEnum(si), + mir, + &nw.interface, + .none, + ) catch |err| switch (err) { + error.WriteFailed => return nw.err.?, + else => |e| return e, + }; + si.get(coff).size = @intCast(nw.interface.end); + si.applyLocationRelocs(coff); +} + +pub fn updateErrorData(coff: *Coff, pt: Zcu.PerThread) !void { + coff.flushLazy(pt, .{ + .kind = .const_data, + .index = @intCast(coff.lazy.getPtr(.const_data).map.getIndex(.anyerror_type) orelse return), + }) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.CodegenFail => return error.LinkFailure, + else => |e| return coff.base.comp.link_diags.fail("updateErrorData failed {t}", .{e}), + }; +} + +pub fn flush( + coff: *Coff, + arena: std.mem.Allocator, + tid: Zcu.PerThread.Id, + prog_node: std.Progress.Node, +) !void { + _ = arena; + _ = prog_node; + while (try coff.idle(tid)) {} + + // hack for stage2_x86_64 + coff + const comp = coff.base.comp; + if (comp.compiler_rt_dyn_lib) |crt_file| { + const gpa = comp.gpa; + const compiler_rt_sub_path = try std.fs.path.join(gpa, &.{ + std.fs.path.dirname(coff.base.emit.sub_path) orelse "", + std.fs.path.basename(crt_file.full_object_path.sub_path), + }); + defer gpa.free(compiler_rt_sub_path); + crt_file.full_object_path.root_dir.handle.copyFile( + crt_file.full_object_path.sub_path, + coff.base.emit.root_dir.handle, + compiler_rt_sub_path, + .{}, + ) catch |err| switch (err) { + else => |e| return comp.link_diags.fail("Copy '{s}' failed: {s}", .{ + compiler_rt_sub_path, + @errorName(e), + }), + }; + } +} + +pub fn idle(coff: *Coff, tid: Zcu.PerThread.Id) !bool { + const comp = coff.base.comp; + task: { + while (coff.pending_uavs.pop()) |pending_uav| { + const sub_prog_node = coff.idleProgNode( + tid, + comp.link_const_prog_node, + .{ .uav = pending_uav.key }, + ); + defer sub_prog_node.end(); + coff.flushUav( + .{ .zcu = coff.base.comp.zcu.?, .tid = tid }, + pending_uav.key, + pending_uav.value.alignment, + pending_uav.value.src_loc, + ) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => |e| return coff.base.comp.link_diags.fail( + "linker failed to lower constant: {t}", + .{e}, + ), + }; + break :task; + } + if (coff.global_pending_index < coff.globals.count()) { + const pt: Zcu.PerThread = .{ .zcu = coff.base.comp.zcu.?, .tid = tid }; + const gmi: Node.GlobalMapIndex = @enumFromInt(coff.global_pending_index); + coff.global_pending_index += 1; + const sub_prog_node = comp.link_synth_prog_node.start( + gmi.globalName(coff).name.toSlice(coff), + 0, + ); + defer sub_prog_node.end(); + coff.flushGlobal(pt, gmi) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => |e| return coff.base.comp.link_diags.fail( + "linker failed to lower constant: {t}", + .{e}, + ), + }; + break :task; + } + var lazy_it = coff.lazy.iterator(); + while (lazy_it.next()) |lazy| if (lazy.value.pending_index < lazy.value.map.count()) { + const pt: Zcu.PerThread = .{ .zcu = coff.base.comp.zcu.?, .tid = tid }; + const lmr: Node.LazyMapRef = .{ .kind = lazy.key, .index = lazy.value.pending_index }; + lazy.value.pending_index += 1; + const kind = switch (lmr.kind) { + .code => "code", + .const_data => "data", + }; + var name: [std.Progress.Node.max_name_len]u8 = undefined; + const sub_prog_node = comp.link_synth_prog_node.start( + std.fmt.bufPrint(&name, "lazy {s} for {f}", .{ + kind, + Type.fromInterned(lmr.lazySymbol(coff).ty).fmt(pt), + }) catch &name, + 0, + ); + defer sub_prog_node.end(); + coff.flushLazy(pt, lmr) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => |e| return coff.base.comp.link_diags.fail( + "linker failed to lower lazy {s}: {t}", + .{ kind, e }, + ), + }; + break :task; + }; + while (coff.mf.updates.pop()) |ni| { + const clean_moved = ni.cleanMoved(&coff.mf); + const clean_resized = ni.cleanResized(&coff.mf); + if (clean_moved or clean_resized) { + const sub_prog_node = coff.idleProgNode(tid, coff.mf.update_prog_node, coff.getNode(ni)); + defer sub_prog_node.end(); + if (clean_moved) try coff.flushMoved(ni); + if (clean_resized) try coff.flushResized(ni); + break :task; + } else coff.mf.update_prog_node.completeOne(); + } + } + if (coff.pending_uavs.count() > 0) return true; + for (&coff.lazy.values) |lazy| if (lazy.map.count() > lazy.pending_index) return true; + if (coff.mf.updates.items.len > 0) return true; + return false; +} + +fn idleProgNode( + coff: *Coff, + tid: Zcu.PerThread.Id, + prog_node: std.Progress.Node, + node: Node, +) std.Progress.Node { + var name: [std.Progress.Node.max_name_len]u8 = undefined; + return prog_node.start(name: switch (node) { + else => |tag| @tagName(tag), + .section => |si| std.mem.sliceTo(&si.get(coff).section_number.header(coff).name, 0), + .nav => |nmi| { + const ip = &coff.base.comp.zcu.?.intern_pool; + break :name ip.getNav(nmi.navIndex(coff)).fqn.toSlice(ip); + }, + .uav => |umi| std.fmt.bufPrint(&name, "{f}", .{ + Value.fromInterned(umi.uavValue(coff)).fmtValue(.{ + .zcu = coff.base.comp.zcu.?, + .tid = tid, + }), + }) catch &name, + }, 0); +} + +fn flushUav( + coff: *Coff, + pt: Zcu.PerThread, + umi: Node.UavMapIndex, + uav_align: InternPool.Alignment, + src_loc: Zcu.LazySrcLoc, +) !void { + const zcu = pt.zcu; + const gpa = zcu.gpa; + + const uav_val = umi.uavValue(coff); + const si = umi.symbol(coff); + const ni = ni: { + const sym = si.get(coff); + switch (sym.ni) { + .none => { + try coff.nodes.ensureUnusedCapacity(gpa, 1); + const ni = try coff.mf.addLastChildNode(gpa, Symbol.Index.data.node(coff), .{ + .alignment = uav_align.toStdMem(), + .moved = true, + }); + coff.nodes.appendAssumeCapacity(.{ .uav = umi }); + sym.ni = ni; + sym.section_number = Symbol.Index.data.get(coff).section_number; + }, + else => { + if (sym.ni.alignment(&coff.mf).order(uav_align.toStdMem()).compare(.gte)) return; + si.deleteLocationRelocs(coff); + }, + } + assert(sym.loc_relocs == .none); + sym.loc_relocs = @enumFromInt(coff.relocs.items.len); + break :ni sym.ni; + }; + + var nw: MappedFile.Node.Writer = undefined; + ni.writer(&coff.mf, gpa, &nw); + defer nw.deinit(); + codegen.generateSymbol( + &coff.base, + pt, + src_loc, + .fromInterned(uav_val), + &nw.interface, + .{ .atom_index = @intFromEnum(si) }, + ) catch |err| switch (err) { + error.WriteFailed => return error.OutOfMemory, + else => |e| return e, + }; + si.get(coff).size = @intCast(nw.interface.end); + si.applyLocationRelocs(coff); +} + +fn flushGlobal(coff: *Coff, pt: Zcu.PerThread, gmi: Node.GlobalMapIndex) !void { + const zcu = pt.zcu; + const comp = zcu.comp; + const gpa = zcu.gpa; + const gn = gmi.globalName(coff); + if (gn.lib_name.toSlice(coff)) |lib_name| { + const name = gn.name.toSlice(coff); + try coff.nodes.ensureUnusedCapacity(gpa, 4); + try coff.symbol_table.ensureUnusedCapacity(gpa, 1); + + const target_endian = coff.targetEndian(); + const magic = coff.targetLoad(&coff.optionalHeaderStandardPtr().magic); + const addr_size: u64, const addr_align: std.mem.Alignment = switch (magic) { + _ => unreachable, + .PE32 => .{ 4, .@"4" }, + .@"PE32+" => .{ 8, .@"8" }, + }; + + const gop = try coff.import_table.dlls.getOrPutAdapted( + gpa, + lib_name, + ImportTable.Adapter{ .coff = coff }, + ); + const import_hint_name_align: std.mem.Alignment = .@"2"; + if (!gop.found_existing) { + errdefer _ = coff.import_table.dlls.pop(); + try coff.import_table.directory_table_ni.resize( + &coff.mf, + gpa, + @sizeOf(std.coff.ImportDirectoryEntry) * (gop.index + 2), + ); + const import_hint_name_table_len = + import_hint_name_align.forward(lib_name.len + ".dll".len + 1); + const idata_section_ni = Symbol.Index.idata.node(coff); + const import_lookup_table_ni = try coff.mf.addLastChildNode(gpa, idata_section_ni, .{ + .size = addr_size * 2, + .alignment = addr_align, + .moved = true, + }); + const import_address_table_ni = try coff.mf.addLastChildNode(gpa, idata_section_ni, .{ + .size = addr_size * 2, + .alignment = addr_align, + .moved = true, + }); + const import_address_table_si = coff.addSymbolAssumeCapacity(); + { + const import_address_table_sym = import_address_table_si.get(coff); + import_address_table_sym.ni = import_address_table_ni; + assert(import_address_table_sym.loc_relocs == .none); + import_address_table_sym.loc_relocs = @enumFromInt(coff.relocs.items.len); + import_address_table_sym.section_number = Symbol.Index.idata.get(coff).section_number; + } + const import_hint_name_table_ni = try coff.mf.addLastChildNode(gpa, idata_section_ni, .{ + .size = import_hint_name_table_len, + .alignment = import_hint_name_align, + .moved = true, + }); + gop.value_ptr.* = .{ + .import_lookup_table_ni = import_lookup_table_ni, + .import_address_table_si = import_address_table_si, + .import_hint_name_table_ni = import_hint_name_table_ni, + .len = 0, + .hint_name_len = @intCast(import_hint_name_table_len), + }; + const import_hint_name_slice = import_hint_name_table_ni.slice(&coff.mf); + @memcpy(import_hint_name_slice[0..lib_name.len], lib_name); + @memcpy(import_hint_name_slice[lib_name.len..][0..".dll".len], ".dll"); + @memset(import_hint_name_slice[lib_name.len + ".dll".len ..], 0); + coff.nodes.appendAssumeCapacity(.{ .import_lookup_table = @intCast(gop.index) }); + coff.nodes.appendAssumeCapacity(.{ .import_address_table = @intCast(gop.index) }); + coff.nodes.appendAssumeCapacity(.{ .import_hint_name_table = @intCast(gop.index) }); + + const import_directory_table: []std.coff.ImportDirectoryEntry = + @ptrCast(@alignCast(coff.import_table.directory_table_ni.slice(&coff.mf))); + import_directory_table[gop.index..][0..2].* = .{ .{ + .import_lookup_table_rva = coff.computeNodeRva(import_lookup_table_ni), + .time_date_stamp = 0, + .forwarder_chain = 0, + .name_rva = coff.computeNodeRva(import_hint_name_table_ni), + .import_address_table_rva = coff.computeNodeRva(import_address_table_ni), + }, .{ + .import_lookup_table_rva = 0, + .time_date_stamp = 0, + .forwarder_chain = 0, + .name_rva = 0, + .import_address_table_rva = 0, + } }; + } + const import_symbol_index = gop.value_ptr.len; + gop.value_ptr.len = import_symbol_index + 1; + const new_symbol_table_size = addr_size * (import_symbol_index + 2); + const import_hint_name_index = gop.value_ptr.hint_name_len; + gop.value_ptr.hint_name_len = @intCast( + import_hint_name_align.forward(import_hint_name_index + 2 + name.len + 1), + ); + try gop.value_ptr.import_lookup_table_ni.resize(&coff.mf, gpa, new_symbol_table_size); + const import_address_table_ni = gop.value_ptr.import_address_table_si.node(coff); + try import_address_table_ni.resize(&coff.mf, gpa, new_symbol_table_size); + try gop.value_ptr.import_hint_name_table_ni.resize(&coff.mf, gpa, gop.value_ptr.hint_name_len); + const import_lookup_slice = gop.value_ptr.import_lookup_table_ni.slice(&coff.mf); + const import_address_slice = import_address_table_ni.slice(&coff.mf); + const import_hint_name_slice = gop.value_ptr.import_hint_name_table_ni.slice(&coff.mf); + @memset(import_hint_name_slice[import_hint_name_index..][0..2], 0); + @memcpy(import_hint_name_slice[import_hint_name_index + 2 ..][0..name.len], name); + @memset(import_hint_name_slice[import_hint_name_index + 2 + name.len ..], 0); + const import_hint_name_rva = + coff.computeNodeRva(gop.value_ptr.import_hint_name_table_ni) + import_hint_name_index; + switch (magic) { + _ => unreachable, + inline .PE32, .@"PE32+" => |ct_magic| { + const Addr = switch (ct_magic) { + _ => comptime unreachable, + .PE32 => u32, + .@"PE32+" => u64, + }; + const import_lookup_table: []Addr = @ptrCast(@alignCast(import_lookup_slice)); + const import_address_table: []Addr = @ptrCast(@alignCast(import_address_slice)); + const import_hint_name_rvas: [2]Addr = .{ + std.mem.nativeTo(Addr, @intCast(import_hint_name_rva), target_endian), + std.mem.nativeTo(Addr, 0, target_endian), + }; + import_lookup_table[import_symbol_index..][0..2].* = import_hint_name_rvas; + import_address_table[import_symbol_index..][0..2].* = import_hint_name_rvas; + }, + } + const si = gmi.symbol(coff); + const sym = si.get(coff); + sym.section_number = Symbol.Index.text.get(coff).section_number; + assert(sym.loc_relocs == .none); + sym.loc_relocs = @enumFromInt(coff.relocs.items.len); + switch (coff.targetLoad(&coff.headerPtr().machine)) { + else => |tag| @panic(@tagName(tag)), + .AMD64 => { + const init = [_]u8{ 0xff, 0x25, 0x00, 0x00, 0x00, 0x00 }; + const target = &comp.root_mod.resolved_target.result; + const ni = try coff.mf.addLastChildNode(gpa, Symbol.Index.text.node(coff), .{ + .alignment = switch (comp.root_mod.optimize_mode) { + .Debug, + .ReleaseSafe, + .ReleaseFast, + => target_util.defaultFunctionAlignment(target), + .ReleaseSmall => target_util.minFunctionAlignment(target), + }.toStdMem(), + .size = init.len, + }); + @memcpy(ni.slice(&coff.mf)[0..init.len], &init); + sym.ni = ni; + sym.size = init.len; + try coff.addReloc( + si, + init.len - 4, + gop.value_ptr.import_address_table_si, + @intCast(addr_size * import_symbol_index), + .{ .AMD64 = .REL32 }, + ); + }, + } + coff.nodes.appendAssumeCapacity(.{ .global = gmi }); + sym.rva = coff.computeNodeRva(sym.ni); + si.applyLocationRelocs(coff); + } +} + +fn flushLazy(coff: *Coff, pt: Zcu.PerThread, lmr: Node.LazyMapRef) !void { + const zcu = pt.zcu; + const gpa = zcu.gpa; + + const lazy = lmr.lazySymbol(coff); + const si = lmr.symbol(coff); + const ni = ni: { + const sym = si.get(coff); + switch (sym.ni) { + .none => { + try coff.nodes.ensureUnusedCapacity(gpa, 1); + const sec_si: Symbol.Index = switch (lazy.kind) { + .code => .text, + .const_data => .rdata, + }; + const ni = try coff.mf.addLastChildNode(gpa, sec_si.node(coff), .{ .moved = true }); + coff.nodes.appendAssumeCapacity(switch (lazy.kind) { + .code => .{ .lazy_code = @enumFromInt(lmr.index) }, + .const_data => .{ .lazy_const_data = @enumFromInt(lmr.index) }, + }); + sym.ni = ni; + sym.section_number = sec_si.get(coff).section_number; + }, + else => si.deleteLocationRelocs(coff), + } + assert(sym.loc_relocs == .none); + sym.loc_relocs = @enumFromInt(coff.relocs.items.len); + break :ni sym.ni; + }; + + var required_alignment: InternPool.Alignment = .none; + var nw: MappedFile.Node.Writer = undefined; + ni.writer(&coff.mf, gpa, &nw); + defer nw.deinit(); + try codegen.generateLazySymbol( + &coff.base, + pt, + Type.fromInterned(lazy.ty).srcLocOrNull(pt.zcu) orelse .unneeded, + lazy, + &required_alignment, + &nw.interface, + .none, + .{ .atom_index = @intFromEnum(si) }, + ); + si.get(coff).size = @intCast(nw.interface.end); + si.applyLocationRelocs(coff); +} + +fn flushMoved(coff: *Coff, ni: MappedFile.Node.Index) !void { + const node = coff.getNode(ni); + switch (node) { + else => |tag| @panic(@tagName(tag)), + .section => |si| return coff.targetStore( + &si.get(coff).section_number.header(coff).pointer_to_raw_data, + @intCast(ni.fileLocation(&coff.mf, false).offset), + ), + .import_directory_table => {}, + .import_lookup_table => |import_directory_table_index| { + const import_directory_table: []std.coff.ImportDirectoryEntry = + @ptrCast(@alignCast(coff.import_table.directory_table_ni.slice(&coff.mf))); + const import_directory_entry = &import_directory_table[import_directory_table_index]; + coff.targetStore(&import_directory_entry.import_lookup_table_rva, coff.computeNodeRva(ni)); + }, + .import_address_table => |import_directory_table_index| { + const import_directory_table: []std.coff.ImportDirectoryEntry = + @ptrCast(@alignCast(coff.import_table.directory_table_ni.slice(&coff.mf))); + const import_directory_entry = &import_directory_table[import_directory_table_index]; + coff.targetStore(&import_directory_entry.import_lookup_table_rva, coff.computeNodeRva(ni)); + const import_address_table_si = + coff.import_table.dlls.values()[import_directory_table_index].import_address_table_si; + import_address_table_si.flushMoved(coff); + coff.targetStore( + &import_directory_entry.import_address_table_rva, + import_address_table_si.get(coff).rva, + ); + }, + .import_hint_name_table => |import_directory_table_index| { + const target_endian = coff.targetEndian(); + const magic = coff.targetLoad(&coff.optionalHeaderStandardPtr().magic); + const import_directory_table: []std.coff.ImportDirectoryEntry = + @ptrCast(@alignCast(coff.import_table.directory_table_ni.slice(&coff.mf))); + const import_directory_entry = &import_directory_table[import_directory_table_index]; + const import_hint_name_rva = coff.computeNodeRva(ni); + coff.targetStore(&import_directory_entry.name_rva, import_hint_name_rva); + const import_entry = &coff.import_table.dlls.values()[import_directory_table_index]; + const import_lookup_slice = import_entry.import_lookup_table_ni.slice(&coff.mf); + const import_address_slice = + import_entry.import_address_table_si.node(coff).slice(&coff.mf); + const import_hint_name_slice = ni.slice(&coff.mf); + const import_hint_name_align = ni.alignment(&coff.mf); + var import_hint_name_index: u32 = 0; + for (0..import_entry.len) |import_symbol_index| { + import_hint_name_index = @intCast(import_hint_name_align.forward( + std.mem.indexOfScalarPos( + u8, + import_hint_name_slice, + import_hint_name_index, + 0, + ).? + 1, + )); + switch (magic) { + _ => unreachable, + inline .PE32, .@"PE32+" => |ct_magic| { + const Addr = switch (ct_magic) { + _ => comptime unreachable, + .PE32 => u32, + .@"PE32+" => u64, + }; + const import_lookup_table: []Addr = @ptrCast(@alignCast(import_lookup_slice)); + const import_address_table: []Addr = @ptrCast(@alignCast(import_address_slice)); + const rva = std.mem.nativeTo( + Addr, + import_hint_name_rva + import_hint_name_index, + target_endian, + ); + import_lookup_table[import_symbol_index] = rva; + import_address_table[import_symbol_index] = rva; + }, + } + import_hint_name_index += 2; + } + }, + inline .global, + .nav, + .uav, + .lazy_code, + .lazy_const_data, + => |mi| mi.symbol(coff).flushMoved(coff), + } + try ni.childrenMoved(coff.base.comp.gpa, &coff.mf); +} + +fn flushResized(coff: *Coff, ni: MappedFile.Node.Index) !void { + _, const size = ni.location(&coff.mf).resolve(&coff.mf); + const node = coff.getNode(ni); + switch (node) { + else => |tag| @panic(@tagName(tag)), + .file => {}, + .header => { + switch (coff.optionalHeaderPtr()) { + inline else => |optional_header| coff.targetStore( + &optional_header.size_of_headers, + @intCast(size), + ), + } + if (size > coff.section_table.items[0].get(coff).rva) try coff.virtualSlide( + 0, + std.mem.alignForward( + u32, + @intCast(size * 4), + coff.optionalHeaderField(.section_alignment), + ), + ); + }, + .section_table => {}, + .section => |si| { + const sym = si.get(coff); + const section_table = coff.sectionTableSlice(); + const section_index = sym.section_number.toIndex(); + const section = §ion_table[section_index]; + coff.targetStore(§ion.size_of_raw_data, @intCast(size)); + if (size > coff.targetLoad(§ion.virtual_size)) { + const virtual_size = std.mem.alignForward( + u32, + @intCast(size * 4), + coff.optionalHeaderField(.section_alignment), + ); + coff.targetStore(§ion.virtual_size, virtual_size); + if (sym.data_directory) |data_directory| + coff.dataDirectoriesSlice()[@intFromEnum(data_directory)].size = + section.virtual_size; + try coff.virtualSlide(section_index + 1, sym.rva + virtual_size); + } + }, + .import_directory_table, + .import_lookup_table, + .import_address_table, + .import_hint_name_table, + .global, + .nav, + .uav, + .lazy_code, + .lazy_const_data, + => {}, + } +} + +fn virtualSlide(coff: *Coff, start_section_index: usize, start_rva: u32) !void { + const section_table = coff.sectionTableSlice(); + var rva = start_rva; + for ( + coff.section_table.items[start_section_index..], + section_table[start_section_index..], + ) |section_si, *section| { + const section_sym = section_si.get(coff); + section_sym.rva = rva; + coff.targetStore(§ion.virtual_address, rva); + if (section_sym.data_directory) |data_directory| + coff.dataDirectoriesSlice()[@intFromEnum(data_directory)].virtual_address = + section.virtual_address; + try section_sym.ni.childrenMoved(coff.base.comp.gpa, &coff.mf); + rva += coff.targetLoad(§ion.virtual_size); + } + switch (coff.optionalHeaderPtr()) { + inline else => |optional_header| coff.targetStore( + &optional_header.size_of_image, + @intCast(rva), + ), + } +} + +pub fn updateExports( + coff: *Coff, + pt: Zcu.PerThread, + exported: Zcu.Exported, + export_indices: []const Zcu.Export.Index, +) !void { + return coff.updateExportsInner(pt, exported, export_indices) catch |err| switch (err) { + error.OutOfMemory => error.OutOfMemory, + error.LinkFailure => error.AnalysisFail, + }; +} +fn updateExportsInner( + coff: *Coff, + pt: Zcu.PerThread, + exported: Zcu.Exported, + export_indices: []const Zcu.Export.Index, +) !void { + const zcu = pt.zcu; + const gpa = zcu.gpa; + const ip = &zcu.intern_pool; + + switch (exported) { + .nav => |nav| log.debug("updateExports({f})", .{ip.getNav(nav).fqn.fmt(ip)}), + .uav => |uav| log.debug("updateExports(@as({f}, {f}))", .{ + Type.fromInterned(ip.typeOf(uav)).fmt(pt), + Value.fromInterned(uav).fmtValue(pt), + }), + } + try coff.symbol_table.ensureUnusedCapacity(gpa, export_indices.len); + const exported_si: Symbol.Index = switch (exported) { + .nav => |nav| try coff.navSymbol(zcu, nav), + .uav => |uav| @enumFromInt(switch (try coff.lowerUav( + pt, + uav, + Type.fromInterned(ip.typeOf(uav)).abiAlignment(zcu), + export_indices[0].ptr(zcu).src, + )) { + .sym_index => |si| si, + .fail => |em| { + defer em.destroy(gpa); + return coff.base.comp.link_diags.fail("{s}", .{em.msg}); + }, + }), + }; + while (try coff.idle(pt.tid)) {} + const exported_ni = exported_si.node(coff); + const exported_sym = exported_si.get(coff); + for (export_indices) |export_index| { + const @"export" = export_index.ptr(zcu); + const export_si = try coff.globalSymbol(@"export".opts.name.toSlice(ip), null); + const export_sym = export_si.get(coff); + export_sym.ni = exported_ni; + export_sym.rva = exported_sym.rva; + export_sym.size = exported_sym.size; + export_sym.section_number = exported_sym.section_number; + export_si.applyTargetRelocs(coff); + if (@"export".opts.name.eqlSlice("wWinMainCRTStartup", ip)) { + coff.entry_hack = exported_si; + coff.optionalHeaderStandardPtr().address_of_entry_point = exported_sym.rva; + } + } +} + +pub fn deleteExport(coff: *Coff, exported: Zcu.Exported, name: InternPool.NullTerminatedString) void { + _ = coff; + _ = exported; + _ = name; +} + +pub fn dump(coff: *Coff, tid: Zcu.PerThread.Id) void { + const w = std.debug.lockStderrWriter(&.{}); + defer std.debug.unlockStderrWriter(); + coff.printNode(tid, w, .root, 0) catch {}; +} + +pub fn printNode( + coff: *Coff, + tid: Zcu.PerThread.Id, + w: *std.Io.Writer, + ni: MappedFile.Node.Index, + indent: usize, +) !void { + const node = coff.getNode(ni); + try w.splatByteAll(' ', indent); + try w.writeAll(@tagName(node)); + switch (node) { + else => {}, + .section => |si| try w.print("({s})", .{ + std.mem.sliceTo(&si.get(coff).section_number.header(coff).name, 0), + }), + .import_lookup_table, + .import_address_table, + .import_hint_name_table, + => |import_directory_table_index| try w.print("({s})", .{ + std.mem.sliceTo(coff.import_table.dlls.values()[import_directory_table_index] + .import_hint_name_table_ni.sliceConst(&coff.mf), 0), + }), + .global => |gmi| { + const gn = gmi.globalName(coff); + try w.writeByte('('); + if (gn.lib_name.toSlice(coff)) |lib_name| try w.print("{s}.dll, ", .{lib_name}); + try w.print("{s})", .{gn.name.toSlice(coff)}); + }, + .nav => |nmi| { + const zcu = coff.base.comp.zcu.?; + const ip = &zcu.intern_pool; + const nav = ip.getNav(nmi.navIndex(coff)); + try w.print("({f}, {f})", .{ + Type.fromInterned(nav.typeOf(ip)).fmt(.{ .zcu = zcu, .tid = tid }), + nav.fqn.fmt(ip), + }); + }, + .uav => |umi| { + const zcu = coff.base.comp.zcu.?; + const val: Value = .fromInterned(umi.uavValue(coff)); + try w.print("({f}, {f})", .{ + val.typeOf(zcu).fmt(.{ .zcu = zcu, .tid = tid }), + val.fmtValue(.{ .zcu = zcu, .tid = tid }), + }); + }, + inline .lazy_code, .lazy_const_data => |lmi| try w.print("({f})", .{ + Type.fromInterned(lmi.lazySymbol(coff).ty).fmt(.{ + .zcu = coff.base.comp.zcu.?, + .tid = tid, + }), + }), + } + { + const mf_node = &coff.mf.nodes.items[@intFromEnum(ni)]; + const off, const size = mf_node.location().resolve(&coff.mf); + try w.print(" index={d} offset=0x{x} size=0x{x} align=0x{x}{s}{s}{s}{s}\n", .{ + @intFromEnum(ni), + off, + size, + mf_node.flags.alignment.toByteUnits(), + if (mf_node.flags.fixed) " fixed" else "", + if (mf_node.flags.moved) " moved" else "", + if (mf_node.flags.resized) " resized" else "", + if (mf_node.flags.has_content) " has_content" else "", + }); + } + var leaf = true; + var child_it = ni.children(&coff.mf); + while (child_it.next()) |child_ni| { + leaf = false; + try coff.printNode(tid, w, child_ni, indent + 1); + } + if (leaf) { + const file_loc = ni.fileLocation(&coff.mf, false); + if (file_loc.size == 0) return; + var address = file_loc.offset; + const line_len = 0x10; + var line_it = std.mem.window( + u8, + coff.mf.contents[@intCast(file_loc.offset)..][0..@intCast(file_loc.size)], + line_len, + line_len, + ); + while (line_it.next()) |line_bytes| : (address += line_len) { + try w.splatByteAll(' ', indent + 1); + try w.print("{x:0>8} ", .{address}); + for (line_bytes) |byte| try w.print("{x:0>2} ", .{byte}); + try w.splatByteAll(' ', 3 * (line_len - line_bytes.len) + 1); + for (line_bytes) |byte| try w.writeByte(if (std.ascii.isPrint(byte)) byte else '.'); + try w.writeByte('\n'); + } + } +} + +const assert = std.debug.assert; +const builtin = @import("builtin"); +const codegen = @import("../codegen.zig"); +const Compilation = @import("../Compilation.zig"); +const Coff = @This(); +const InternPool = @import("../InternPool.zig"); +const link = @import("../link.zig"); +const log = std.log.scoped(.link); +const MappedFile = @import("MappedFile.zig"); +const native_endian = builtin.cpu.arch.endian(); +const std = @import("std"); +const target_util = @import("../target.zig"); +const Type = @import("../Type.zig"); +const Value = @import("../Value.zig"); +const Zcu = @import("../Zcu.zig"); diff --git a/src/link/Elf2.zig b/src/link/Elf2.zig index e43ebf2639ad..90e7dfbc673a 100644 --- a/src/link/Elf2.zig +++ b/src/link/Elf2.zig @@ -11,7 +11,7 @@ lazy: std.EnumArray(link.File.LazySymbol.Kind, struct { map: std.AutoArrayHashMapUnmanaged(InternPool.Index, Symbol.Index), pending_index: u32, }), -pending_uavs: std.AutoArrayHashMapUnmanaged(InternPool.Index, struct { +pending_uavs: std.AutoArrayHashMapUnmanaged(Node.UavMapIndex, struct { alignment: InternPool.Alignment, src_loc: Zcu.LazySrcLoc, }), @@ -25,10 +25,65 @@ pub const Node = union(enum) { shdr, segment: u32, section: Symbol.Index, - nav: InternPool.Nav.Index, - uav: InternPool.Index, - lazy_code: InternPool.Index, - lazy_const_data: InternPool.Index, + nav: NavMapIndex, + uav: UavMapIndex, + lazy_code: LazyMapRef.Index(.code), + lazy_const_data: LazyMapRef.Index(.const_data), + + pub const NavMapIndex = enum(u32) { + _, + + pub fn navIndex(nmi: NavMapIndex, elf: *const Elf) InternPool.Nav.Index { + return elf.navs.keys()[@intFromEnum(nmi)]; + } + + pub fn symbol(nmi: NavMapIndex, elf: *const Elf) Symbol.Index { + return elf.navs.values()[@intFromEnum(nmi)]; + } + }; + + pub const UavMapIndex = enum(u32) { + _, + + pub fn uavValue(umi: UavMapIndex, elf: *const Elf) InternPool.Index { + return elf.uavs.keys()[@intFromEnum(umi)]; + } + + pub fn symbol(umi: UavMapIndex, elf: *const Elf) Symbol.Index { + return elf.uavs.values()[@intFromEnum(umi)]; + } + }; + + pub const LazyMapRef = struct { + kind: link.File.LazySymbol.Kind, + index: u32, + + pub fn Index(comptime kind: link.File.LazySymbol.Kind) type { + return enum(u32) { + _, + + pub fn ref(lmi: @This()) LazyMapRef { + return .{ .kind = kind, .index = @intFromEnum(lmi) }; + } + + pub fn lazySymbol(lmi: @This(), elf: *const Elf) link.File.LazySymbol { + return lmi.ref().lazySymbol(elf); + } + + pub fn symbol(lmi: @This(), elf: *const Elf) Symbol.Index { + return lmi.ref().symbol(elf); + } + }; + } + + pub fn lazySymbol(lmr: LazyMapRef, elf: *const Elf) link.File.LazySymbol { + return .{ .kind = lmr.kind, .ty = elf.lazy.getPtrConst(lmr.kind).map.keys()[lmr.index] }; + } + + pub fn symbol(lmr: LazyMapRef, elf: *const Elf) Symbol.Index { + return elf.lazy.getPtrConst(lmr.kind).map.values()[lmr.index]; + } + }; pub const Tag = @typeInfo(Node).@"union".tag_type.?; @@ -43,11 +98,7 @@ pub const Node = union(enum) { seg_text, seg_data, }; - var mut_known: std.enums.EnumFieldStruct( - Known, - MappedFile.Node.Index, - null, - ) = undefined; + var mut_known: std.enums.EnumFieldStruct(Known, MappedFile.Node.Index, null) = undefined; for (@typeInfo(Known).@"enum".fields) |field| @field(mut_known, field.name) = @enumFromInt(field.value); break :known mut_known; @@ -223,10 +274,10 @@ pub const Reloc = extern struct { addend: i64, pub const Type = extern union { - x86_64: std.elf.R_X86_64, - aarch64: std.elf.R_AARCH64, - riscv: std.elf.R_RISCV, - ppc64: std.elf.R_PPC64, + X86_64: std.elf.R_X86_64, + AARCH64: std.elf.R_AARCH64, + RISCV: std.elf.R_RISCV, + PPC64: std.elf.R_PPC64, }; pub const Index = enum(u32) { @@ -239,7 +290,7 @@ pub const Reloc = extern struct { }; pub fn apply(reloc: *const Reloc, elf: *Elf) void { - const target_endian = elf.endian(); + const target_endian = elf.targetEndian(); switch (reloc.loc.get(elf).ni) { .none => return, else => |ni| if (ni.hasMoved(&elf.mf)) return, @@ -274,7 +325,7 @@ pub const Reloc = extern struct { ) +% @as(u64, @bitCast(reloc.addend)); switch (elf.ehdrField(.machine)) { else => |machine| @panic(@tagName(machine)), - .X86_64 => switch (reloc.type.x86_64) { + .X86_64 => switch (reloc.type.X86_64) { else => |kind| @panic(@tagName(kind)), .@"64" => std.mem.writeInt( u64, @@ -394,37 +445,7 @@ fn create( }, .Obj => .REL, }; - const machine: std.elf.EM = switch (target.cpu.arch) { - .spirv32, .spirv64, .wasm32, .wasm64 => .NONE, - .sparc => .SPARC, - .x86 => .@"386", - .m68k => .@"68K", - .mips, .mipsel, .mips64, .mips64el => .MIPS, - .powerpc, .powerpcle => .PPC, - .powerpc64, .powerpc64le => .PPC64, - .s390x => .S390, - .arm, .armeb, .thumb, .thumbeb => .ARM, - .hexagon => .SH, - .sparc64 => .SPARCV9, - .arc => .ARC, - .x86_64 => .X86_64, - .or1k => .OR1K, - .xtensa => .XTENSA, - .msp430 => .MSP430, - .avr => .AVR, - .nvptx, .nvptx64 => .CUDA, - .kalimba => .CSR_KALIMBA, - .aarch64, .aarch64_be => .AARCH64, - .xcore => .XCORE, - .amdgcn => .AMDGPU, - .riscv32, .riscv32be, .riscv64, .riscv64be => .RISCV, - .lanai => .LANAI, - .bpfel, .bpfeb => .BPF, - .ve => .VE, - .csky => .CSKY, - .loongarch32, .loongarch64 => .LOONGARCH, - .propeller => if (target.cpu.has(.propeller, .p2)) .PROPELLER2 else .PROPELLER, - }; + const machine = target.toElfMachine(); const maybe_interp = switch (comp.config.output_mode) { .Exe, .Lib => switch (comp.config.link_mode) { .static => null, @@ -479,7 +500,7 @@ fn create( switch (class) { .NONE, _ => unreachable, - inline .@"32", .@"64" => |ct_class| try elf.initHeaders( + inline else => |ct_class| try elf.initHeaders( ct_class, data, osabi, @@ -567,30 +588,31 @@ fn initHeaders( .fixed = true, })); elf.nodes.appendAssumeCapacity(.ehdr); - - const ehdr: *ElfN.Ehdr = @ptrCast(@alignCast(ehdr_ni.slice(&elf.mf))); - const EI = std.elf.EI; - @memcpy(ehdr.ident[0..std.elf.MAGIC.len], std.elf.MAGIC); - ehdr.ident[EI.CLASS] = @intFromEnum(class); - ehdr.ident[EI.DATA] = @intFromEnum(data); - ehdr.ident[EI.VERSION] = 1; - ehdr.ident[EI.OSABI] = @intFromEnum(osabi); - ehdr.ident[EI.ABIVERSION] = 0; - @memset(ehdr.ident[EI.PAD..], 0); - ehdr.type = @"type"; - ehdr.machine = machine; - ehdr.version = 1; - ehdr.entry = 0; - ehdr.phoff = 0; - ehdr.shoff = 0; - ehdr.flags = 0; - ehdr.ehsize = @sizeOf(ElfN.Ehdr); - ehdr.phentsize = @sizeOf(ElfN.Phdr); - ehdr.phnum = @min(phnum, std.elf.PN_XNUM); - ehdr.shentsize = @sizeOf(ElfN.Shdr); - ehdr.shnum = 1; - ehdr.shstrndx = 0; - if (target_endian != native_endian) std.mem.byteSwapAllFields(ElfN.Ehdr, ehdr); + { + const ehdr: *ElfN.Ehdr = @ptrCast(@alignCast(ehdr_ni.slice(&elf.mf))); + const EI = std.elf.EI; + @memcpy(ehdr.ident[0..std.elf.MAGIC.len], std.elf.MAGIC); + ehdr.ident[EI.CLASS] = @intFromEnum(class); + ehdr.ident[EI.DATA] = @intFromEnum(data); + ehdr.ident[EI.VERSION] = 1; + ehdr.ident[EI.OSABI] = @intFromEnum(osabi); + ehdr.ident[EI.ABIVERSION] = 0; + @memset(ehdr.ident[EI.PAD..], 0); + ehdr.type = @"type"; + ehdr.machine = machine; + ehdr.version = 1; + ehdr.entry = 0; + ehdr.phoff = 0; + ehdr.shoff = 0; + ehdr.flags = 0; + ehdr.ehsize = @sizeOf(ElfN.Ehdr); + ehdr.phentsize = @sizeOf(ElfN.Phdr); + ehdr.phnum = @min(phnum, std.elf.PN_XNUM); + ehdr.shentsize = @sizeOf(ElfN.Shdr); + ehdr.shnum = 1; + ehdr.shstrndx = 0; + if (target_endian != native_endian) std.mem.byteSwapAllFields(ElfN.Ehdr, ehdr); + } const phdr_ni = Node.known.phdr; assert(phdr_ni == try elf.mf.addLastChildNode(gpa, seg_rodata_ni, .{ @@ -750,7 +772,10 @@ fn initHeaders( }, .shndx = std.elf.SHN_UNDEF, }; - ehdr.shstrndx = ehdr.shnum; + { + const ehdr = @field(elf.ehdrPtr(), @tagName(class)); + ehdr.shstrndx = ehdr.shnum; + } assert(try elf.addSection(seg_rodata_ni, .{ .type = std.elf.SHT_STRTAB, .addralign = elf.mf.flags.block_size, @@ -821,6 +846,24 @@ fn getNode(elf: *Elf, ni: MappedFile.Node.Index) Node { return elf.nodes.get(@intFromEnum(ni)); } +pub fn identClass(elf: *Elf) std.elf.CLASS { + return @enumFromInt(elf.mf.contents[std.elf.EI.CLASS]); +} + +pub fn identData(elf: *Elf) std.elf.DATA { + return @enumFromInt(elf.mf.contents[std.elf.EI.DATA]); +} +fn endianForData(data: std.elf.DATA) std.builtin.Endian { + return switch (data) { + .NONE, _ => unreachable, + .@"2LSB" => .little, + .@"2MSB" => .big, + }; +} +pub fn targetEndian(elf: *Elf) std.builtin.Endian { + return endianForData(elf.identData()); +} + pub const EhdrPtr = union(std.elf.CLASS) { NONE: noreturn, @"32": *std.elf.Elf32.Ehdr, @@ -830,7 +873,7 @@ pub fn ehdrPtr(elf: *Elf) EhdrPtr { const slice = Node.known.ehdr.slice(&elf.mf); return switch (elf.identClass()) { .NONE, _ => unreachable, - inline .@"32", .@"64" => |class| @unionInit( + inline else => |class| @unionInit( EhdrPtr, @tagName(class), @ptrCast(@alignCast(slice)), @@ -841,35 +884,15 @@ pub fn ehdrField( elf: *Elf, comptime field: enum { type, machine }, ) @FieldType(std.elf.Elf32.Ehdr, @tagName(field)) { - const Field = @FieldType(std.elf.Elf32.Ehdr, @tagName(field)); - comptime assert(@FieldType(std.elf.Elf64.Ehdr, @tagName(field)) == Field); return @enumFromInt(std.mem.toNative( - @typeInfo(Field).@"enum".tag_type, + @typeInfo(@FieldType(std.elf.Elf32.Ehdr, @tagName(field))).@"enum".tag_type, @intFromEnum(switch (elf.ehdrPtr()) { inline else => |ehdr| @field(ehdr, @tagName(field)), }), - elf.endian(), + elf.targetEndian(), )); } -pub fn identClass(elf: *Elf) std.elf.CLASS { - return @enumFromInt(elf.mf.contents[std.elf.EI.CLASS]); -} - -pub fn identData(elf: *Elf) std.elf.DATA { - return @enumFromInt(elf.mf.contents[std.elf.EI.DATA]); -} -fn endianForData(data: std.elf.DATA) std.builtin.Endian { - return switch (data) { - .NONE, _ => unreachable, - .@"2LSB" => .little, - .@"2MSB" => .big, - }; -} -pub fn endian(elf: *Elf) std.builtin.Endian { - return endianForData(elf.identData()); -} - fn baseAddrForType(@"type": std.elf.ET) u64 { return switch (@"type") { else => 0, @@ -889,7 +912,7 @@ pub fn phdrSlice(elf: *Elf) PhdrSlice { const slice = Node.known.phdr.slice(&elf.mf); return switch (elf.identClass()) { .NONE, _ => unreachable, - inline .@"32", .@"64" => |class| @unionInit( + inline else => |class| @unionInit( PhdrSlice, @tagName(class), @ptrCast(@alignCast(slice)), @@ -906,7 +929,7 @@ pub fn shdrSlice(elf: *Elf) ShdrSlice { const slice = Node.known.shdr.slice(&elf.mf); return switch (elf.identClass()) { .NONE, _ => unreachable, - inline .@"32", .@"64" => |class| @unionInit( + inline else => |class| @unionInit( ShdrSlice, @tagName(class), @ptrCast(@alignCast(slice)), @@ -923,7 +946,7 @@ pub fn symSlice(elf: *Elf) SymSlice { const slice = Symbol.Index.symtab.node(elf).slice(&elf.mf); return switch (elf.identClass()) { .NONE, _ => unreachable, - inline .@"32", .@"64" => |class| @unionInit( + inline else => |class| @unionInit( SymSlice, @tagName(class), @ptrCast(@alignCast(slice)), @@ -942,7 +965,7 @@ pub fn symPtr(elf: *Elf, si: Symbol.Index) SymPtr { }; } -fn addSymbolAssumeCapacity(elf: *Elf) !Symbol.Index { +fn addSymbolAssumeCapacity(elf: *Elf) Symbol.Index { defer elf.symtab.addOneAssumeCapacity().* = .{ .ni = .none, .loc_relocs = .none, @@ -953,30 +976,27 @@ fn addSymbolAssumeCapacity(elf: *Elf) !Symbol.Index { } fn initSymbolAssumeCapacity(elf: *Elf, opts: Symbol.Index.InitOptions) !Symbol.Index { - const si = try elf.addSymbolAssumeCapacity(); + const si = elf.addSymbolAssumeCapacity(); try si.init(elf, opts); return si; } -pub fn globalSymbol( - elf: *Elf, - opts: struct { - name: []const u8, - type: std.elf.STT, - bind: std.elf.STB = .GLOBAL, - visibility: std.elf.STV = .DEFAULT, - }, -) !Symbol.Index { +pub fn globalSymbol(elf: *Elf, opts: struct { + name: []const u8, + type: std.elf.STT, + bind: std.elf.STB = .GLOBAL, + visibility: std.elf.STV = .DEFAULT, +}) !Symbol.Index { const gpa = elf.base.comp.gpa; try elf.symtab.ensureUnusedCapacity(gpa, 1); - const sym_gop = try elf.globals.getOrPut(gpa, try elf.string(.strtab, opts.name)); - if (!sym_gop.found_existing) sym_gop.value_ptr.* = try elf.initSymbolAssumeCapacity(.{ + const global_gop = try elf.globals.getOrPut(gpa, try elf.string(.strtab, opts.name)); + if (!global_gop.found_existing) global_gop.value_ptr.* = try elf.initSymbolAssumeCapacity(.{ .name = opts.name, .type = opts.type, .bind = opts.bind, .visibility = opts.visibility, }); - return sym_gop.value_ptr.*; + return global_gop.value_ptr.*; } fn navType( @@ -1008,8 +1028,19 @@ fn navType( }, }; } -pub fn navSymbol(elf: *Elf, zcu: *Zcu, nav_index: InternPool.Nav.Index) !Symbol.Index { +fn navMapIndex(elf: *Elf, zcu: *Zcu, nav_index: InternPool.Nav.Index) !Node.NavMapIndex { const gpa = zcu.gpa; + const ip = &zcu.intern_pool; + const nav = ip.getNav(nav_index); + try elf.symtab.ensureUnusedCapacity(gpa, 1); + const nav_gop = try elf.navs.getOrPut(gpa, nav_index); + if (!nav_gop.found_existing) nav_gop.value_ptr.* = try elf.initSymbolAssumeCapacity(.{ + .name = nav.fqn.toSlice(ip), + .type = navType(ip, nav.status, elf.base.comp.config.any_non_single_threaded), + }); + return @enumFromInt(nav_gop.index); +} +pub fn navSymbol(elf: *Elf, zcu: *Zcu, nav_index: InternPool.Nav.Index) !Symbol.Index { const ip = &zcu.intern_pool; const nav = ip.getNav(nav_index); if (nav.getExtern(ip)) |@"extern"| return elf.globalSymbol(.{ @@ -1027,40 +1058,37 @@ pub fn navSymbol(elf: *Elf, zcu: *Zcu, nav_index: InternPool.Nav.Index) !Symbol. .protected => .PROTECTED, }, }); - try elf.symtab.ensureUnusedCapacity(gpa, 1); - const sym_gop = try elf.navs.getOrPut(gpa, nav_index); - if (!sym_gop.found_existing) { - sym_gop.value_ptr.* = try elf.initSymbolAssumeCapacity(.{ - .name = nav.fqn.toSlice(ip), - .type = navType(ip, nav.status, elf.base.comp.config.any_non_single_threaded), - }); - } - return sym_gop.value_ptr.*; + const nmi = try elf.navMapIndex(zcu, nav_index); + return nmi.symbol(elf); } -pub fn uavSymbol(elf: *Elf, uav_val: InternPool.Index) !Symbol.Index { +fn uavMapIndex(elf: *Elf, uav_val: InternPool.Index) !Node.UavMapIndex { const gpa = elf.base.comp.gpa; try elf.symtab.ensureUnusedCapacity(gpa, 1); - const sym_gop = try elf.uavs.getOrPut(gpa, uav_val); - if (!sym_gop.found_existing) - sym_gop.value_ptr.* = try elf.initSymbolAssumeCapacity(.{ .type = .OBJECT }); - return sym_gop.value_ptr.*; + const uav_gop = try elf.uavs.getOrPut(gpa, uav_val); + if (!uav_gop.found_existing) + uav_gop.value_ptr.* = try elf.initSymbolAssumeCapacity(.{ .type = .OBJECT }); + return @enumFromInt(uav_gop.index); +} +pub fn uavSymbol(elf: *Elf, uav_val: InternPool.Index) !Symbol.Index { + const umi = try elf.uavMapIndex(uav_val); + return umi.symbol(elf); } pub fn lazySymbol(elf: *Elf, lazy: link.File.LazySymbol) !Symbol.Index { const gpa = elf.base.comp.gpa; try elf.symtab.ensureUnusedCapacity(gpa, 1); - const sym_gop = try elf.lazy.getPtr(lazy.kind).map.getOrPut(gpa, lazy.ty); - if (!sym_gop.found_existing) { - sym_gop.value_ptr.* = try elf.initSymbolAssumeCapacity(.{ + const lazy_gop = try elf.lazy.getPtr(lazy.kind).map.getOrPut(gpa, lazy.ty); + if (!lazy_gop.found_existing) { + lazy_gop.value_ptr.* = try elf.initSymbolAssumeCapacity(.{ .type = switch (lazy.kind) { .code => .FUNC, .const_data => .OBJECT, }, }); - elf.base.comp.link_lazy_prog_node.increaseEstimatedTotalItems(1); + elf.base.comp.link_synth_prog_node.increaseEstimatedTotalItems(1); } - return sym_gop.value_ptr.*; + return lazy_gop.value_ptr.*; } pub fn getNavVAddr( @@ -1088,7 +1116,7 @@ pub fn getVAddr(elf: *Elf, reloc_info: link.File.RelocInfo, target_si: Symbol.In reloc_info.addend, switch (elf.ehdrField(.machine)) { else => unreachable, - .X86_64 => .{ .x86_64 = switch (elf.identClass()) { + .X86_64 => .{ .X86_64 = switch (elf.identClass()) { .NONE, _ => unreachable, .@"32" => .@"32", .@"64" => .@"64", @@ -1107,7 +1135,7 @@ fn addSection(elf: *Elf, segment_ni: MappedFile.Node.Index, opts: struct { entsize: std.elf.Word = 0, }) !Symbol.Index { const gpa = elf.base.comp.gpa; - const target_endian = elf.endian(); + const target_endian = elf.targetEndian(); try elf.nodes.ensureUnusedCapacity(gpa, 1); try elf.symtab.ensureUnusedCapacity(gpa, 1); @@ -1127,7 +1155,7 @@ fn addSection(elf: *Elf, segment_ni: MappedFile.Node.Index, opts: struct { .size = opts.size, .moved = true, }); - const si = try elf.addSymbolAssumeCapacity(); + const si = elf.addSymbolAssumeCapacity(); elf.nodes.appendAssumeCapacity(.{ .section = si }); si.get(elf).ni = ni; try si.init(elf, .{ @@ -1160,7 +1188,7 @@ fn addSection(elf: *Elf, segment_ni: MappedFile.Node.Index, opts: struct { fn renameSection(elf: *Elf, si: Symbol.Index, name: []const u8) !void { const strtab_entry = try elf.string(.strtab, name); const shstrtab_entry = try elf.string(.shstrtab, name); - const target_endian = elf.endian(); + const target_endian = elf.targetEndian(); switch (elf.shdrSlice()) { inline else => |shdr, class| { const sym = @field(elf.symPtr(si), @tagName(class)); @@ -1173,7 +1201,7 @@ fn renameSection(elf: *Elf, si: Symbol.Index, name: []const u8) !void { } fn linkSections(elf: *Elf, si: Symbol.Index, link_si: Symbol.Index) !void { - const target_endian = elf.endian(); + const target_endian = elf.targetEndian(); switch (elf.shdrSlice()) { inline else => |shdr, class| { const sym = @field(elf.symPtr(si), @tagName(class)); @@ -1184,7 +1212,7 @@ fn linkSections(elf: *Elf, si: Symbol.Index, link_si: Symbol.Index) !void { } fn sectionName(elf: *Elf, si: Symbol.Index) [:0]const u8 { - const target_endian = elf.endian(); + const target_endian = elf.targetEndian(); const name = Symbol.Index.shstrtab.node(elf).slice(&elf.mf)[name: switch (elf.shdrSlice()) { inline else => |shndx, class| { const sym = @field(elf.symPtr(si), @tagName(class)); @@ -1263,7 +1291,8 @@ fn updateNavInner(elf: *Elf, pt: Zcu.PerThread, nav_index: InternPool.Nav.Index) }; if (nav_init == .none or !Type.fromInterned(ip.typeOf(nav_init)).hasRuntimeBits(zcu)) return; - const si = try elf.navSymbol(zcu, nav_index); + const nmi = try elf.navMapIndex(zcu, nav_index); + const si = nmi.symbol(elf); const ni = ni: { const sym = si.get(elf); switch (sym.ni) { @@ -1275,7 +1304,7 @@ fn updateNavInner(elf: *Elf, pt: Zcu.PerThread, nav_index: InternPool.Nav.Index) .alignment = pt.navAlignment(nav_index).toStdMem(), .moved = true, }); - elf.nodes.appendAssumeCapacity(.{ .nav = nav_index }); + elf.nodes.appendAssumeCapacity(.{ .nav = nmi }); sym.ni = ni; switch (elf.symPtr(si)) { inline else => |sym_ptr, class| sym_ptr.shndx = @@ -1289,28 +1318,24 @@ fn updateNavInner(elf: *Elf, pt: Zcu.PerThread, nav_index: InternPool.Nav.Index) break :ni sym.ni; }; - const size = size: { - var nw: MappedFile.Node.Writer = undefined; - ni.writer(&elf.mf, gpa, &nw); - defer nw.deinit(); - codegen.generateSymbol( - &elf.base, - pt, - zcu.navSrcLoc(nav_index), - .fromInterned(nav_init), - &nw.interface, - .{ .atom_index = @intFromEnum(si) }, - ) catch |err| switch (err) { - error.WriteFailed => return error.OutOfMemory, - else => |e| return e, - }; - break :size nw.interface.end; + var nw: MappedFile.Node.Writer = undefined; + ni.writer(&elf.mf, gpa, &nw); + defer nw.deinit(); + codegen.generateSymbol( + &elf.base, + pt, + zcu.navSrcLoc(nav_index), + .fromInterned(nav_init), + &nw.interface, + .{ .atom_index = @intFromEnum(si) }, + ) catch |err| switch (err) { + error.WriteFailed => return error.OutOfMemory, + else => |e| return e, }; - - const target_endian = elf.endian(); + const target_endian = elf.targetEndian(); switch (elf.symPtr(si)) { inline else => |sym| sym.size = - std.mem.nativeTo(@TypeOf(sym.size), @intCast(size), target_endian), + std.mem.nativeTo(@TypeOf(sym.size), @intCast(nw.interface.end), target_endian), } si.applyLocationRelocs(elf); } @@ -1326,7 +1351,7 @@ pub fn lowerUav( const gpa = zcu.gpa; try elf.pending_uavs.ensureUnusedCapacity(gpa, 1); - const si = elf.uavSymbol(uav_val) catch |err| switch (err) { + const umi = elf.uavMapIndex(uav_val) catch |err| switch (err) { error.OutOfMemory => return error.OutOfMemory, else => |e| return .{ .fail = try Zcu.ErrorMsg.create( gpa, @@ -1335,11 +1360,12 @@ pub fn lowerUav( .{@errorName(e)}, ) }, }; + const si = umi.symbol(elf); if (switch (si.get(elf).ni) { .none => true, else => |ni| uav_align.toStdMem().order(ni.alignment(&elf.mf)).compare(.gt), }) { - const gop = elf.pending_uavs.getOrPutAssumeCapacity(uav_val); + const gop = elf.pending_uavs.getOrPutAssumeCapacity(umi); if (gop.found_existing) { gop.value_ptr.alignment = gop.value_ptr.alignment.max(uav_align); } else { @@ -1347,7 +1373,7 @@ pub fn lowerUav( .alignment = uav_align, .src_loc = src_loc, }; - elf.base.comp.link_uav_prog_node.increaseEstimatedTotalItems(1); + elf.base.comp.link_const_prog_node.increaseEstimatedTotalItems(1); } } return .{ .sym_index = @intFromEnum(si) }; @@ -1384,7 +1410,8 @@ fn updateFuncInner( const func = zcu.funcInfo(func_index); const nav = ip.getNav(func.owner_nav); - const si = try elf.navSymbol(zcu, func.owner_nav); + const nmi = try elf.navMapIndex(zcu, func.owner_nav); + const si = nmi.symbol(elf); log.debug("updateFunc({f}) = {d}", .{ nav.fqn.fmt(ip), si }); const ni = ni: { const sym = si.get(elf); @@ -1406,7 +1433,7 @@ fn updateFuncInner( }.toStdMem(), .moved = true, }); - elf.nodes.appendAssumeCapacity(.{ .nav = func.owner_nav }); + elf.nodes.appendAssumeCapacity(.{ .nav = nmi }); sym.ni = ni; switch (elf.symPtr(si)) { inline else => |sym_ptr, class| sym_ptr.shndx = @@ -1420,37 +1447,35 @@ fn updateFuncInner( break :ni sym.ni; }; - const size = size: { - var nw: MappedFile.Node.Writer = undefined; - ni.writer(&elf.mf, gpa, &nw); - defer nw.deinit(); - codegen.emitFunction( - &elf.base, - pt, - zcu.navSrcLoc(func.owner_nav), - func_index, - @intFromEnum(si), - mir, - &nw.interface, - .none, - ) catch |err| switch (err) { - error.WriteFailed => return nw.err.?, - else => |e| return e, - }; - break :size nw.interface.end; + var nw: MappedFile.Node.Writer = undefined; + ni.writer(&elf.mf, gpa, &nw); + defer nw.deinit(); + codegen.emitFunction( + &elf.base, + pt, + zcu.navSrcLoc(func.owner_nav), + func_index, + @intFromEnum(si), + mir, + &nw.interface, + .none, + ) catch |err| switch (err) { + error.WriteFailed => return nw.err.?, + else => |e| return e, }; - - const target_endian = elf.endian(); + const target_endian = elf.targetEndian(); switch (elf.symPtr(si)) { inline else => |sym| sym.size = - std.mem.nativeTo(@TypeOf(sym.size), @intCast(size), target_endian), + std.mem.nativeTo(@TypeOf(sym.size), @intCast(nw.interface.end), target_endian), } si.applyLocationRelocs(elf); } pub fn updateErrorData(elf: *Elf, pt: Zcu.PerThread) !void { - const si = elf.lazy.getPtr(.const_data).map.get(.anyerror_type) orelse return; - elf.flushLazy(pt, .{ .kind = .const_data, .ty = .anyerror_type }, si) catch |err| switch (err) { + elf.flushLazy(pt, .{ + .kind = .const_data, + .index = @intCast(elf.lazy.getPtr(.const_data).map.getIndex(.anyerror_type) orelse return), + }) catch |err| switch (err) { error.OutOfMemory => return error.OutOfMemory, error.CodegenFail => return error.LinkFailure, else => |e| return elf.base.comp.link_diags.fail("updateErrorData failed {t}", .{e}), @@ -1472,14 +1497,13 @@ pub fn idle(elf: *Elf, tid: Zcu.PerThread.Id) !bool { const comp = elf.base.comp; task: { while (elf.pending_uavs.pop()) |pending_uav| { - const sub_prog_node = - elf.idleProgNode( - tid, - comp.link_uav_prog_node, - .{ .uav = pending_uav.key }, - ); + const sub_prog_node = elf.idleProgNode( + tid, + comp.link_const_prog_node, + .{ .uav = pending_uav.key }, + ); defer sub_prog_node.end(); - break :task elf.flushUav( + elf.flushUav( .{ .zcu = elf.base.comp.zcu.?, .tid = tid }, pending_uav.key, pending_uav.value.alignment, @@ -1491,37 +1515,34 @@ pub fn idle(elf: *Elf, tid: Zcu.PerThread.Id) !bool { .{e}, ), }; + break :task; } var lazy_it = elf.lazy.iterator(); - while (lazy_it.next()) |lazy| for ( - lazy.value.map.keys()[lazy.value.pending_index..], - lazy.value.map.values()[lazy.value.pending_index..], - ) |ty, si| { - lazy.value.pending_index += 1; + while (lazy_it.next()) |lazy| if (lazy.value.pending_index < lazy.value.map.count()) { const pt: Zcu.PerThread = .{ .zcu = elf.base.comp.zcu.?, .tid = tid }; - const kind = switch (lazy.key) { + const lmr: Node.LazyMapRef = .{ .kind = lazy.key, .index = lazy.value.pending_index }; + lazy.value.pending_index += 1; + const kind = switch (lmr.kind) { .code => "code", .const_data => "data", }; var name: [std.Progress.Node.max_name_len]u8 = undefined; - const sub_prog_node = comp.link_lazy_prog_node.start( + const sub_prog_node = comp.link_synth_prog_node.start( std.fmt.bufPrint(&name, "lazy {s} for {f}", .{ kind, - Type.fromInterned(ty).fmt(pt), + Type.fromInterned(lmr.lazySymbol(elf).ty).fmt(pt), }) catch &name, 0, ); defer sub_prog_node.end(); - break :task elf.flushLazy(pt, .{ - .kind = lazy.key, - .ty = ty, - }, si) catch |err| switch (err) { + elf.flushLazy(pt, lmr) catch |err| switch (err) { error.OutOfMemory => return error.OutOfMemory, else => |e| return elf.base.comp.link_diags.fail( "linker failed to lower lazy {s}: {t}", .{ kind, e }, ), }; + break :task; }; while (elf.mf.updates.pop()) |ni| { const clean_moved = ni.cleanMoved(&elf.mf); @@ -1551,12 +1572,12 @@ fn idleProgNode( return prog_node.start(name: switch (node) { else => |tag| @tagName(tag), .section => |si| elf.sectionName(si), - .nav => |nav| { + .nav => |nmi| { const ip = &elf.base.comp.zcu.?.intern_pool; - break :name ip.getNav(nav).fqn.toSlice(ip); + break :name ip.getNav(nmi.navIndex(elf)).fqn.toSlice(ip); }, - .uav => |uav| std.fmt.bufPrint(&name, "{f}", .{ - Value.fromInterned(uav).fmtValue(.{ .zcu = elf.base.comp.zcu.?, .tid = tid }), + .uav => |umi| std.fmt.bufPrint(&name, "{f}", .{ + Value.fromInterned(umi.uavValue(elf)).fmtValue(.{ .zcu = elf.base.comp.zcu.?, .tid = tid }), }) catch &name, }, 0); } @@ -1564,14 +1585,15 @@ fn idleProgNode( fn flushUav( elf: *Elf, pt: Zcu.PerThread, - uav_val: InternPool.Index, + umi: Node.UavMapIndex, uav_align: InternPool.Alignment, src_loc: Zcu.LazySrcLoc, ) !void { const zcu = pt.zcu; const gpa = zcu.gpa; - const si = try elf.uavSymbol(uav_val); + const uav_val = umi.uavValue(elf); + const si = umi.symbol(elf); const ni = ni: { const sym = si.get(elf); switch (sym.ni) { @@ -1581,7 +1603,7 @@ fn flushUav( .alignment = uav_align.toStdMem(), .moved = true, }); - elf.nodes.appendAssumeCapacity(.{ .uav = uav_val }); + elf.nodes.appendAssumeCapacity(.{ .uav = umi }); sym.ni = ni; switch (elf.symPtr(si)) { inline else => |sym_ptr, class| sym_ptr.shndx = @@ -1598,36 +1620,34 @@ fn flushUav( break :ni sym.ni; }; - const size = size: { - var nw: MappedFile.Node.Writer = undefined; - ni.writer(&elf.mf, gpa, &nw); - defer nw.deinit(); - codegen.generateSymbol( - &elf.base, - pt, - src_loc, - .fromInterned(uav_val), - &nw.interface, - .{ .atom_index = @intFromEnum(si) }, - ) catch |err| switch (err) { - error.WriteFailed => return error.OutOfMemory, - else => |e| return e, - }; - break :size nw.interface.end; + var nw: MappedFile.Node.Writer = undefined; + ni.writer(&elf.mf, gpa, &nw); + defer nw.deinit(); + codegen.generateSymbol( + &elf.base, + pt, + src_loc, + .fromInterned(uav_val), + &nw.interface, + .{ .atom_index = @intFromEnum(si) }, + ) catch |err| switch (err) { + error.WriteFailed => return error.OutOfMemory, + else => |e| return e, }; - - const target_endian = elf.endian(); + const target_endian = elf.targetEndian(); switch (elf.symPtr(si)) { inline else => |sym| sym.size = - std.mem.nativeTo(@TypeOf(sym.size), @intCast(size), target_endian), + std.mem.nativeTo(@TypeOf(sym.size), @intCast(nw.interface.end), target_endian), } si.applyLocationRelocs(elf); } -fn flushLazy(elf: *Elf, pt: Zcu.PerThread, lazy: link.File.LazySymbol, si: Symbol.Index) !void { +fn flushLazy(elf: *Elf, pt: Zcu.PerThread, lmr: Node.LazyMapRef) !void { const zcu = pt.zcu; const gpa = zcu.gpa; + const lazy = lmr.lazySymbol(elf); + const si = lmr.symbol(elf); const ni = ni: { const sym = si.get(elf); switch (sym.ni) { @@ -1639,8 +1659,8 @@ fn flushLazy(elf: *Elf, pt: Zcu.PerThread, lazy: link.File.LazySymbol, si: Symbo }; const ni = try elf.mf.addLastChildNode(gpa, sec_si.node(elf), .{ .moved = true }); elf.nodes.appendAssumeCapacity(switch (lazy.kind) { - .code => .{ .lazy_code = lazy.ty }, - .const_data => .{ .lazy_const_data = lazy.ty }, + .code => .{ .lazy_code = @enumFromInt(lmr.index) }, + .const_data => .{ .lazy_const_data = @enumFromInt(lmr.index) }, }); sym.ni = ni; switch (elf.symPtr(si)) { @@ -1655,34 +1675,30 @@ fn flushLazy(elf: *Elf, pt: Zcu.PerThread, lazy: link.File.LazySymbol, si: Symbo break :ni sym.ni; }; - const size = size: { - var required_alignment: InternPool.Alignment = .none; - var nw: MappedFile.Node.Writer = undefined; - ni.writer(&elf.mf, gpa, &nw); - defer nw.deinit(); - try codegen.generateLazySymbol( - &elf.base, - pt, - Type.fromInterned(lazy.ty).srcLocOrNull(pt.zcu) orelse .unneeded, - lazy, - &required_alignment, - &nw.interface, - .none, - .{ .atom_index = @intFromEnum(si) }, - ); - break :size nw.interface.end; - }; - - const target_endian = elf.endian(); + var required_alignment: InternPool.Alignment = .none; + var nw: MappedFile.Node.Writer = undefined; + ni.writer(&elf.mf, gpa, &nw); + defer nw.deinit(); + try codegen.generateLazySymbol( + &elf.base, + pt, + Type.fromInterned(lazy.ty).srcLocOrNull(pt.zcu) orelse .unneeded, + lazy, + &required_alignment, + &nw.interface, + .none, + .{ .atom_index = @intFromEnum(si) }, + ); + const target_endian = elf.targetEndian(); switch (elf.symPtr(si)) { inline else => |sym| sym.size = - std.mem.nativeTo(@TypeOf(sym.size), @intCast(size), target_endian), + std.mem.nativeTo(@TypeOf(sym.size), @intCast(nw.interface.end), target_endian), } si.applyLocationRelocs(elf); } fn flushMoved(elf: *Elf, ni: MappedFile.Node.Index) !void { - const target_endian = elf.endian(); + const target_endian = elf.targetEndian(); const file_offset = ni.fileLocation(&elf.mf, false).offset; const node = elf.getNode(ni); switch (node) { @@ -1738,11 +1754,8 @@ fn flushMoved(elf: *Elf, ni: MappedFile.Node.Index) !void { .nav, .uav, .lazy_code, .lazy_const_data => { const si = switch (node) { else => unreachable, - .nav => |nav| elf.navs.get(nav), - .uav => |uav| elf.uavs.get(uav), - .lazy_code => |ty| elf.lazy.getPtr(.code).map.get(ty), - .lazy_const_data => |ty| elf.lazy.getPtr(.const_data).map.get(ty), - }.?; + inline .nav, .uav, .lazy_code, .lazy_const_data => |mi| mi.symbol(elf), + }; switch (elf.shdrSlice()) { inline else => |shdr, class| { const sym = @field(elf.symPtr(si), @tagName(class)); @@ -1773,7 +1786,7 @@ fn flushMoved(elf: *Elf, ni: MappedFile.Node.Index) !void { } fn flushResized(elf: *Elf, ni: MappedFile.Node.Index) !void { - const target_endian = elf.endian(); + const target_endian = elf.targetEndian(); _, const size = ni.location(&elf.mf).resolve(&elf.mf); const node = elf.getNode(ni); switch (node) { @@ -1957,65 +1970,74 @@ pub fn printNode( indent: usize, ) !void { const node = elf.getNode(ni); - const mf_node = &elf.mf.nodes.items[@intFromEnum(ni)]; - const off, const size = mf_node.location().resolve(&elf.mf); try w.splatByteAll(' ', indent); try w.writeAll(@tagName(node)); switch (node) { else => {}, .section => |si| try w.print("({s})", .{elf.sectionName(si)}), - .nav => |nav_index| { + .nav => |nmi| { const zcu = elf.base.comp.zcu.?; const ip = &zcu.intern_pool; - const nav = ip.getNav(nav_index); + const nav = ip.getNav(nmi.navIndex(elf)); try w.print("({f}, {f})", .{ Type.fromInterned(nav.typeOf(ip)).fmt(.{ .zcu = zcu, .tid = tid }), nav.fqn.fmt(ip), }); }, - .uav => |uav| { + .uav => |umi| { const zcu = elf.base.comp.zcu.?; - const val: Value = .fromInterned(uav); + const val: Value = .fromInterned(umi.uavValue(elf)); try w.print("({f}, {f})", .{ val.typeOf(zcu).fmt(.{ .zcu = zcu, .tid = tid }), val.fmtValue(.{ .zcu = zcu, .tid = tid }), }); }, + inline .lazy_code, .lazy_const_data => |lmi| try w.print("({f})", .{ + Type.fromInterned(lmi.lazySymbol(elf).ty).fmt(.{ + .zcu = elf.base.comp.zcu.?, + .tid = tid, + }), + }), } - try w.print(" index={d} offset=0x{x} size=0x{x} align=0x{x}{s}{s}{s}{s}\n", .{ - @intFromEnum(ni), - off, - size, - mf_node.flags.alignment.toByteUnits(), - if (mf_node.flags.fixed) " fixed" else "", - if (mf_node.flags.moved) " moved" else "", - if (mf_node.flags.resized) " resized" else "", - if (mf_node.flags.has_content) " has_content" else "", - }); - var child_ni = mf_node.first; - switch (child_ni) { - .none => { - const file_loc = ni.fileLocation(&elf.mf, false); - if (file_loc.size == 0) return; - var address = file_loc.offset; - const line_len = 0x10; - var line_it = std.mem.window( - u8, - elf.mf.contents[@intCast(file_loc.offset)..][0..@intCast(file_loc.size)], - line_len, - line_len, - ); - while (line_it.next()) |line_bytes| : (address += line_len) { - try w.splatByteAll(' ', indent + 1); - try w.print("{x:0>8}", .{address}); - for (line_bytes) |byte| try w.print(" {x:0>2}", .{byte}); - try w.writeByte('\n'); - } - }, - else => while (child_ni != .none) { - try elf.printNode(tid, w, child_ni, indent + 1); - child_ni = elf.mf.nodes.items[@intFromEnum(child_ni)].next; - }, + { + const mf_node = &elf.mf.nodes.items[@intFromEnum(ni)]; + const off, const size = mf_node.location().resolve(&elf.mf); + try w.print(" index={d} offset=0x{x} size=0x{x} align=0x{x}{s}{s}{s}{s}\n", .{ + @intFromEnum(ni), + off, + size, + mf_node.flags.alignment.toByteUnits(), + if (mf_node.flags.fixed) " fixed" else "", + if (mf_node.flags.moved) " moved" else "", + if (mf_node.flags.resized) " resized" else "", + if (mf_node.flags.has_content) " has_content" else "", + }); + } + var leaf = true; + var child_it = ni.children(&elf.mf); + while (child_it.next()) |child_ni| { + leaf = false; + try elf.printNode(tid, w, child_ni, indent + 1); + } + if (leaf) { + const file_loc = ni.fileLocation(&elf.mf, false); + if (file_loc.size == 0) return; + var address = file_loc.offset; + const line_len = 0x10; + var line_it = std.mem.window( + u8, + elf.mf.contents[@intCast(file_loc.offset)..][0..@intCast(file_loc.size)], + line_len, + line_len, + ); + while (line_it.next()) |line_bytes| : (address += line_len) { + try w.splatByteAll(' ', indent + 1); + try w.print("{x:0>8} ", .{address}); + for (line_bytes) |byte| try w.print("{x:0>2} ", .{byte}); + try w.splatByteAll(' ', 3 * (line_len - line_bytes.len) + 1); + for (line_bytes) |byte| try w.writeByte(if (std.ascii.isPrint(byte)) byte else '.'); + try w.writeByte('\n'); + } } } diff --git a/src/link/MappedFile.zig b/src/link/MappedFile.zig index cd100f149698..c44f1f68fbce 100644 --- a/src/link/MappedFile.zig +++ b/src/link/MappedFile.zig @@ -34,17 +34,28 @@ pub fn init(file: std.fs.File, gpa: std.mem.Allocator) !MappedFile { .writers = .{}, }; errdefer mf.deinit(gpa); - const size: u64, const blksize = if (is_windows) - .{ try windows.GetFileSizeEx(file.handle), 1 } - else stat: { + const size: u64, const block_size = stat: { + if (is_windows) { + var sbi: windows.SYSTEM_BASIC_INFORMATION = undefined; + break :stat .{ + try windows.GetFileSizeEx(file.handle), + switch (windows.ntdll.NtQuerySystemInformation( + .SystemBasicInformation, + &sbi, + @sizeOf(windows.SYSTEM_BASIC_INFORMATION), + null, + )) { + .SUCCESS => @max(sbi.PageSize, sbi.AllocationGranularity), + else => std.heap.page_size_max, + }, + }; + } const stat = try std.posix.fstat(mf.file.handle); if (!std.posix.S.ISREG(stat.mode)) return error.PathAlreadyExists; - break :stat .{ @bitCast(stat.size), stat.blksize }; + break :stat .{ @bitCast(stat.size), @max(std.heap.pageSize(), stat.blksize) }; }; mf.flags = .{ - .block_size = .fromByteUnits( - std.math.ceilPowerOfTwoAssert(usize, @max(std.heap.pageSize(), blksize)), - ), + .block_size = .fromByteUnits(std.math.ceilPowerOfTwoAssert(usize, block_size)), .copy_file_range_unsupported = false, .fallocate_insert_range_unsupported = false, .fallocate_punch_hole_unsupported = false, @@ -90,9 +101,11 @@ pub const Node = extern struct { resized: bool, /// Whether this node might contain non-zero bytes. has_content: bool, + /// Whether a moved event on this node bubbles down to children. + bubbles_moved: bool, unused: @Type(.{ .int = .{ .signedness = .unsigned, - .bits = 32 - @bitSizeOf(std.mem.Alignment) - 5, + .bits = 32 - @bitSizeOf(std.mem.Alignment) - 6, } }) = 0, }; @@ -136,6 +149,25 @@ pub const Node = extern struct { return &mf.nodes.items[@intFromEnum(ni)]; } + pub fn parent(ni: Node.Index, mf: *const MappedFile) Node.Index { + return ni.get(mf).parent; + } + + pub const ChildIterator = struct { + mf: *const MappedFile, + ni: Node.Index, + + pub fn next(it: *ChildIterator) ?Node.Index { + const ni = it.ni; + if (ni == .none) return null; + it.ni = ni.get(it.mf).next; + return ni; + } + }; + pub fn children(ni: Node.Index, mf: *const MappedFile) ChildIterator { + return .{ .mf = mf, .ni = ni.get(mf).first }; + } + pub fn childrenMoved(ni: Node.Index, gpa: std.mem.Allocator, mf: *MappedFile) !void { var child_ni = ni.get(mf).last; while (child_ni != .none) { @@ -147,9 +179,10 @@ pub const Node = extern struct { pub fn hasMoved(ni: Node.Index, mf: *const MappedFile) bool { var parent_ni = ni; while (parent_ni != Node.Index.root) { - const parent = parent_ni.get(mf); - if (parent.flags.moved) return true; - parent_ni = parent.parent; + const parent_node = parent_ni.get(mf); + if (!parent_node.flags.bubbles_moved) break; + if (parent_node.flags.moved) return true; + parent_ni = parent_node.parent; } return false; } @@ -163,12 +196,7 @@ pub const Node = extern struct { return node_moved.*; } fn movedAssumeCapacity(ni: Node.Index, mf: *MappedFile) void { - var parent_ni = ni; - while (parent_ni != Node.Index.root) { - const parent_node = parent_ni.get(mf); - if (parent_node.flags.moved) return; - parent_ni = parent_node.parent; - } + if (ni.hasMoved(mf)) return; const node = ni.get(mf); node.flags.moved = true; if (node.flags.resized) return; @@ -242,10 +270,10 @@ pub const Node = extern struct { var offset, const size = ni.location(mf).resolve(mf); var parent_ni = ni; while (true) { - const parent = parent_ni.get(mf); - if (set_has_content) parent.flags.has_content = true; + const parent_node = parent_ni.get(mf); + if (set_has_content) parent_node.flags.has_content = true; if (parent_ni == .none) break; - parent_ni = parent.parent; + parent_ni = parent_node.parent; offset += parent_ni.location(mf).resolve(mf)[0]; } return .{ .offset = offset, .size = size }; @@ -449,6 +477,7 @@ fn addNode(mf: *MappedFile, gpa: std.mem.Allocator, opts: struct { .moved = true, .resized = true, .has_content = false, + .bubbles_moved = opts.add_node.bubbles_moved, }, .location_payload = location_payload, }; @@ -471,6 +500,7 @@ pub const AddNodeOptions = struct { fixed: bool = false, moved: bool = false, resized: bool = false, + bubbles_moved: bool = true, }; pub fn addOnlyChildNode( diff --git a/src/target.zig b/src/target.zig index 6f139e785d7a..e67cf901f28b 100644 --- a/src/target.zig +++ b/src/target.zig @@ -233,7 +233,7 @@ pub fn hasLldSupport(ofmt: std.Target.ObjectFormat) bool { pub fn hasNewLinkerSupport(ofmt: std.Target.ObjectFormat, backend: std.builtin.CompilerBackend) bool { return switch (ofmt) { - .elf => switch (backend) { + .elf, .coff => switch (backend) { .stage2_x86_64 => true, else => false, }, diff --git a/test/behavior/cast.zig b/test/behavior/cast.zig index a04e94451e13..2155c6eb27b8 100644 --- a/test/behavior/cast.zig +++ b/test/behavior/cast.zig @@ -1650,7 +1650,6 @@ test "coerce between pointers of compatible differently-named floats" { if (builtin.zig_backend == .stage2_c and builtin.os.tag == .windows and !builtin.link_libc) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_llvm and builtin.os.tag == .windows) { @@ -2883,7 +2882,6 @@ test "@intFromFloat vector boundary cases" { if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; const S = struct { fn case(comptime I: type, unshifted_inputs: [2]f32, expected: [2]I) !void { diff --git a/test/behavior/export_keyword.zig b/test/behavior/export_keyword.zig index f6b5126eb200..fa69c74747b0 100644 --- a/test/behavior/export_keyword.zig +++ b/test/behavior/export_keyword.zig @@ -43,7 +43,6 @@ export fn testPackedStuff(a: *const PackedStruct, b: *const PackedUnion) void { } test "export function alias" { - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; _ = struct { diff --git a/test/behavior/extern.zig b/test/behavior/extern.zig index 7541a1957dd4..4de685894687 100644 --- a/test/behavior/extern.zig +++ b/test/behavior/extern.zig @@ -16,7 +16,6 @@ export var a_mystery_symbol: i32 = 1234; test "function extern symbol" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; const a = @extern(*const fn () callconv(.c) i32, .{ .name = "a_mystery_function" }); @@ -29,7 +28,6 @@ export fn a_mystery_function() i32 { test "function extern symbol matches extern decl" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; const S = struct { diff --git a/test/behavior/floatop.zig b/test/behavior/floatop.zig index a36d157a82d3..78d858774ffd 100644 --- a/test/behavior/floatop.zig +++ b/test/behavior/floatop.zig @@ -158,7 +158,6 @@ test "cmp f80/c_longdouble" { if (builtin.zig_backend == .stage2_llvm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; try testCmp(f80); try comptime testCmp(f80); @@ -283,7 +282,6 @@ test "vector cmp f80/c_longdouble" { if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; try testCmpVector(f80); try comptime testCmpVector(f80); @@ -396,7 +394,6 @@ test "@sqrt f80/f128/c_longdouble" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; if (builtin.os.tag == .freebsd) { // TODO https://github.com/ziglang/zig/issues/10875 @@ -526,7 +523,6 @@ test "@sin f80/f128/c_longdouble" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; try testSin(f80); comptime try testSin(f80); @@ -596,7 +592,6 @@ test "@cos f80/f128/c_longdouble" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; try testCos(f80); try comptime testCos(f80); @@ -666,7 +661,6 @@ test "@tan f80/f128/c_longdouble" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; try testTan(f80); try comptime testTan(f80); @@ -736,7 +730,6 @@ test "@exp f80/f128/c_longdouble" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; try testExp(f80); try comptime testExp(f80); @@ -810,7 +803,6 @@ test "@exp2 f80/f128/c_longdouble" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; try testExp2(f80); try comptime testExp2(f80); @@ -879,7 +871,6 @@ test "@log f80/f128/c_longdouble" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; try testLog(f80); try comptime testLog(f80); @@ -946,7 +937,6 @@ test "@log2 f80/f128/c_longdouble" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; try testLog2(f80); try comptime testLog2(f80); @@ -1019,7 +1009,6 @@ test "@log10 f80/f128/c_longdouble" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; try testLog10(f80); try comptime testLog10(f80); @@ -1086,7 +1075,6 @@ test "@abs f80/f128/c_longdouble" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; try testFabs(f80); try comptime testFabs(f80); @@ -1204,7 +1192,6 @@ test "@floor f80/f128/c_longdouble" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; @@ -1295,7 +1282,6 @@ test "@ceil f80/f128/c_longdouble" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; @@ -1388,7 +1374,6 @@ test "@trunc f80/f128/c_longdouble" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; if (builtin.zig_backend == .stage2_llvm and builtin.os.tag == .windows) { // https://github.com/ziglang/zig/issues/12602 @@ -1485,7 +1470,6 @@ test "neg f80/f128/c_longdouble" { if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; try testNeg(f80); try comptime testNeg(f80); @@ -1741,7 +1725,6 @@ test "comptime calls are only memoized when float arguments are bit-for-bit equa test "result location forwarded through unary float builtins" { if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; diff --git a/test/behavior/import_c_keywords.zig b/test/behavior/import_c_keywords.zig index 6bea049f85f8..3e17e30b37a7 100644 --- a/test/behavior/import_c_keywords.zig +++ b/test/behavior/import_c_keywords.zig @@ -31,7 +31,6 @@ test "import c keywords" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; try std.testing.expect(int == .c_keyword_variable); diff --git a/test/behavior/math.zig b/test/behavior/math.zig index a4b85fb62d5b..867567ea404d 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -1416,7 +1416,6 @@ test "remainder division" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_c and builtin.cpu.arch.isArm()) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; @@ -1425,6 +1424,8 @@ test "remainder division" { return error.SkipZigTest; } + if (builtin.zig_backend == .stage2_x86_64 and builtin.object_format == .coff and builtin.abi != .gnu) return error.SkipZigTest; + try comptime remdiv(f16); try comptime remdiv(f32); try comptime remdiv(f64); @@ -1496,9 +1497,10 @@ test "float modulo division using @mod" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; + if (builtin.zig_backend == .stage2_x86_64 and builtin.object_format == .coff and builtin.abi != .gnu) return error.SkipZigTest; + try comptime fmod(f16); try comptime fmod(f32); try comptime fmod(f64); @@ -1686,7 +1688,6 @@ test "signed zeros are represented properly" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; const S = struct { fn doTheTest() !void { @@ -1824,7 +1825,8 @@ test "float divide by zero" { if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf and builtin.target.ofmt != .macho) return error.SkipZigTest; + + if (builtin.zig_backend == .stage2_x86_64 and builtin.object_format == .coff and builtin.abi != .gnu) return error.SkipZigTest; const S = struct { fn doTheTest(comptime F: type, zero: F, one: F) !void { diff --git a/test/behavior/multiple_externs_with_conflicting_types.zig b/test/behavior/multiple_externs_with_conflicting_types.zig index 787a18a78f85..7fa72506aaa4 100644 --- a/test/behavior/multiple_externs_with_conflicting_types.zig +++ b/test/behavior/multiple_externs_with_conflicting_types.zig @@ -14,7 +14,6 @@ test "call extern function defined with conflicting type" { if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO if (builtin.zig_backend == .stage2_spirv) return error.SkipZigTest; - if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt == .coff) return error.SkipZigTest; if (builtin.zig_backend == .stage2_riscv64) return error.SkipZigTest; @import("conflicting_externs/a.zig").issue529(null); diff --git a/test/behavior/x86_64/binary.zig b/test/behavior/x86_64/binary.zig index 7f5adaf04edf..9a132802e44f 100644 --- a/test/behavior/x86_64/binary.zig +++ b/test/behavior/x86_64/binary.zig @@ -5172,15 +5172,6 @@ test mulSaturate { try test_mul_saturate.testIntVectors(); } -inline fn multiply(comptime Type: type, lhs: Type, rhs: Type) Type { - return lhs * rhs; -} -test multiply { - const test_multiply = binary(multiply, .{}); - try test_multiply.testFloats(); - try test_multiply.testFloatVectors(); -} - inline fn divide(comptime Type: type, lhs: Type, rhs: Type) Type { return lhs / rhs; } @@ -5264,7 +5255,8 @@ inline fn mod(comptime Type: type, lhs: Type, rhs: Type) Type { return @mod(lhs, rhs); } test mod { - if (@import("builtin").object_format == .coff) return error.SkipZigTest; + const builtin = @import("builtin"); + if (builtin.object_format == .coff and builtin.abi != .gnu) return error.SkipZigTest; const test_mod = binary(mod, .{}); try test_mod.testInts(); try test_mod.testIntVectors(); diff --git a/test/incremental/add_decl b/test/incremental/add_decl index 39a25e72de9c..9efd274b9eb5 100644 --- a/test/incremental/add_decl +++ b/test/incremental/add_decl @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe //#target=wasm32-wasi-selfhosted diff --git a/test/incremental/add_decl_namespaced b/test/incremental/add_decl_namespaced index 7e2fe5742ce9..1025ae24e1ce 100644 --- a/test/incremental/add_decl_namespaced +++ b/test/incremental/add_decl_namespaced @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe //#target=wasm32-wasi-selfhosted diff --git a/test/incremental/analysis_error_and_syntax_error b/test/incremental/analysis_error_and_syntax_error index 05d899a22548..43ba7480526a 100644 --- a/test/incremental/analysis_error_and_syntax_error +++ b/test/incremental/analysis_error_and_syntax_error @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/bad_import b/test/incremental/bad_import index 4e78b7074ad9..9b6be8b1767c 100644 --- a/test/incremental/bad_import +++ b/test/incremental/bad_import @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/change_embed_file b/test/incremental/change_embed_file index 7c23b120f118..85d861ab9319 100644 --- a/test/incremental/change_embed_file +++ b/test/incremental/change_embed_file @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/change_enum_tag_type b/test/incremental/change_enum_tag_type index 1691764fbcc9..906f91027155 100644 --- a/test/incremental/change_enum_tag_type +++ b/test/incremental/change_enum_tag_type @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/change_exports b/test/incremental/change_exports index e492930031e7..b0850626d6ed 100644 --- a/test/incremental/change_exports +++ b/test/incremental/change_exports @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe diff --git a/test/incremental/change_fn_type b/test/incremental/change_fn_type index 24392b25f750..df788684cdc0 100644 --- a/test/incremental/change_fn_type +++ b/test/incremental/change_fn_type @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #update=initial version diff --git a/test/incremental/change_generic_line_number b/test/incremental/change_generic_line_number index c9eb2be929c0..f1920c67e6a4 100644 --- a/test/incremental/change_generic_line_number +++ b/test/incremental/change_generic_line_number @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=wasm32-wasi-selfhosted #update=initial version #file=main.zig diff --git a/test/incremental/change_line_number b/test/incremental/change_line_number index 0754d39182dc..5c809b8fa99b 100644 --- a/test/incremental/change_line_number +++ b/test/incremental/change_line_number @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=wasm32-wasi-selfhosted #update=initial version #file=main.zig diff --git a/test/incremental/change_module b/test/incremental/change_module index 7a198721eee0..6275536ad4a8 100644 --- a/test/incremental/change_module +++ b/test/incremental/change_module @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/change_panic_handler b/test/incremental/change_panic_handler index 699134100eec..070384887a21 100644 --- a/test/incremental/change_panic_handler +++ b/test/incremental/change_panic_handler @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #update=initial version diff --git a/test/incremental/change_panic_handler_explicit b/test/incremental/change_panic_handler_explicit index 2d068d593eeb..774b18bbfd2c 100644 --- a/test/incremental/change_panic_handler_explicit +++ b/test/incremental/change_panic_handler_explicit @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #update=initial version diff --git a/test/incremental/change_shift_op b/test/incremental/change_shift_op index ccb904581df9..41b5d1926644 100644 --- a/test/incremental/change_shift_op +++ b/test/incremental/change_shift_op @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/change_struct_same_fields b/test/incremental/change_struct_same_fields index f3bfbbdd6922..7af116132627 100644 --- a/test/incremental/change_struct_same_fields +++ b/test/incremental/change_struct_same_fields @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/change_zon_file b/test/incremental/change_zon_file index 247f78828e64..62f73dd3bf79 100644 --- a/test/incremental/change_zon_file +++ b/test/incremental/change_zon_file @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe //#target=wasm32-wasi-selfhosted diff --git a/test/incremental/change_zon_file_no_result_type b/test/incremental/change_zon_file_no_result_type index 231558e3e9b2..498543e4f159 100644 --- a/test/incremental/change_zon_file_no_result_type +++ b/test/incremental/change_zon_file_no_result_type @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe //#target=wasm32-wasi-selfhosted diff --git a/test/incremental/compile_error_then_log b/test/incremental/compile_error_then_log index 5a6340f9eb15..9bd306fbf75f 100644 --- a/test/incremental/compile_error_then_log +++ b/test/incremental/compile_error_then_log @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/compile_log b/test/incremental/compile_log index de4152456307..697bb26569a7 100644 --- a/test/incremental/compile_log +++ b/test/incremental/compile_log @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/delete_comptime_decls b/test/incremental/delete_comptime_decls index b5ccd438a230..c8b68e31ed10 100644 --- a/test/incremental/delete_comptime_decls +++ b/test/incremental/delete_comptime_decls @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/dependency_on_type_of_inferred_global b/test/incremental/dependency_on_type_of_inferred_global index 5869fe9bb227..9d5ab28034d3 100644 --- a/test/incremental/dependency_on_type_of_inferred_global +++ b/test/incremental/dependency_on_type_of_inferred_global @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/fix_astgen_failure b/test/incremental/fix_astgen_failure index 9c427c7a96d5..8b1b3adbf782 100644 --- a/test/incremental/fix_astgen_failure +++ b/test/incremental/fix_astgen_failure @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/function_becomes_inline b/test/incremental/function_becomes_inline index 8f36a31b6958..240d7a54af49 100644 --- a/test/incremental/function_becomes_inline +++ b/test/incremental/function_becomes_inline @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #update=non-inline version diff --git a/test/incremental/hello b/test/incremental/hello index c30cd50c6fdc..dc6f02177fc4 100644 --- a/test/incremental/hello +++ b/test/incremental/hello @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/make_decl_pub b/test/incremental/make_decl_pub index 139593b2b044..b25b11716092 100644 --- a/test/incremental/make_decl_pub +++ b/test/incremental/make_decl_pub @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/modify_inline_fn b/test/incremental/modify_inline_fn index ef31df2d5eb2..d485d8ffd5ea 100644 --- a/test/incremental/modify_inline_fn +++ b/test/incremental/modify_inline_fn @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/move_src b/test/incremental/move_src index c2ff12761fd3..4f43e8ea6ac5 100644 --- a/test/incremental/move_src +++ b/test/incremental/move_src @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/no_change_preserves_tag_names b/test/incremental/no_change_preserves_tag_names index f7386db2a482..623496119d13 100644 --- a/test/incremental/no_change_preserves_tag_names +++ b/test/incremental/no_change_preserves_tag_names @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe //#target=wasm32-wasi-selfhosted diff --git a/test/incremental/recursive_function_becomes_non_recursive b/test/incremental/recursive_function_becomes_non_recursive index 9bba6bc038ac..a5a03749b8eb 100644 --- a/test/incremental/recursive_function_becomes_non_recursive +++ b/test/incremental/recursive_function_becomes_non_recursive @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/remove_enum_field b/test/incremental/remove_enum_field index 7623922d3dff..02daf2a0fb0e 100644 --- a/test/incremental/remove_enum_field +++ b/test/incremental/remove_enum_field @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/remove_invalid_union_backing_enum b/test/incremental/remove_invalid_union_backing_enum index 1f7ee69b144e..84abedcf7b41 100644 --- a/test/incremental/remove_invalid_union_backing_enum +++ b/test/incremental/remove_invalid_union_backing_enum @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/temporary_parse_error b/test/incremental/temporary_parse_error index 0933546d337a..956ed6122570 100644 --- a/test/incremental/temporary_parse_error +++ b/test/incremental/temporary_parse_error @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/type_becomes_comptime_only b/test/incremental/type_becomes_comptime_only index 3bcae1cd2112..8e712042f586 100644 --- a/test/incremental/type_becomes_comptime_only +++ b/test/incremental/type_becomes_comptime_only @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted diff --git a/test/incremental/unreferenced_error b/test/incremental/unreferenced_error index 51e078a82a72..505fb3d5f414 100644 --- a/test/incremental/unreferenced_error +++ b/test/incremental/unreferenced_error @@ -1,4 +1,5 @@ #target=x86_64-linux-selfhosted +#target=x86_64-windows-selfhosted #target=x86_64-linux-cbe #target=x86_64-windows-cbe #target=wasm32-wasi-selfhosted