-
-
Notifications
You must be signed in to change notification settings - Fork 2.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bitshifting gives inconsistent result with different release modes #3980
Comments
I'm wondering if the u6 is causing the problem, could you try to use |
@FireFox317 Already tried it! From the issue:
You are correct that it fixes the issue so something is definitely up with the |
Whoops, my bad for not seeing that |
@andrewrk Time for a bug report for the LLVM devs, the loop vectorizer is choking on the non-byte-sized elements. The most obvious problems are:
There may be other problems related to the sequence of ops generated by the vectorizer but I'm not SSE-savy enough to tell. Zig code: const seq = [_]u6{1} ** 43;
pub export fn _start() callconv(.C) void {
const key: u64 = 0x55aa55aa55aa55aa;
var out: u64 = 0;
for (seq) |x, i| {
out ^= ((key >> x) & 1) << @intCast(u6, i);
}
if (out != 0x7ffffffffff) @breakpoint();
}
const builtin = @import("builtin");
pub fn panic(msg: []const u8, error_return_trace: ?*builtin.StackTrace) noreturn {
while (true) {}
} LLVM IR: ; ModuleID = 'foo'
source_filename = "foo"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
@seq = internal unnamed_addr constant [43 x i6] [i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1], align 2
; Function Attrs: nobuiltin nounwind
define void @_start() local_unnamed_addr #0 {
Entry:
%wide.load = load <2 x i6>, <2 x i6>* bitcast ([43 x i6]* @seq to <2 x i6>*), align 2
%0 = zext <2 x i6> %wide.load to <2 x i64>
%1 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %0
%2 = and <2 x i64> %1, <i64 1, i64 1>
%3 = shl <2 x i64> %2, <i64 0, i64 1>
%wide.load.1 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 2) to <2 x i6>*), align 2
%4 = zext <2 x i6> %wide.load.1 to <2 x i64>
%5 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %4
%6 = and <2 x i64> %5, <i64 1, i64 1>
%7 = shl <2 x i64> %6, <i64 2, i64 3>
%8 = xor <2 x i64> %7, %3
%wide.load.2 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 4) to <2 x i6>*), align 2
%9 = zext <2 x i6> %wide.load.2 to <2 x i64>
%10 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %9
%11 = and <2 x i64> %10, <i64 1, i64 1>
%12 = shl <2 x i64> %11, <i64 4, i64 5>
%13 = xor <2 x i64> %12, %8
%wide.load.3 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 6) to <2 x i6>*), align 2
%14 = zext <2 x i6> %wide.load.3 to <2 x i64>
%15 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %14
%16 = and <2 x i64> %15, <i64 1, i64 1>
%17 = shl <2 x i64> %16, <i64 6, i64 7>
%18 = xor <2 x i64> %17, %13
%wide.load.4 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 8) to <2 x i6>*), align 2
%19 = zext <2 x i6> %wide.load.4 to <2 x i64>
%20 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %19
%21 = and <2 x i64> %20, <i64 1, i64 1>
%22 = shl <2 x i64> %21, <i64 8, i64 9>
%23 = xor <2 x i64> %22, %18
%wide.load.5 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 10) to <2 x i6>*), align 2
%24 = zext <2 x i6> %wide.load.5 to <2 x i64>
%25 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %24
%26 = and <2 x i64> %25, <i64 1, i64 1>
%27 = shl <2 x i64> %26, <i64 10, i64 11>
%28 = xor <2 x i64> %27, %23
%wide.load.6 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 12) to <2 x i6>*), align 2
%29 = zext <2 x i6> %wide.load.6 to <2 x i64>
%30 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %29
%31 = and <2 x i64> %30, <i64 1, i64 1>
%32 = shl <2 x i64> %31, <i64 12, i64 13>
%33 = xor <2 x i64> %32, %28
%wide.load.7 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 14) to <2 x i6>*), align 2
%34 = zext <2 x i6> %wide.load.7 to <2 x i64>
%35 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %34
%36 = and <2 x i64> %35, <i64 1, i64 1>
%37 = shl <2 x i64> %36, <i64 14, i64 15>
%38 = xor <2 x i64> %37, %33
%wide.load.8 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 16) to <2 x i6>*), align 2
%39 = zext <2 x i6> %wide.load.8 to <2 x i64>
%40 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %39
%41 = and <2 x i64> %40, <i64 1, i64 1>
%42 = shl <2 x i64> %41, <i64 16, i64 17>
%43 = xor <2 x i64> %42, %38
%wide.load.9 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 18) to <2 x i6>*), align 2
%44 = zext <2 x i6> %wide.load.9 to <2 x i64>
%45 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %44
%46 = and <2 x i64> %45, <i64 1, i64 1>
%47 = shl <2 x i64> %46, <i64 18, i64 19>
%48 = xor <2 x i64> %47, %43
%wide.load.10 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 20) to <2 x i6>*), align 2
%49 = zext <2 x i6> %wide.load.10 to <2 x i64>
%50 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %49
%51 = and <2 x i64> %50, <i64 1, i64 1>
%52 = shl <2 x i64> %51, <i64 20, i64 21>
%53 = xor <2 x i64> %52, %48
%wide.load.11 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 22) to <2 x i6>*), align 2
%54 = zext <2 x i6> %wide.load.11 to <2 x i64>
%55 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %54
%56 = and <2 x i64> %55, <i64 1, i64 1>
%57 = shl <2 x i64> %56, <i64 22, i64 23>
%58 = xor <2 x i64> %57, %53
%wide.load.12 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 24) to <2 x i6>*), align 2
%59 = zext <2 x i6> %wide.load.12 to <2 x i64>
%60 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %59
%61 = and <2 x i64> %60, <i64 1, i64 1>
%62 = shl <2 x i64> %61, <i64 24, i64 25>
%63 = xor <2 x i64> %62, %58
%wide.load.13 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 26) to <2 x i6>*), align 2
%64 = zext <2 x i6> %wide.load.13 to <2 x i64>
%65 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %64
%66 = and <2 x i64> %65, <i64 1, i64 1>
%67 = shl <2 x i64> %66, <i64 26, i64 27>
%68 = xor <2 x i64> %67, %63
%wide.load.14 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 28) to <2 x i6>*), align 2
%69 = zext <2 x i6> %wide.load.14 to <2 x i64>
%70 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %69
%71 = and <2 x i64> %70, <i64 1, i64 1>
%72 = shl <2 x i64> %71, <i64 28, i64 29>
%73 = xor <2 x i64> %72, %68
%wide.load.15 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 30) to <2 x i6>*), align 2
%74 = zext <2 x i6> %wide.load.15 to <2 x i64>
%75 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %74
%76 = and <2 x i64> %75, <i64 1, i64 1>
%77 = shl <2 x i64> %76, <i64 30, i64 31>
%78 = xor <2 x i64> %77, %73
%wide.load.16 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 32) to <2 x i6>*), align 2
%79 = zext <2 x i6> %wide.load.16 to <2 x i64>
%80 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %79
%81 = and <2 x i64> %80, <i64 1, i64 1>
%82 = shl <2 x i64> %81, <i64 32, i64 33>
%83 = xor <2 x i64> %82, %78
%wide.load.17 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 34) to <2 x i6>*), align 2
%84 = zext <2 x i6> %wide.load.17 to <2 x i64>
%85 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %84
%86 = and <2 x i64> %85, <i64 1, i64 1>
%87 = shl <2 x i64> %86, <i64 34, i64 35>
%88 = xor <2 x i64> %87, %83
%wide.load.18 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 36) to <2 x i6>*), align 2
%89 = zext <2 x i6> %wide.load.18 to <2 x i64>
%90 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %89
%91 = and <2 x i64> %90, <i64 1, i64 1>
%92 = shl <2 x i64> %91, <i64 36, i64 37>
%93 = xor <2 x i64> %92, %88
%wide.load.19 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 38) to <2 x i6>*), align 2
%94 = zext <2 x i6> %wide.load.19 to <2 x i64>
%95 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %94
%96 = and <2 x i64> %95, <i64 1, i64 1>
%97 = shl <2 x i64> %96, <i64 38, i64 39>
%98 = xor <2 x i64> %97, %93
%wide.load.20 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 40) to <2 x i6>*), align 2
%99 = zext <2 x i6> %wide.load.20 to <2 x i64>
%100 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %99
%101 = and <2 x i64> %100, <i64 1, i64 1>
%102 = shl <2 x i64> %101, <i64 40, i64 41>
%103 = xor <2 x i64> %102, %98
%rdx.shuf = shufflevector <2 x i64> %103, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
%bin.rdx = xor <2 x i64> %103, %rdx.shuf
%104 = extractelement <2 x i64> %bin.rdx, i32 0
%105 = icmp eq i64 %104, 4398046511103
br i1 %105, label %EndIf, label %Then
Then: ; preds = %Entry
tail call void @llvm.debugtrap()
br label %EndIf
EndIf: ; preds = %Entry, %Then
ret void
}
; Function Attrs: nounwind
declare void @llvm.debugtrap() #1
attributes #0 = { nobuiltin nounwind }
attributes #1 = { nounwind }
!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2}
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = !{i32 2, !"Dwarf Version", i32 4}
!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "zig 0.5.0", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !4)
!3 = !DIFile(filename: "foo", directory: "/tmp")
!4 = !{!5}
!5 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "std.builtin.OutputMode", scope: !6, file: !6, line: 386, baseType: !7, size: 2, align: 8, elements: !8)
!6 = !DIFile(filename: "builtin.zig", directory: "/home/abc/code/zig/build/lib/zig/std")
!7 = !DIBasicType(name: "u2", size: 8, encoding: DW_ATE_unsigned)
!8 = !{!9, !10, !11}
!9 = !DIEnumerator(name: "Exe", value: 0)
!10 = !DIEnumerator(name: "Lib", value: 1)
!11 = !DIEnumerator(name: "Obj", value: 2)
ASM code: .text
.file "foo"
.section .rodata.cst16,"aM",@progbits,16
.p2align 4
.LCPI0_0:
.quad 6172840429334713770
.quad 6172840429334713770
.LCPI0_1:
.quad 1
.quad 1
.text
.globl _start
.p2align 4, 0x90
.type _start,@function
_start:
movzwl seq(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm1
andl $63, %eax
vmovq %rax, %xmm2
vmovdqa .LCPI0_0(%rip), %xmm0
vpsrlq %xmm2, %xmm0, %xmm2
vpsrlq %xmm1, %xmm0, %xmm1
vpblendw $240, %xmm1, %xmm2, %xmm2
vmovdqa .LCPI0_1(%rip), %xmm1
vpand %xmm1, %xmm2, %xmm2
vpsllq $1, %xmm2, %xmm3
movzwl seq+2(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpblendw $240, %xmm3, %xmm2, %xmm2
vpblendw $240, %xmm4, %xmm5, %xmm3
vpand %xmm1, %xmm3, %xmm3
vpsllq $3, %xmm3, %xmm4
vpsllq $2, %xmm3, %xmm3
vpblendw $240, %xmm4, %xmm3, %xmm3
movzwl seq+4(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpor %xmm2, %xmm3, %xmm2
vpblendw $240, %xmm4, %xmm5, %xmm3
vpand %xmm1, %xmm3, %xmm3
vpsllq $5, %xmm3, %xmm4
vpsllq $4, %xmm3, %xmm3
vpblendw $240, %xmm4, %xmm3, %xmm3
movzwl seq+6(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpor %xmm2, %xmm3, %xmm2
vpblendw $240, %xmm4, %xmm5, %xmm3
vpand %xmm1, %xmm3, %xmm3
vpsllq $7, %xmm3, %xmm4
vpsllq $6, %xmm3, %xmm3
vpblendw $240, %xmm4, %xmm3, %xmm3
movzwl seq+8(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpblendw $240, %xmm4, %xmm5, %xmm4
vpand %xmm1, %xmm4, %xmm4
vpsllq $9, %xmm4, %xmm5
vpsllq $8, %xmm4, %xmm4
vpblendw $240, %xmm5, %xmm4, %xmm4
vpxor %xmm4, %xmm3, %xmm3
movzwl seq+10(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpblendw $240, %xmm4, %xmm5, %xmm4
vpand %xmm1, %xmm4, %xmm4
vpsllq $11, %xmm4, %xmm5
vpsllq $10, %xmm4, %xmm4
vpblendw $240, %xmm5, %xmm4, %xmm4
vpxor %xmm4, %xmm3, %xmm3
movzwl seq+12(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpxor %xmm3, %xmm2, %xmm2
vpblendw $240, %xmm4, %xmm5, %xmm3
vpand %xmm1, %xmm3, %xmm3
vpsllq $13, %xmm3, %xmm4
vpsllq $12, %xmm3, %xmm3
vpblendw $240, %xmm4, %xmm3, %xmm3
movzwl seq+14(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpblendw $240, %xmm4, %xmm5, %xmm4
vpand %xmm1, %xmm4, %xmm4
vpsllq $15, %xmm4, %xmm5
vpsllq $14, %xmm4, %xmm4
vpblendw $240, %xmm5, %xmm4, %xmm4
vpxor %xmm4, %xmm3, %xmm3
movzwl seq+16(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpblendw $240, %xmm4, %xmm5, %xmm4
vpand %xmm1, %xmm4, %xmm4
vpsllq $17, %xmm4, %xmm5
vpsllq $16, %xmm4, %xmm4
vpblendw $240, %xmm5, %xmm4, %xmm4
vpxor %xmm4, %xmm3, %xmm3
movzwl seq+18(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpblendw $240, %xmm4, %xmm5, %xmm4
vpand %xmm1, %xmm4, %xmm4
vpsllq $19, %xmm4, %xmm5
vpsllq $18, %xmm4, %xmm4
vpblendw $240, %xmm5, %xmm4, %xmm4
vpxor %xmm4, %xmm3, %xmm3
movzwl seq+20(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpxor %xmm3, %xmm2, %xmm2
vpblendw $240, %xmm4, %xmm5, %xmm3
vpand %xmm1, %xmm3, %xmm3
vpsllq $21, %xmm3, %xmm4
vpsllq $20, %xmm3, %xmm3
vpblendw $240, %xmm4, %xmm3, %xmm3
movzwl seq+22(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpblendw $240, %xmm4, %xmm5, %xmm4
vpand %xmm1, %xmm4, %xmm4
vpsllq $23, %xmm4, %xmm5
vpsllq $22, %xmm4, %xmm4
vpblendw $240, %xmm5, %xmm4, %xmm4
vpxor %xmm4, %xmm3, %xmm3
movzwl seq+24(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpblendw $240, %xmm4, %xmm5, %xmm4
vpand %xmm1, %xmm4, %xmm4
vpsllq $25, %xmm4, %xmm5
vpsllq $24, %xmm4, %xmm4
vpblendw $240, %xmm5, %xmm4, %xmm4
vpxor %xmm4, %xmm3, %xmm3
movzwl seq+26(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpblendw $240, %xmm4, %xmm5, %xmm4
vpand %xmm1, %xmm4, %xmm4
vpsllq $27, %xmm4, %xmm5
vpsllq $26, %xmm4, %xmm4
vpblendw $240, %xmm5, %xmm4, %xmm4
vpxor %xmm4, %xmm3, %xmm3
movzwl seq+28(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpblendw $240, %xmm4, %xmm5, %xmm4
vpand %xmm1, %xmm4, %xmm4
vpsllq $29, %xmm4, %xmm5
vpsllq $28, %xmm4, %xmm4
vpblendw $240, %xmm5, %xmm4, %xmm4
vpxor %xmm4, %xmm3, %xmm3
movzwl seq+30(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpxor %xmm3, %xmm2, %xmm2
vpblendw $240, %xmm4, %xmm5, %xmm3
vpand %xmm1, %xmm3, %xmm3
vpsllq $31, %xmm3, %xmm4
vpsllq $30, %xmm3, %xmm3
vpblendw $240, %xmm4, %xmm3, %xmm3
movzwl seq+32(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpblendw $240, %xmm4, %xmm5, %xmm4
vpand %xmm1, %xmm4, %xmm4
vpsllq $33, %xmm4, %xmm5
vpsllq $32, %xmm4, %xmm4
vpblendw $240, %xmm5, %xmm4, %xmm4
vpxor %xmm4, %xmm3, %xmm3
movzwl seq+34(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpblendw $240, %xmm4, %xmm5, %xmm4
vpand %xmm1, %xmm4, %xmm4
vpsllq $35, %xmm4, %xmm5
vpsllq $34, %xmm4, %xmm4
vpblendw $240, %xmm5, %xmm4, %xmm4
vpxor %xmm4, %xmm3, %xmm3
movzwl seq+36(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpblendw $240, %xmm4, %xmm5, %xmm4
vpand %xmm1, %xmm4, %xmm4
vpsllq $37, %xmm4, %xmm5
vpsllq $36, %xmm4, %xmm4
vpblendw $240, %xmm5, %xmm4, %xmm4
vpxor %xmm4, %xmm3, %xmm3
movzwl seq+38(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm4
vpblendw $240, %xmm4, %xmm5, %xmm4
vpand %xmm1, %xmm4, %xmm4
vpsllq $39, %xmm4, %xmm5
vpsllq $38, %xmm4, %xmm4
vpblendw $240, %xmm5, %xmm4, %xmm4
vpxor %xmm4, %xmm3, %xmm3
movzwl seq+40(%rip), %eax
movl %eax, %ecx
shrl $6, %ecx
andl $63, %ecx
vmovq %rcx, %xmm4
andl $63, %eax
vmovq %rax, %xmm5
vpsrlq %xmm5, %xmm0, %xmm5
vpsrlq %xmm4, %xmm0, %xmm0
vpblendw $240, %xmm0, %xmm5, %xmm0
vpand %xmm1, %xmm0, %xmm0
vpsllq $41, %xmm0, %xmm1
vpsllq $40, %xmm0, %xmm0
vpblendw $240, %xmm1, %xmm0, %xmm0
vpxor %xmm0, %xmm3, %xmm0
vpxor %xmm0, %xmm2, %xmm0
vpshufd $78, %xmm0, %xmm1
vpxor %xmm1, %xmm0, %xmm0
vmovq %xmm0, %rax
movabsq $4398046511103, %rcx
cmpq %rcx, %rax
je .LBB0_2
int3
.LBB0_2:
retq
.Lfunc_end0:
.size _start, .Lfunc_end0-_start
.type seq,@object
.section .rodata,"a",@progbits
.p2align 1
seq:
.zero 43,1
.size seq, 43
.section ".note.GNU-stack","",@progbits |
Who wants to be in charge of submitting this bug report upstream and following up? |
Has this been submitted upstream yet? |
Appears fixed as of |
I'm back in Zig land! I discovered that an old issue I had is still present and this time I've got a minimal repro.
The following code produces different output in debug and
--release-fast
/--release-safe
.Output in debug mode:
Output with
--release-fast
:If you make any of the following modifications, the output is consistent between the two modes:
size
to anything smaller than 43std.debug.warn("", .{});
afterout ^= ...
for
to aninline for
seq
to[]u8
, and change the loop body toout ^= (key >> @intCast(u6, x) & 1) << @intCast(u6, i);
I tried to dig deeper but I'm afraid my LLVM and GDB skills aren't up to par for this one.
Versions:
macOS Mojave 10.14.6
The text was updated successfully, but these errors were encountered: