Bitshifting gives inconsistent result with different release modes #3980

schmee · 2019-12-24T14:41:18Z

I'm back in Zig land! I discovered that an old issue I had is still present and this time I've got a minimal repro.

The following code produces different output in debug and --release-fast / --release-safe.

const std = @import("std");

// < 43 results in the same output between debug and release-fast
// >= 43 results in different output between debug and release-fast
const size: u8 = 43;

const seq = blk: {
    var a: [size]u6 = undefined;
    var i = 0;
    while (i < size) : (i += 1) {
        a[i] = i;
    }
    break :blk a;
};

pub fn main() void {
    const key: u64 = 0x133457799BBCDFF1;
    var out: u64 = 0;
    for (seq) |x, i| {
        out ^= (key >> x & 1) << @intCast(u6, i);
    }

    std.debug.warn("key: {x}\n", .{key});
    std.debug.warn("out: {x}\n", .{out});
}

Output in debug mode:

➜  src git:(master) ✗ zig build-exe repro.zig && ./repro
key: 133457799bbcdff1
out: 7799bbcdff1

Output with --release-fast:

➜  src git:(master) ✗ zig build-exe --release-fast repro.zig && ./repro
key: 133457799bbcdff1
out: 7fbbbbefffb

If you make any of the following modifications, the output is consistent between the two modes:

Change size to anything smaller than 43
Add std.debug.warn("", .{}); after out ^= ...
Change the for to an inline for
Change the type of seq to []u8, and change the loop body to out ^= (key >> @intCast(u6, x) & 1) << @intCast(u6, i);

I tried to dig deeper but I'm afraid my LLVM and GDB skills aren't up to par for this one.

Versions:

➜  src git:(master) ✗ zig version
0.5.0+cf0d300dd

macOS Mojave 10.14.6

The text was updated successfully, but these errors were encountered:

FireFox317 · 2019-12-24T16:03:37Z

I'm wondering if the u6 is causing the problem, could you try to use var a: [size]u8 = undefined; var i: usize = 0; ? Or try to run valgrind on the binary, I guess something is causing undefined behavior.

schmee · 2019-12-24T17:09:04Z

@FireFox317 Already tried it! From the issue:

Change the type of seq to []u8, and change the loop body to out ^= (key >> @intcast(u6, x) & 1) << @intcast(u6, i);

You are correct that it fixes the issue so something is definitely up with the u6 🤔

FireFox317 · 2019-12-24T17:10:22Z

Whoops, my bad for not seeing that

See ziglang/zig#3980.

LemonBoy · 2020-01-31T15:20:21Z

@andrewrk Time for a bug report for the LLVM devs, the loop vectorizer is choking on the non-byte-sized elements.

The most obvious problems are:

the seq array is materialized as array of i8 elements but the code loads 16-bit a time and performs a shift by 6 bit instead of 8 bit
the check against 0x7ffffffffff is turned into a check against 0x3ffffffffff

There may be other problems related to the sequence of ops generated by the vectorizer but I'm not SSE-savy enough to tell.

Zig code:

const seq = [_]u6{1} ** 43;

pub export fn _start() callconv(.C) void {
    const key: u64 = 0x55aa55aa55aa55aa;
    var out: u64 = 0;
    for (seq) |x, i| {
        out ^= ((key >> x) & 1) << @intCast(u6, i);
    }
    if (out != 0x7ffffffffff) @breakpoint();
}
const builtin = @import("builtin");
pub fn panic(msg: []const u8, error_return_trace: ?*builtin.StackTrace) noreturn {
    while (true) {}
}

LLVM IR:

; ModuleID = 'foo'
source_filename = "foo"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

@seq = internal unnamed_addr constant [43 x i6] [i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1, i6 1], align 2

; Function Attrs: nobuiltin nounwind
define void @_start() local_unnamed_addr #0 {
Entry:
  %wide.load = load <2 x i6>, <2 x i6>* bitcast ([43 x i6]* @seq to <2 x i6>*), align 2
  %0 = zext <2 x i6> %wide.load to <2 x i64>
  %1 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %0
  %2 = and <2 x i64> %1, <i64 1, i64 1>
  %3 = shl <2 x i64> %2, <i64 0, i64 1>
  %wide.load.1 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 2) to <2 x i6>*), align 2
  %4 = zext <2 x i6> %wide.load.1 to <2 x i64>
  %5 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %4
  %6 = and <2 x i64> %5, <i64 1, i64 1>
  %7 = shl <2 x i64> %6, <i64 2, i64 3>
  %8 = xor <2 x i64> %7, %3
  %wide.load.2 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 4) to <2 x i6>*), align 2
  %9 = zext <2 x i6> %wide.load.2 to <2 x i64>
  %10 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %9
  %11 = and <2 x i64> %10, <i64 1, i64 1>
  %12 = shl <2 x i64> %11, <i64 4, i64 5>
  %13 = xor <2 x i64> %12, %8
  %wide.load.3 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 6) to <2 x i6>*), align 2
  %14 = zext <2 x i6> %wide.load.3 to <2 x i64>
  %15 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %14
  %16 = and <2 x i64> %15, <i64 1, i64 1>
  %17 = shl <2 x i64> %16, <i64 6, i64 7>
  %18 = xor <2 x i64> %17, %13
  %wide.load.4 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 8) to <2 x i6>*), align 2
  %19 = zext <2 x i6> %wide.load.4 to <2 x i64>
  %20 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %19
  %21 = and <2 x i64> %20, <i64 1, i64 1>
  %22 = shl <2 x i64> %21, <i64 8, i64 9>
  %23 = xor <2 x i64> %22, %18
  %wide.load.5 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 10) to <2 x i6>*), align 2
  %24 = zext <2 x i6> %wide.load.5 to <2 x i64>
  %25 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %24
  %26 = and <2 x i64> %25, <i64 1, i64 1>
  %27 = shl <2 x i64> %26, <i64 10, i64 11>
  %28 = xor <2 x i64> %27, %23
  %wide.load.6 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 12) to <2 x i6>*), align 2
  %29 = zext <2 x i6> %wide.load.6 to <2 x i64>
  %30 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %29
  %31 = and <2 x i64> %30, <i64 1, i64 1>
  %32 = shl <2 x i64> %31, <i64 12, i64 13>
  %33 = xor <2 x i64> %32, %28
  %wide.load.7 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 14) to <2 x i6>*), align 2
  %34 = zext <2 x i6> %wide.load.7 to <2 x i64>
  %35 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %34
  %36 = and <2 x i64> %35, <i64 1, i64 1>
  %37 = shl <2 x i64> %36, <i64 14, i64 15>
  %38 = xor <2 x i64> %37, %33
  %wide.load.8 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 16) to <2 x i6>*), align 2
  %39 = zext <2 x i6> %wide.load.8 to <2 x i64>
  %40 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %39
  %41 = and <2 x i64> %40, <i64 1, i64 1>
  %42 = shl <2 x i64> %41, <i64 16, i64 17>
  %43 = xor <2 x i64> %42, %38
  %wide.load.9 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 18) to <2 x i6>*), align 2
  %44 = zext <2 x i6> %wide.load.9 to <2 x i64>
  %45 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %44
  %46 = and <2 x i64> %45, <i64 1, i64 1>
  %47 = shl <2 x i64> %46, <i64 18, i64 19>
  %48 = xor <2 x i64> %47, %43
  %wide.load.10 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 20) to <2 x i6>*), align 2
  %49 = zext <2 x i6> %wide.load.10 to <2 x i64>
  %50 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %49
  %51 = and <2 x i64> %50, <i64 1, i64 1>
  %52 = shl <2 x i64> %51, <i64 20, i64 21>
  %53 = xor <2 x i64> %52, %48
  %wide.load.11 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 22) to <2 x i6>*), align 2
  %54 = zext <2 x i6> %wide.load.11 to <2 x i64>
  %55 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %54
  %56 = and <2 x i64> %55, <i64 1, i64 1>
  %57 = shl <2 x i64> %56, <i64 22, i64 23>
  %58 = xor <2 x i64> %57, %53
  %wide.load.12 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 24) to <2 x i6>*), align 2
  %59 = zext <2 x i6> %wide.load.12 to <2 x i64>
  %60 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %59
  %61 = and <2 x i64> %60, <i64 1, i64 1>
  %62 = shl <2 x i64> %61, <i64 24, i64 25>
  %63 = xor <2 x i64> %62, %58
  %wide.load.13 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 26) to <2 x i6>*), align 2
  %64 = zext <2 x i6> %wide.load.13 to <2 x i64>
  %65 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %64
  %66 = and <2 x i64> %65, <i64 1, i64 1>
  %67 = shl <2 x i64> %66, <i64 26, i64 27>
  %68 = xor <2 x i64> %67, %63
  %wide.load.14 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 28) to <2 x i6>*), align 2
  %69 = zext <2 x i6> %wide.load.14 to <2 x i64>
  %70 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %69
  %71 = and <2 x i64> %70, <i64 1, i64 1>
  %72 = shl <2 x i64> %71, <i64 28, i64 29>
  %73 = xor <2 x i64> %72, %68
  %wide.load.15 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 30) to <2 x i6>*), align 2
  %74 = zext <2 x i6> %wide.load.15 to <2 x i64>
  %75 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %74
  %76 = and <2 x i64> %75, <i64 1, i64 1>
  %77 = shl <2 x i64> %76, <i64 30, i64 31>
  %78 = xor <2 x i64> %77, %73
  %wide.load.16 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 32) to <2 x i6>*), align 2
  %79 = zext <2 x i6> %wide.load.16 to <2 x i64>
  %80 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %79
  %81 = and <2 x i64> %80, <i64 1, i64 1>
  %82 = shl <2 x i64> %81, <i64 32, i64 33>
  %83 = xor <2 x i64> %82, %78
  %wide.load.17 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 34) to <2 x i6>*), align 2
  %84 = zext <2 x i6> %wide.load.17 to <2 x i64>
  %85 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %84
  %86 = and <2 x i64> %85, <i64 1, i64 1>
  %87 = shl <2 x i64> %86, <i64 34, i64 35>
  %88 = xor <2 x i64> %87, %83
  %wide.load.18 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 36) to <2 x i6>*), align 2
  %89 = zext <2 x i6> %wide.load.18 to <2 x i64>
  %90 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %89
  %91 = and <2 x i64> %90, <i64 1, i64 1>
  %92 = shl <2 x i64> %91, <i64 36, i64 37>
  %93 = xor <2 x i64> %92, %88
  %wide.load.19 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 38) to <2 x i6>*), align 2
  %94 = zext <2 x i6> %wide.load.19 to <2 x i64>
  %95 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %94
  %96 = and <2 x i64> %95, <i64 1, i64 1>
  %97 = shl <2 x i64> %96, <i64 38, i64 39>
  %98 = xor <2 x i64> %97, %93
  %wide.load.20 = load <2 x i6>, <2 x i6>* bitcast (i6* getelementptr inbounds ([43 x i6], [43 x i6]* @seq, i64 0, i64 40) to <2 x i6>*), align 2
  %99 = zext <2 x i6> %wide.load.20 to <2 x i64>
  %100 = lshr <2 x i64> <i64 6172840429334713770, i64 6172840429334713770>, %99
  %101 = and <2 x i64> %100, <i64 1, i64 1>
  %102 = shl <2 x i64> %101, <i64 40, i64 41>
  %103 = xor <2 x i64> %102, %98
  %rdx.shuf = shufflevector <2 x i64> %103, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
  %bin.rdx = xor <2 x i64> %103, %rdx.shuf
  %104 = extractelement <2 x i64> %bin.rdx, i32 0
  %105 = icmp eq i64 %104, 4398046511103
  br i1 %105, label %EndIf, label %Then

Then:                                             ; preds = %Entry
  tail call void @llvm.debugtrap()
  br label %EndIf

EndIf:                                            ; preds = %Entry, %Then
  ret void
}

; Function Attrs: nounwind
declare void @llvm.debugtrap() #1

attributes #0 = { nobuiltin nounwind }
attributes #1 = { nounwind }

!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2}

!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = !{i32 2, !"Dwarf Version", i32 4}
!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "zig 0.5.0", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !4)
!3 = !DIFile(filename: "foo", directory: "/tmp")
!4 = !{!5}
!5 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "std.builtin.OutputMode", scope: !6, file: !6, line: 386, baseType: !7, size: 2, align: 8, elements: !8)
!6 = !DIFile(filename: "builtin.zig", directory: "/home/abc/code/zig/build/lib/zig/std")
!7 = !DIBasicType(name: "u2", size: 8, encoding: DW_ATE_unsigned)
!8 = !{!9, !10, !11}
!9 = !DIEnumerator(name: "Exe", value: 0)
!10 = !DIEnumerator(name: "Lib", value: 1)
!11 = !DIEnumerator(name: "Obj", value: 2)

ASM code:

	.text
	.file	"foo"
	.section	.rodata.cst16,"aM",@progbits,16
	.p2align	4
.LCPI0_0:
	.quad	6172840429334713770
	.quad	6172840429334713770
.LCPI0_1:
	.quad	1
	.quad	1
	.text
	.globl	_start
	.p2align	4, 0x90
	.type	_start,@function
_start:
	movzwl	seq(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm1
	andl	$63, %eax
	vmovq	%rax, %xmm2
	vmovdqa	.LCPI0_0(%rip), %xmm0
	vpsrlq	%xmm2, %xmm0, %xmm2
	vpsrlq	%xmm1, %xmm0, %xmm1
	vpblendw	$240, %xmm1, %xmm2, %xmm2
	vmovdqa	.LCPI0_1(%rip), %xmm1
	vpand	%xmm1, %xmm2, %xmm2
	vpsllq	$1, %xmm2, %xmm3
	movzwl	seq+2(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpblendw	$240, %xmm3, %xmm2, %xmm2
	vpblendw	$240, %xmm4, %xmm5, %xmm3
	vpand	%xmm1, %xmm3, %xmm3
	vpsllq	$3, %xmm3, %xmm4
	vpsllq	$2, %xmm3, %xmm3
	vpblendw	$240, %xmm4, %xmm3, %xmm3
	movzwl	seq+4(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpor	%xmm2, %xmm3, %xmm2
	vpblendw	$240, %xmm4, %xmm5, %xmm3
	vpand	%xmm1, %xmm3, %xmm3
	vpsllq	$5, %xmm3, %xmm4
	vpsllq	$4, %xmm3, %xmm3
	vpblendw	$240, %xmm4, %xmm3, %xmm3
	movzwl	seq+6(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpor	%xmm2, %xmm3, %xmm2
	vpblendw	$240, %xmm4, %xmm5, %xmm3
	vpand	%xmm1, %xmm3, %xmm3
	vpsllq	$7, %xmm3, %xmm4
	vpsllq	$6, %xmm3, %xmm3
	vpblendw	$240, %xmm4, %xmm3, %xmm3
	movzwl	seq+8(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpblendw	$240, %xmm4, %xmm5, %xmm4
	vpand	%xmm1, %xmm4, %xmm4
	vpsllq	$9, %xmm4, %xmm5
	vpsllq	$8, %xmm4, %xmm4
	vpblendw	$240, %xmm5, %xmm4, %xmm4
	vpxor	%xmm4, %xmm3, %xmm3
	movzwl	seq+10(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpblendw	$240, %xmm4, %xmm5, %xmm4
	vpand	%xmm1, %xmm4, %xmm4
	vpsllq	$11, %xmm4, %xmm5
	vpsllq	$10, %xmm4, %xmm4
	vpblendw	$240, %xmm5, %xmm4, %xmm4
	vpxor	%xmm4, %xmm3, %xmm3
	movzwl	seq+12(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpxor	%xmm3, %xmm2, %xmm2
	vpblendw	$240, %xmm4, %xmm5, %xmm3
	vpand	%xmm1, %xmm3, %xmm3
	vpsllq	$13, %xmm3, %xmm4
	vpsllq	$12, %xmm3, %xmm3
	vpblendw	$240, %xmm4, %xmm3, %xmm3
	movzwl	seq+14(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpblendw	$240, %xmm4, %xmm5, %xmm4
	vpand	%xmm1, %xmm4, %xmm4
	vpsllq	$15, %xmm4, %xmm5
	vpsllq	$14, %xmm4, %xmm4
	vpblendw	$240, %xmm5, %xmm4, %xmm4
	vpxor	%xmm4, %xmm3, %xmm3
	movzwl	seq+16(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpblendw	$240, %xmm4, %xmm5, %xmm4
	vpand	%xmm1, %xmm4, %xmm4
	vpsllq	$17, %xmm4, %xmm5
	vpsllq	$16, %xmm4, %xmm4
	vpblendw	$240, %xmm5, %xmm4, %xmm4
	vpxor	%xmm4, %xmm3, %xmm3
	movzwl	seq+18(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpblendw	$240, %xmm4, %xmm5, %xmm4
	vpand	%xmm1, %xmm4, %xmm4
	vpsllq	$19, %xmm4, %xmm5
	vpsllq	$18, %xmm4, %xmm4
	vpblendw	$240, %xmm5, %xmm4, %xmm4
	vpxor	%xmm4, %xmm3, %xmm3
	movzwl	seq+20(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpxor	%xmm3, %xmm2, %xmm2
	vpblendw	$240, %xmm4, %xmm5, %xmm3
	vpand	%xmm1, %xmm3, %xmm3
	vpsllq	$21, %xmm3, %xmm4
	vpsllq	$20, %xmm3, %xmm3
	vpblendw	$240, %xmm4, %xmm3, %xmm3
	movzwl	seq+22(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpblendw	$240, %xmm4, %xmm5, %xmm4
	vpand	%xmm1, %xmm4, %xmm4
	vpsllq	$23, %xmm4, %xmm5
	vpsllq	$22, %xmm4, %xmm4
	vpblendw	$240, %xmm5, %xmm4, %xmm4
	vpxor	%xmm4, %xmm3, %xmm3
	movzwl	seq+24(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpblendw	$240, %xmm4, %xmm5, %xmm4
	vpand	%xmm1, %xmm4, %xmm4
	vpsllq	$25, %xmm4, %xmm5
	vpsllq	$24, %xmm4, %xmm4
	vpblendw	$240, %xmm5, %xmm4, %xmm4
	vpxor	%xmm4, %xmm3, %xmm3
	movzwl	seq+26(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpblendw	$240, %xmm4, %xmm5, %xmm4
	vpand	%xmm1, %xmm4, %xmm4
	vpsllq	$27, %xmm4, %xmm5
	vpsllq	$26, %xmm4, %xmm4
	vpblendw	$240, %xmm5, %xmm4, %xmm4
	vpxor	%xmm4, %xmm3, %xmm3
	movzwl	seq+28(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpblendw	$240, %xmm4, %xmm5, %xmm4
	vpand	%xmm1, %xmm4, %xmm4
	vpsllq	$29, %xmm4, %xmm5
	vpsllq	$28, %xmm4, %xmm4
	vpblendw	$240, %xmm5, %xmm4, %xmm4
	vpxor	%xmm4, %xmm3, %xmm3
	movzwl	seq+30(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpxor	%xmm3, %xmm2, %xmm2
	vpblendw	$240, %xmm4, %xmm5, %xmm3
	vpand	%xmm1, %xmm3, %xmm3
	vpsllq	$31, %xmm3, %xmm4
	vpsllq	$30, %xmm3, %xmm3
	vpblendw	$240, %xmm4, %xmm3, %xmm3
	movzwl	seq+32(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpblendw	$240, %xmm4, %xmm5, %xmm4
	vpand	%xmm1, %xmm4, %xmm4
	vpsllq	$33, %xmm4, %xmm5
	vpsllq	$32, %xmm4, %xmm4
	vpblendw	$240, %xmm5, %xmm4, %xmm4
	vpxor	%xmm4, %xmm3, %xmm3
	movzwl	seq+34(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpblendw	$240, %xmm4, %xmm5, %xmm4
	vpand	%xmm1, %xmm4, %xmm4
	vpsllq	$35, %xmm4, %xmm5
	vpsllq	$34, %xmm4, %xmm4
	vpblendw	$240, %xmm5, %xmm4, %xmm4
	vpxor	%xmm4, %xmm3, %xmm3
	movzwl	seq+36(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpblendw	$240, %xmm4, %xmm5, %xmm4
	vpand	%xmm1, %xmm4, %xmm4
	vpsllq	$37, %xmm4, %xmm5
	vpsllq	$36, %xmm4, %xmm4
	vpblendw	$240, %xmm5, %xmm4, %xmm4
	vpxor	%xmm4, %xmm3, %xmm3
	movzwl	seq+38(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm4
	vpblendw	$240, %xmm4, %xmm5, %xmm4
	vpand	%xmm1, %xmm4, %xmm4
	vpsllq	$39, %xmm4, %xmm5
	vpsllq	$38, %xmm4, %xmm4
	vpblendw	$240, %xmm5, %xmm4, %xmm4
	vpxor	%xmm4, %xmm3, %xmm3
	movzwl	seq+40(%rip), %eax
	movl	%eax, %ecx
	shrl	$6, %ecx
	andl	$63, %ecx
	vmovq	%rcx, %xmm4
	andl	$63, %eax
	vmovq	%rax, %xmm5
	vpsrlq	%xmm5, %xmm0, %xmm5
	vpsrlq	%xmm4, %xmm0, %xmm0
	vpblendw	$240, %xmm0, %xmm5, %xmm0
	vpand	%xmm1, %xmm0, %xmm0
	vpsllq	$41, %xmm0, %xmm1
	vpsllq	$40, %xmm0, %xmm0
	vpblendw	$240, %xmm1, %xmm0, %xmm0
	vpxor	%xmm0, %xmm3, %xmm0
	vpxor	%xmm0, %xmm2, %xmm0
	vpshufd	$78, %xmm0, %xmm1
	vpxor	%xmm1, %xmm0, %xmm0
	vmovq	%xmm0, %rax
	movabsq	$4398046511103, %rcx
	cmpq	%rcx, %rax
	je	.LBB0_2
	int3
.LBB0_2:
	retq
.Lfunc_end0:
	.size	_start, .Lfunc_end0-_start

	.type	seq,@object
	.section	.rodata,"a",@progbits
	.p2align	1
seq:
	.zero	43,1
	.size	seq, 43


	.section	".note.GNU-stack","",@progbits

andrewrk · 2020-04-03T00:28:19Z

Who wants to be in charge of submitting this bug report upstream and following up?

pixelherodev · 2020-05-17T10:35:06Z

Has this been submitted upstream yet?

schmee · 2022-09-14T17:53:50Z

Appears fixed as of 0.10.0-dev.3952+9e070b653, closing.

schmee mentioned this issue Dec 24, 2019

Strange issues with bit operations #1962

Closed

schmee added a commit to schmee/zig-crypto that referenced this issue Dec 30, 2019

Work around bitshifting bug

6c758e0

See ziglang/zig#3980.

andrewrk added this to the 0.6.0 milestone Dec 31, 2019

andrewrk added bug Observed behavior contradicts documented or intended behavior stage1 The process of building from source via WebAssembly and the C backend. miscompilation The compiler reports success but produces semantically incorrect code. labels Dec 31, 2019

andrewrk added contributor friendly This issue is limited in scope and/or knowledge of Zig internals. upstream An issue with a third party project that Zig uses. labels Apr 3, 2020

andrewrk modified the milestones: 0.6.0, 0.7.0 Apr 3, 2020

andrewrk modified the milestones: 0.7.0, 0.8.0 Oct 17, 2020

andrewrk modified the milestones: 0.8.0, 0.9.0 Nov 6, 2020

andrewrk modified the milestones: 0.9.0, 0.10.0 May 19, 2021

schmee closed this as completed Sep 14, 2022

Vexu modified the milestones: 0.12.0, 0.10.0 Sep 15, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Bitshifting gives inconsistent result with different release modes #3980

Bitshifting gives inconsistent result with different release modes #3980

schmee commented Dec 24, 2019

FireFox317 commented Dec 24, 2019 •

edited

schmee commented Dec 24, 2019 •

edited

FireFox317 commented Dec 24, 2019

LemonBoy commented Jan 31, 2020

andrewrk commented Apr 3, 2020

pixelherodev commented May 17, 2020

schmee commented Sep 14, 2022

Bitshifting gives inconsistent result with different release modes #3980

Bitshifting gives inconsistent result with different release modes #3980

Comments

schmee commented Dec 24, 2019

FireFox317 commented Dec 24, 2019 • edited

schmee commented Dec 24, 2019 • edited

FireFox317 commented Dec 24, 2019

LemonBoy commented Jan 31, 2020

andrewrk commented Apr 3, 2020

pixelherodev commented May 17, 2020

schmee commented Sep 14, 2022

FireFox317 commented Dec 24, 2019 •

edited

schmee commented Dec 24, 2019 •

edited