Skip to content

GCC generate better code than Clang related to the update of memory address on AArch64 #62935

Closed
@vfdff

Description

@vfdff
float foo (void) {
   for (int i = 0; i < N; i++)
   {
      a[i] += b[i]* c[i] + d[i] * e[i] + f[i] * g[i] + 
              h[i] * j[i]  + p[i];
   }
}
  • Clang: base address are reused with x0 and x1
.LBB0_1:                                // =>This Inner Loop Header: Depth=1
        add     x0, x11, x8
        add     x1, x12, x8
        ldr     q0, [x0, #8192]
        add     x0, x9, x8
        ldr     q1, [x1, #8192]
        add     x1, x10, x8
        ldr     q2, [x0, #8192]
        add     x0, x13, x8
        fmul    v0.2d, v0.2d, v1.2d
        ldr     q1, [x1, #8192]
        add     x1, x14, x8
        fmla    v0.2d, v1.2d, v2.2d
        ldr     q1, [x0, #8192]
        ldr     q2, [x1, #8192]
        add     x0, x15, x8
        add     x1, x16, x8
        fmla    v0.2d, v2.2d, v1.2d
        ldr     q3, [x0, #8192]
        ldr     q1, [x1, #8192]
        add     x0, x17, x8
        fmla    v0.2d, v1.2d, v3.2d
        ldr     q1, [x0, #8192]
        add     x0, x18, x8
        adds    x8, x8, #16
        fadd    v0.2d, v0.2d, v1.2d
        ldr     q1, [x0, #8192]
        fadd    v0.2d, v1.2d, v0.2d
        str     q0, [x0, #8192]
        b.ne    .LBB0_1
  • GCC: only need one single add x0, x0, 16 to update all the memory address
.L2:
        ldr     q30, [x7, x0]
        ldr     q31, [x8, x0]
        ldr     q27, [x10, x0]
        ldr     q28, [x9, x0]
        fmul    v31.2d, v31.2d, v30.2d
        ldr     q29, [x6, x0]
        ldr     q30, [x5, x0]
        fmla    v31.2d, v27.2d, v28.2d
        ldr     q27, [x4, x0]
        ldr     q28, [x3, x0]
        fmla    v31.2d, v29.2d, v30.2d
        ldr     q29, [x2, x0]
        ldr     q30, [x1, x0]
        fmla    v31.2d, v27.2d, v28.2d
        fadd    v31.2d, v31.2d, v29.2d
        fadd    v31.2d, v31.2d, v30.2d
        str     q31, [x1, x0]
        add     x0, x0, 16
        cmp     x0, 8192
        bne     .L2

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions