diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc2.s b/src/cmd/asm/internal/asm/testdata/loong64enc2.s index 3b5e3cb81ae897..00768365b698e5 100644 --- a/src/cmd/asm/internal/asm/testdata/loong64enc2.s +++ b/src/cmd/asm/internal/asm/testdata/loong64enc2.s @@ -61,22 +61,22 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 XOR $-1, R4 // 1efcbf0284f81500 MOVH R4, R5 // 85c04000a5c04800 - // relocation instructions - MOVW R4, name(SB) // 1e00001cc4038029 - MOVWU R4, name(SB) // 1e00001cc4038029 - MOVV R4, name(SB) // 1e00001cc403c029 - MOVB R4, name(SB) // 1e00001cc4030029 - MOVBU R4, name(SB) // 1e00001cc4030029 - MOVF F4, name(SB) // 1e00001cc403402b - MOVD F4, name(SB) // 1e00001cc403c02b - MOVW name(SB), R4 // 1e00001cc4038028 - MOVWU name(SB), R4 // 1e00001cc403802a - MOVV name(SB), R4 // 1e00001cc403c028 - MOVB name(SB), R4 // 1e00001cc4030028 - MOVBU name(SB), R4 // 1e00001cc403002a - MOVF name(SB), F4 // 1e00001cc403002b - MOVD name(SB), F4 // 1e00001cc403802b - MOVH R4, name(SB) // 1e00001cc4034029 - MOVH name(SB), R4 // 1e00001cc4034028 - MOVHU R4, name(SB) // 1e00001cc4034029 - MOVHU name(SB), R4 // 1e00001cc403402a + // relocation instructions + MOVW R4, name(SB) // 1e00001ac4038029 + MOVWU R4, name(SB) // 1e00001ac4038029 + MOVV R4, name(SB) // 1e00001ac403c029 + MOVB R4, name(SB) // 1e00001ac4030029 + MOVBU R4, name(SB) // 1e00001ac4030029 + MOVF F4, name(SB) // 1e00001ac403402b + MOVD F4, name(SB) // 1e00001ac403c02b + MOVW name(SB), R4 // 1e00001ac4038028 + MOVWU name(SB), R4 // 1e00001ac403802a + MOVV name(SB), R4 // 1e00001ac403c028 + MOVB name(SB), R4 // 1e00001ac4030028 + MOVBU name(SB), R4 // 1e00001ac403002a + MOVF name(SB), F4 // 1e00001ac403002b + MOVD name(SB), F4 // 1e00001ac403802b + MOVH R4, name(SB) // 1e00001ac4034029 + MOVH name(SB), R4 // 1e00001ac4034028 + MOVHU R4, name(SB) // 1e00001ac4034029 + MOVHU name(SB), R4 // 1e00001ac403402a diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go index 4e1c36e11b8102..a2645f03950f68 100644 --- a/src/cmd/internal/obj/loong64/asm.go +++ b/src/cmd/internal/obj/loong64/asm.go @@ -1478,8 +1478,8 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 = c.oprrr(ABREAK) // relocation operations - case 50: // mov r,addr ==> pcaddu12i + sw - o1 = OP_IR(c.opir(APCADDU12I), uint32(0), uint32(REGTMP)) + case 50: // mov r,addr ==> pcalau12i + sw + o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP)) rel := obj.Addrel(c.cursym) rel.Off = int32(c.pc) rel.Siz = 4 @@ -1495,8 +1495,8 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { rel2.Add = p.To.Offset rel2.Type = objabi.R_ADDRLOONG64 - case 51: // mov addr,r ==> pcaddu12i + lw - o1 = OP_IR(c.opir(APCADDU12I), uint32(0), uint32(REGTMP)) + case 51: // mov addr,r ==> pcalau12i + lw + o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP)) rel := obj.Addrel(c.cursym) rel.Off = int32(c.pc) rel.Siz = 4 @@ -1514,7 +1514,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { case 52: // mov $lext, r // NOTE: this case does not use REGTMP. If it ever does, // remove the NOTUSETMP flag in optab. - o1 = OP_IR(c.opir(APCADDU12I), uint32(0), uint32(p.To.Reg)) + o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(p.To.Reg)) rel := obj.Addrel(c.cursym) rel.Off = int32(c.pc) rel.Siz = 4 diff --git a/src/cmd/link/internal/ld/elf.go b/src/cmd/link/internal/ld/elf.go index 842570d5ef3a46..cc387da68b1bce 100644 --- a/src/cmd/link/internal/ld/elf.go +++ b/src/cmd/link/internal/ld/elf.go @@ -254,7 +254,7 @@ func Elfinit(ctxt *Link) { ehdr.Flags = 0x20000004 /* MIPS 3 CPIC */ } if ctxt.Arch.Family == sys.Loong64 { - ehdr.Flags = 0x3 /* LoongArch lp64d */ + ehdr.Flags = 0x43 /* DOUBLE_FLOAT, OBJABI_V1 */ } if ctxt.Arch.Family == sys.RISCV64 { ehdr.Flags = 0x4 /* RISCV Float ABI Double */ diff --git a/src/cmd/link/internal/loong64/asm.go b/src/cmd/link/internal/loong64/asm.go index 0eb3a813b2140a..61b6efe92ccadc 100644 --- a/src/cmd/link/internal/loong64/asm.go +++ b/src/cmd/link/internal/loong64/asm.go @@ -46,100 +46,28 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, } case objabi.R_ADDRLOONG64TLS: out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_PUSH_TLS_TPREL) | uint64(elfsym)<<32) + out.Write64(uint64(elf.R_LARCH_TLS_LE_LO12) | uint64(elfsym)<<32) out.Write64(uint64(r.Xadd)) - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_PUSH_ABSOLUTE)) - out.Write64(uint64(0xfff)) - - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_AND)) - out.Write64(uint64(0x0)) - - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_POP_32_U_10_12)) - out.Write64(uint64(0x0)) - case objabi.R_ADDRLOONG64TLSU: out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_PUSH_TLS_TPREL) | uint64(elfsym)<<32) + out.Write64(uint64(elf.R_LARCH_TLS_LE_HI20) | uint64(elfsym)<<32) out.Write64(uint64(r.Xadd)) - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_PUSH_ABSOLUTE)) - out.Write64(uint64(0xc)) - - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_SR)) - out.Write64(uint64(0x0)) - - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_POP_32_S_5_20) | uint64(0)<<32) - out.Write64(uint64(0x0)) - case objabi.R_CALLLOONG64: out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_PUSH_PLT_PCREL) | uint64(elfsym)<<32) + out.Write64(uint64(elf.R_LARCH_B26) | uint64(elfsym)<<32) out.Write64(uint64(r.Xadd)) - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_POP_32_S_0_10_10_16_S2)) - out.Write64(uint64(0x0)) - // The pcaddu12i + addi.d instructions is used to obtain address of a symbol on Loong64. - // The low 12-bit of the symbol address need to be added. The addi.d instruction have - // signed 12-bit immediate operand. The 0x800 (addr+U12 <=> addr+0x800+S12) is introduced - // to do sign extending from 12 bits. The 0x804 is 0x800 + 4, 4 is instruction bit - // width on Loong64 and is used to correct the PC of the addi.d instruction. case objabi.R_ADDRLOONG64: out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_PUSH_PCREL) | uint64(elfsym)<<32) - out.Write64(uint64(r.Xadd + 0x4)) - - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_PUSH_PCREL) | uint64(elfsym)<<32) - out.Write64(uint64(r.Xadd + 0x804)) - - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_PUSH_ABSOLUTE)) - out.Write64(uint64(0xc)) - - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_SR)) - out.Write64(uint64(0x0)) - - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_PUSH_ABSOLUTE)) - out.Write64(uint64(0xc)) - - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_SL)) - out.Write64(uint64(0x0)) - - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_SUB)) - out.Write64(uint64(0x0)) - - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_POP_32_S_10_12)) - out.Write64(uint64(0x0)) + out.Write64(uint64(elf.R_LARCH_PCALA_LO12) | uint64(elfsym)<<32) + out.Write64(uint64(r.Xadd)) case objabi.R_ADDRLOONG64U: out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_PUSH_PCREL) | uint64(elfsym)<<32) - out.Write64(uint64(r.Xadd + 0x800)) - - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_PUSH_ABSOLUTE)) - out.Write64(uint64(0xc)) - - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_SR)) - out.Write64(uint64(0x0)) - - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_SOP_POP_32_S_5_20) | uint64(0)<<32) - out.Write64(uint64(0x0)) + out.Write64(uint64(elf.R_LARCH_PCALA_HI20) | uint64(elfsym)<<32) + out.Write64(uint64(r.Xadd)) } return true @@ -156,7 +84,6 @@ func machoreloc1(*sys.Arch, *ld.OutBuf, *loader.Loader, loader.Sym, loader.ExtRe func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loader.Reloc, s loader.Sym, val int64) (o int64, nExtReloc int, ok bool) { rs := r.Sym() if target.IsExternal() { - nExtReloc := 0 switch r.Type() { default: return val, 0, false @@ -168,20 +95,12 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade if rst != sym.SHOSTOBJ && rst != sym.SDYNIMPORT && ldr.SymSect(rs) == nil { ldr.Errorf(s, "missing section for %s", ldr.SymName(rs)) } - nExtReloc = 8 // need 8 ELF relocations. see elfreloc1 - if r.Type() == objabi.R_ADDRLOONG64U { - nExtReloc = 4 - } - return val, nExtReloc, true + return val, 1, true case objabi.R_ADDRLOONG64TLS, objabi.R_ADDRLOONG64TLSU, objabi.R_CALLLOONG64, objabi.R_JMPLOONG64: - nExtReloc = 4 - if r.Type() == objabi.R_CALLLOONG64 || r.Type() == objabi.R_JMPLOONG64 { - nExtReloc = 2 - } - return val, nExtReloc, true + return val, 1, true } } @@ -196,11 +115,11 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade case objabi.R_ADDRLOONG64, objabi.R_ADDRLOONG64U: pc := ldr.SymValue(s) + int64(r.Off()) - t := ldr.SymAddr(rs) + r.Add() - pc + t := calculatePCAlignedReloc(r.Type(), ldr.SymAddr(rs)+r.Add(), pc) if r.Type() == objabi.R_ADDRLOONG64 { - return int64(val&0xffc003ff | (((t + 4 - ((t + 4 + 1<<11) >> 12 << 12)) << 10) & 0x3ffc00)), noExtReloc, isOk + return int64(val&0xffc003ff | (t << 10)), noExtReloc, isOk } - return int64(val&0xfe00001f | (((t + 1<<11) >> 12 << 5) & 0x1ffffe0)), noExtReloc, isOk + return int64(val&0xfe00001f | (t << 5)), noExtReloc, isOk case objabi.R_ADDRLOONG64TLS, objabi.R_ADDRLOONG64TLSU: t := ldr.SymAddr(rs) + r.Add() @@ -238,3 +157,33 @@ func extreloc(target *ld.Target, ldr *loader.Loader, r loader.Reloc, s loader.Sy } return loader.ExtReloc{}, false } + +func isRequestingLowPageBits(t objabi.RelocType) bool { + switch t { + case objabi.R_ADDRLOONG64: + return true + } + return false +} + +// Calculates the value to put into the immediate slot, according to the +// desired relocation type, target and PC. +// The value to use varies based on the reloc type. Namely, the absolute low +// bits of the target are to be used for the low part, while the page-aligned +// offset is to be used for the higher part. A "page" here is not related to +// the system's actual page size, but rather a fixed 12-bit range (designed to +// cooperate with ADDI/LD/ST's 12-bit immediates). +func calculatePCAlignedReloc(t objabi.RelocType, tgt int64, pc int64) int64 { + if isRequestingLowPageBits(t) { + // corresponding immediate field is 12 bits wide + return tgt & 0xfff + } + + pageDelta := (tgt >> 12) - (pc >> 12) + if tgt&0xfff >= 0x800 { + // adjust for sign-extended addition of the low bits + pageDelta += 1 + } + // corresponding immediate field is 20 bits wide + return pageDelta & 0xfffff +}