From 71e8866f10973fc777530efefea70a4b57a16520 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Tue, 21 Mar 2023 00:46:11 +0800 Subject: [PATCH] cmd/compile: wire up math/bits.Len intrinsics for loong64 For the SubFromLen64 test case to work, we need to fold c-(-(x-d)) into x+(c-d) as well. benchmark TODO Updates #59120 Change-Id: Icc8f7d8e79c6168aae634f5c36f044f3fd034d89 --- src/cmd/compile/internal/loong64/ssa.go | 2 + .../compile/internal/ssa/_gen/LOONG64.rules | 3 + .../compile/internal/ssa/_gen/LOONG64Ops.go | 2 + src/cmd/compile/internal/ssa/opGen.go | 28 +++++++++ .../compile/internal/ssa/rewriteLOONG64.go | 63 +++++++++++++++++++ src/cmd/compile/internal/ssagen/ssa.go | 10 +-- test/codegen/mathbits.go | 11 ++++ 7 files changed, 114 insertions(+), 5 deletions(-) diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go index 19d6d1abcfae6f..b27c7d9a9c7e6b 100644 --- a/src/cmd/compile/internal/loong64/ssa.go +++ b/src/cmd/compile/internal/loong64/ssa.go @@ -351,6 +351,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpLOONG64NEGD, ssa.OpLOONG64CTZW, ssa.OpLOONG64CTZV, + ssa.OpLOONG64CLZW, + ssa.OpLOONG64CLZV, ssa.OpLOONG64SQRTD, ssa.OpLOONG64SQRTF: p := s.Prog(v.Op.Asm()) diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules index 0a013f15dd6429..707e6c3c72fffb 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules @@ -128,6 +128,8 @@ (Neg(32|64)F ...) => (NEG(F|D) ...) (Ctz(32|64) ...) => (CTZ(W|V) ...) (Ctz(8|16|32|64)NonZero ...) => (Ctz(8|16|32|64) ...) +(BitLen64 x) => (NEGV (SUBVconst [64] (CLZV x))) +(BitLen32 x) => (NEGV (SUBVconst [32] (CLZW x))) (Com(64|32|16|8) x) => (NOR (MOVVconst [0]) x) @@ -634,6 +636,7 @@ (SUBVconst [c] (MOVVconst [d])) => (MOVVconst [d-c]) (SUBVconst [c] (SUBVconst [d] x)) && is32Bit(-c-d) => (ADDVconst [-c-d] x) (SUBVconst [c] (ADDVconst [d] x)) && is32Bit(-c+d) => (ADDVconst [-c+d] x) +(SUBV (MOVVconst [c]) (NEGV (SUBVconst [d] x))) => (ADDVconst [c-d] x) (SLLVconst [c] (MOVVconst [d])) => (MOVVconst [d< (MOVVconst [int64(uint64(d)>>uint64(c))]) (SRAVconst [c] (MOVVconst [d])) => (MOVVconst [d>>uint64(c)]) diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go index dd93b876784421..22659160746db1 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go @@ -194,6 +194,8 @@ func init() { {name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32 {name: "CTZW", argLength: 1, reg: gp11, asm: "CTZW"}, // Count trailing (low order) zeroes (returns 0-32) {name: "CTZV", argLength: 1, reg: gp11, asm: "CTZV"}, // Count trailing (low order) zeroes (returns 0-64) + {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // Count leading (high order) zeroes (returns 0-32) + {name: "CLZV", argLength: 1, reg: gp11, asm: "CLZV"}, // Count leading (high order) zeroes (returns 0-64) {name: "MASKEQZ", argLength: 2, reg: gp21, asm: "MASKEQZ"}, // returns 0 if arg1 == 0, otherwise returns arg0 {name: "MASKNEZ", argLength: 2, reg: gp21, asm: "MASKNEZ"}, // returns 0 if arg1 != 0, otherwise returns arg0 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 63542c1140d29e..e9cf95de89b9b4 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1744,6 +1744,8 @@ const ( OpLOONG64SQRTF OpLOONG64CTZW OpLOONG64CTZV + OpLOONG64CLZW + OpLOONG64CLZV OpLOONG64MASKEQZ OpLOONG64MASKNEZ OpLOONG64SLLV @@ -23325,6 +23327,32 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "CLZW", + argLen: 1, + asm: loong64.ACLZW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, + { + name: "CLZV", + argLen: 1, + asm: loong64.ACLZV, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, { name: "MASKEQZ", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go index 678ef4d7f4ea15..b8f996d7ab10b7 100644 --- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go +++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go @@ -87,6 +87,10 @@ func rewriteValueLOONG64(v *Value) bool { return true case OpAvg64u: return rewriteValueLOONG64_OpAvg64u(v) + case OpBitLen32: + return rewriteValueLOONG64_OpBitLen32(v) + case OpBitLen64: + return rewriteValueLOONG64_OpBitLen64(v) case OpClosureCall: v.Op = OpLOONG64CALLclosure return true @@ -767,6 +771,44 @@ func rewriteValueLOONG64_OpAvg64u(v *Value) bool { return true } } +func rewriteValueLOONG64_OpBitLen32(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (BitLen32 x) + // result: (NEGV (SUBVconst [32] (CLZW x))) + for { + t := v.Type + x := v_0 + v.reset(OpLOONG64NEGV) + v.Type = t + v0 := b.NewValue0(v.Pos, OpLOONG64SUBVconst, t) + v0.AuxInt = int64ToAuxInt(32) + v1 := b.NewValue0(v.Pos, OpLOONG64CLZW, t) + v1.AddArg(x) + v0.AddArg(v1) + v.AddArg(v0) + return true + } +} +func rewriteValueLOONG64_OpBitLen64(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (BitLen64 x) + // result: (NEGV (SUBVconst [64] (CLZV x))) + for { + t := v.Type + x := v_0 + v.reset(OpLOONG64NEGV) + v.Type = t + v0 := b.NewValue0(v.Pos, OpLOONG64SUBVconst, t) + v0.AuxInt = int64ToAuxInt(64) + v1 := b.NewValue0(v.Pos, OpLOONG64CLZV, t) + v1.AddArg(x) + v0.AddArg(v1) + v.AddArg(v0) + return true + } +} func rewriteValueLOONG64_OpCom16(v *Value) bool { v_0 := v.Args[0] b := v.Block @@ -4046,6 +4088,27 @@ func rewriteValueLOONG64_OpLOONG64SUBV(v *Value) bool { v.AddArg(x) return true } + // match: (SUBV (MOVVconst [c]) (NEGV (SUBVconst [d] x))) + // result: (ADDVconst [c-d] x) + for { + if v_0.Op != OpLOONG64MOVVconst { + break + } + c := auxIntToInt64(v_0.AuxInt) + if v_1.Op != OpLOONG64NEGV { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpLOONG64SUBVconst { + break + } + d := auxIntToInt64(v_1_0.AuxInt) + x := v_1_0.Args[0] + v.reset(OpLOONG64ADDVconst) + v.AuxInt = int64ToAuxInt(c - d) + v.AddArg(x) + return true + } return false } func rewriteValueLOONG64_OpLOONG64SUBVconst(v *Value) bool { diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index f83c2bc4c389a0..003566c35c8b1c 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -4557,12 +4557,12 @@ func InitTables() { func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0]) }, - sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) + sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.Loong64, sys.MIPS, sys.PPC64, sys.Wasm) addF("math/bits", "Len32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0]) }, - sys.AMD64, sys.ARM64, sys.PPC64) + sys.AMD64, sys.ARM64, sys.Loong64, sys.PPC64) addF("math/bits", "Len32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { if s.config.PtrSize == 4 { @@ -4581,7 +4581,7 @@ func InitTables() { x := s.newValue1(ssa.OpZeroExt16to64, types.Types[types.TUINT64], args[0]) return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x) }, - sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) + sys.ARM64, sys.ARM, sys.S390X, sys.Loong64, sys.MIPS, sys.PPC64, sys.Wasm) addF("math/bits", "Len16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue1(ssa.OpBitLen16, types.Types[types.TINT], args[0]) @@ -4596,7 +4596,7 @@ func InitTables() { x := s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], args[0]) return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x) }, - sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) + sys.ARM64, sys.ARM, sys.S390X, sys.Loong64, sys.MIPS, sys.PPC64, sys.Wasm) addF("math/bits", "Len8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0]) @@ -4609,7 +4609,7 @@ func InitTables() { } return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0]) }, - sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) + sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.Loong64, sys.MIPS, sys.PPC64, sys.Wasm) // LeadingZeros is handled because it trivially calls Len. addF("math/bits", "Reverse64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index f8313938dc230b..98fa108496bf1e 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -17,6 +17,7 @@ func LeadingZeros(n uint) int { // amd64/v3:"LZCNTQ", -"BSRQ" // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" + // loong64:"CLZV" // mips:"CLZ" // wasm:"I64Clz" // ppc64x:"CNTLZD" @@ -28,6 +29,7 @@ func LeadingZeros64(n uint64) int { // amd64/v3:"LZCNTQ", -"BSRQ" // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" + // loong64:"CLZV" // mips:"CLZ" // wasm:"I64Clz" // ppc64x:"CNTLZD" @@ -39,6 +41,7 @@ func LeadingZeros32(n uint32) int { // amd64/v3: "LZCNTL",- "BSRL" // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZW" + // loong64:"CLZW" // mips:"CLZ" // wasm:"I64Clz" // ppc64x:"CNTLZW" @@ -50,6 +53,7 @@ func LeadingZeros16(n uint16) int { // amd64/v3: "LZCNTL",- "BSRL" // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" + // loong64:"CLZV" // mips:"CLZ" // wasm:"I64Clz" // ppc64x:"CNTLZD" @@ -61,6 +65,7 @@ func LeadingZeros8(n uint8) int { // amd64/v3: "LZCNTL",- "BSRL" // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" + // loong64:"CLZV" // mips:"CLZ" // wasm:"I64Clz" // ppc64x:"CNTLZD" @@ -76,6 +81,7 @@ func Len(n uint) int { // amd64/v3: "LZCNTQ" // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" + // loong64:"CLZV" // mips:"CLZ" // wasm:"I64Clz" // ppc64x:"SUBC","CNTLZD" @@ -87,6 +93,7 @@ func Len64(n uint64) int { // amd64/v3: "LZCNTQ" // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" + // loong64:"CLZV" // mips:"CLZ" // wasm:"I64Clz" // ppc64x:"SUBC","CNTLZD" @@ -94,6 +101,7 @@ func Len64(n uint64) int { } func SubFromLen64(n uint64) int { + // loong64:"CLZV",-"ADD" // ppc64x:"CNTLZD",-"SUBC" return 64 - bits.Len64(n) } @@ -103,6 +111,7 @@ func Len32(n uint32) int { // amd64/v3: "LZCNTL" // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" + // loong64:"CLZW" // mips:"CLZ" // wasm:"I64Clz" // ppc64x: "CNTLZW" @@ -114,6 +123,7 @@ func Len16(n uint16) int { // amd64/v3: "LZCNTL" // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" + // loong64:"CLZV" // mips:"CLZ" // wasm:"I64Clz" // ppc64x:"SUBC","CNTLZD" @@ -125,6 +135,7 @@ func Len8(n uint8) int { // amd64/v3: "LZCNTL" // s390x:"FLOGR" // arm:"CLZ" arm64:"CLZ" + // loong64:"CLZV" // mips:"CLZ" // wasm:"I64Clz" // ppc64x:"SUBC","CNTLZD"