cmd/compile: wire up math/bits.Len intrinsics for loong64

For the SubFromLen64 test case to work, we need to fold c-(-(x-d)) into x+(c-d) as well. benchmark TODO Updates golang#59120 Change-Id: Icc8f7d8e79c6168aae634f5c36f044f3fd034d89
xen0n · Mar 27, 2023 · 71e8866 · 71e8866
1 parent 5873cbd
commit 71e8866
Show file tree

Hide file tree

Showing 7 changed files with 114 additions and 5 deletions.
diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go
@@ -351,6 +351,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		ssa.OpLOONG64NEGD,
 		ssa.OpLOONG64CTZW,
 		ssa.OpLOONG64CTZV,
+		ssa.OpLOONG64CLZW,
+		ssa.OpLOONG64CLZV,
 		ssa.OpLOONG64SQRTD,
 		ssa.OpLOONG64SQRTF:
 		p := s.Prog(v.Op.Asm())

diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
@@ -128,6 +128,8 @@
 (Neg(32|64)F ...) => (NEG(F|D) ...)
 (Ctz(32|64) ...) => (CTZ(W|V) ...)
 (Ctz(8|16|32|64)NonZero ...) => (Ctz(8|16|32|64) ...)
+(BitLen64 <t> x) => (NEGV <t> (SUBVconst <t> [64] (CLZV <t> x)))
+(BitLen32 <t> x) => (NEGV <t> (SUBVconst <t> [32] (CLZW <t> x)))
 
 (Com(64|32|16|8) x) => (NOR (MOVVconst [0]) x)
 
@@ -634,6 +636,7 @@
 (SUBVconst [c] (MOVVconst [d]))  => (MOVVconst [d-c])
 (SUBVconst [c] (SUBVconst [d] x)) && is32Bit(-c-d) => (ADDVconst [-c-d] x)
 (SUBVconst [c] (ADDVconst [d] x)) && is32Bit(-c+d) => (ADDVconst [-c+d] x)
+(SUBV (MOVVconst [c]) (NEGV (SUBVconst [d] x))) => (ADDVconst [c-d] x)
 (SLLVconst [c] (MOVVconst [d]))  => (MOVVconst [d<<uint64(c)])
 (SRLVconst [c] (MOVVconst [d]))  => (MOVVconst [int64(uint64(d)>>uint64(c))])
 (SRAVconst [c] (MOVVconst [d]))  => (MOVVconst [d>>uint64(c)])

diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
@@ -194,6 +194,8 @@ func init() {
 		{name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32
 		{name: "CTZW", argLength: 1, reg: gp11, asm: "CTZW"},   // Count trailing (low order) zeroes (returns 0-32)
 		{name: "CTZV", argLength: 1, reg: gp11, asm: "CTZV"},   // Count trailing (low order) zeroes (returns 0-64)
+		{name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"},   // Count leading (high order) zeroes (returns 0-32)
+		{name: "CLZV", argLength: 1, reg: gp11, asm: "CLZV"},   // Count leading (high order) zeroes (returns 0-64)
 
 		{name: "MASKEQZ", argLength: 2, reg: gp21, asm: "MASKEQZ"}, // returns 0 if arg1 == 0, otherwise returns arg0
 		{name: "MASKNEZ", argLength: 2, reg: gp21, asm: "MASKNEZ"}, // returns 0 if arg1 != 0, otherwise returns arg0

diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go
diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go
@@ -4557,12 +4557,12 @@ func InitTables() {
 		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
 			return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0])
 		},
-		sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
+		sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.Loong64, sys.MIPS, sys.PPC64, sys.Wasm)
 	addF("math/bits", "Len32",
 		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
 			return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0])
 		},
-		sys.AMD64, sys.ARM64, sys.PPC64)
+		sys.AMD64, sys.ARM64, sys.Loong64, sys.PPC64)
 	addF("math/bits", "Len32",
 		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
 			if s.config.PtrSize == 4 {
@@ -4581,7 +4581,7 @@ func InitTables() {
 			x := s.newValue1(ssa.OpZeroExt16to64, types.Types[types.TUINT64], args[0])
 			return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x)
 		},
-		sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
+		sys.ARM64, sys.ARM, sys.S390X, sys.Loong64, sys.MIPS, sys.PPC64, sys.Wasm)
 	addF("math/bits", "Len16",
 		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
 			return s.newValue1(ssa.OpBitLen16, types.Types[types.TINT], args[0])
@@ -4596,7 +4596,7 @@ func InitTables() {
 			x := s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], args[0])
 			return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x)
 		},
-		sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
+		sys.ARM64, sys.ARM, sys.S390X, sys.Loong64, sys.MIPS, sys.PPC64, sys.Wasm)
 	addF("math/bits", "Len8",
 		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
 			return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0])
@@ -4609,7 +4609,7 @@ func InitTables() {
 			}
 			return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0])
 		},
-		sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
+		sys.AMD64, sys.ARM64, sys.ARM, sys.S390X, sys.Loong64, sys.MIPS, sys.PPC64, sys.Wasm)
 	// LeadingZeros is handled because it trivially calls Len.
 	addF("math/bits", "Reverse64",
 		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {

diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go
@@ -17,6 +17,7 @@ func LeadingZeros(n uint) int {
 	// amd64/v3:"LZCNTQ", -"BSRQ"
 	// s390x:"FLOGR"
 	// arm:"CLZ" arm64:"CLZ"
+	// loong64:"CLZV"
 	// mips:"CLZ"
 	// wasm:"I64Clz"
 	// ppc64x:"CNTLZD"
@@ -28,6 +29,7 @@ func LeadingZeros64(n uint64) int {
 	// amd64/v3:"LZCNTQ", -"BSRQ"
 	// s390x:"FLOGR"
 	// arm:"CLZ" arm64:"CLZ"
+	// loong64:"CLZV"
 	// mips:"CLZ"
 	// wasm:"I64Clz"
 	// ppc64x:"CNTLZD"
@@ -39,6 +41,7 @@ func LeadingZeros32(n uint32) int {
 	// amd64/v3: "LZCNTL",- "BSRL"
 	// s390x:"FLOGR"
 	// arm:"CLZ" arm64:"CLZW"
+	// loong64:"CLZW"
 	// mips:"CLZ"
 	// wasm:"I64Clz"
 	// ppc64x:"CNTLZW"
@@ -50,6 +53,7 @@ func LeadingZeros16(n uint16) int {
 	// amd64/v3: "LZCNTL",- "BSRL"
 	// s390x:"FLOGR"
 	// arm:"CLZ" arm64:"CLZ"
+	// loong64:"CLZV"
 	// mips:"CLZ"
 	// wasm:"I64Clz"
 	// ppc64x:"CNTLZD"
@@ -61,6 +65,7 @@ func LeadingZeros8(n uint8) int {
 	// amd64/v3: "LZCNTL",- "BSRL"
 	// s390x:"FLOGR"
 	// arm:"CLZ" arm64:"CLZ"
+	// loong64:"CLZV"
 	// mips:"CLZ"
 	// wasm:"I64Clz"
 	// ppc64x:"CNTLZD"
@@ -76,6 +81,7 @@ func Len(n uint) int {
 	// amd64/v3: "LZCNTQ"
 	// s390x:"FLOGR"
 	// arm:"CLZ" arm64:"CLZ"
+	// loong64:"CLZV"
 	// mips:"CLZ"
 	// wasm:"I64Clz"
 	// ppc64x:"SUBC","CNTLZD"
@@ -87,13 +93,15 @@ func Len64(n uint64) int {
 	// amd64/v3: "LZCNTQ"
 	// s390x:"FLOGR"
 	// arm:"CLZ" arm64:"CLZ"
+	// loong64:"CLZV"
 	// mips:"CLZ"
 	// wasm:"I64Clz"
 	// ppc64x:"SUBC","CNTLZD"
 	return bits.Len64(n)
 }
 
 func SubFromLen64(n uint64) int {
+	// loong64:"CLZV",-"ADD"
 	// ppc64x:"CNTLZD",-"SUBC"
 	return 64 - bits.Len64(n)
 }
@@ -103,6 +111,7 @@ func Len32(n uint32) int {
 	// amd64/v3: "LZCNTL"
 	// s390x:"FLOGR"
 	// arm:"CLZ" arm64:"CLZ"
+	// loong64:"CLZW"
 	// mips:"CLZ"
 	// wasm:"I64Clz"
 	// ppc64x: "CNTLZW"
@@ -114,6 +123,7 @@ func Len16(n uint16) int {
 	// amd64/v3: "LZCNTL"
 	// s390x:"FLOGR"
 	// arm:"CLZ" arm64:"CLZ"
+	// loong64:"CLZV"
 	// mips:"CLZ"
 	// wasm:"I64Clz"
 	// ppc64x:"SUBC","CNTLZD"
@@ -125,6 +135,7 @@ func Len8(n uint8) int {
 	// amd64/v3: "LZCNTL"
 	// s390x:"FLOGR"
 	// arm:"CLZ" arm64:"CLZ"
+	// loong64:"CLZV"
 	// mips:"CLZ"
 	// wasm:"I64Clz"
 	// ppc64x:"SUBC","CNTLZD"