math32 - r2ka hardware multiply

z88dk · Aug 15, 2023 · de5746f · de5746f
1 parent dd887cf
commit de5746f
Show file tree

Hide file tree

Showing 20 changed files with 255 additions and 42 deletions.
diff --git a/libsrc/_DEVELOPMENT/math/float/math32/math32_r2ka_asm.lst b/libsrc/_DEVELOPMENT/math/float/math32/math32_r2ka_asm.lst
@@ -0,0 +1,4 @@
+
+math/float/math32/z80/f32_r2ka_mulu_32h_24x24
+math/float/math32/z80/f32_r2ka_mulu_32h_32x32
+math/float/math32/z80/f32_r2ka_sqr_32h_24x24
diff --git a/libsrc/_DEVELOPMENT/math/float/math32/readme.md b/libsrc/_DEVELOPMENT/math/float/math32/readme.md
@@ -6,16 +6,16 @@ This is the z88dk 32-bit IEEE-754 (mostly) standard math32 floating point maths
 Where not written by me, the functions were sourced from:
 
   * the Digi International Rabbit IEEE-754 32-bit library, copyright (C) 2015 Digi International Inc.
-  * the Hi-Tech C 32-bit floating point library, copyright (C) 1984-1987 HI-TECH SOFTWARE.
   * the Cephes Math Library Release 2.2, copyright (C) 1984, 1987, 1989 by Stephen L. Moshier.
+  * the Hi-Tech C 32-bit floating point library, copyright (C) 1984-1987 HI-TECH SOFTWARE.
   * the SDCC 32-bit floating point library, copyright (C) 1991 by Pipeline Associates, Inc, and others.
   * various Wikipedia references, especially for Newton-Raphson and Horner's Method.
 
-This library is designed for z180, and z80n processors. Specifically, it is optimised for the z180 and [ZX Spectrum Next](https://www.specnext.com/) z80n as these processors have a hardware `16_8x8` multiply instruction that can substantially accelerate the floating point mantissa calculation.
+This library is designed for z180 (ez80), z80n, and Rabbit 2000 / 3000 processors. Specifically, it is optimised for the z180 (ez80) and [ZX Spectrum Next](https://www.specnext.com/) z80n as these processors have a hardware `16_8x8` multiply instruction that can substantially accelerate the floating point mantissa calculation. The Rabbit `32_16x16` signed multiply instruction is also implemented for r2ka/r3k machines and provides the fastest solution.
 
 This library is also designed to be as fast as possible on the z80 processor, using a `32_24x8` basis multiply function.
 
-*@feilipu, May 2019*
+*@feilipu, May 2019 - August 2023*
 
 ---
 
@@ -27,7 +27,7 @@ This library is also designed to be as fast as possible on the z80 processor, us
 
   *  Register use is limited to the main and alternate set (including af'). NO index registers were abused in the process.
 
-  *  Made for the Spectrum Next. The z80n `mul de` and the z180 `mlt nn` multiply instructions are used to full advantage to accelerate all floating point calculations.
+  *  Made for the Spectrum Next (z80n) and Agon Lite (ez80). The z80n `mul de` and the z180 (ez80) `mlt nn`, and r2ka `mul` multiply instructions are used to full advantage to accelerate all floating point calculations.
 
   *  The z80 multiply (without a hardware instruction) is implemented with a `32_24x8` unrolled multiply algorithm.
 

diff --git a/libsrc/_DEVELOPMENT/math/float/math32/z80/d32_fsadd.asm b/libsrc/_DEVELOPMENT/math/float/math32/z80/d32_fsadd.asm
@@ -84,12 +84,9 @@ PUBLIC m32_fsadd, m32_fsadd_callee
 
     ld hl,002h                  ; get second operand off of the stack
     add hl,sp
-    ld e,(hl)
-    inc hl
-    ld d,(hl)
-    inc hl
-    ld c,(hl)
-    inc hl
+    ld e,(hl+)
+    ld d,(hl+)
+    ld c,(hl+)
     ld h,(hl)
     ld l,c                      ; hlde = seeeeeee emmmmmmm mmmmmmmm mmmmmmmm
     jp farejoin

diff --git a/libsrc/_DEVELOPMENT/math/float/math32/z80/f32__dtoa_base10.asm b/libsrc/_DEVELOPMENT/math/float/math32/z80/f32__dtoa_base10.asm
@@ -26,11 +26,9 @@ PUBLIC m32__dtoa_base10
     ; e = n * log(2) = n * 0.301.. = n * 0.01001101...(base 2) = INT((n*77 + 5)/256)
 
     exx
-    sla e                       ; move mantissa to capture exponent
-    rl d
+    rl de                       ; move mantissa to capture exponent
     ld a,d                      ; get exponent in A
-    rr d
-    rr e
+    rr de
 
     exx
     ; A = n (binary exponent)
@@ -74,8 +72,7 @@ PUBLIC m32__dtoa_base10
 
     ; DEHL = b
 
-    sla e                       ; move mantissa to capture exponent
-    rl d
+    rl de                       ; move mantissa to capture exponent
     ld a,d                      ; get exponent in A
     rr de
 
@@ -94,8 +91,8 @@ PUBLIC m32__dtoa_base10
     ; there is one decimal digit in four bits of EHL
     ; align these bits so they are the first four in register D
 
-    sla e                       ; move mantissa to capture exponent
-    rl d                        ; get exponent in D
+    rl de                       ; move mantissa to capture exponent
+                                ; get exponent in D
     scf                         ; restore mantissa bit
     rr e
 

diff --git a/libsrc/_DEVELOPMENT/math/float/math32/z80/f32_discardfraction.asm b/libsrc/_DEVELOPMENT/math/float/math32/z80/f32_discardfraction.asm
@@ -8,13 +8,11 @@ PUBLIC m32_discardfraction
 ; Exit:  dehl = 32 bit float without fractional part
 
 .m32_discardfraction
-    sla e                       ; get the exponent
-    rl d
+    rl de                       ; get the exponent
     jr Z,zero_legal             ; return IEEE signed zero
 
     ld a,d                      ; Exponent
-    rr d                        ; Keep sign and exponent safe
-    rr e
+    rr de                       ; Keep sign and exponent safe
     sub $7f                     ; Exponent value of 127 is 1.xx
     jr C,return_zero
 
@@ -31,8 +29,7 @@ PUBLIC m32_discardfraction
 .shift_right                    ; shift mantissa mask right
     scf
     rr e
-    rr h
-    rr l
+    rr hl
     dec a
     jr NZ,shift_right
 

diff --git a/libsrc/_DEVELOPMENT/math/float/math32/z80/f32_fsfrexp.asm b/libsrc/_DEVELOPMENT/math/float/math32/z80/f32_fsfrexp.asm
@@ -51,8 +51,7 @@ PUBLIC _m32_frexpf
     pop bc                      ; (int8_t*)pw2
     push af                     ; return on stack
 
-    sla e                       ; get the exponent
-    rl d
+    rl de                       ; get the exponent
     rr e                        ; save the sign in e[7]
 
     ld a,d

diff --git a/libsrc/_DEVELOPMENT/math/float/math32/z80/f32_fsmul.asm b/libsrc/_DEVELOPMENT/math/float/math32/z80/f32_fsmul.asm
@@ -69,12 +69,9 @@ PUBLIC m32_fsmul, m32_fsmul_callee
 
     ld hl,002h                  ; get second operand off of the stack
     add hl,sp
-    ld e,(hl)
-    inc hl
-    ld d,(hl)
-    inc hl
-    ld c,(hl)
-    inc hl
+    ld e,(hl+)
+    ld d,(hl+)
+    ld c,(hl+)
     ld h,(hl)
     ld l,c                      ; hlde = seeeeeee emmmmmmm mmmmmmmm mmmmmmmm
     jr fmrejoin

diff --git a/libsrc/_DEVELOPMENT/math/float/math32/z80/f32_r2ka_mulu_32h_24x24.asm b/libsrc/_DEVELOPMENT/math/float/math32/z80/f32_r2ka_mulu_32h_24x24.asm
@@ -0,0 +1,65 @@
+;
+;  feilipu, 2023 August
+;
+;  This Source Code Form is subject to the terms of the Mozilla Public
+;  License, v. 2.0. If a copy of the MPL was not distributed with this
+;  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;
+;------------------------------------------------------------------------------
+
+IF __CPU_R2KA__ | __CPU_R3K__
+
+SECTION code_clib
+SECTION code_fp_math32
+
+EXTERN l_mulu_64_32x32
+
+PUBLIC m32_mulu_32h_24x24
+
+;------------------------------------------------------------------------------
+;
+; multiplication of two 24-bit numbers into a 32-bit high product
+;
+; result is calculated for highest 32-bit result
+; from a 48-bit calculation.
+;
+; lower 8 bits intended to provide rounding information for
+; IEEE floating point 24-bit mantissa calculations.
+;
+; enter :  lde  = 24-bit multiplicand = x
+;          lde' = 24-bit multiplier   = y
+;
+; exit  : hlde  = 32-bit product
+;
+; uses  : af, bc, de, hl, bc', de', hl'
+
+
+.m32_mulu_32h_24x24
+
+    ld h,e                      ; shift to high order of 64_32x32
+    ld e,d                      ; 0lde -> deh0
+    ld d,l
+    ld l,0
+    exx
+
+    ld h,e                      ; shift to high order of 64_32x32
+    ld e,d                      ; 0lde -> deh0
+    ld d,l
+    ld l,0
+
+    ; multiplication of two 32-bit numbers into a 64-bit product
+    ;
+    ; enter : de hl = 32-bit multiplicand = x
+    ;         de'hl'= 32-bit multiplier   = y
+    ;
+    ; exit  : dehl dehl' = 64-bit product
+    ;         carry reset
+    ;
+    ; uses  : af, bc, de, hl, bc', de', hl'
+
+    call l_mulu_64_32x32
+
+    ex de,hl
+    ret                         ; exit  : HLDE  = 32-bit high product
+
+ENDIF
diff --git a/libsrc/_DEVELOPMENT/math/float/math32/z80/f32_r2ka_mulu_32h_32x32.asm b/libsrc/_DEVELOPMENT/math/float/math32/z80/f32_r2ka_mulu_32h_32x32.asm
@@ -0,0 +1,37 @@
+;
+;  feilipu, 2023 August
+;
+;  This Source Code Form is subject to the terms of the Mozilla Public
+;  License, v. 2.0. If a copy of the MPL was not distributed with this
+;  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;
+;------------------------------------------------------------------------------
+
+IF __CPU_R2KA__ | __CPU_R3K__
+
+SECTION code_clib
+SECTION code_fp_math32
+
+EXTERN l_mulu_64_32x32
+
+PUBLIC m32_mulu_32h_32x32
+
+;------------------------------------------------------------------------------
+;
+; multiplication of two 32-bit numbers into the high bytes of 64-bit product
+;
+;
+; enter : dehl  = 32-bit multiplicand  = x   x1x0
+;         dehl' = 32-bit multiplier    = y   y1y0
+;
+; exit  : dehl  = 32-bit product = z  z3z2 = y1y0 * x1x0
+;         carry reset
+;
+; uses  : af, bc, de, hl, bc', de', hl'
+
+
+defc m32_mulu_32h_32x32 = l_mulu_64_32x32
+
+                                ; exit  : DEHL = 32-bit product
+
+ENDIF
diff --git a/libsrc/_DEVELOPMENT/math/float/math32/z80/f32_r2ka_sqr_32h_24x24.asm b/libsrc/_DEVELOPMENT/math/float/math32/z80/f32_r2ka_sqr_32h_24x24.asm
@@ -0,0 +1,63 @@
+;
+;  feilipu, 2023 August
+;
+;  This Source Code Form is subject to the terms of the Mozilla Public
+;  License, v. 2.0. If a copy of the MPL was not distributed with this
+;  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;
+;------------------------------------------------------------------------------
+
+IF __CPU_R2KA__ | __CPU_R3K__
+
+SECTION code_clib
+SECTION code_fp_math32
+
+EXTERN l_mulu_64_32x32
+
+PUBLIC m32_sqr_32h_24x24
+
+;------------------------------------------------------------------------------
+;
+; square of two 24-bit numbers into a 32-bit product
+;
+; result is calculated for highest 32-bit result
+; from a 48-bit calculation.
+;
+; Lower 8 bits intended to provide rounding information for
+; IEEE floating point mantissa calculations.
+;
+; enter : abc = lde  = 24-bit multiplier  = x
+;
+; exit  : hlde  = 32-bit product
+;
+; uses  : af, bc, de, hl, bc', de', hl'
+
+
+.m32_sqr_32h_24x24
+
+    ld h,e                      ; shift to high order of 64_32x32
+    ld e,d                      ; 0lde -> deh0
+    ld d,l
+    ld l,0
+
+    ld de',de                   ; replicate multiplicand
+    ex de,hl
+    ld hl',de
+    ex de,hl
+
+    ; multiplication of two 32-bit numbers into a 64-bit product
+    ;
+    ; enter : de hl = 32-bit multiplicand = x
+    ;         de'hl'= 32-bit multiplier   = y
+    ;
+    ; exit  : dehl dehl' = 64-bit product
+    ;         carry reset
+    ;
+    ; uses  : af, bc, de, hl, bc', de', hl'
+
+    call l_mulu_64_32x32
+
+    ex de,hl
+    ret                         ; exit  : HLDE  = 32-bit high product
+
+ENDIF
diff --git a/libsrc/_DEVELOPMENT/math/integer/r2ka/l_r2ka_muls_32_32x32.asm b/libsrc/_DEVELOPMENT/math/integer/r2ka/l_r2ka_muls_32_32x32.asm
@@ -1,4 +1,11 @@
-; 2023 June feilipu
+;
+;  feilipu, 2023 June
+;
+;  This Source Code Form is subject to the terms of the Mozilla Public
+;  License, v. 2.0. If a copy of the MPL was not distributed with this
+;  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;
+;------------------------------------------------------------------------------
 
 INCLUDE "config_private.inc"
 
@@ -9,8 +16,6 @@ EXTERN l_r2ka_mulu_32_32x32
 
 PUBLIC l_r2ka_muls_32_32x32
 
-defc l_r2ka_muls_32_32x32 = l_r2ka_mulu_32_32x32
-
    ; signed multiplication of two 32-bit signed numbers
    ;
    ; error reported on overflow
@@ -29,3 +34,6 @@ defc l_r2ka_muls_32_32x32 = l_r2ka_mulu_32_32x32
    ;            carry set, errno = ERANGE
    ;
    ; uses  : af, bc. de, hl, bc', de', hl'
+
+defc l_r2ka_muls_32_32x32 = l_r2ka_mulu_32_32x32
+
diff --git a/libsrc/_DEVELOPMENT/math/integer/r2ka/l_r2ka_mulu_16_16x16.asm b/libsrc/_DEVELOPMENT/math/integer/r2ka/l_r2ka_mulu_16_16x16.asm
@@ -1,4 +1,11 @@
-; 2023 June feilipu
+;
+;  feilipu, 2023 June
+;
+;  This Source Code Form is subject to the terms of the Mozilla Public
+;  License, v. 2.0. If a copy of the MPL was not distributed with this
+;  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;
+;------------------------------------------------------------------------------
 
 SECTION code_clib
 SECTION code_math

diff --git a/libsrc/_DEVELOPMENT/math/integer/r2ka/l_r2ka_mulu_16_16x8.asm b/libsrc/_DEVELOPMENT/math/integer/r2ka/l_r2ka_mulu_16_16x8.asm
@@ -1,4 +1,11 @@
-; 2023 June feilipu
+;
+;  feilipu, 2023 June
+;
+;  This Source Code Form is subject to the terms of the Mozilla Public
+;  License, v. 2.0. If a copy of the MPL was not distributed with this
+;  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;
+;------------------------------------------------------------------------------
 
 SECTION code_clib
 SECTION code_math

diff --git a/libsrc/_DEVELOPMENT/math/integer/r2ka/l_r2ka_mulu_24_16x8.asm b/libsrc/_DEVELOPMENT/math/integer/r2ka/l_r2ka_mulu_24_16x8.asm
@@ -1,4 +1,11 @@
-; 2018 June feilipu
+;
+;  feilipu, 2023 June
+;
+;  This Source Code Form is subject to the terms of the Mozilla Public
+;  License, v. 2.0. If a copy of the MPL was not distributed with this
+;  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;
+;------------------------------------------------------------------------------
 
 SECTION code_clib
 SECTION code_math

diff --git a/libsrc/_DEVELOPMENT/math/integer/r2ka/l_r2ka_mulu_32_16x16.asm b/libsrc/_DEVELOPMENT/math/integer/r2ka/l_r2ka_mulu_32_16x16.asm
@@ -1,4 +1,11 @@
-; 2023 June feilipu
+;
+;  feilipu, 2023 August
+;
+;  This Source Code Form is subject to the terms of the Mozilla Public
+;  License, v. 2.0. If a copy of the MPL was not distributed with this
+;  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;
+;------------------------------------------------------------------------------
 
 SECTION code_clib
 SECTION code_math

diff --git a/libsrc/_DEVELOPMENT/math/integer/r2ka/l_r2ka_mulu_32_32x32.asm b/libsrc/_DEVELOPMENT/math/integer/r2ka/l_r2ka_mulu_32_32x32.asm
@@ -1,4 +1,11 @@
-; 2023 June feilipu
+;
+;  feilipu, 2023 August
+;
+;  This Source Code Form is subject to the terms of the Mozilla Public
+;  License, v. 2.0. If a copy of the MPL was not distributed with this
+;  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;
+;------------------------------------------------------------------------------
 
 SECTION code_clib
 SECTION code_math