## HeteroCL Main Branch Typing Rules
### Add Sub
To lay down the typing rules systematically, we consider the product of five types:
(Int, UInt, Fixed, UFixed, Float) x (Int, UInt, Fixed, UFixed, Float).
So there should be in total 25 rules. Some operations are commutative, so some rules can be combined.

In [2]:
import heterocl as hcl

def add(A, B, C, D, E):
    hcl.compute((1,), lambda x : D[x] + E[x], name="ufixed_ufixed")

A = hcl.placeholder((1,), "A", dtype=hcl.Int(32))
B = hcl.placeholder((1,), "B", dtype=hcl.UInt(31))
C = hcl.placeholder((1,), "C", dtype=hcl.Float(32))
D = hcl.placeholder((1,), "D", dtype=hcl.UFixed(32, 4))
E = hcl.placeholder((1,), "E", dtype=hcl.UFixed(31, 2))
s = hcl.create_schedule([A, B, C, D, E], add)
print(hcl.lower(s))

// attr [_top] storage_scope = "global"
allocate _top[int32 * 1]
produce _top {
  // attr [0] extern_scope = 0
  // attr [ufixed_ufixed] storage_scope = "global"
  allocate ufixed_ufixed[int32 * 1]
  produce ufixed_ufixed {
    // attr [0] extern_scope = 0
    for "stage_name"="ufixed_ufixed" (x, 0, 1) {
      ufixed_ufixed[x] = int32((ufixed34_4(D[x]) + ufixed34_4(E[x])))
    }
  }
}



Add Sub rules are commutative.

#### Int x (Int, UInt, Fixed, UFixed, Float)
```
int(b1) + int(b2) -> int(max(b1, b2) + 1)
int(b1) + uint(b2) -> int(max(b1, b2 + 1) + 1)
int(b1) + fixed(b2, f2) -> fixed(max(b1, b2-f2) + 1 + f2, f2)
int(b1) + ufixed(b2, f2) -> fixed(max(b1, b2+1-f2) + 1 + f2, f2)
int(b1) + float -> float
```
#### UInt x (Int, UInt, Fixed, UFixed, Float)
```
uint(b1) + int(b2) covered
uint(b1) + uint(b2) -> uint(max(b1, b2) + 1)
uint(b1) + fixed(b2, f2) -> fixed(max(b1+1, b2-f2) + 1 + f2, f2)
uint(b1) + ufixed(b2, f2) -> ufixed(max(b1, b2-f2) + 1 + f2, f2)
uint(b1) + float -> float
```
#### Fixed x (Int, UInt, Fixed, UFixed, Float)
```
fixed(b1, f1) + Int covered
fixed(b1, f1) + UInt covered
fixed(b1, f1) + fixed(b2, f2) -> fixed(max(b1-f1, b2-f2) + 1 + max(f1, f2), max(f1, f2))
fixed(b1, f1) + ufixed(b2, f2) -> fixed(max(b1-f1, b2+1-f2) + 1 + max(f1, f2), max(f1, f2))
fixed(b1, f1) + float -> float
````
#### UFixed x (Int, UInt, Fixed, UFixed, Float)
```
ufixed + int covered
ufixed + uint covered
ufixed + fixed covered
ufixed(b1, f1) + ufixed(b2, f2) -> ufixed(max(b1-f1, b2-f2) + 1 + max(f1, f2), max(f1, f2))
ufixed + float -> float
```
#### Float x (Int, UInt, Fixed, UFixed, Float)
```
all covered
```

### Mul

In [15]:
import heterocl as hcl

def mul(A, B, C, D,D_wider, E):
    # hcl.compute((1,), lambda x : A[x] * A[x], name="int_int")
    # hcl.compute((1,), lambda x : A[x] * B[x], name="int_uint")
    # hcl.compute((1,), lambda x : B[x] * B[x], name="uint_uint")
    # hcl.compute((1,), lambda x : A[x] * D[x], name="int_fixed")
    # hcl.compute((1,), lambda x : A[x] * E[x], name="int_ufixed")
    # hcl.compute((1,), lambda x : B[x] * D[x], name="uint_fixed")
    # hcl.compute((1,), lambda x : B[x] * E[x], name="uint_ufixed")
    hcl.compute((1,), lambda x : D[x] * D_wider[x], name="fixed_ufixed")

A = hcl.placeholder((1,), "A", dtype=hcl.Int(32))
B = hcl.placeholder((1,), "B", dtype=hcl.UInt(31))
C = hcl.placeholder((1,), "C", dtype=hcl.Float(32))
D = hcl.placeholder((1,), "D", dtype=hcl.Fixed(31, 4))
D_wider = hcl.placeholder((1,), "D_wider", dtype=hcl.Fixed(20, 8))
E = hcl.placeholder((1,), "E", dtype=hcl.UFixed(31, 2))
s = hcl.create_schedule([A, B, C, D, D_wider, E], mul)
print(hcl.lower(s))

// attr [_top] storage_scope = "global"
allocate _top[int32 * 1]
produce _top {
  // attr [0] extern_scope = 0
  // attr [fixed_ufixed] storage_scope = "global"
  allocate fixed_ufixed[int32 * 1]
  produce fixed_ufixed {
    // attr [0] extern_scope = 0
    for "stage_name"="fixed_ufixed" (x, 0, 1) {
      fixed_ufixed[x] = int32((fixed51_4(D[x])*fixed51_8(D_wider[x])))
    }
  }
}



Mul rules are commutative.
#### Int x (Int, UInt, Fixed, UFixed, Float)
```
int(b1) * int(b2) -> int(b1 + b2)
int(b1) * uint(b2) -> int(b1 + b2)
int(b1) * fixed(b2, f2) -> fixed(b1 + b2, f2)
int(b1) * ufixed(b2, f2) -> fixed(b1 + b2, f2)
int(b1) * float -> float
```
#### UInt x (Int, UInt, Fixed, UFixed, Float)
```
uint(b1) * int(b2) covered
uint(b1) * uint(b2) -> uint(b1 + b2)
uint(b1) * fixed(b2, f2) -> fixed(b1 + b2, f2)
uint(b1) * ufixed(b2, f2) -> ufixed(b1 + b2, f2)
uint(b1) * float -> float
```
#### Fixed x (Int, UInt, Fixed, UFixed, Float)
```
fixed(b1, f1) * int covered
fixed(b1, f1) * uint covered
fixed(b1, f1) * fixed(b2, f2) -> fixed(b1 + b2, f1 + f2)
fixed(b1, f1) * ufixed(b2, f2) -> fixed(b1 + b2, f1 + f2)
fixed(b1, f1) * float -> float
````
#### UFixed x (Int, UInt, Fixed, UFixed, Float)
```
ufixed * int covered
ufixed * uint covered
ufixed * fixed covered
ufixed(b1, f1) * ufixed(b2, f2) -> ufixed(b1 + b2, f1 + f2)
ufixed * float -> float
```
#### Float x (Int, UInt, Fixed, UFixed, Float)
```
all covered
```

### Div

In [21]:
import heterocl as hcl

def div(A, A_wider, B, C, D, D_wider, E, E_wider):
    hcl.compute((1,), lambda x : E[x] / E_wider[x], name="ufixed_ufixed")

A = hcl.placeholder((1,), "A", dtype=hcl.Int(3))
A_wider = hcl.placeholder((1,), "A_wider", dtype=hcl.Int(38))
B = hcl.placeholder((1,), "B", dtype=hcl.UInt(31))
C = hcl.placeholder((1,), "C", dtype=hcl.Float(32))
D = hcl.placeholder((1,), "D", dtype=hcl.Fixed(31, 4))
D_wider = hcl.placeholder((1,), "D_wider", dtype=hcl.Fixed(63, 8))
E = hcl.placeholder((1,), "E", dtype=hcl.UFixed(31, 9))
E_wider = hcl.placeholder((1,), "E_wider", dtype=hcl.UFixed(63, 8))
s = hcl.create_schedule([A, A_wider, B, C, D, D_wider, E, E_wider], div)
print(hcl.lower(s))

// attr [_top] storage_scope = "global"
allocate _top[int32 * 1]
produce _top {
  // attr [0] extern_scope = 0
  // attr [ufixed_ufixed] storage_scope = "global"
  allocate ufixed_ufixed[int32 * 1]
  produce ufixed_ufixed {
    // attr [0] extern_scope = 0
    for "stage_name"="ufixed_ufixed" (x, 0, 1) {
      ufixed_ufixed[x] = int32((ufixed94_64(E[x])/ufixed94_64(E_wider[x])))
    }
  }
}



Div is not commutative.
#### Int x (Int, UInt, Fixed, UFixed, Float)
```
int(b1) / int(b2) -> int(b1) // is this the right rule?
int(b1) / uint(b2) -> int(b1)
int(b1) / fixed(b2, f2) -> fixed(b1 + b2, b2 - f2) 
int(b1) / ufixed(b2, f2) -> fixed(b1 + b2 + 1, b2 - f2)
int(b1) / float -> float
```
#### UInt x (Int, UInt, Fixed, UFixed, Float)
```
uint(b1) / int(b2) -> int(b1)
uint(b1) / uint(b2) -> uint(b1)
uint(b1) / fixed(b2, f2) -> fixed(b1 + b2, b2 - f2)
uint(b1) / ufixed(b2, f2) -> ufixed(b1 + b2, b2 - f2)
uint(b1) / float -> float
```
#### Fixed x (Int, UInt, Fixed, UFixed, Float)
```
fixed(b1, f1) / int(b2) -> fixed(b1 + b2, b2 + f1)
fixed(b1, f1) / uint(b2) -> fixed(b1 + b2 + 1, b2 + f1)
fixed(b1, f1) / fixed(b2, f2) -> fixed(b1 + b2, b2 - f2 + f1)
fixed(b1, f1) / ufixed(b2, f2) -> fixed(b1 + b2 + 1, b2 - f2 + f1)
fixed(b1, f1) / float -> float
````
#### UFixed x (Int, UInt, Fixed, UFixed, Float)
```
ufixed(b1, f1) / int(b2) -> fixed(b1 + b2 + 1, b2 + f1)
ufixed(b1, f1) / uint(b2) -> ufixed(b1 + b2, b2 + f1)
ufixed(b1, f1) / fixed(b2, f2) -> fixed(b1 + b2, b2 - f2 + f1)
ufixed(b1, f1) / ufixed(b2, f2) -> ufixed(b1 + b2, b2 - f2 + f1)
ufixed(b1, f1) / float -> float
```
#### Float x (Int, UInt, Fixed, UFixed, Float)
```
float / int(b2) -> float
float / uint(b2) -> float
float / fixed(b2, f2) -> float
float / ufixed(b2, f2) -> float
float / float -> float
```

### Mod

In [32]:
import heterocl as hcl

def mod(A, A_wider, B, C, D, D_wider, E, E_wider):
    hcl.compute((1,), lambda x : D[x] % D_wider[x], name="int_fixed")

A = hcl.placeholder((1,), "A", dtype=hcl.Int(3))
A_wider = hcl.placeholder((1,), "A_wider", dtype=hcl.Int(38))
B = hcl.placeholder((1,), "B", dtype=hcl.UInt(31))
C = hcl.placeholder((1,), "C", dtype=hcl.Float(32))
D = hcl.placeholder((1,), "D", dtype=hcl.Fixed(31, 12))
D_wider = hcl.placeholder((1,), "D_wider", dtype=hcl.Fixed(63, 8))
E = hcl.placeholder((1,), "E", dtype=hcl.UFixed(31, 9))
E_wider = hcl.placeholder((1,), "E_wider", dtype=hcl.UFixed(63, 8))
s = hcl.create_schedule([A, A_wider, B, C, D, D_wider, E, E_wider], mod)
print(hcl.lower(s))

// attr [_top] storage_scope = "global"
allocate _top[int32 * 1]
produce _top {
  // attr [0] extern_scope = 0
  // attr [int_fixed] storage_scope = "global"
  allocate int_fixed[int32 * 1]
  produce int_fixed {
    // attr [0] extern_scope = 0
    for "stage_name"="int_fixed" (x, 0, 1) {
      int_fixed[x] = int32((fixed67_12(D[x]) % fixed67_12(D_wider[x])))
    }
  }
}



Mod is not commutative but its typing rules are
#### Int x (Int, UInt, Fixed, UFixed, Float)
```
int(b1) % int(b2) -> int(max(b1, b2)) 
int(b1) % uint(b2) -> int(max(b1, b2+1))
int(b1) % fixed(b2, f2) -> fixed(max(b1, b2-f2) + f2, f2)
int(b1) % ufixed(b2, f2) -> fixed(max(b1, b2-f2+1) + f2, f2)
int(b1) % float -> float
```
#### UInt x (Int, UInt, Fixed, UFixed, Float)
```
uint(b1) % int(b2) -> int(max(b1+1, b2))
uint(b1) % uint(b2) -> uint(max(b1, b2)) 
uint(b1) % fixed(b2, f2) -> fixed(max(b1+1, b2-f2) + f2, f2)
uint(b1) % ufixed(b2, f2) -> ufixed(max(b1, b2-f2) + f2, f2)
uint(b1) % float -> float
```
#### Fixed x (Int, UInt, Fixed, UFixed, Float)
```
fixed(b1, f1) % int(b2) -> fixed(max(b1-f1, b2))
fixed(b1, f1) % uint(b2) -> fixed(max(b1-f1, b2+1) + f1, f1)
fixed(b1, f1) % fixed(b2, f2) -> fixed(max(b1-f1, b2-f2) + max(f1, f2), max(f1, f2))
fixed(b1, f1) % ufixed(b2, f2) -> fixed(max(b1-f1, b2-f2+1))
fixed(b1, f1) % float -> float
````
#### UFixed x (Int, UInt, Fixed, UFixed, Float)
```
ufixed(b1, f1) % int(b2) -> fixed(max(b1-f1+1, b2) + f1, f1)
ufixed(b1, f1) % uint(b2) -> ufixed(max(b1-f1, b2) + f1, f1)
ufixed(b1, f1) % fixed(b2, f2) -> fixed(max(b1-f1+1, b2-f2) + max(f1, f2), max(f1, f2))
ufixed(b1, f1) % ufixed(b2, f2) -> ufixed(max(b1-f1, b2-f2) + max(f1, f2), max(f1, f2)) 
ufixed(b1, f1) % float -> float
```
#### Float x (Int, UInt, Fixed, UFixed, Float)
```
float % int(b2) -> float
float % uint(b2) -> float
float % fixed(b2, f2) -> float
float % ufixed(b2, f2) -> float
float % float -> float
```

### Shift
Shift can only be applied on integers, and the shift amount can only be positive integer.
(Int, UInt) x (Int, UInt), not commutative.

In [2]:
import heterocl as hcl

def shift(a, b):
    print((a[0] << b[0]).dtype)
    hcl.compute((1,), lambda x : a[x] << b[x], name="int_int")
    hcl.compute((1,), lambda x : a[x] >> b[x], name="int_int")

A = hcl.placeholder((1,), "A", dtype=hcl.Int(32))
B = hcl.placeholder((1,), "B", dtype=hcl.Int(32))
s = hcl.create_schedule([A, B], shift)
print(hcl.lower(s))

Int(32)
module {
  func.func @top(%arg0: memref<1xi32>, %arg1: memref<1xi32>) attributes {itypes = "ss", otypes = ""} {
    %c0 = arith.constant 0 : index
    %0 = memref.alloc() {name = "int_int"} : memref<1xi32>
    %1 = affine.load %arg0[%c0] {from = "A"} : memref<1xi32>
    %2 = affine.load %arg1[%c0] {from = "B"} : memref<1xi32>
    %3 = arith.shli %1, %2 : i32
    %c32_i32 = arith.constant 32 : i32
    %4 = arith.cmpi eq, %3, %c32_i32 : i32
    %c0_i32 = arith.constant 0 : i32
    %5 = arith.select %4, %c0_i32, %3 : i32
    affine.store %5, %0[%c0] {to = "int_int"} : memref<1xi32>
    %6 = memref.alloc() {name = "int_int_3"} : memref<1xi32>
    %7 = affine.load %arg0[%c0] {from = "A"} : memref<1xi32>
    %8 = affine.load %arg1[%c0] {from = "B"} : memref<1xi32>
    %9 = arith.shrsi %7, %8 : i32
    affine.store %9, %6[%c0] {to = "int_int_3"} : memref<1xi32>
    return
  }
}



HalideIR doesn't show the casted type. So I'm making this up.
Should left shift be lossless?

#### Int x (Int, UInt)
```
int(b1) >> int(b2) -> int(b1)
int(b1) << int(b2) -> int(b1)
```
#### UInt x (Int, UInt)
```
```

### Reduce