In [10]:
import numpy as np
import warnings
import tensorflow as tf
warnings.filterwarnings(action='ignore')
import tensorflow as tf
print("Executing eagerly: {}".format(tf.executing_eagerly()))
warnings.filterwarnings(action='default')
import os

Executing eagerly: True


In [4]:
iinfo_s32 = np.iinfo(np.int32)
iinfo_s16 = np.iinfo(np.int16)
iinfo_s8 = np.iinfo(np.int8)

bitdepth_map = { # because using "np.int8" directly as the key doesn't work
    np.zeros(0,dtype=np.int8).dtype:   8,
    np.zeros(0,dtype=np.int16).dtype: 16,
    np.zeros(0,dtype=np.int32).dtype: 32,
}

unit_test_dir = "../../../xcore/operator_book/test/nn_operators/"

# Helper Functions

In [5]:
def rand_tensor_s8(*dims):
    return np.random.randint(-128, 128, size=tuple(dims), dtype=np.int8)
def rand_tensor_s32(*dims):
    return np.random.randint(iinfo_s32.min, np.int64(iinfo_s32.max)+1, size=tuple(dims), dtype=np.int32)

def vlsat_s16(x, shift):
    psh = shift > 0
    nsh = shift < 0
    x[...,psh] += 1 << (shift[psh]-1)
    x[...,psh] = x[...,psh] >> shift[psh]
    x[...,nsh] = x[...,nsh] << -shift[nsh]
    return np.clip(x, a_min = iinfo_s16.min+1, a_max = iinfo_s16.max).astype(np.int16)
    

def vlmul_s16(x, scales):
    assert(x.shape[-1] == scales.shape[-1])
    while scales.ndim < x.ndim:
        scales = np.expand_dims(scales, axis=0)
    y = x.astype(np.int32) * scales
    assert(y.dtype == np.int32)
    #vlmul for s16 has an implicit built-in vlsat on 
    # the 32-bit result, where all shfits are 14
    shifts = 14 *np.ones(y.shape, dtype=np.int16)
    return vlsat_s16(y, shifts)

def vdepth8(x):
    shr = bitdepth_map[x.dtype] - 8
    assert(shr > 0)
    x += 1<<(shr-1)
    x = x >> shr
    return np.clip(x, a_min = iinfo_s8.min+1, a_max = iinfo_s8.max).astype(np.int8)
    

def conv2d_s8(K, X):
    C_out, K_h, K_w, C_in = K.shape
    height, width, C_inx = X.shape
    assert(K_h % 2 == 1)
    assert(K_w % 2 == 1)
    assert(C_in == C_inx)
    P_h, P_w = (K_h//2),(K_w//2)
    
    X_padded = np.zeros(shape=(height+2*P_h, width+2*P_w, C_in), dtype=np.int8)
    X_padded[P_h:height+P_h,P_w:width+P_w,:] = X
    Y = np.zeros(shape=(height,width, C_out), dtype=np.int32)
    
    for i in range(height):
        for j in range(width):
            patch = X_padded[i:i+K_h,j:j+K_w,:].astype(np.int32)
            for k in range(C_out):
                kernel = K[k,:,:,:]
                Y[i,j,k] = np.sum(kernel * patch)
    
    return Y
    
    

# maxpool2d_deep()

In [6]:

def maxpool2d_deep(tensor_in):
    assert(tensor_in.ndim == 3)
    return tf.nn.max_pool2d(td.expand_dims(tensor_in, axis=0),
                            ksize=2, strides=2, padding='VALID'
                           )[0,:,:,:].numpy()


def test_case_maxpool2d_deep(width, height, chans, writefile=None):
    #produce a maxpool2d_deep() test case
    A = rand_tensor_s8(width, height, chans)

    A_out = maxpool2d_deep(A)
    if writefile:
        with open(writefile, "wb+") as file:
            A.tofile(file)
            A_out.tofile(file)
        



# fc_deepin_shallowout_lin()

In [7]:


def fc_deepin_shallowout_lin(W, B, X, shifts, scales):
    y = np.matmul(W.astype(np.int32), X.astype(np.int32)) + B
    assert(y.shape == shifts.shape)
    y = vlsat_s16(y, shifts)
    y = vlmul_s16(y, scales)
    return y


def test_case_fc_deepin_shallowout_lin(C_in, C_out, writefile=None):
    #produce a fc_deepin_shallowout_lin() test case
    W = rand_tensor_s8(C_out, C_in)
    X = rand_tensor_s8(C_in)
    B = rand_tensor_s32(C_out)
    
    shifts = np.random.randint(0, 16, size=C_out, dtype=np.int16)
    scales = np.random.randint(0x4000, 0x8000, size=C_out, dtype=np.int16)
    
    #shouldn't let bias dominate the pre-activation value
    if(1):
        while(True):
            tmp1 = np.matmul(W.astype(np.int32), X.astype(np.int32))
            
            too_biased = np.abs(tmp1) < np.abs(B)
            
            if np.sum(too_biased) <= (C_out / 10):
                break
                
            B[too_biased] = (B[too_biased] * 0.5).astype(np.int32)
        
    
    #going to iterate to make sure our shifts are reasonable
    if(1):
        #only allow <10% of values to be 0 
        while(True):
            tmp = np.matmul(W.astype(np.int32), X.astype(np.int32)) + B
            tmp = vlsat_s16(tmp, shifts)

            zeros = (tmp == 0)
            if np.sum(zeros) <= (C_out / 10):
                break
                
            shifts[zeros] = shifts[zeros] - 2

        #only allow <10% of values to saturate
        while(True):
            tmp = np.matmul(W.astype(np.int32), X.astype(np.int32)) + B
            tmp = vlsat_s16(tmp, shifts)
            
            sats = np.logical_or(tmp == iinfo_s16.max,tmp == (iinfo_s16.min+1))
            if np.sum(sats) <= (C_out / 10):
                break
                
            shifts[sats] = shifts[sats] + 2
    
    #going to iterate to make sure our scales are reasonable
    if(1):
        #only allow <10% of values to saturate
        while(True):
            tmp = np.matmul(W.astype(np.int32), X.astype(np.int32)) + B
            tmp = vlsat_s16(tmp, shifts)
            tmp = vlmul_s16(tmp, scales)

            sats = np.logical_or(tmp == iinfo_s16.max,tmp == (iinfo_s16.min+1))
            if np.sum(sats) <= (C_out / 10):
                break
                
            scales[sats] = (scales[sats] * 0.9).astype(np.int16)
        
    
    Y = fc_deepin_shallowout_lin(W, B, X, shifts, scales)
    
    if writefile:
        with open(writefile, "wb+") as file:
#             print(W.dtype, X.dtype, B.dtype, shifts.dtype, scales.dtype, Y.dtype)
            assert(W.dtype == np.int8 and X.dtype == np.int8 and B.dtype == np.int32 
                    and shifts.dtype == np.int16 and scales.dtype == np.int16
                    and Y.dtype == np.int16)
            W.tofile(file)
            X.tofile(file)
            B.tofile(file)
            shifts.tofile(file)
            scales.tofile(file)
            (Y).tofile(file)
    
    return W,X,B,shifts,scales,Y


#Generate test vectors for fc_deepin_shallowout_lin()
if False:
    thing = [
        (2, 32, 4, 100),
        (3, 96, 15, 100),
    ]
    for (caseNum, C_in, C_out, vecs) in thing:
                
        Ws      = np.zeros(shape=(0, C_out, C_in), dtype=np.int8)
        Xs      = np.zeros(shape=(0, C_in), dtype=np.int8)
        Bs      = np.zeros(shape=(0, C_out), dtype=np.int32)
        shiftss = np.zeros(shape=(0, C_out), dtype=np.int16)
        scaless = np.zeros(shape=(0, C_out), dtype=np.int16)
        Ys      = np.zeros(shape=(0, C_out), dtype=np.int16)
        
        for i in range(vecs):
            W,X,B,shifts,scales,Y = test_case_fc_deepin_shallowout_lin(C_in, C_out, 
                    writefile=os.path.join(unit_test_dir, 
                        "test_data/fc_deepin_shallowout_lin_case{0}.{1}.dat".format(caseNum, i)))
            
            Ws      = np.append(Ws,      W[np.newaxis,...],      axis=0)
            Xs      = np.append(Xs,      X[np.newaxis,...],      axis=0)
            Bs      = np.append(Bs,      B[np.newaxis,...],      axis=0)
            shiftss = np.append(shiftss, shifts[np.newaxis,...], axis=0)
            scaless = np.append(scaless, scales[np.newaxis,...], axis=0)
            Ys      = np.append(Ys,      Y[np.newaxis,...],      axis=0)


        #These will be used to ensure we're not passing the test because files weren't
        # correctly loaded
        ychk_str = ("#undef Y_CHECK\n#define Y_CHECK  " 
                    +  ",".join([str(x) for x in Ys[:,0]]) + "\n")

        with open(os.path.join(unit_test_dir, 
                               "test_data/fc_deepin_shallowout_lin_case{0}.h".format(caseNum)), "w+"
                 ) as file:
                  file.write(ychk_str)

# conv2d_deepin_deepout_relu()

In [28]:
def conv2d_deepin_deepout_relu(K, B, X, shifts, scales):
    y = B + conv2d_s8(K, X)
    y = vlsat_s16(y, shifts)
    assert(y.dtype == np.int16)
    y = np.clip(y, a_min=0, a_max=None) # ReLU
    y = y - ((1<<14)-1)
    y = vlmul_s16(y, scales)
    y = vdepth8(y)
    assert(y.dtype == np.int8)
    
    return y


def test_case_conv2d_deepin_deepout_relu(height, width, K_h, K_w, C_in, C_out, writefile=None):
    #produce a conv2d_deepin_deepout_relu() test case
    

    def transformK(K):
        C_out,K_h,K_w,C_in = K.shape
        assert(C_out % 16 == 0)
        assert(C_in % 32 == 0)
        assert(K_h % 2 == 1)
        assert(K_w % 2 == 1)

        C_out_groups = C_out // 16
        C_in_groups  = C_in  // 32

        offsets = set()

        K_out = np.zeros(shape=np.product(K.shape), dtype=np.int8)

        for q in range(C_out_groups):
            k_1 = C_in * 16 * K_h * K_w * q
            for r in range(K_h):
                k_2 = C_in * 16 * K_w * r
                for c in range(K_w):
                    k_3 = C_in * 16 * c
                    for w in range(C_in_groups):
                        k_4 = 32 * 16 * w
                        for a in range(16):
                            k_5 = 32 * (15-a)
                            for s in range(32):
                                k_6 = s
                                k = k_1 + k_2 + k_3 + k_4 + k_5 + k_6
                                assert(not (k in offsets)) #make sure we're not doubling up anything
                                offsets.add(k)
                                K_out[k] = K[16*q+a, r, c, 32*w+s]

        assert(len(offsets) == len(K_out)) #make sure we've set everything
        return K_out
    
    def transformB(B):
        assert(B.dtype == np.int32)
        assert(B.ndim == 1)
        B_out = np.zeros(shape=(2,len(B)), dtype=np.uint16)
        B_out[1,:] = B >> 16
        B_out[0,:] = B
        return B_out

    K = rand_tensor_s8(C_out, K_h, K_w, C_in)
    X = rand_tensor_s8(height, width, C_in)
    
    tmp = conv2d_s8(K, X)
    B = -(np.mean(tmp, axis=(0,1))).astype(np.int32)
    
    tmp = tmp + B
    
    tmp_min = np.min(tmp, axis=(0,1))
    tmp_max = np.max(tmp, axis=(0,1))
    tmp_max = np.max((tmp_max, np.abs(tmp_min)), axis=0)
    shifts  = np.ceil(np.log2(tmp_max)).astype(np.int16) - 15
    
    scales = np.ones(C_out, dtype=np.int16) * 0x4000
    
    Y = conv2d_deepin_deepout_relu(K, B, X, shifts, scales)
    
    if writefile:
        with open(writefile, "wb+") as file:
            assert(K.dtype==np.int8);       assert(X.dtype==np.int8);  
            assert(shifts.dtype==np.int16); assert(scales.dtype==np.int16); assert(Y.dtype==np.int8)
                
#             tK = transformK(K)
#             for i in range(16):
#                 assert(np.all(K[i,:,:,:] == tK[15-i,:,:,:]))
            
            transformK(K).tofile(file)
            transformB(B).tofile(file)
            X.tofile(file)
            shifts.tofile(file)
            scales.tofile(file)
            Y.tofile(file)
            
    return K,B,X,shifts,scales,Y

#Generate test vectors for conv2d_deepin_deepout_relu()
FNAME = "conv2d_deepin_deepout_relu"
if True:
    thing = [ #caseNum, C_in, C_out, K_h, K_w, height, width, numVectors
#         (2, 32, 16, 1, 1, 4, 4, 10),
        (3, 32, 16, 3, 3, 4, 4, 10),
#         (4, 64, 32, 3, 3, 8, 8, 4),
    ]
    for (caseNum, C_in, C_out, K_h, K_w, height, width, vecs) in thing:
        
        Ks      = np.zeros(shape=(0, C_out, K_h, K_w, C_in), dtype=np.int8)
        Xs      = np.zeros(shape=(0, height, width, C_in), dtype=np.int8)
        Bs      = np.zeros(shape=(0, C_out), dtype=np.int32)
        shiftss = np.zeros(shape=(0, C_out), dtype=np.int16)
        scaless = np.zeros(shape=(0, C_out), dtype=np.int16)
        Ys      = np.zeros(shape=(0, height, width, C_out), dtype=np.int8)
        
        for i in range(vecs):
            K,B,X,shifts,scales,Y = test_case_conv2d_deepin_deepout_relu(
                    height, width, K_h, K_w, C_in, C_out,
                    writefile=os.path.join(unit_test_dir, 
                        "test_data/{0}_case{1}.{2}.dat".format(FNAME, caseNum, i)))
            Ks      = np.append(Ks,      K[np.newaxis,...],      axis=0)
            Xs      = np.append(Xs,      X[np.newaxis,...],      axis=0)
            Bs      = np.append(Bs,      B[np.newaxis,...],      axis=0)
            shiftss = np.append(shiftss, shifts[np.newaxis,...], axis=0)
            scaless = np.append(scaless, scales[np.newaxis,...], axis=0)
            Ys      = np.append(Ys,      Y[np.newaxis,...],      axis=0)


        #These will be used to ensure we're not passing the test because files weren't
        # correctly loaded
        ychk_str = ("#undef Y_CHECK\n#define Y_CHECK  " 
                    +  ",".join([str(x) for x in Ys[:,0,0,0]]) + "\n")

        with open(os.path.join(unit_test_dir, 
                               "test_data/{0}_case{1}.h".format(FNAME, caseNum)), "w+"
                 ) as file:
                  file.write(ychk_str)

    print("Done.")

Done.


In [23]:


def transformK(K):
    C_out,K_h,K_w,C_in = K.shape
    assert(C_out % 16 == 0)
    assert(C_in % 32 == 0)
    assert(K_h % 2 == 1)
    assert(K_w % 2 == 1)
    
    C_out_groups = C_out // 16
    C_in_groups  = C_in  // 32
    
    offsets = set()
    
    K_out = np.zeros(shape=np.product(K.shape), dtype=np.int8)
    
    for q in range(C_out_groups):
        k_1 = C_in * 16 * K_h * K_w * q
        for r in range(K_h):
            k_2 = C_in * 16 * K_w * r
            for c in range(K_w):
                k_3 = C_in * 16 * c
                for w in range(C_in_groups):
                    k_4 = 32 * 16 * w
                    for a in range(16):
                        k_5 = 32 * (15-a)
                        for s in range(32):
                            k_6 = s
                            k = k_1 + k_2 + k_3 + k_4 + k_5 + k_6
                            assert(not (k in offsets)) #make sure we're not doubling up anything
                            offsets.add(k)
                            K_out[k] = K[16*q+a, r, c, 32*w+s]
    
    assert(len(offsets) == len(K_out)) #make sure we've set everything
    return K_out

K = transformK(Ks[0,...])

print(K.shape)


(4608,)


# conv2d_shallowin_deepout_relu()

In [9]:


def conv2d_shallowin_deepout_relu(K, B, X, shifts, scales):
    C_out, K_h, K_w, C_in
    assert(C_out % 16 == 0)
    assert(C_in == 4)
    assert(K_h % 2 == 1)
    assert((K_w < 8) and (K_w % 2 == 1))
    y = B + conv2d_s8(K, X)
    y = vlsat_s16(y, shifts)
    assert(y.dtype == np.int16)
    y = np.clip(y, a_min=0, a_max=None) # ReLU
    y = y - ((1<<14)-1)
    y = vlmul_s16(y, scales)
    y = vdepth8(y)
    assert(y.dtype == np.int8)
    
    return y
    



def test_case_conv2d_shallowin_deepout_relu(height, width, K_h, K_w, C_in, C_out, writefile=None):
    #produce a conv2d_deepin_deepout_relu() test case
    assert(C_in == 4)
    
    def transformK(K):
        cou, hei, wid, cin = K.shape
        assert(cou % 16 == 0); assert(wid <= 8);
        K2 = np.concatenate((K, np.zeros(shape=(cou, hei, 8-wid, cin), dtype=np.int8)),axis=2)
        return np.flip(K2.reshape((cou//16, 16, *K2.shape[1:])), axis=1).reshape(K2.shape)
    
    def transformB(B):
        assert(B.dtype == np.int32)
        assert(B.ndim == 1)
        B_out = np.zeros(shape=(2,len(B)), dtype=np.uint16)
        B_out[1,:] = B >> 16
        B_out[0,:] = B
        return B_out
        
    
    K = rand_tensor_s8(C_out, K_h, K_w, C_in)
    X = rand_tensor_s8(height, width, C_in)
    
    tmp = conv2d_s8(K, X)
    B = -(np.mean(tmp, axis=(0,1))).astype(np.int32)
    
    tmp = tmp + B
    
    tmp_min = np.min(tmp, axis=(0,1))
    tmp_max = np.max(tmp, axis=(0,1))
    tmp_max = np.max((tmp_max, np.abs(tmp_min)), axis=0)
    shifts  = np.ceil(np.log2(tmp_max)).astype(np.int16) - 15
    
    scales = np.ones(C_out, dtype=np.int16) * 0x4000
    
    Y = conv2d_shallowin_deepout_relu(K, B, X, shifts, scales)
    
    
#     print("0:", X[:,:,0])
#     tmp = conv2d_s8(K, X)
#     print("1:", tmp[:,:,0])
#     tmp = tmp + B
#     print("2:", tmp[:,:,0])
#     tmp = vlsat_s16(tmp, shifts)
#     print("3:", tmp[:,:,0])
#     tmp = np.clip(tmp, a_min=0, a_max=None)
#     print("4:", tmp[:,:,0])
#     tmp = tmp - ((1<<14)-1)
#     print("5:", tmp[:,:,0])
#     tmp = vlmul_s16(tmp, scales)
#     print("6:", tmp[:,:,0], "\n")
#     tmp = vdepth8(tmp)
#     print("7:", tmp[:,:,0])
    
    if writefile:
        with open(writefile, "wb+") as file:
            assert(K.dtype==np.int8);       assert(X.dtype==np.int8);  
            assert(shifts.dtype==np.int16); assert(scales.dtype==np.int16); assert(Y.dtype==np.int8)
            transformK(K).tofile(file)
            transformB(B).tofile(file)
            X.tofile(file)
            shifts.tofile(file)
            scales.tofile(file)
            Y.tofile(file)
            
    return K,B,X,shifts,scales,Y


#Generate test vectors for conv2d_shallowin_deepout_relu()
FNAME = "conv2d_shallowin_deepout_relu"
if False:
    thing = [ #caseNum, C_in, C_out, K_h, K_w, height, width, numVectors
        (2, 4, 16, 1, 1, 2, 2, 10),
        (3, 4, 16, 3, 3, 4, 4, 10),
        (4, 4, 32, 3, 3, 8, 8, 4),
    ]
    for (caseNum, C_in, C_out, K_h, K_w, height, width, vecs) in thing:
        
        Ks      = np.zeros(shape=(0, C_out, K_h, K_w, C_in), dtype=np.int8)
        Xs      = np.zeros(shape=(0, height, width, C_in), dtype=np.int8)
        Bs      = np.zeros(shape=(0, C_out), dtype=np.int32)
        shiftss = np.zeros(shape=(0, C_out), dtype=np.int16)
        scaless = np.zeros(shape=(0, C_out), dtype=np.int16)
        Ys      = np.zeros(shape=(0, height, width, C_out), dtype=np.int8)
        
        for i in range(vecs):
            K,B,X,shifts,scales,Y = test_case_conv2d_shallowin_deepout_relu(
                    height, width, K_h, K_w, C_in, C_out,
                    writefile=os.path.join(unit_test_dir, 
                        "test_data/{0}_case{1}.{2}.dat".format(FNAME, caseNum, i)))
            Ks      = np.append(Ks,      K[np.newaxis,...],      axis=0)
            Xs      = np.append(Xs,      X[np.newaxis,...],      axis=0)
            Bs      = np.append(Bs,      B[np.newaxis,...],      axis=0)
            shiftss = np.append(shiftss, shifts[np.newaxis,...], axis=0)
            scaless = np.append(scaless, scales[np.newaxis,...], axis=0)
            Ys      = np.append(Ys,      Y[np.newaxis,...],      axis=0)


        #These will be used to ensure we're not passing the test because files weren't
        # correctly loaded
        ychk_str = ("#undef Y_CHECK\n#define Y_CHECK  " 
                    +  ",".join([str(x) for x in Ys[:,0,0,0]]) + "\n")

        with open(os.path.join(unit_test_dir, 
                               "test_data/{0}_case{1}.h".format(FNAME, caseNum)), "w+"
                 ) as file:
                  file.write(ychk_str)

print("Done.")

Done.
