In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import math
import numpy as np
from scipy.special import softmax

In [None]:
def test_error(gt, test):
    MPE_error = np.mean(np.abs((gt - test)/gt))
    print("Mean-percentage error is:{}".format(MPE_error))
    return MPE_error


class Attention():
    def __init__(self, d_model, weight_q, weight_k, weight_v, CORDIC_value, CORDIC_shift):
        super().__init__()

        self.d_model = d_model
        self.d_k = d_model
        
        self.weight_q = weight_q
        self.weight_k = weight_k
        self.weight_v = weight_v

        self.CORDIC_value = CORDIC_value
        self.CORDIC_shift = CORDIC_shift
        self.CORDIC_size = len(self.CORDIC_value)


    def forward(self, seq_in):
        v = np.matmul(seq_in, self.weight_v)

        q = np.matmul(seq_in, self.weight_q)

        k = np.matmul(seq_in, self.weight_k)
        
        scores = np.matmul(q, np.transpose(k))/math.sqrt(self.d_k)
        scores = softmax(scores, axis=-1)

        output = np.matmul(scores, v)

        return output


    def my_forward(self, seq_in):
        v = np.matmul(seq_in, self.weight_v)
        q = np.matmul(seq_in, self.weight_q)
        k = np.matmul(seq_in, self.weight_k)

        scores = np.matmul(q, np.transpose(k))/math.sqrt(self.d_k)
        
        scores = self.my_softmax(scores)

        output = np.matmul(scores, v)
        return output


    def my_softmax(self, x):
        numerator = self.my_exp(x)
        denominator = np.sum(numerator, axis=-1, keepdims=True)
        softmax = numerator / denominator
        return softmax

    def my_exp(self, x):
        y = np.ones(x.shape)

        temp_sqrt = x
        temp_fact = 1
        for i in range(2):
            y = y + temp_sqrt/temp_fact

            temp_fact = temp_fact*(i+1)
            temp_sqrt = temp_sqrt*x
        
        return y

In [None]:
np.random.seed(seed=123)


N = 512
d_model = 16

scaling = 2e-1
input = np.random.randn(N,d_model)*scaling
weight_V = np.random.randn(d_model, d_model)*scaling
weight_Q = np.random.randn(d_model, d_model)*scaling
weight_K = np.random.randn(d_model, d_model)*scaling

In [None]:
attention_test = Attention(d_model=d_model, weight_q=weight_Q, weight_k=weight_K, weight_v=weight_V, CORDIC_value=CORDIC_value, CORDIC_shift=CORDIC_shift)

ground_truth = attention_test.forward(input)
print(ground_truth)
test_results = attention_test.my_forward(input)

test_error(ground_truth, test_results);

[511.59383696 511.99304584 511.04492792 513.58406934 512.4867975
 512.83540829 512.12017884 511.84279406 512.28426967 511.39959124
 512.31362442 510.75436138 511.92393602 513.07971458 511.81322016
 512.70003005 512.91905989 511.93328122 513.09315381 512.38963593
 512.54243933 511.72148209 511.85016611 511.88896268 511.97069403
 512.30226771 511.78670653 512.49910708 513.25336881 512.88931262
 512.14520658 512.91228598 511.51920618 512.87374018 510.72459276
 511.85271325 511.44335052 511.8663398  512.5202836  511.29727495
 512.62050218 512.43629667 512.38281897 512.33848973 512.00785514
 513.35556601 512.50344871 512.35718475 512.52804975 512.00536303
 512.54694372 512.28188911 512.4017242  512.50272981 512.11788796
 512.07044548 511.32647834 511.35549743 513.82571695 512.21543475
 512.49148429 511.98205496 512.3225741  512.24693053 512.2629252
 511.51325135 512.50460245 512.59856732 513.5469877  511.6423293
 511.7871518  512.28512553 511.86077417 511.80255993 512.84462425
 512.15268896

In [None]:
print(ground_truth)

print(test_results)

[[-0.00513203 -0.00574538 -0.00588638 ...  0.00099086 -0.0016811
  -0.00229402]
 [-0.00187303 -0.00549399 -0.00813977 ...  0.00099874 -0.00201036
  -0.00389557]
 [-0.00445093 -0.00489659 -0.00661451 ...  0.00067789 -0.00161097
  -0.00157971]
 ...
 [-0.00144164 -0.00476471 -0.00515074 ...  0.00082549 -0.00050885
  -0.00136442]
 [-0.0024371  -0.00368435 -0.00587407 ...  0.00158643 -0.00139262
  -0.00281512]
 [-0.00129878 -0.00559677 -0.00611969 ... -0.00015669 -0.00040985
  -0.0008655 ]]
[[-0.0051274  -0.00573758 -0.00588529 ...  0.00099184 -0.00167848
  -0.00228846]
 [-0.00187908 -0.00548844 -0.00813061 ...  0.00100399 -0.00200292
  -0.00389221]
 [-0.00444369 -0.00488737 -0.00660658 ...  0.00068167 -0.00160711
  -0.00158013]
 ...
 [-0.00144436 -0.00476672 -0.00515261 ...  0.00082421 -0.00050757
  -0.00136146]
 [-0.00244308 -0.0036869  -0.00587397 ...  0.00158726 -0.00139893
  -0.00280944]
 [-0.0013061  -0.00559544 -0.00611693 ... -0.00015428 -0.0004069
  -0.00086335]]


In [None]:
np.savetxt('input{}.input.dat'.format(N), input)
np.savetxt('input{}.weight_V.dat'.format(N), weight_V)
np.savetxt('input{}.weight_Q.dat'.format(N), weight_Q)
np.savetxt('input{}.weight_K.dat'.format(N), weight_K)
np.savetxt('output{}.output.dat'.format(N), ground_truth)