In [1]:
import numpy as np
import os
import sys
import math
import torch
import time
import re
import cv2
from pathlib import Path
from tqdm import tqdm
from matplotlib import pyplot as plt
from torchvision import transforms
from icecream import ic
from itertools import repeat
# import sympy as sy
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Transformer实现

## self-attention实现

In [2]:
import tensorflow as tf

In [4]:
x = [
    [1,0,1,0],
    [0,2,0,2],
    [1,1,1,1]
]
x = tf.convert_to_tensor(x, dtype=tf.float32)
x

2022-04-12 07:43:00.998950: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-04-12 07:43:10.720747: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 41874 MB memory:  -> device: 0, name: NVIDIA RTX A6000, pci bus id: 0000:89:00.0, compute capability: 8.6
2022-04-12 07:43:10.721664: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 44769 MB memory:  -> device: 1, name: NVIDIA RTX A6000, pci bus id: 0000:b1:00.0, compute capability: 8.6


<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[1., 0., 1., 0.],
       [0., 2., 0., 2.],
       [1., 1., 1., 1.]], dtype=float32)>

In [9]:
w_query = [
    [1, 0, 1],
    [1, 0, 0],
    [0, 0, 0],
    [0, 1, 0]
]
w_key = [
    [0, 0, 1],
    [1, 1, 0],
    [0, 1, 0],
    [1, 1, 0]
]
w_value = [
    [0, 2, 1],
    [1, 2, 0],
    [0, 1, 2],
    [1, 1, 4]
]

w_query = tf.convert_to_tensor(w_query, dtype=tf.float32)
w_key = tf.convert_to_tensor(w_key, dtype=tf.float32)
w_value = tf.convert_to_tensor(w_value, dtype=tf.float32)
w_query
w_key
w_value

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[1., 0., 1.],
       [1., 0., 0.],
       [0., 0., 0.],
       [0., 1., 0.]], dtype=float32)>

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[0., 0., 1.],
       [1., 1., 0.],
       [0., 1., 0.],
       [1., 1., 0.]], dtype=float32)>

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[0., 2., 1.],
       [1., 2., 0.],
       [0., 1., 2.],
       [1., 1., 4.]], dtype=float32)>

In [12]:
query = tf.matmul(x, w_query)
key = tf.matmul(x, w_key)
value = tf.matmul(x, w_value)
query
key
value


<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[1., 0., 1.],
       [2., 2., 0.],
       [2., 1., 1.]], dtype=float32)>

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[0., 1., 1.],
       [4., 4., 0.],
       [2., 3., 1.]], dtype=float32)>

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[0., 3., 3.],
       [4., 6., 8.],
       [2., 6., 7.]], dtype=float32)>

In [14]:
attn_score = tf.matmul(query, key, transpose_b=True)
attn_score

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[ 1.,  4.,  3.],
       [ 2., 16., 10.],
       [ 2., 12.,  8.]], dtype=float32)>

In [17]:
attn_score_softmax=tf.nn.softmax(attn_score, axis=1)
attn_score_softmax

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[3.5119023e-02, 7.0538449e-01, 2.5949648e-01],
       [8.2947201e-07, 9.9752659e-01, 2.4726214e-03],
       [4.4581368e-05, 9.8196995e-01, 1.7985407e-02]], dtype=float32)>

In [20]:
weighted_value = value[:None] * tf.transpose(attn_score_softmax[:, :None])
weighted_value

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[0.0000000e+00, 2.4884162e-06, 1.3374411e-04],
       [2.8215380e+00, 5.9851594e+00, 7.8557596e+00],
       [5.1899296e-01, 1.4835728e-02, 1.2589785e-01]], dtype=float32)>

In [23]:
weighted_value = value * tf.transpose(attn_score_softmax)
weighted_value

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[0.0000000e+00, 2.4884162e-06, 1.3374411e-04],
       [2.8215380e+00, 5.9851594e+00, 7.8557596e+00],
       [5.1899296e-01, 1.4835728e-02, 1.2589785e-01]], dtype=float32)>

In [24]:
value
attn_score_softmax

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[0., 3., 3.],
       [4., 6., 8.],
       [2., 6., 7.]], dtype=float32)>

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[3.5119023e-02, 7.0538449e-01, 2.5949648e-01],
       [8.2947201e-07, 9.9752659e-01, 2.4726214e-03],
       [4.4581368e-05, 9.8196995e-01, 1.7985407e-02]], dtype=float32)>

In [26]:
output = tf.reduce_sum(weighted_value, axis=0)
output

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([3.3405309, 5.9999976, 7.981791 ], dtype=float32)>