In [None]:
import torch

import torch.nn as nn
import numpy as np
torch.set_default_tensor_type(torch.cuda.FloatTensor)

# Dataset

In [1]:
import torch
from torch.utils.data.dataloader import DataLoader
from torchvision.datasets import Flowers102
from torchvision.transforms import Normalize, Compose, ToTensor

dataset = Flowers102("data/", transform=Compose([ToTensor(), Normalize(0.5, 0.5)]), download=True)
dataloader = DataLoader(dataset, 1, shuffle=True, drop_last=True)

count = 0
for (X, y) in dataloader:
    if count > 1:
        break
    print(X.shape)
    count += 1

torch.Size([1, 3, 500, 781])
torch.Size([1, 3, 500, 752])


# Checking conv dimensions

In [None]:
# Define the parameters
n = 32  # Size of the square matrix
in_channels = 1  # Number of input channels
out_channels = 1  # Number of output channels
kernel_size = 3  # Size of the convolutional kernel
stride = 2  # Stride of the convolution
padding = 1  # Padding of the convolution

# Create a random input tensor
input_tensor = torch.randn(1, in_channels, n, n)
print(f"Shape of the input tensor: {input_tensor.size()}")

# Create a 2D convolutional layer
down_conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)

# Pass the input tensor through the convolutional layer
downed_tensor = down_conv(input_tensor)

# Print the dimensions of the output tensor
print(f"Shape after down: {downed_tensor.size()}")


t_in_channels = in_channels  # Number of input channels
t_out_channels = out_channels  # Number of output channels
t_kernel_size = kernel_size + 1  # Size of the convolutional kernel
t_stride = 2  # Stride of the convolution
t_padding = 1  # Padding of the convolution

up_conv = nn.ConvTranspose2d(t_in_channels, t_out_channels, t_kernel_size, t_stride, t_padding)
upped_tensor = up_conv(downed_tensor)
print(f"Shape after up: {up_conv(downed_tensor).size()}")

print(f"Shape matched: {input_tensor.size() == upped_tensor.size()}")

# Checking MHA implementation using einsum

In [None]:
# real impl
n = 8
c = 4
h = 32
w = h

embed_dim=h*w
num_heads=4

X = torch.randn(n, c, h, w)
print(X.shape)
MHA = nn.MultiheadAttention(embed_dim=embed_dim, num_heads=num_heads)
conv = nn.Conv2d(in_channels=c, out_channels=3 * c, kernel_size=3,padding=1)
Q, K, V = torch.reshape(conv(X), (X.shape[0], X.shape[1], X.shape[2] * X.shape[3], 3)).unbind(3)
Y = MHA(Q, K, V)[0]
img_dim = int(np.sqrt(embed_dim))
Y.reshape(X.shape[0], X.shape[1], img_dim, img_dim)
Y.shape

## simple single-head scaled self-attention

In [None]:
n = 30 # data points
d = 32 # dim of data
heads = 1
D_k = d # dim of query keys

X = torch.randn(n, d)
Q = K = V = X
attn = torch.softmax(Q.matmul(K.T) / D_k, 1)
Y = attn.matmul(V)
print(Y)

## with learned params

In [None]:
n = 30 # data points
d = 32 # dim of data
heads = 1
D_k = d # dim of query keys

class SHSelfAttention(nn.Module):
    def __init__(self, heads, key_dim):
        self.

# Embedding dims

In [None]:
n = 4
c = 3
h = 4
w = 4
emb_dim = 5

linear = nn.Linear(in_features=emb_dim, out_features=c)


x = torch.randn(n, c, h, w)
emb = torch.randn(n, emb_dim)

(x + linear(emb).unsqueeze(-1).unsqueeze(-1)).shape

In [None]:
import json
a_json =  json.dumps("/mnt/meg/vishravi/diffusion/src/configs/attn.json")
some_str