In [None]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt # for making figures
%matplotlib inline
words = open('../../names.txt', 'r').read().splitlines()
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}
vocab_size = len(itos)
block_size = 3 # context length: how many characters do we take to predict the next one?

def build_dataset(words):  
  X, Y = [], []
  
  for w in words:
    context = [0] * block_size
    for ch in w + '.':
      ix = stoi[ch]
      X.append(context)
      Y.append(ix)
      context = context[1:] + [ix] # crop and append

  X = torch.tensor(X)
  Y = torch.tensor(Y)
  print(X.shape, Y.shape)
  return X, Y

import random
random.seed(42)
random.shuffle(words)
n1 = int(0.8*len(words))
n2 = int(0.9*len(words))

Xtr,  Ytr  = build_dataset(words[:n1])     # 80%
Xdev, Ydev = build_dataset(words[n1:n2])   # 10%
Xte,  Yte  = build_dataset(words[n2:]) 

In [None]:
n_embd = 10 # the dimensionality of the character embedding vectors
n_hidden = 200 # the number of neurons in the hidden layer of the MLP

g = torch.Generator().manual_seed(2147483647) # for reproducibility
C  = torch.randn((vocab_size, n_embd),            generator=g)
W1 = torch.randn((n_embd * block_size, n_hidden), generator=g) * (5/3)/((n_embd * block_size)**0.5) #* 0.2
#W1 = torch.randn((n_embd * block_size, n_hidden), generator=g) * .2
#b1 = torch.randn(n_hidden,                        generator=g) * 0.01
W2 = torch.randn((n_hidden, vocab_size),          generator=g) * 0.01
b2 = torch.randn(vocab_size,                      generator=g) * 0

# BatchNorm parameters
bngain = torch.ones((1, n_hidden))
bnbias = torch.zeros((1, n_hidden))
bnmean_running = torch.zeros((1, n_hidden))
bnstd_running = torch.ones((1, n_hidden))

parameters = [C, W1, W2, b2, bngain, bnbias]
print(sum(p.nelement() for p in parameters)) # number of parameters in total
for p in parameters:
  p.requires_grad = True

In [None]:
max_steps = 200000
batch_size = 32
lossi = []

for i in range(10):
  
  # minibatch construct
  ix = torch.randint(0, Xtr.shape[0], (batch_size,), generator=g)
  Xb, Yb = Xtr[ix], Ytr[ix] # batch X,Y
  
  # forward pass
  emb = C[Xb] # embed the characters into vectors
  embcat = emb.view(emb.shape[0], -1) # concatenate the vectors
  # Linear layer
  hpreact = embcat @ W1  #+ b1 # hidden layer pre-activation
  # BatchNorm layer
  # -------------------------------------------------------------
  bnmeani = hpreact.mean(0, keepdim=True)
  bnstdi = hpreact.std(0, keepdim=True)
  hpreact_n = bngain * (hpreact - bnmeani) / bnstdi + bnbias
  
  with torch.no_grad():
    bnmean_running = 0.999 * bnmean_running + 0.001 * bnmeani
    bnstd_running = 0.999 * bnstd_running + 0.001 * bnstdi
  # -------------------------------------------------------------
  # Non-linearity
  h = torch.tanh(hpreact_n) # hidden layer
  logits = h @ W2 + b2 # output layer
  loss = F.cross_entropy(logits, Yb) # loss function
  
  # backward pass
  for p in parameters:
    p.grad = None
  loss.backward()
  
  # update
  lr = 0.1 if i < 100000 else 0.01 # step learning rate decay
  for p in parameters:
    p.data += -lr * p.grad

  # track stats
  if i % 10000 == 0: # print every once in a while
    print(f'{i:7d}/{max_steps:7d}: {loss.item():.4f}')
  lossi.append(loss.log10().item())

In [None]:
bnbias

In [None]:
max(hpreact[0]),min(hpreact[0]),max(hpreact1[0]),min(hpreact1[0])

In [None]:
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F

# Your code to get hprev, h, logits, and probs
# ...

# Convert the tensors to NumPy and flatten them for histogram plotting
hprev_numpy = hpreact.detach().numpy().flatten()
hprev1_numpy = hpreact1.detach().numpy().flatten()
h_mean = hpreact.mean(0, keepdim=True).detach().numpy().flatten()
h_mean_sub = (hpreact - hpreact.mean(0, keepdim=True)).detach().numpy().flatten()

# Create a figure with 2x2 grid of axes
fig, axs = plt.subplots(2, 2, figsize=(20, 16))

# Plot histogram for hprev
axs[0, 0].hist(hprev_numpy, bins=50, color='blue', alpha=0.7)
axs[0, 0].set_title('Histogram of hprev values')
axs[0, 0].set_xlabel('Value')
axs[0, 0].set_ylabel('Frequency')

axs[0, 1].hist(hprev1_numpy, bins=50, color='blue', alpha=0.7)
axs[0, 1].set_title('Histogram of hprev values')
axs[0, 1].set_xlabel('Value')
axs[0, 1].set_ylabel('Frequency')

axs[1, 0].hist(h_mean, bins=50, color='blue', alpha=0.7)
axs[1, 0].set_title('Histogram of hmean values')
axs[1, 0].set_xlabel('Value')
axs[1, 0].set_ylabel('Frequency')

axs[1, 1].hist(h_mean_sub, bins=50, color='blue', alpha=0.7)
axs[1, 1].set_title('Histogram of h_mean_sub values')
axs[1, 1].set_xlabel('Value')
axs[1, 1].set_ylabel('Frequency')



In [None]:
4**.5

In [None]:
16**.5

In [None]:
9 = 3*3
27 = 3*3*3

In [None]:
258
1
2*2


8*8 = 64


15*15 = 225

16*16 = 256

17 *17 = 349

In [None]:
(1/30)**.5

In [None]:
(5/3)/((30)**0.5),1/30**.5

In [None]:
import sys
sys.path.append('../../')
from helpers import graphs

In [None]:
import torch
w1 = torch.randn(30,200) / 30**.5
l1 = torch.nn.Linear(30,200) 
l1.weight.max(), w1.max()

In [None]:
import torch
w1 = torch.randn(150,200) / 150**.5
l1 = torch.nn.Linear(150,200) 
l1.weight.max(), w1.max()

In [None]:
l1 = Linear(30,200, False)
C = 27,10
xb = 32, 3
emb = c[xb] # 32,10,3
embcat = 32,30
l1(embcat)

In [27]:
from graphviz import Digraph



# Assuming you have the Azure icons downloaded and stored at the given paths
cosmos_db_icon_path = '../Batch-Normalization/10121-icon-service-Azure-Cosmos-DB.svg'
azure_functions_icon_path = '../Batch-Normalization/10121-icon-service-Azure-Cosmos-DB.svg'
mongo_db_icon_path =  '../Batch-Normalization/10121-icon-service-Azure-Cosmos-DB.svg'

# Create a Digraph object
dot = Digraph('Architecture', format='svg')

# Set graph attributes if necessary (e.g., size, rankdir)
dot.attr(size='10,6')
dot.attr(rankdir='LR')  # Left to Right, instead of Top to Bottom

# Add nodes with the SVG images
dot.node('CosmosDB', label='', image=cosmos_db_icon_path, shape='none')
dot.node('AzureFunctions', label='', image=azure_functions_icon_path, shape='none')
dot.node('MongoDB', label='', image=mongo_db_icon_path, shape='none')

# Add edges between the nodes
dot.edge('CosmosDB', 'AzureFunctions')
dot.edge('AzureFunctions', 'MongoDB')

# Save the diagram to a file
dot.render('../Batch-Normalization/aa', format='svg', cleanup=True)



'..\\Batch-Normalization\\aa.svg'

In [3]:
from graphviz import Digraph

# Create a Digraph object
dot = Digraph('G')

# Add a node with an image
dot.node('A', 'Node A', image='./output.png', shape='none')

# Visualize the graph
dot.view()


'G.gv.pdf'

In [2]:
import cairosvg
cairosvg.svg2png(url='./dummy-svgrepo-com.svg', write_to='output.png')


In [None]:
torch.randn((10, 100), generator=g) / fan_in**0.5

In [None]:
x= [torch.tensor(2).float(),torch.tensor(5).float()]
y = torch.tensor(5).float()

w1 = torch.tensor(2.1).float()

output = x*w1

loss = (y - output)**2





In [8]:
import math
def custTanh(x):
   return  (math.exp(3*x) - 1)/(math.exp(3*x) + 1)

arr = [-17, -2,-1.1,0,2,]
res = [custTanh(num) for num in arr] 

res



[-1.0, -0.9950547536867306, -0.9288576214547277, 0.0, 0.9950547536867305]

In [5]:
arr = [1,2,3,4,5]
 
arr[:4], arr[:-1]


([1, 2, 3, 4], [1, 2, 3, 4])