<a href="https://colab.research.google.com/github/whoami-Lory271/DL-project/blob/main/DL_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from numpy import array
from sklearn.preprocessing import OneHotEncoder
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.normalization import LayerNorm

In [None]:
def create_vocabulary(sentences):
  vocabulary = {}
  for s in sentences:
    tokens = s.split()
    for t in tokens:
      vocabulary[t] = 1
  return vocabulary

In [None]:
def create_one_hot_encoder(vocabulary):
  enc = OneHotEncoder(handle_unknown='ignore')
  enc.fit(array(list(vocabulary.keys())).reshape(-1,1))
  return enc

In [None]:
def one_hot_encoding(enc,sentence):
  X = [[token] for token in sentence.split()]
  X = torch.tensor(enc.transform(X).todense(),dtype=torch.float32)
  return X

In [None]:
def position_embedding(input_length,dmodel):
  emb = []
  for pos in range(input_length):
    l = []
    for i in range(dmodel):
      if i%2 == 0:
        l.append(math.sin(pos/10000**(2*i/dmodel)))
      else:
        l.append(math.cos(pos/10000**(2*i/dmodel)))
    emb.append(l)
  return torch.tensor(emb,dtype=torch.float32)


In [None]:
class EmbeddingLayer(nn.Module):
  def __init__(self, vocabulary_size, dmodel):
    super().__init__()
    self.We = nn.Linear(vocabulary_size, dmodel)

  def forward(self, x):
    return self.We(x)

In [None]:
class SelfAttention(nn.Module):
  def __init__(self,dmodel,dk,dv):
    super().__init__()
    self.dk = dk
    self.Wq = nn.Linear(dmodel,dk)
    self.Wk = nn.Linear(dmodel,dk)
    self.Wv = nn.Linear(dmodel,dv)
    self.softmax = nn.Softmax(dim=1)
  
  def forward(self,x):
    Q = self.Wq(x)
    K = self.Wk(x)
    V = self.Wv(x)
    score = self.softmax(Q @ K.T / math.sqrt(self.dk)) @ V
    return score

In [None]:
class MultiHeadAttention(nn.Module):
  def __init__(self,dmodel,dk,dv,nhead):
    super().__init__()
    self.nhead = nhead
    self.att_layers = nn.ModuleList([SelfAttention(dmodel,dk,dv) for i in range(nhead)])
    self.Wo = nn.Linear(dv * nhead, dmodel)
  
  def forward(self,x):
    y = self.att_layers[0](x)
    for i in range(1,self.nhead):
      y = torch.cat([y,self.att_layers[i](x)],dim=1)
    return self.Wo(y)

In [None]:
class FFN(nn.Module):
  def __init__(self,dmodel,df):
    super().__init__()
    self.W1 = nn.Linear(dmodel,df)
    self.W2 = nn.Linear(df,dmodel)
  
  def forward(self,x):
    x = self.W1(x)
    x = F.relu(x)
    x = self.W2(x)
    return x


In [None]:
class Encoder(nn.Module):
  def __init__(self,dmodel,dk,dv,df,nhead):
    super().__init__()
    self.mha = MultiHeadAttention(dmodel,dk,dv,nhead)
    self.norm = LayerNorm(dmodel)
    self.ffn = FFN(dmodel,df)
  
  def forward(self,x):
    z = self.mha(x)
    z = self.norm(x+z)
    y = self.ffn(z)
    return self.norm(z+y)


In [None]:
class Transformer(nn.Module):
  def __init__(self,vocabulary_size,dmodel,dk,dv,df,nhead,nlayers):
    super().__init__()
    self.nlayers = nlayers
    self.embedding = EmbeddingLayer(vocabulary_size,dmodel)
    self.encoders = nn.ModuleList([Encoder(dmodel,dk,dv,df,nhead) for i in range(nlayers)])
  
  def forward(self,x):
    emb = self.embedding(x) * math.sqrt(dmodel)
    t = position_embedding(x.shape[0],dmodel)
    x = emb + t
    for i in range(self.nlayers):
      x = self.encoders[i](x)
    return x

In [None]:
sentences = ["dai ragazzi per una volta che ci andiamo non scegliamo il posto che fa pagare poco","altrimenti tanto vale andare a mensa"," importante è la compagnia"]
vocabulary = create_vocabulary(sentences)
enc = create_one_hot_encoder(vocabulary)

In [None]:
input = one_hot_encoding(enc,sentences[0])

In [None]:
dmodel = 512
dk,dv = 64,64
nhead = 8
df = 2048
nlayers = 6
vocabulary_size = len(vocabulary.keys())

In [None]:
tran = Transformer(vocabulary_size,dmodel,dk,dv,df,nhead,nlayers)

In [None]:
output = tran(input)

In [None]:
output.shape

torch.Size([16, 512])