In [1]:
from w2v import Word2Vec
from data import SquadDataset
from rnn import RNN
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from datasets import load_dataset
from transformers import BertTokenizerFast
import time



In [2]:
dataset = load_dataset("nyu-mll/glue", "qnli")


In [3]:
dataset['train'][0]

{'question': 'When did the third Digimon series begin?',
 'sentence': 'Unlike the two seasons before it and most of the seasons that followed, Digimon Tamers takes a darker and more realistic approach to its story featuring Digimon who do not reincarnate after their deaths and more complex character development in the original Japanese.',
 'label': 1,
 'idx': 0}

In [3]:
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

In [4]:
random_train = dataset['train'].select(range(2269,6999))
random_val = dataset['validation'].select(range(2269,2869))

In [5]:
train_data = SquadDataset(random_train, 128, tokenizer)
validation_data = SquadDataset(random_val, 24, tokenizer)

In [None]:
class RNNe(nn.Module):
	def __init__(self, args):
		super(RNNe, self).__init__()
		self.w2v = Word2Vec(args.vocab_size, args.embed_size)
		self.rnn = RNN(args.embed_size, args.hidden_size, args.num_layers)
	
	def forward(self, c, q):
		c = self.w2v(c)
		q = self.w2v(q)
		out = self.rnn(c, q)
		return out

In [None]:
args = {
	'vocab_size': tokenizer.vocab_size,
	'embed_size': 100,
	'hidden_size': 128,
	'num_layers': 2,
}

In [None]:
model = RNNe(args).to('cuda')

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
critereon = nn.CrossEntropyLoss().to('cuda')

In [6]:
def train(model, optimizer, critereon, epochs=10):
    t0 = time.time()
	for i in range(epochs):
		model.train()
		running_loss = 0
		for s, q, l in train_data:
			optimizer.zero_grad()
			s = s['input_ids'].to('cuda')
			q = q['input_ids'].to('cuda')
			l = l.long().to('cuda')
			with torch.autocast(device_type=device, dtype=torch.bfloat16):
				output = model(s, q)
				loss = critereon(output, l)
			loss.backward()
			optimizer.step()
			running_loss += loss.item()
		print(f'Epoch {i}, Loss: {running_loss/len(train_data)}')

In [None]:
train( model, optimizer, critereon, 10)

In [None]:
def evaluation(model, critereon):
	model.eval()
	running_loss = 0
	for s, q, l in validation_data:
		s = s['input_ids'].to('cuda')
		q = q['input_ids'].to('cuda')
		l = l.long().to('cuda')
		output = model(s, q)
		loss = critereon(output, l)
		running_loss += loss.item()
	print(f'Validation Loss: {running_loss/len(validation_data)}')

In [2]:
import torch

In [11]:
a = torch.Tensor([[[1,2,3],[4,5,6],[7,8,9]],[[1,2,3],[4,5,6],[7,8,9]]])
b = torch.Tensor([[1,1, 0], [1, 0, 0]])
print(a.shape, b.shape)
print(a)
print(b)

torch.Size([2, 3, 3]) torch.Size([2, 3])
tensor([[[1., 2., 3.],
         [4., 5., 6.],
         [7., 8., 9.]],

        [[1., 2., 3.],
         [4., 5., 6.],
         [7., 8., 9.]]])
tensor([[1., 1., 0.],
        [1., 0., 0.]])


In [12]:
a.masked_fill(b.unsqueeze(-1) == 0, 0)

tensor([[[1., 2., 3.],
         [4., 5., 6.],
         [0., 0., 0.]],

        [[1., 2., 3.],
         [0., 0., 0.],
         [0., 0., 0.]]])

In [None]:
def evaluate(model, critereon):
	model.eval()
	running_loss = 0
	for s, q, l in validation_data:
		s = s['input_ids'].to('cuda')
		q = q['input_ids'].to('cuda')
		l = l.long().to('cuda')
		output = model(s, q)
		loss = critereon(output, l)
		running_loss += loss.item()
	print(f'Validation Loss: {running_loss/len(validation_data)}')

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class Word2Vec(nn.Module):
	def __init__(self, vocab_size, embed_size) :
		super(Word2Vec, self).__init__()
		self.embeddings = nn.Embedding(vocab_size, embed_size)
	
	def forward(self, x):
		x = self.embeddings(x)
		return x
	


class PositionalEmbedding(nn.Module):
	def __init__(self,embed_size, max_len = 96):
		super(PositionalEmbedding, self).__init__()
		self.encoding = torch.zeros(max_len, embed_size, requires_grad=False)
		pos = torch.arange(0, max_len).float().	unsqueeze(1)
		_2i = torch.arange(0, embed_size, 2).float()
		self.encoding[:, 0::2] = torch.sin(pos/ torch.pow(10000, _2i/ embed_size))
		self.encoding[:, 1::2] = torch.cos(pos/ torch.pow(10000, _2i/ embed_size))

	def forward(self, x):
		# bs, seqlen, embed_dim = x.size()
		# pe_tensor = torch.zeros(seqlen, embed_dim)
		# sin = [torch.sin(pos/ torch.pow(10000, torch.arange(0, embed_dim, 2)/ embed_dim)) for  pos in self.pos]
		# cos = [torch.cos(pos/ torch.pow(10000, torch.arange(1, embed_dim, 2)/ embed_dim)) for pos in self.pos]
		# pe_tensor[:, 0::2] = sin
		# pe_tensor[:, 1::2] = cos
		# pe_tensor = pe_tensor.unsqueeze(0).expand(bs, seqlen, embed_dim)
		bs, seqlen, embed_dim = x.size()
		return self.encoding[:seqlen, :].expand(bs, seqlen, embed_dim)

class WordEmbedding(nn.Module):
	def __init__(self, vocab_size, embed_size, max_len = 96):
		super(WordEmbedding, self).__init__()
		self.word2vec = Word2Vec(vocab_size, embed_size)
		self.positional_embedding = PositionalEmbedding( embed_size, max_len)
	
	def forward(self, x):
		x = self.word2vec(x)
		x = x + self.positional_embedding(x)
		return x

x = torch.randint(0, 100, (	3, 9))
a = WordEmbedding(100, 10, 9)
print(x.size())
a(x)

torch.Size([3, 9])


tensor([[[ 1.1780,  2.3726,  0.1212,  0.9717, -0.5141,  1.8723,  1.8695,
           0.3158, -0.9684,  1.0252],
         [ 1.5407,  1.9876, -0.4566,  1.4335, -1.0039,  1.9742, -1.0171,
           1.0240,  0.0887,  2.3723],
         [ 0.0669, -0.0659,  0.4259,  0.8481, -0.1229,  1.9464, -2.0172,
           2.7150, -0.4906,  0.4149],
         [-0.4627, -3.2527,  2.4779,  1.0126,  0.3104,  1.4989,  1.1695,
           2.0811,  0.6647,  1.2333],
         [ 0.0197, -3.7601,  0.4670,  0.6169, -0.1290,  1.3264,  1.0500,
           0.6898,  0.4134, -0.0589],
         [-1.3704, -0.2010,  1.4713,  0.5881, -0.1401,  1.3658, -1.3642,
           1.1760, -0.8576, -0.5149],
         [-0.7665,  0.5607,  0.7573,  0.3028,  0.3197,  1.3995, -1.3331,
          -0.0302,  0.2542,  0.1805],
         [ 0.6938,  0.0803,  2.7115,  0.0466,  0.3521,  1.0088,  0.5934,
           0.5734, -0.2874,  0.4161],
         [ 1.6889,  1.0880, -0.3118,  1.1236,  0.8956,  0.7725,  1.3381,
           1.5693,  1.3475,  0.9954]],


In [12]:
from transformers import BertModel, BertTokenizer

# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Load pre-trained model (weights)
model = BertModel.from_pretrained('bert-base-uncased')



In [13]:
# Access the embeddings
word_embeddings = model.embeddings.word_embeddings

In [14]:
# Tokenize a sentence
input_ids = tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)

# Convert input ids to tensor
input_ids_tensor = torch.tensor([input_ids])

# Get embeddings for the input ids
with torch.no_grad():
	embeddings = word_embeddings(input_ids_tensor)

In [24]:
embed = nn.Embedding(tokenizer.vocab_size, 100)

In [25]:
embed.weight = nn.Parameter(model.embeddings.word_embeddings.weight.data)


In [26]:
embed(input_ids_tensor) - embeddings

tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]], grad_fn=<SubBackward0>)

In [27]:
embed(input_ids_tensor).size()

torch.Size([1, 8, 768])

In [17]:
input_ids

[101, 7592, 1010, 2026, 3899, 2003, 10140, 102]

In [16]:
embeddings.shape

torch.Size([1, 8, 768])

In [28]:
import collections

In [29]:
dict = dict(a = '36')

In [30]:
dict.a

AttributeError: 'dict' object has no attribute 'a'