In [2]:
!pip install transformers
!pip install sentencepiece

Collecting transformers
  Downloading transformers-4.32.1-py3-none-any.whl (7.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m73.9 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m33.1 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m119.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m73.1 MB/s[0m eta [36m0:00:

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import nltk
from nltk.tokenize import word_tokenize
import numpy as np
import random
import pandas as pd
import transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration
import sentencepiece
import tensorflow as tf
import keras


In [9]:

tokenizer = T5Tokenizer.from_pretrained("t5-small")
input_text = "On a sweltering summer day, a weary and dehydrated crow embarked on a relentless quest for water. With the blazing sun beating down on it, the bird scoured the parched landscape for signs of relief. After what felt like an eternity, it stumbled upon a quaint earthenware pot in the courtyard of a humble village house. Hope turned to disappointment as the crow found only a meager trickle of water at the pot's bottom, far from sufficient to quench its thirst. Yet, the crow refused to yield to despair. Instead, it hatched a clever plan, using its beak to drop small pebbles into the pot, one by one. As each pebble fell, the water level in the pot slowly crept higher. The crow's tenacity knew no bounds, and after relentless effort, the water reached a level where it could drink. With a grateful heart, the crow finally quenched its long-standing thirst. Taking to the sky once more, it left behind a powerful lesson for all who witnessed its remarkable feat—a reminder of the indomitable spirit of determination, problem-solving, and the boundless resourcefulness of nature."
output_text = "On a scorching summer day, a thirsty crow searched for water. It spotted a village, found a nearly empty pot, and dropped pebbles to raise the water level until it could drink. The story teaches us about determination and clever problem-solving."
input_encoding = tokenizer("summarize: " + input_text, padding=True, truncation=True, return_tensors="pt", max_length=512, add_special_tokens=True)
output_encoding = tokenizer(output_text, padding=True, truncation=True, return_tensors="pt", max_length=512, add_special_tokens=True)
input_ids = input_encoding['input_ids']
input_attention_mask = input_encoding['attention_mask']
output_ids = output_encoding['input_ids']
output_attention_mask = output_encoding['attention_mask']

batch_size = 32
vocab_size = tokenizer.vocab_size
lr = 0.001
epochs = 10
num_epochs = 50
hidden_states = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class PointerGenerator(nn.Module):

  def __init__(self,model_name,vocab_size):
    super(PointerGenerator, self).__init__()
    self.model_name = T5ForConditionalGeneration.from_pretrained(model_name)
    self.output_projection = nn.Linear(self.model_name.config.d_model, vocab_size)
    self.softmax = nn.Softmax(dim = -1)

  def forwardpass(self, input_ids, output_ids, hidden_states):
    t5_output = self.model_name(input_ids = input_ids, decoder_input_ids = output_ids)
    point_gen = torch.tensor(0.5)  # Replace with your desired value
    vocab_dist = torch.ones(vocab_size) / vocab_size  #
    return point_gen , vocab_dist

from typing import Any
input_ids = torch.cat((input_ids,), dim = 0)
output_ids = torch.cat((output_ids,), dim = 0)

dataset = TensorDataset(input_ids, output_ids)
dataloader = DataLoader( dataset, batch_size=32, shuffle=False)

vocab_size: Any
modelf = PointerGenerator("t5-small",vocab_size)
optimizer = optim.Adam(modelf.parameters(), lr=0.001)
criterion = nn.L1Loss()

for epoch in range(num_epochs):
    total_loss = 0.0
    modelf.train()

    for batch in dataloader:
        input_batch, output_batch = batch
        input_batch = input_batch.to(device).reshape(1,-1)
        output_batch = output_batch.to(device).reshape(1,-1)
        optimizer = optim.SGD(modelf.parameters(), lr=0.01)
        optimizer.zero_grad()
        print(f"Input Batch Shape: {input_batch.shape}")
        print(f"Output Batch Shape: {output_batch.shape}")
        desired_size = max(input_batch.size(1), output_batch.size(-2))
        padding_size = desired_size - input_batch.size(-2)
        from torch.nn.utils.rnn import pad_sequence
        padded_input_batch = pad_sequence(input_batch, batch_first=True)
        padded_output_batch = pad_sequence(output_batch, batch_first=True)
        padded_ip_batch = padded_input_batch.reshape(-1,1)
        logits, hidden_states = modelf.forwardpass(input_batch, output_batch, hidden_states)  # Ensure the model returns hidden_states
        print(f"Hidden States Shape: {hidden_states.shape}")
        logits = logits.view(-1, 1)
        output_batch = output_batch.view(1, -1)
        output_batch = output_batch.to(torch.float32)
        logits.requires_grad = True
        output_batch.requires_grad = True
        loss = criterion(logits, output_batch)
        for param in modelf.parameters():
          if param not in modelf.parameters(0):
            param.requires_grad = True
        optimizer = optim.SGD(modelf.parameters(), lr=0.01)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(dataloader)}")
    torch.save(modelf.state_dict(), "pointer_generator_model.pth")

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Input Batch Shape: torch.Size([1, 283])
Output Batch Shape: torch.Size([1, 63])
Hidden States Shape: torch.Size([32100])
Epoch 1/50, Loss: 2764.97607421875


  return F.l1_loss(input, target, reduction=self.reduction)


Input Batch Shape: torch.Size([1, 283])
Output Batch Shape: torch.Size([1, 63])
Hidden States Shape: torch.Size([32100])
Epoch 2/50, Loss: 2764.97607421875
Input Batch Shape: torch.Size([1, 283])
Output Batch Shape: torch.Size([1, 63])
Hidden States Shape: torch.Size([32100])
Epoch 3/50, Loss: 2764.97607421875
Input Batch Shape: torch.Size([1, 283])
Output Batch Shape: torch.Size([1, 63])
Hidden States Shape: torch.Size([32100])
Epoch 4/50, Loss: 2764.97607421875
Input Batch Shape: torch.Size([1, 283])
Output Batch Shape: torch.Size([1, 63])
Hidden States Shape: torch.Size([32100])
Epoch 5/50, Loss: 2764.97607421875
Input Batch Shape: torch.Size([1, 283])
Output Batch Shape: torch.Size([1, 63])
Hidden States Shape: torch.Size([32100])
Epoch 6/50, Loss: 2764.97607421875
Input Batch Shape: torch.Size([1, 283])
Output Batch Shape: torch.Size([1, 63])
Hidden States Shape: torch.Size([32100])
Epoch 7/50, Loss: 2764.97607421875
Input Batch Shape: torch.Size([1, 283])
Output Batch Shape: torc