In [None]:
import torch
import transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration
import sentencepiece

model = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model)
model_1 = T5ForConditionalGeneration.from_pretrained(model)
input_text = """ Once upon a sweltering day, a very thirsty crow was on a mission to find water. The blazing sun had made it incredibly thirsty, and its wings felt heavy from flying for so long. After a long search, it saw a small village. The crow flew towards the village and saw a pot in a courtyard. It was so excited, but when it looked inside, it saw only a tiny bit of water at the bottom. The crow didn't give up. It had a smart idea. It started picking up small stones and dropping them into the pot, one by one. As it dropped more stones, the water level slowly rose. Finally, after a lot of hard work, the crow could drink the water. It was so happy and flew away, leaving behind a lesson for everyone. The lesson was about not giving up and finding clever ways to solve problems, just like the crow did. The people in the village were amazed by the clever crow and never forgot the story of the thirsty crow. """
input_ids = tokenizer.encode("sumarize : " + input_text, return_tensors = 'pt' , max_length = 1000 , truncation = True)
output = model_1.generate(input_ids, max_length = 150, num_beams = 7 , length_penalty= 2.0 , early_stopping = True )
summary = tokenizer.decode(output[0],skip_special_tokens=True)
print(" ")
print("Generated Summary ", summary)

 
Generated Summary  : Once upon a sweltering day, a thirsty crow was on a mission to find water. the crow flew towards the village and saw a pot in a courtyard. after a long search, it saw only a tiny bit of water at the bottom. after a lot of hard work, the crow could drink the water.


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import nltk
from nltk.tokenize import word_tokenize
import numpy as np
import random
import pandas as pd
import transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration
import sentencepiece
import tensorflow as tf
import keras


tokenizer = T5Tokenizer.from_pretrained("t5-small")



# Input text
input_text = "On a sweltering summer day, a weary and dehydrated crow embarked on a relentless quest for water. With the blazing sun beating down on it, the bird scoured the parched landscape for signs of relief. After what felt like an eternity, it stumbled upon a quaint earthenware pot in the courtyard of a humble village house. Hope turned to disappointment as the crow found only a meager trickle of water at the pot's bottom, far from sufficient to quench its thirst. Yet, the crow refused to yield to despair. Instead, it hatched a clever plan, using its beak to drop small pebbles into the pot, one by one. As each pebble fell, the water level in the pot slowly crept higher. The crow's tenacity knew no bounds, and after relentless effort, the water reached a level where it could drink. With a grateful heart, the crow finally quenched its long-standing thirst. Taking to the sky once more, it left behind a powerful lesson for all who witnessed its remarkable feat—a reminder of the indomitable spirit of determination, problem-solving, and the boundless resourcefulness of nature."

# Output text (target summary)
output_text = "On a scorching summer day, a thirsty crow searched for water. It spotted a village, found a nearly empty pot, and dropped pebbles to raise the water level until it could drink. The story teaches us about determination and clever problem-solving."

# Tokenize and encode the input text
input_encoding = tokenizer("summarize: " + input_text, padding=True, truncation=True, return_tensors="pt", max_length=512, add_special_tokens=True)

# Tokenize and encode the output text
output_encoding = tokenizer(output_text, padding=True, truncation=True, return_tensors="pt", max_length=512, add_special_tokens=True)

# Access 'input_ids' and 'attention_mask' for input
input_ids = input_encoding['input_ids']
input_attention_mask = input_encoding['attention_mask']

# Access 'input_ids' and 'attention_mask' for output
output_ids = output_encoding['input_ids']
output_attention_mask = output_encoding['attention_mask']

batch_size = 32
vocab_size = tokenizer.vocab_size
lr = 0.001
epochs = 10
num_epochs = 50
hidden_states = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class PointerGenerator(nn.Module):

  def __init__(self,model_name,vocab_size):
    super(PointerGenerator, self).__init__()
    self.model_name = T5ForConditionalGeneration.from_pretrained(model_name)
    self.output_projection = nn.Linear(self.model_name.config.d_model, vocab_size)
    self.softmax = nn.Softmax(dim = -1)

  def forwardpass(self, input_ids, output_ids, hidden_states):
    t5_output = self.model_name(input_ids = input_ids, decoder_input_ids = output_ids)
    point_gen = torch.tensor(0.5)  # Replace with your desired value
    vocab_dist = torch.ones(vocab_size) / vocab_size  #
    return point_gen , vocab_dist

from typing import Any
input_ids = torch.cat((input_ids,), dim = 0)
output_ids = torch.cat((output_ids,), dim = 0)

dataset = TensorDataset(input_ids, output_ids)
dataloader = DataLoader( dataset, batch_size=32, shuffle=False)

vocab_size: Any
modelf = PointerGenerator("t5-small",vocab_size)
optimizer = optim.Adam(modelf.parameters(), lr=0.001)
criterion = nn.L1Loss()

for epoch in range(num_epochs):
    total_loss = 0.0
    modelf.train()

    for batch in dataloader:
        input_batch, output_batch = batch
        input_batch = input_batch.to(device).reshape(1,-1)
        output_batch = output_batch.to(device).reshape(1,-1)

        optimizer = optim.SGD(modelf.parameters(), lr=0.01)
        optimizer.zero_grad()

        # Debugging: Check input_batch and output_batch shapes
        print(f"Input Batch Shape: {input_batch.shape}")
        print(f"Output Batch Shape: {output_batch.shape}")

      # Determine the desired size along dimension 3 (the third dimension)
        desired_size = max(input_batch.size(1), output_batch.size(-2))

      # Calculate the amount of padding needed for 'a' along dimension 3
        padding_size = desired_size - input_batch.size(-2)

        from torch.nn.utils.rnn import pad_sequence

# Assuming input_batch and output_batch are lists of tensors
# Convert them to a list of tensors of the same length by padding the shorter sequences
        padded_input_batch = pad_sequence(input_batch, batch_first=True)
        padded_output_batch = pad_sequence(output_batch, batch_first=True)
        padded_ip_batch = padded_input_batch.reshape(-1,1)


        # Forward pass
        logits, hidden_states = modelf.forwardpass(input_batch, output_batch, hidden_states)  # Ensure the model returns hidden_states

        # Debugging: Check hidden_states
        print(f"Hidden States Shape: {hidden_states.shape}")

        # Assuming logits and output_batch are your tensors
        logits = logits.view(-1, 1)  # Reshape to [1, vocab_size]
        output_batch = output_batch.view(1, -1)
        output_batch = output_batch.to(torch.float32)
        logits.requires_grad = True
        output_batch.requires_grad = True
        # Calculate loss
        loss = criterion(logits, output_batch)

        # Set requires_grad=True for all model parameters
        for param in modelf.parameters():
          if param not in modelf.parameters(0):
            param.requires_grad = True

        # Define the optimizer (e.g., Stochastic Gradient Descent)
        optimizer = optim.SGD(modelf.parameters(), lr=0.01)


        loss.backward()


        # Perform an optimizer step
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(dataloader)}")

    torch.save(modelf.state_dict(), "pointer_generator_model.pth")

Input Batch Shape: torch.Size([1, 283])
Output Batch Shape: torch.Size([1, 63])
Hidden States Shape: torch.Size([32100])
Epoch 1/50, Loss: 2764.97607421875


  return F.l1_loss(input, target, reduction=self.reduction)


Input Batch Shape: torch.Size([1, 283])
Output Batch Shape: torch.Size([1, 63])
Hidden States Shape: torch.Size([32100])
Epoch 2/50, Loss: 2764.97607421875
Input Batch Shape: torch.Size([1, 283])
Output Batch Shape: torch.Size([1, 63])
Hidden States Shape: torch.Size([32100])
Epoch 3/50, Loss: 2764.97607421875
Input Batch Shape: torch.Size([1, 283])
Output Batch Shape: torch.Size([1, 63])
Hidden States Shape: torch.Size([32100])
Epoch 4/50, Loss: 2764.97607421875
Input Batch Shape: torch.Size([1, 283])
Output Batch Shape: torch.Size([1, 63])
Hidden States Shape: torch.Size([32100])
Epoch 5/50, Loss: 2764.97607421875
Input Batch Shape: torch.Size([1, 283])
Output Batch Shape: torch.Size([1, 63])
Hidden States Shape: torch.Size([32100])
Epoch 6/50, Loss: 2764.97607421875
Input Batch Shape: torch.Size([1, 283])
Output Batch Shape: torch.Size([1, 63])
Hidden States Shape: torch.Size([32100])
Epoch 7/50, Loss: 2764.97607421875
Input Batch Shape: torch.Size([1, 283])
Output Batch Shape: torc

In [None]:

!pip install rouge-score



Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24932 sha256=b9b07a1ec1e94d60bc328daf19d2c5db388946bb3009b23a2554e5c827cf4c08
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [None]:
from rouge_score import rouge_scorer

# Example reference and hypothesis summaries
reference_summary = "On a scorching summer day, a thirsty crow searched for water. It spotted a village, found a nearly empty pot, and dropped pebbles to raise the water level until it could drink. The story teaches us about determination and clever problem-solving."
hypothesis_summary = "Once upon a sweltering day, a thirsty crow was on a mission to find water. the crow flew towards the village and saw a pot in a courtyard. after a long search, it saw only a tiny bit of water at the bottom. after a lot of hard work, the crow could drink the water."

# Initialize the ROUGE scorer
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

# Calculate ROUGE scores
scores = scorer.score(reference_summary, hypothesis_summary)

# Access individual ROUGE scores
rouge1_score = scores['rouge1'].fmeasure
rouge2_score = scores['rouge2'].fmeasure
rougeL_score = scores['rougeL'].fmeasure

# Print ROUGE scores
print(f'ROUGE-1 F1 Score: {rouge1_score}')
print(f'ROUGE-2 F1 Score: {rouge2_score}')
print(f'ROUGE-L F1 Score: {rougeL_score}')




ROUGE-1 F1 Score: 0.41237113402061853
ROUGE-2 F1 Score: 0.14736842105263157
ROUGE-L F1 Score: 0.26804123711340205


In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.translate.bleu_score import sentence_bleu
nltk.download('punkt')

# Reference and generated summaries
reference_summary = "On a scorching summer day, a thirsty crow searched for water. It spotted a village, found a nearly empty pot, and dropped pebbles to raise the water level until it could drink. The story teaches us about determination and clever problem-solving."
generated_summary = "Once upon a sweltering day, a thirsty crow was on a mission to find water. the crow flew towards the village and saw a pot in a courtyard. after a long search, it saw only a tiny bit of water at the bottom. after a lot of hard work, the crow could drink the water."
# Tokenize the reference and generated summaries
reference_tokens = word_tokenize(reference_summary.lower())
generated_tokens = word_tokenize(generated_summary.lower())

# Calculate BLEU score (you can choose n-gram order, e.g., 1 for unigram, 2 for bigram, etc.)
bleu_score = sentence_bleu([reference_tokens], generated_tokens, weights=(1, 0, 0, 0))

print("BLEU Score:", bleu_score)


BLEU Score: 0.4032258064516129


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
