# T5


In [None]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer
from rouge import Rouge
from datasets import load_dataset

#saved models
model_path = "../Model"
token_path = "../Model"

device = "cpu"

#model and tokenizer loading
tokenizer = T5Tokenizer.from_pretrained(token_path)
model = T5ForConditionalGeneration.from_pretrained(model_path).to(device)

def summarize(conversation):
    inputs = tokenizer.encode("summarize: " + conversation, return_tensors="pt", max_length=512, truncation=True).to(device)
    outputs = model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def calculate_rouge(hypotheses, references):
    rouge = Rouge()
    scores = rouge.get_scores(hypotheses, references, avg=True)
    return scores

#dataset
dataset = load_dataset('knkarthick/dialogsum', split='validation')

#data preparation
conversation_list = [item['dialogue'] for item in dataset]
reference_summaries = [item['summary'] for item in dataset]




Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
conversation_list_small = conversation_list
reference_summaries_small = reference_summaries

In [None]:
generated_summaries = []

#generate summaries for each conversation
for index, conversation in enumerate(conversation_list_small):
    print(f"Processing item {index + 1}/{len(conversation_list_small)}")
    summary = summarize(conversation)
    generated_summaries.append(summary)

#calculating ROUGE scores
rouge_scores = calculate_rouge(generated_summaries, reference_summaries_small)

print(rouge_scores)

Processing item 1/500
Processing item 2/500
Processing item 3/500
Processing item 4/500
Processing item 5/500
Processing item 6/500
Processing item 7/500
Processing item 8/500
Processing item 9/500
Processing item 10/500
Processing item 11/500
Processing item 12/500
Processing item 13/500
Processing item 14/500
Processing item 15/500
Processing item 16/500
Processing item 17/500
Processing item 18/500
Processing item 19/500
Processing item 20/500
Processing item 21/500
Processing item 22/500
Processing item 23/500
Processing item 24/500
Processing item 25/500
Processing item 26/500
Processing item 27/500
Processing item 28/500
Processing item 29/500
Processing item 30/500
Processing item 31/500
Processing item 32/500
Processing item 33/500
Processing item 34/500
Processing item 35/500
Processing item 36/500
Processing item 37/500
Processing item 38/500
Processing item 39/500
Processing item 40/500
Processing item 41/500
Processing item 42/500
Processing item 43/500
Processing item 44/5

In [None]:
rouge_scores = calculate_rouge(generated_summaries, reference_summaries_small)

#print formatting
for rouge_type, scores in rouge_scores.items():
    print(f"{rouge_type.upper()}:")
    print(f"  Precision: {scores['p']:.4f}")
    print(f"  Recall:    {scores['r']:.4f}")
    print(f"  F1-Score:  {scores['f']:.4f}\n")

ROUGE-1:
  Precision: 0.3572
  Recall:    0.4714
  F1-Score:  0.3929

ROUGE-2:
  Precision: 0.1250
  Recall:    0.1859
  F1-Score:  0.1436

ROUGE-L:
  Precision: 0.3329
  Recall:    0.4413
  F1-Score:  0.3671



# BART


In [None]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer
from rouge import Rouge
from datasets import load_dataset

#model paths
model_path = "../model-bart"
token_path = "../model-bart"
device = "cpu"

#tokenizer and model
tokenizer = BartTokenizer.from_pretrained(token_path)
model = BartForConditionalGeneration.from_pretrained(model_path).to(device)

def summarize(conversation):
    inputs = tokenizer.encode("summarize: " + conversation, return_tensors="pt", max_length=512, truncation=True).to(device)
    outputs = model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def calculate_rouge(hypotheses, references):
    rouge = Rouge()
    scores = rouge.get_scores(hypotheses, references, avg=True)
    return scores

#dataset
dataset = load_dataset('knkarthick/dialogsum', split='validation')

conversation_list = [item['dialogue'] for item in dataset]
reference_summaries = [item['summary'] for item in dataset]

conversation_list_small = conversation_list[:50]
reference_summaries_small = reference_summaries[:50]

#generating summaries and rouge score
generated_summaries = []

for index, conversation in enumerate(conversation_list_small):
    print(f"Processing item {index + 1}/{len(conversation_list_small)}")
    summary = summarize(conversation)
    generated_summaries.append(summary)

#rouge score calculatio
rouge_scores = calculate_rouge(generated_summaries, reference_summaries_small)

print(rouge_scores)

Processing item 1/50
Processing item 2/50
Processing item 3/50
Processing item 4/50
Processing item 5/50
Processing item 6/50
Processing item 7/50
Processing item 8/50
Processing item 9/50
Processing item 10/50
Processing item 11/50
Processing item 12/50
Processing item 13/50
Processing item 14/50
Processing item 15/50
Processing item 16/50
Processing item 17/50
Processing item 18/50
Processing item 19/50
Processing item 20/50
Processing item 21/50
Processing item 22/50
Processing item 23/50
Processing item 24/50
Processing item 25/50
Processing item 26/50
Processing item 27/50
Processing item 28/50
Processing item 29/50
Processing item 30/50
Processing item 31/50
Processing item 32/50
Processing item 33/50
Processing item 34/50
Processing item 35/50
Processing item 36/50
Processing item 37/50
Processing item 38/50
Processing item 39/50
Processing item 40/50
Processing item 41/50
Processing item 42/50
Processing item 43/50
Processing item 44/50
Processing item 45/50
Processing item 46/

In [None]:
#print formatting
for rouge_type, scores in rouge_scores.items():
    print(f"{rouge_type.upper()}:")
    print(f"  Precision: {scores['p']:.4f}")
    print(f"  Recall:    {scores['r']:.4f}")
    print(f"  F1-Score:  {scores['f']:.4f}\n")

ROUGE-1:
  Precision: 0.3436
  Recall:    0.5120
  F1-Score:  0.4048

ROUGE-2:
  Precision: 0.1319
  Recall:    0.1983
  F1-Score:  0.1544

ROUGE-L:
  Precision: 0.3146
  Recall:    0.4694
  F1-Score:  0.3707

