# Installing Packages needed and Importing Libraries

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import string

import os
import time




# Dataset

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split

###Loading the dataset

In [5]:
news = pd.read_csv("news.csv",engine="python",on_bad_lines='skip')
news.head()

Unnamed: 0,Headline,Content,News Categories,Date
0,Congress leader Baljinder Singh shot dead at h...,Congress leader Baljinder Singh was shot dead ...,['national'],19-09-2023
1,17-year-old girl preparing for NEET dies by su...,Another NEET aspirant died by suicide in Rajas...,['national'],19-09-2023
2,Hampers to welcome MPs in new Parliament tomor...,In order to mark the first-ever working day of...,['national'],19-09-2023
3,"Only 10% women lawmakers in RS, while only 14%...","Congress President Mallikarjun Kharge, while s...",['national'],19-09-2023
4,"Ganesh temple decorated with notes, coins wort...",The Sri Sathya Ganapathi Temple in Bengaluru a...,['national'],19-09-2023


In [6]:
print ("The total no. of samples in the dataset =",news.shape[0])

The total no. of samples in the dataset = 270474


In [7]:
text = news['Content']
summary = news['Headline']

In [8]:
# using the train test split function
text, text_test, summary, summary_test = train_test_split(text, summary, random_state=104, test_size=0.1, shuffle=True)

In [9]:
print ("The no. of training samples =",text.shape[0])

The no. of training samples = 243426


In [10]:
print ("The no. of test samples =",text_test.shape[0])

The no. of test samples = 27048


###Preprocessing


In [11]:
import contractions
import re
def preprocess(x):
  # expand contractions
  x= x.apply(lambda y: contractions.fix(y))
  # remove html tags
  x= x.apply(lambda y: re.compile('<.*?>').sub(r'',y))
  # remove url
  x= x.apply(lambda y: re.compile(r'https?://\S+|www\.\S+').sub(r'',y))
  # remove 's
  x=  x.apply(lambda y: re.sub(r"'s\b","",y))
  # remove '
  x= x.apply(lambda y: re.sub("'",'', y))
  # add end and start token
  x= x.apply(lambda y: 'sos ' + y + ' eos')
  return x


In [12]:
text=preprocess(text)

In [13]:
summary=preprocess(summary)

###Tokenization

In [14]:
import tensorflow as tf

# convert to lowercase,remove punctuations, tokenize

# create tokenizer object
tokenizer=tf.keras.preprocessing.text.Tokenizer(oov_token="<unk>")

# fit tokenizer on data
tokenizer.fit_on_texts(text)
tokenizer.fit_on_texts(summary)

# convert text to numbers
text=tokenizer.texts_to_sequences(text)
summary=tokenizer.texts_to_sequences(summary)

# vocabulary
VOCAB=tokenizer.word_index
VOCAB_SIZE=len(VOCAB)+1


In [15]:
import pickle
# saving
with open('/content/drive/My Drive/tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle)


In [16]:
ENCODER_LEN = 100
DECODER_LEN = 20

In [17]:
# post padding the sequences so that they are of same size
text=tf.keras.utils.pad_sequences(text, maxlen=ENCODER_LEN, truncating='post', padding='post')
summary=tf.keras.utils.pad_sequences(summary, maxlen=DECODER_LEN, truncating='post', padding='post')


###Preparing the dataset for training (Making batches)

In [18]:
BATCH_SIZE = 64
BUFFER_SIZE = BATCH_SIZE*8

In [19]:
text = tf.cast(text, dtype=tf.int64)
summary = tf.cast(summary, dtype=tf.int64)

In [20]:
dataset = tf.data.Dataset.from_tensor_slices((text,summary)).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

# DESIGNING TRANSFORMER

### Positional Encoding function

In [21]:
# The code implements the formula, but instead of interleaving the sines and cosines,
# the vectors of sines and cosines are simply concatenated.
# Permuting the channels like this is functionally equivalent, and just a little easier to implement.

def positional_encoding(length, depth):
  # length: length of the sequence
  # depth: dimension of the output embedding space

  # positions: positions of a token in input sequence
  positions = np.arange(length)[:, np.newaxis]

  # i is used for mapping to column indices [0<=i<=depth/2]
  i = depth/2
  i = np.arange(i)[np.newaxis, :]

  # Angle in radians as per the formula
  angle_rates = 1 / (10000**(2*i/depth))
  angle_rads = positions * angle_rates

  # Concatenating the sines and cos
  pos_encoding = np.concatenate([np.sin(angle_rads), np.cos(angle_rads)], axis=-1)

  # return the result after casting in to compatible datatype
  return tf.cast(pos_encoding, dtype=tf.float32)

###ATTENTION layer

In [22]:
class MultiHead_Attention(tf.keras.layers.Layer):
# extending the keras Layer class to create a new class

    def __init__(self, d_model, num_heads):
        # calling the super class(Layer) initializer
        super(MultiHead_Attention, self).__init__()

        # Attributes of MultiHead_Attention

        # num_heads: no. of heads in the multihead attention
        self.num_heads = num_heads

        # d_model: dimension of the model
        self.d_model = d_model

        # if d_model % num_heads not equal to 0 raise AssertionError
        assert d_model % self.num_heads == 0

        # depth: dimension of each head
        self.depth = d_model // self.num_heads

        # Linear dense layers of d_model units,
        # to produce query
        self.wq = tf.keras.layers.Dense(d_model)
        # to produce key
        self.wk = tf.keras.layers.Dense(d_model)
        # to produce value
        self.wv = tf.keras.layers.Dense(d_model)

        # the last linear layer of the multihead-attention
        self.dense = tf.keras.layers.Dense(d_model)


    def split_heads(self, x, batch_size):
        # splits the sequence of d_model dimension to num_heads no. of depth dimension tensors
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    # Scaled Dot-Product Attention
    def scaled_dot_product_attention(self,q, k, v, mask):

      # matrix multiplication of the query and transpose(key)
      matmul_qk = tf.matmul(q, k, transpose_b=True)

      # dk: dimension of key
      dk = tf.cast(tf.shape(k)[-1], tf.float32)

      # Scaling
      scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

      # if token is to be masked its attention logits is set to -infinity
      if mask is not None:
        scaled_attention_logits += (mask * -1e9)

      # softmax on the attention logits to get the attention weights
      attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)

      # matrix multiplication of the resulting attention weights and value
      output = tf.matmul(attention_weights, v)

      return output, attention_weights


    def call(self, v, k, q, mask):

        batch_size = tf.shape(q)[0]

        # the linear transformations to get the query,key,value from the inputs
        q = self.wq(q)
        k = self.wk(k)
        v = self.wv(v)

        # splitting each of query,key,value into multiple heads
        q = self.split_heads(q, batch_size)
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)

        # calculating the Scaled Dot-Product Attention
        scaled_attention, attention_weights = self.scaled_dot_product_attention(q, k, v, mask)

        # changing the output to compatible type
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])

        # concatinating the attention outputs
        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model))

        # passing the output through  the final linear layer
        output = self.dense(concat_attention)

        return output, attention_weights


###Feed-forward Neural Network

In [23]:
def feed_forward_network(d_model, dff):
    # creates a  2 layer neural network with ReLU activation function
    return tf.keras.Sequential([
        tf.keras.layers.Dense(dff, activation='relu'),
        tf.keras.layers.Dense(d_model)  ])

### Encoder layer

In [24]:
# A SINGLE ENCODER LAYER
class EncoderLayer(tf.keras.layers.Layer):
# extending the keras Layer class to create a new class

    def __init__(self, d_model, num_heads, dff, rate=0.1):
         # calling the super class(Layer) initializer
        super(EncoderLayer, self).__init__()

        # Multihead attention layer
        self.mha = MultiHead_Attention(d_model, num_heads)

        # Point-wise feed forward neural network(FFN)
        self.ffn = feed_forward_network(d_model, dff)

        # layer normalization for attention layer
        self.layernorm_mha = tf.keras.layers.LayerNormalization()
        # layer normalization for FFN layer
        self.layernorm_ffn = tf.keras.layers.LayerNormalization()

        # dropout layer for attention layer
        self.dropout_mha = tf.keras.layers.Dropout(rate)
        # dropout layer for FFN layer
        self.dropout_ffn = tf.keras.layers.Dropout(rate)


    def call(self, x, training, mask):

        # input x is passed to Self-Attention layer
        #(all of the keys, values and queries come from the same place, in this case,
        # the output of the previous layer in the encoder)
        attn_output, _ = self.mha(x, x, x, mask)
        # applying dropout to this output
        attn_output = self.dropout_mha(attn_output, training=training)
        # add the residual connection and layer normalize
        out1 = self.layernorm_mha(x + attn_output)

        # the final attention output is feed to FFN
        ffn_output = self.ffn(out1)
        # applying dropout to this output
        ffn_output = self.dropout_ffn(ffn_output, training=training)
        # add the residual connection and layer normalize
        out2 = self.layernorm_ffn(out1 + ffn_output)

        return out2

###Decoder Layer

In [25]:
# A SINGLE DECODER LAYER
class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(DecoderLayer, self).__init__()

        # 2 Multihead attention layers (one for decoder and another is encoder-decoder layer )
        self.masked_mha = MultiHead_Attention(d_model, num_heads)
        self.cross_mha = MultiHead_Attention(d_model, num_heads)

        # Point-wise feed forward neural network(FFN)
        self.ffn = feed_forward_network(d_model, dff)

        # layer normalization for both attention layer
        self.layernorm_masked_mha = tf.keras.layers.LayerNormalization()
        self.layernorm_cross_mha = tf.keras.layers.LayerNormalization()

        # layer normalization for FFN layer
        self.layernorm_ffn = tf.keras.layers.LayerNormalization()

        # dropout layer for attention layer
        self.dropout_masked_mha = tf.keras.layers.Dropout(rate)
        self.dropout_cross_mha = tf.keras.layers.Dropout(rate)

        # dropout layer for FFN layer
        self.dropout_ffn = tf.keras.layers.Dropout(rate)


    def call(self, x, enc_output, training, look_ahead_mask, padding_mask):

        # The masked self-attention layers in the decoder allow each position in the decoder
        # to attend to all positions in the decoder up to and including that position
        masked_attn, masked_attn_weights = self.masked_mha(x, x, x, look_ahead_mask)
        # applying the dropout
        masked_attn = self.dropout_masked_mha(masked_attn, training=training)

        # add the residual connection and layer normalize
        out1 = self.layernorm_masked_mha(masked_attn + x)

        # the queries come from the previous decoder layer,
        # and the memory keys and values come from the output of the encoder
        cross_attn, cross_attn_weights = self.cross_mha(enc_output, enc_output, out1, padding_mask)
        # applying the dropout
        cross_attn = self.dropout_cross_mha(cross_attn, training=training)

        # add the residual connection and layer normalize
        out2 = self.layernorm_cross_mha(cross_attn + out1)

        # the final attention output is feed to FFN
        ffn_output = self.ffn(out2)
        # applying dropout to this output
        ffn_output = self.dropout_ffn(ffn_output, training=training)
        # add the residual connection and layer normalize
        out3 = self.layernorm_ffn(ffn_output + out2)

        return out3, masked_attn_weights, cross_attn_weights

### ENCODER

In [26]:
# The ENCODER with N encoder layers
class Encoder(tf.keras.layers.Layer):

    def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, maximum_position_encoding, rate=0.1):
        super(Encoder, self).__init__()

        # d_model: dimensions of the model
        self.d_model = d_model
        # num_layers: No. of encoder layers(N)
        self.num_layers = num_layers

        # Embedding layer
        self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)

        # Positional-encoding layer
        self.pos_encoding = positional_encoding(maximum_position_encoding, self.d_model)

        # a stack of N layers
        self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)]

        # Dropout layer
        self.dropout = tf.keras.layers.Dropout(rate)

    def call(self, x, training, mask):

        # seq_len: length of the token sequences in x
        seq_len = tf.shape(x)[1]

        # passing the input x to the embedding layer
        x = self.embedding(x)
        # This factor sets the relative scale of the embedding and positonal_encoding.
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        # adding the embedded vector and the positional encoding
        x = x + self.pos_encoding[tf.newaxis, :seq_len, :]

        # applying dropout on x
        x = self.dropout(x, training=training)

        # feeding x to the stack of encoder layers
        for i in range(self.num_layers):
            x = self.enc_layers[i](x, training, mask)

        return x

###DECODER

In [27]:
# The DECODER with N decoder layers
class Decoder(tf.keras.layers.Layer):

    def __init__(self, num_layers, d_model, num_heads, dff, target_vocab_size, maximum_position_encoding, rate=0.1):
        super(Decoder, self).__init__()

        # d_model: dimensions of the model
        self.d_model = d_model
        # num_layers: No. of encoder layers(N)
        self.num_layers = num_layers

        # Embedding layer
        self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model)

        # Positional-encoding layer
        self.pos_encoding = positional_encoding(maximum_position_encoding, d_model)

        # a stack of N decoder layers
        self.dec_layers = [DecoderLayer(d_model, num_heads, dff, rate) for _ in range(num_layers)]

        # Dropout layer
        self.dropout = tf.keras.layers.Dropout(rate)

    def call(self, x, enc_output, training, look_ahead_mask, padding_mask):
        # seq_len: length of the token sequences in x
        seq_len = tf.shape(x)[1]
        attention_weights = {}

        # passing the input x to the embedding layer
        x = self.embedding(x)
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        # adding the embedded vector and the positional encoding
        x = x + self.pos_encoding[tf.newaxis, :seq_len, :]

        # applying dropout on x
        x = self.dropout(x, training=training)

        # feeding x to the stack of decoder layers
        for i in range(self.num_layers):
            x, block1, block2 = self.dec_layers[i](x, enc_output, training, look_ahead_mask, padding_mask)
            # This constructs a dynamic key for accessing attention weights within a specific decoder layer.
            # The {} is a placeholder that will be replaced with the actual value of i+1.
            attention_weights['decoder_layer{}_block1'.format(i+1)] = block1
            attention_weights['decoder_layer{}_block2'.format(i+1)] = block2

        return x, attention_weights


## TRANSFORMER MODEL

In [28]:
class Transformer(tf.keras.Model):
# creating a model
    def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size,rate=0.1):
        # calling the super class(Model) initializer
        super(Transformer, self).__init__()

        # Encoder
        self.encoder = Encoder(num_layers, d_model, num_heads, dff, input_vocab_size, rate)

        # Decoder
        self.decoder = Decoder(num_layers, d_model, num_heads, dff, target_vocab_size, rate)

        # Final Linear layer
        self.final_layer = tf.keras.layers.Dense(target_vocab_size)

    def call(self, inp, tar, training, enc_padding_mask, look_ahead_mask, dec_padding_mask):

        # pass the input and the padding mask to encoder
        enc_output = self.encoder(inp, training, enc_padding_mask)

        # pass the encoder output, look ahead mask for decoder masked self attention,
        # padding mask for the encoder-decoder attention to the decoder
        dec_output, attention_weights = self.decoder(tar, enc_output, training, look_ahead_mask, dec_padding_mask)

        # pass the decoder output to the linear layer
        final_output = self.final_layer(dec_output)

        return final_output, attention_weights

# TRAINING THE MODEL

In [29]:
num_layers = 3  # original=6
d_model = 128 # original=512
dff = 512 # original=2048
num_heads = 4 # original=8
dropout_rate = 0.1 # original= 0.1
EPOCHS = 20

###Custom Learning Rate

In [30]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
  def __init__(self, d_model, warmup_steps=4000):
    super().__init__()

    self.d_model = d_model
    self.d_model = tf.cast(self.d_model, tf.float32)

    self.warmup_steps = warmup_steps

  def __call__(self, step):
    step = tf.cast(step, dtype=tf.float32)
    arg1 = tf.math.rsqrt(step)
    arg2 = step * (self.warmup_steps ** -1.5)

    return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

###Adam Optimiser

In [31]:
learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

### Masked Loss

In [32]:
def loss_function(label, pred):

  # creates a boolean mask where the label is not zero
  mask = label != 0

  # initializes the loss function without reduction, meaning it will return the loss for each element.
  loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

  #computes the loss for each element
  loss = loss_object(label, pred)

  # mask is cast to the same type as the loss and applied to zero out the loss where the mask is false.
  mask = tf.cast(mask, dtype=loss.dtype)
  loss *= mask

  # loss is summed and then normalized by the sum of the mask to get the average loss over the non-masked elements
  loss = tf.reduce_sum(loss)/tf.reduce_sum(mask)

  return loss

### Masked Accuracy

Your accuracy_function computes the accuracy between predicted sequences (pred) and ground truth sequences (label) in a sequence-to-sequence task.

The accuracy measure you’ve implemented is commonly known as Token Accuracy or Sequence Token Accuracy. It evaluates how well a sequence-to-sequence model predicts individual tokens in the output sequence compared to the ground truth tokens


First, you find the most likely token index for each time step in the predicted sequence using tf.argmax(pred, axis=2). This assumes that pred is a tensor with shape (batch_size, sequence_length, num_classes).

Next, you cast the label tensor to the same data type as pred.
You create a boolean mask where each element is True if the label is non-zero (i.e., not a padding token).

The match tensor contains True at positions where the predicted token matches the ground truth token and the label is non-zero. It’s computed as match = label == pred & mask.

Finally, you calculate the accuracy by dividing the sum of match (correct predictions) by the sum of mask (total non-padding tokens).

Overall, your function provides a way to evaluate the accuracy of sequence predictions while handling padding tokens appropriately.


In [33]:
def accuracy_function(label, pred):

  label = tf.cast(label, pred.dtype)

  # creates a boolean tensor where each element is True
  # if the corresponding elements in label and pred are equal
  match = label == pred

  # creates a boolean mask where the label is not zero.
  mask = label != 0
  # applies the mask to the match tensor, setting elements to False where the mask is False
  match = match & mask

  match = tf.cast(match, dtype=tf.float32)
  mask = tf.cast(mask, dtype=tf.float32)

  # accuracy is calculated as the sum of the matches divided by the sum of the mask,
  # giving the proportion of correct predictions among the non-zero labels.
  return tf.reduce_sum(match)/tf.reduce_sum(mask)

### Instance of transformer model

In [34]:
transformer = Transformer(
    num_layers= num_layers,
    d_model= d_model,
    num_heads= num_heads,
    dff= dff,
    input_vocab_size= VOCAB_SIZE,
    target_vocab_size= VOCAB_SIZE,
    rate= dropout_rate)

### Creating Masks

In [35]:
def create_padding_mask(seq):
    # creates a padding mask
    seq = tf.cast(tf.math.equal(seq, 0), tf.float32)
    return seq[:, tf.newaxis, tf.newaxis, :]

def create_look_ahead_mask(size):
    # creates the look ahead mask
    mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
    return mask


In [36]:
def create_masks(inp, tar):
    # creates all the masks needed
    enc_padding_mask = create_padding_mask(inp)
    dec_padding_mask = create_padding_mask(inp)

    look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1])
    dec_target_padding_mask = create_padding_mask(tar)
    combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask)

    return enc_padding_mask, combined_mask, dec_padding_mask

### Train function

In [37]:
# finds the mean loss for each batch
train_loss = tf.keras.metrics.Mean(name='train_loss')
# finds the mean training accuracy
train_accuracy = tf.keras.metrics.Mean(name='train_accuracy')

In [38]:
@tf.function
def train_step(inp, tar):
# function to train the model using teacher forcing

    # the target sequence given to the decoder (the last token is removed)
    tar_inp = tar[:, :-1]
    # the target sequence used for finding the loss (the first token is removed)
    tar_real = tar[:, 1:]

    # generating the masks needed
    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)

    # minimize the loss
    with tf.GradientTape() as tape:
        # output from the model
        predictions, _ = transformer(inp, tar_inp, True, enc_padding_mask, combined_mask, dec_padding_mask)
        # loss between the output and real target sequence
        loss = loss_function(tar_real, predictions)

    # updating the weights
    gradients = tape.gradient(loss, transformer.trainable_variables)
    optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))

    # finding mean loss
    train_loss(loss)
    # finding the mean accuracy
    train_accuracy(accuracy_function(tar_real, tf.argmax(predictions, axis=2)))

### Training the Model

In [39]:
for epoch in range(EPOCHS):
    start = time.time()

    # resetting the mean loss before each training step
    train_loss.reset_states()

    # for each batch in the training dataset
    for (batch, (inp, tar)) in enumerate(dataset):
        # train using the function
        train_step(inp, tar)

        if batch % 100 == 0:
            print(f'Epoch {epoch + 1} Batch {batch} Loss {train_loss.result():.4f} Accuracy {train_accuracy.result():.4f}')

    print(f'Epoch {epoch + 1} Loss {train_loss.result():.4f} Accuracy {train_accuracy.result():.4f}')
    print ('Time taken for 1 epoch: {} secs\n'.format(time.time() - start))


Epoch 1 Batch 0 Loss 11.2957 Accuracy 0.0000


KeyboardInterrupt: 

### Saving the model

In [None]:
transformer.save_weights('/content/drive/My Drive/Project/checkpoints')


In [40]:
transformer.load_weights('/content/drive/My Drive/checkpoints')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x791a62074970>

In [41]:
with open('/content/drive/My Drive/tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)


In [None]:
transformer.summary()

#EVALUATION

In [42]:
def evaluate(input, output):
# input is feed to the encoder
# initially only the "sos" token is feed to decoder i.e. the output
# generate tokens until the sequence is of DECODER_LEN
    for i in range(1,DECODER_LEN):

        # generate the mask required
        enc_padding_mask, combined_mask, dec_padding_mask = create_masks(input, output)

        # the output probabilities are obtained from the model
        # training is set to False
        predictions, _ = transformer(input, output, False, enc_padding_mask, combined_mask, dec_padding_mask)

        # predicted output probabilities for the last token (till now)
        predictions = predictions[: ,-1:, :]

        # finding the token
        predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)

        # if this function is used for summarization and not testing
        # return output as soon as "eos" token is recieved
        if predicted_id.shape[0]==1 and predicted_id == tokenizer.word_index['eos']:
           return output

        # add the last token to the output and feed this sequence to the decoder in the next iteration
        output = tf.concat([output, predicted_id], axis=-1)

    return output

#TESTING

In [67]:
text_test[86875]

KeyError: 86875

In [None]:
# preprocess
text_test=preprocess(text_test)
summary_test=preprocess(summary_test)

In [None]:
# convert to lowercase,remove punctuations, tokenize
text_test=tokenizer.texts_to_sequences(text_test)
summary_test=tokenizer.texts_to_sequences(summary_test)

In [None]:
# pad the sequences
text_test= tf.keras.utils.pad_sequences(text_test, maxlen=ENCODER_LEN, truncating='post', padding='post')
summary_test= tf.keras.utils.pad_sequences(summary_test, maxlen=DECODER_LEN, truncating='post', padding='post')

In [None]:
text_test = tf.cast(text_test, dtype=tf.int32)
summary_test = tf.cast(summary_test, dtype=tf.int32)

In [None]:
# create batches of the testing set
dataset_test = tf.data.Dataset.from_tensor_slices((text_test,summary_test)).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

In [None]:
dataset_test

In [None]:
test_accuracy = tf.keras.metrics.Mean(name='test_accuracy')

In [None]:
for (batch, (inp, tar)) in enumerate(dataset_test):

  # passing the "sos" tokens
  out= tar[:,0:1]
  # generating the output sequences
  pred= evaluate(inp,out)

  # mask the eos tokens
  pred= tf.where(tf.equal(pred, tokenizer.word_index['eos']), 0, pred)
  label= tf.where(tf.equal(tar, tokenizer.word_index['eos']), 0, tar)

  # find the mean accuracy
  test_accuracy(accuracy_function(label,pred))

  if batch % 10 == 0:
            print(f'Batch {batch}  Accuracy {test_accuracy.result():.4f}')


In [None]:
print(test_accuracy.result())

#Summarization

In [43]:
def summarize(txt):

  # convert the txt string to sequence of tokens after preprocessing
  txt= preprocess(pd.Series(txt))
  txt= tokenizer.texts_to_sequences(txt)
  txt= tf.keras.utils.pad_sequences(txt, maxlen= ENCODER_LEN, truncating='post', padding='post')
  txt= tf.cast(txt, dtype=tf.int32)

  # the initial output or decoder input - 'sos' token
  out= tf.cast([[tokenizer.word_index['sos']]],dtype=tf.int32)

  # generate the output sequence
  p= evaluate(txt,out)
  p=p.numpy()

  # convert output sequence to text
  p= tokenizer.sequences_to_texts(p)

  # return the output text excluding the 'sos'
  return p[0][4:]

In [51]:
print("Article: ", news["Content"][86875])
print("Original Headline: ", news["Headline"][86875])
print("Generated Headline: ",summarize(news["Content"][86875]))

Article:  Radhamani Textiles, the owner of menswear range Rare Rabbit and other apparel brands, is working with investors to finalise funding at a valuation of $300-320 million, sources tell online publication The Arc. A91 Partners, a venture-capital fund, may lead the round. A91 already backs consumer brands like Atomberg and Sugar Cosmetics. Rare Rabbit operates at least 100 outlets.
Original Headline:  Fashion brand Rare Rabbit eyes funding at $300-mn valuation
Generated Headline:  fashion brand rare rabbit eyes funding at 300 mn valuation


In [52]:
print("Article: ", news["Content"][111325])
print("Original Headline: ", news["Headline"][111325])
print("Generated Headline: ",summarize(news["Content"][111325]))

Article:  Society of Manufacturers of Electric Vehicles has written a letter to the Minister of Heavy Industries (MHI) after seven electric two-wheeler makers were asked to return ₹469 crore for violating the FAME-II scheme's norms. "Since...the subsidies passed on to customers...now stand cancelled...the customers who've taken such subsidies can be asked to return these...in all fairness," the letter said.
Original Headline:  Customers could be asked to return cancelled FAME subsidy: EV body
Generated Headline:  customers could be asked to return cancelled fame subsidy ev body


In [65]:
print("Article: ", news["Content"][24664])
print("Original Headline: ", news["Headline"][24664])
print("Generated Headline: ",summarize(news["Content"][24664]))

Article:  The National Testing Agency (NTA) released the examination calendar for major exams like JEE, NEET, CUET and UGC NET for the academic year 2024-25. JEE Main will be held in two sessions in January-February and in April. NEET and CUET UG will take place in May. Examination-specific details will be provided to candidates through the information bulletin of respective exams. 
Original Headline:  Calendar for major exams like JEE, NEET for 2024-25 released
Generated Headline:  calendar for major exams like jee neet for 2024 25 released


In [74]:
article="""A new malaria vaccine, developed by researchers at the University of Oxford, has shown an efficacy rate of 77% in recent trials. This breakthrough could significantly reduce the global burden of malaria, which affects millions annually. The vaccine, named R21/Matrix-M, is expected to be a game-changer in regions heavily impacted by the disease. Researchers are hopeful that with further testing and approval, the vaccine could be widely distributed within the next two years."""

print("Article: ", article)
print("\nOriginal Headline:  New Vaccine Shows Promise in Combating Malaria")
print("Generated Headline: ",summarize(article))

Article:  A new malaria vaccine, developed by researchers at the University of Oxford, has shown an efficacy rate of 77% in recent trials. This breakthrough could significantly reduce the global burden of malaria, which affects millions annually. The vaccine, named R21/Matrix-M, is expected to be a game-changer in regions heavily impacted by the disease. Researchers are hopeful that with further testing and approval, the vaccine could be widely distributed within the next two years.

Original Headline:  New Vaccine Shows Promise in Combating Malaria
Generated Headline:  malaria vaccine could reduce the catches fire by serum institute


In [75]:
article="""A groundbreaking AI technology developed by MedTech Innovations has significantly improved diagnostic accuracy in healthcare. The AI system, which analyzes medical images, has achieved a 95% accuracy rate in detecting early-stage cancers. This advancement promises to enhance patient outcomes and reduce diagnostic errors. The technology is currently being tested in several hospitals and is expected to be widely adopted within the next year."""

print("Article: ", article)
print("\nOriginal Headline:  AI Technology Revolutionizes Healthcare Diagnostics")
print("Generated Headline: ",summarize(article))

Article:  A groundbreaking AI technology developed by MedTech Innovations has significantly improved diagnostic accuracy in healthcare. The AI system, which analyzes medical images, has achieved a 95% accuracy rate in detecting early-stage cancers. This advancement promises to enhance patient outcomes and reduce diagnostic errors. The technology is currently being tested in several hospitals and is expected to be widely adopted within the next year.

Original Headline:  AI Technology Revolutionizes Healthcare Diagnostics
Generated Headline:  ai tool to reduce your infection of patient
