In [1]:
import re
import spacy as sp
import nltk

In [2]:
text = ''' Mahesh Babu, born on August 9, 1975, in Chennai, India, is one of the most prominent actors in the Telugu film industry. He is the son of veteran actor Krishna Ghattamaneni and Indira Devi. Mahesh Babu made his acting debut as a child artist in 1979 with the film "Needa," and his first leading role came in the 1999 movie "Raja Kumarudu," which earned him the Nandi Award for Best Male Debut.

Over the years, Mahesh Babu has established himself as a leading actor, known for his versatility and charm. Some of his most notable films include "Murari" (2001), "Okkadu" (2003), "Pokiri" (2006), "Dookudu" (2011), "Srimanthudu" (2015), and "Bharat Ane Nenu" (2018). His performance in these films has earned him numerous accolades, including several Nandi Awards, Filmfare Awards, and a SIIMA Award.

Mahesh Babu's film "Pokiri" was a major breakthrough, becoming one of the highest-grossing Telugu films at the time and solidifying his status as a superstar. He is known for his action-packed roles as well as his ability to portray emotional characters, making him a favorite among a wide range of audiences.

In addition to his acting career, Mahesh Babu is also a successful producer and philanthropist. He owns the production house G. Mahesh Babu Entertainment Pvt. Ltd. and has been actively involved in various charitable activities. Through his philanthropic foundation, Mahesh Babu has supported initiatives in education, health, and rural development.

Mahesh Babu is married to former actress Namrata Shirodkar, and the couple has two children, Gautham Krishna and Sitara. Despite his busy schedule, he is known to maintain a balance between his professional and personal life.

With a career spanning over two decades, Mahesh Babu continues to be a dominant force in the Telugu film industry, consistently delivering box-office hits and captivating performances. His dedication to his craft and his contributions to society make him a revered figure both on and off the screen.

'''

In [3]:
print("The Length of text is:",len(text))

The Length of text is: 1990


In [4]:
text = re.sub(r'\([^)]*\)', ' ', text)   # Removing the (inside matter)
text = re.sub(r'\[[^\]]*\]', ' ', text)  # Removing the [inside matter]
text = re.sub(r' +', ' ', text)          # Removing Extra Spaces
text = re.sub(r'"', '', text)
print(text)

 Mahesh Babu, born on August 9, 1975, in Chennai, India, is one of the most prominent actors in the Telugu film industry. He is the son of veteran actor Krishna Ghattamaneni and Indira Devi. Mahesh Babu made his acting debut as a child artist in 1979 with the film Needa, and his first leading role came in the 1999 movie Raja Kumarudu, which earned him the Nandi Award for Best Male Debut.

Over the years, Mahesh Babu has established himself as a leading actor, known for his versatility and charm. Some of his most notable films include Murari , Okkadu , Pokiri , Dookudu , Srimanthudu , and Bharat Ane Nenu . His performance in these films has earned him numerous accolades, including several Nandi Awards, Filmfare Awards, and a SIIMA Award.

Mahesh Babu's film Pokiri was a major breakthrough, becoming one of the highest-grossing Telugu films at the time and solidifying his status as a superstar. He is known for his action-packed roles as well as his ability to portray emotional characters,

In [5]:
print("The Length of text is:",len(text))

The Length of text is: 1936


In [6]:
nlp = sp.load("en_core_web_sm")

nltk.download('punkt')
original_sentences = nltk.sent_tokenize(text)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [7]:
original_sentences

[' Mahesh Babu, born on August 9, 1975, in Chennai, India, is one of the most prominent actors in the Telugu film industry.',
 'He is the son of veteran actor Krishna Ghattamaneni and Indira Devi.',
 'Mahesh Babu made his acting debut as a child artist in 1979 with the film Needa, and his first leading role came in the 1999 movie Raja Kumarudu, which earned him the Nandi Award for Best Male Debut.',
 'Over the years, Mahesh Babu has established himself as a leading actor, known for his versatility and charm.',
 'Some of his most notable films include Murari , Okkadu , Pokiri , Dookudu , Srimanthudu , and Bharat Ane Nenu .',
 'His performance in these films has earned him numerous accolades, including several Nandi Awards, Filmfare Awards, and a SIIMA Award.',
 "Mahesh Babu's film Pokiri was a major breakthrough, becoming one of the highest-grossing Telugu films at the time and solidifying his status as a superstar.",
 'He is known for his action-packed roles as well as his ability to p

#Pre-Processing

In [8]:
def preprocess(sentences):

  preprocessed_sentences = []

  for sentence in sentences:
    doc = nlp(sentence)
    extracted_words = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
    preprocessed_sentences.append(" ".join(extracted_words))
  return preprocessed_sentences

In [9]:
preprocessed_sentences = preprocess(original_sentences)
preprocessed_sentences

['Mahesh Babu bear August Chennai India prominent actor Telugu film industry',
 'son veteran actor Krishna Ghattamaneni Indira Devi',
 'Mahesh Babu acting debut child artist film Needa leading role come movie Raja Kumarudu earn Nandi Award Best Male Debut',
 'year Mahesh Babu establish lead actor know versatility charm',
 'notable film include Murari Okkadu Pokiri Dookudu Srimanthudu Bharat Ane Nenu',
 'performance film earn numerous accolade include Nandi Awards Filmfare Awards SIIMA Award',
 'Mahesh Babu film Pokiri major breakthrough highest gross telugu film time solidify status superstar',
 'know action pack role ability portray emotional character make favorite wide range audience',
 'addition act career Mahesh Babu successful producer philanthropist',
 'own production house Mahesh Babu Entertainment Pvt',
 'actively involve charitable activity',
 'philanthropic foundation Mahesh Babu support initiative education health rural development',
 'Mahesh Babu married actress Namrata Sh

#TF-IDF (Term Frequency-Inverse Document Frequency)






In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [11]:
vectorizer = TfidfVectorizer()
matrix  = vectorizer.fit_transform(preprocessed_sentences)

In [12]:
matrix   # we can see that 16x130: 16 rows and 130 columns in the matrix

<16x130 sparse matrix of type '<class 'numpy.float64'>'
	with 168 stored elements in Compressed Sparse Row format>

In [13]:
print(matrix[0].toarray())

[[0.         0.         0.         0.         0.         0.
  0.         0.27885138 0.         0.         0.         0.
  0.         0.35784258 0.         0.         0.17443071 0.
  0.35784258 0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.35784258
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.21507746
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.35784258
  0.         0.31163569 0.         0.         0.         0.
  0.         0.         0.         0.         0.17443071 0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0. 

In [14]:
len(matrix.toarray()[0])

130

In [15]:
len(vectorizer.vocabulary_)

130

In [16]:
print(matrix[15])

  (0, 111)	0.37796447300922725
  (0, 52)	0.37796447300922725
  (0, 107)	0.37796447300922725
  (0, 115)	0.37796447300922725
  (0, 34)	0.37796447300922725
  (0, 36)	0.37796447300922725
  (0, 39)	0.37796447300922725


In [17]:
sum_scores = matrix.toarray().sum(axis = 1)
sum_scores

array([3.22249022, 2.63577527, 4.15430168, 2.91265222, 3.29079686,
       3.14253764, 3.52055485, 3.59738003, 2.74190166, 2.55346622,
       2.        , 3.08286353, 3.24031296, 2.9924422 , 4.16204629,
       2.64575131])

In [18]:
len(sum_scores)

16

In [19]:
ranked_scores = (-sum_scores).argsort()
ranked_scores

array([14,  2,  7,  6,  4, 12,  0,  5, 11, 13,  3,  8, 15,  1,  9, 10])

In [20]:
no_of_sentences = 4
top_score_indices = sorted(ranked_scores[: no_of_sentences])
top_score_indices

[2, 6, 7, 14]

In [21]:
final_sentences = [original_sentences[i] for i in top_score_indices]
summary = nlp(" ".join(final_sentences))
summary

Mahesh Babu made his acting debut as a child artist in 1979 with the film Needa, and his first leading role came in the 1999 movie Raja Kumarudu, which earned him the Nandi Award for Best Male Debut. Mahesh Babu's film Pokiri was a major breakthrough, becoming one of the highest-grossing Telugu films at the time and solidifying his status as a superstar. He is known for his action-packed roles as well as his ability to portray emotional characters, making him a favorite among a wide range of audiences. With a career spanning over two decades, Mahesh Babu continues to be a dominant force in the Telugu film industry, consistently delivering box-office hits and captivating performances.

In [22]:
len(summary)

133

In [25]:
!pip install rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=008a3563c609191282b0a99acadf9426d138936734a100e6f0f8eb948591df9d
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [38]:
from rouge_score import rouge_scorer, scoring


reference_summaries = [
    "Mahesh Babu, born on August 9, 1975, in Chennai, India, is one of the most prominent actors in the Telugu film industry. He made his acting debut as a child artist in 1979 with the film \"Needa,\" and his first leading role came in the 1999 movie \"Raja Kumarudu,\" which earned him the Nandi Award for Best Male Debut. Some of his most notable films include \"Murari\" (2001), \"Okkadu\" (2003), \"Pokiri\" (2006), \"Dookudu\" (2011), \"Srimanthudu\" (2015), and \"Bharat Ane Nenu\" (2018). In addition to his acting career, Mahesh Babu is also a successful producer and philanthropist, owning the production house G. Mahesh Babu Entertainment Pvt. Ltd. and being actively involved in various charitable activities.", ]

generated_summaries = [
    "Mahesh Babu made his acting debut as a child artist in 1979 with the film Needa, and his first leading role came in the 1999 movie Raja Kumarudu, which earned him the Nandi Award for Best Male Debut. Mahesh Babu's film Pokiri was a major breakthrough, becoming one of the highest-grossing Telugu films at the time and solidifying his status as a superstar. He is known for his action-packed roles as well as his ability to portray emotional characters, making him a favorite among a wide range of audiences. With a career spanning over two decades, Mahesh Babu continues to be a dominant force in the Telugu film industry, consistently delivering box-office hits and captivating performances.",
]

# Initialize ROUGE scorer
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
aggregator = scoring.BootstrapAggregator()

for ref, gen in zip(reference_summaries, generated_summaries):
    scores = scorer.score(ref, gen)
    aggregator.add_scores(scores)

result = aggregator.aggregate()

# Display results
for key, value in result.items():
    print(f"{key}: Recall: {value.mid.recall:.4f}, Precision: {value.mid.precision:.4f}, F1-Score: {value.mid.fmeasure:.4f}")


rouge1: Recall: 0.5614, Precision: 0.5333, F1-Score: 0.5470
rouge2: Recall: 0.3894, Precision: 0.3697, F1-Score: 0.3793
rougeL: Recall: 0.4211, Precision: 0.4000, F1-Score: 0.4103
