In [5]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np

# Function to read the text file
def read_text_file(file_path):
    with open(file_path, "r", encoding="utf-8") as file:
        text = file.read()
    return text

# Function to preprocess the text
def preprocess_text(text):
    # Tokenize the text into sentences and words
    sentences = sent_tokenize(text)
    words = [word_tokenize(sentence) for sentence in sentences]

    # Remove stopwords and punctuation
    stop_words = set(stopwords.words("english"))
    words = [[word.lower() for word in sentence if word.lower() not in stop_words and word.isalnum()] for sentence in words]

    return sentences, words

# Function to calculate sentence similarity using cosine distance
def sentence_similarity(sent1, sent2):
    sent1 = [word.lower() for word in sent1]
    sent2 = [word.lower() for word in sent2]

    all_words = list(set(sent1 + sent2))

    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)

    for word in sent1:
        vector1[all_words.index(word)] += 1

    for word in sent2:
        vector2[all_words.index(word)] += 1

    return 1 - cosine_distance(vector1, vector2)

# Function to create a similarity matrix
def create_similarity_matrix(sentences):
    similarity_matrix = np.zeros((len(sentences), len(sentences)))

    for i in range(len(sentences)):
        for j in range(len(sentences)):
            if i != j:
                similarity_matrix[i][j] = sentence_similarity(sentences[i], sentences[j])

    return similarity_matrix

# Function to generate the summary
def generate_summary(text, num_sentences=3):
    # Preprocess the text
    sentences, words = preprocess_text(text)

    # Create similarity matrix
    similarity_matrix = create_similarity_matrix(words)

    # PageRank algorithm
    scores = np.array([1.0] * len(sentences))
    damping_factor = 0.85
    epsilon = 0.001
    prev_scores = np.ones(len(sentences))

    while np.sum(np.abs(scores - prev_scores)) > epsilon:
        prev_scores = scores.copy()
        for i in range(len(sentences)):
            summation = np.sum(similarity_matrix[:, i] * prev_scores)
            scores[i] = (1 - damping_factor) + damping_factor * summation

    # Sort the sentences by score
    ranked_sentences = [sentence for _, sentence in sorted(zip(scores, sentences), reverse=True)]

    # Select top sentences as summary
    summary = " ".join(ranked_sentences[:num_sentences])

    return summary

# File path of the input text file
file_path = "/content/sample.txt"

# Read the text file
text = read_text_file(file_path)

# Generate the summary
summary = generate_summary(text)

# Print the summary
print("Summary:")
print(summary)


Summary:
[citation needed]

The Cholas came to prominence once more through the rise of the Medieval Chola monarch Vijayalaya (841–878 CE) in about 850 CE. [citation needed] [9][10] During the first decade of the eleventh century, the Chola king Raja Raja Chola I (985–1014) constructed the Brihadeeswarar Temple at Thanjavur.
