In [28]:
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize, sent_tokenize


In [29]:
text_string = "Mahatma Gandhi was a great freedom fighter who spent his whole life in struggle for the independence of India. He was born in the Indian Hindu family on 2nd of October in 1869 in the Porbander, Gujarat. He lived his whole as a leader of the Indian people. His whole life story is a great inspiration for us. He is called as the Bapu or Rashtrapita as he spent his life in fighting against British rule for the freedom of us. While fighting with Britishers he took help of his great weapons like non-violence and Satyagraha movements to achieve freedom. Many times he got arrested and sent to the jail but he never discourages himself and continued fighting for national freedom.He is the real father of our nation who really used his all power to make us free from the British rule. He truly understood the power of unity in people (from different castes, religions, community, race, age or gender) which he used all through his independence movement. Finally he forced Britishers to quit India forever through his mass movements on 15th of August in 1947. Since 1947, the 15th of August is celebrated every year as the Independence Day in India.He could not continue his life after the independence of India in 1947 as he was assassinated by one of the Hindu activists, Nathuram Godse in 1948 on 30th of January. He was the great personality who served his whole life till death for the motherland. He enlightened our life with the true light of freedom from British rule. He proved that everything is possible with the non-violence and unity of people. Even after getting died many years ago, he is still alive in the heart of every Indian as a “Father of the Nation and Bapu”."



In [30]:
def _create_frequency_table(text_string) -> dict:

    stopWords = set(stopwords.words("english"))
    words = word_tokenize(text_string)
    ps = PorterStemmer()

    freqTable = dict()
    for word in words:
        word = ps.stem(word)
        if word in stopWords:
            continue
        if word in freqTable:
            freqTable[word] += 1
        else:
            freqTable[word] = 1

    return freqTable


In [38]:
sent_tokenize(text_string)

['Mahatma Gandhi was a great freedom fighter who spent his whole life in struggle for the independence of India.',
 'He was born in the Indian Hindu family on 2nd of October in 1869 in the Porbander, Gujarat.',
 'He lived his whole as a leader of the Indian people.',
 'His whole life story is a great inspiration for us.',
 'He is called as the Bapu or Rashtrapita as he spent his life in fighting against British rule for the freedom of us.',
 'While fighting with Britishers he took help of his great weapons like non-violence and Satyagraha movements to achieve freedom.',
 'Many times he got arrested and sent to the jail but he never discourages himself and continued fighting for national freedom.He is the real father of our nation who really used his all power to make us free from the British rule.',
 'He truly understood the power of unity in people (from different castes, religions, community, race, age or gender) which he used all through his independence movement.',
 'Finally he for

In [43]:
def _score_sentences(sentences, freqTable) -> dict:
    sentenceValue = dict()

    for sentence in sentences:
        word_count_in_sentence = (len(word_tokenize(sentence)))
        for wordValue in freqTable:
            if wordValue in sentence.lower():
                if sentence[:10] in sentenceValue:
                    sentenceValue[sentence[:10]] += freqTable[wordValue]
                else:
                    sentenceValue[sentence[:10]] = freqTable[wordValue]

        sentenceValue[sentence[:10]] = sentenceValue[sentence[:10]] // word_count_in_sentence

    return sentenceValue


In [44]:
def _find_average_score(sentenceValue) -> int:
    sumValues = 0
    for entry in sentenceValue:
        sumValues += sentenceValue[entry]

    # Average value of a sentence from original text
    average = int(sumValues / len(sentenceValue))

    return average


In [45]:
def _generate_summary(sentences, sentenceValue, threshold):
    sentence_count = 0
    summary = ''

    for sentence in sentences:
        if sentence[:10] in sentenceValue and sentenceValue[sentence[:10]] > (threshold):
            summary += " " + sentence
            sentence_count += 1

    return summary


In [46]:
# 1 Create the word frequency table
freq_table = _create_frequency_table(text_string)

'''
We already have a sentence tokenizer, so we just need 
to run the sent_tokenize() method to create the array of sentences.
'''

# 2 Tokenize the sentences
sentences = sent_tokenize(text_string)

# 3 Important Algorithm: score the sentences
sentence_scores = _score_sentences(sentences, freq_table)

# 4 Find the threshold
threshold = _find_average_score(sentence_scores)

# 5 Important Algorithm: Generate the summary
summary = _generate_summary(sentences, sentence_scores, 1.5 * threshold)

print(summary)



 Mahatma Gandhi was a great freedom fighter who spent his whole life in struggle for the independence of India. He was born in the Indian Hindu family on 2nd of October in 1869 in the Porbander, Gujarat. He lived his whole as a leader of the Indian people. His whole life story is a great inspiration for us. He is called as the Bapu or Rashtrapita as he spent his life in fighting against British rule for the freedom of us. While fighting with Britishers he took help of his great weapons like non-violence and Satyagraha movements to achieve freedom. Finally he forced Britishers to quit India forever through his mass movements on 15th of August in 1947. He was the great personality who served his whole life till death for the motherland. He enlightened our life with the true light of freedom from British rule. He proved that everything is possible with the non-violence and unity of people.
