In [8]:
import numpy as np
import networkx as nx
import nltk
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import re

stop_words = stopwords.words('english')

In [30]:
with open("summ.txt", "r") as f:
    filedata = f.readlines()
article = filedata[0].split('. ')
sentences = []

for sentence in article:
    sentence = re.sub('[^a-zA-Z]', ' ', sentence).split(" ")
    sentence = [word for word in sentence if len(word) > 0]
    sentences.append(sentence[:-1])

In [20]:
sentences

['In an attempt to build an AI-ready workforce, Microsoft announced Intelligent Cloud Hub which has been launched to empower the next generation of students with AI-ready skills',
 'Envisioned as a three-year collaborative program, Intelligent Cloud Hub will support around 100 institutions with AI infrastructure, course content and curriculum, developer support, development tools and give students access to cloud and AI services',
 'As part of the program, the Redmond giant which wants to expand its reach and is planning to build a strong developer ecosystem in India with the program will set up the core AI infrastructure and IoT Hub for the selected campuses',
 'The company will provide AI development tools and Azure AI services such as Microsoft Cognitive Services, Bot Services and Azure Machine Learning.According to Manish Prakash, Country General Manager-PS, Health and Education, Microsoft India, said, "With AI being the defining technology of our time, it is transforming lives and

In [35]:
def sentence_similarity(sentence1, sentence2):
    sent1 = [w.lower() for w in sentence1]
    sent2 = [w.lower() for w in sentence2]    
    
    all_words = list(set(sent1 + sent2))
    
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)    
    
    for w in sent1:
        if w in stop_words:
            continue
        vector1[all_words.index(w)] += 1

    for w in sent2:
        if w in stop_words:
            continue
        vector2[all_words.index(w)] += 1
    
    return 1 - cosine_distance(vector1, vector2)

In [36]:
sim_matrix = np.zeros((len(sentences), len(sentences)))

for id1 in range(len(sentences)):
    for id2 in range(len(sentences)):
        if id1 == id2:
            continue
        sim_matrix[id1][id2] = sentence_similarity(sentences[id1], sentences[id2])

In [37]:
sim_matrix

array([[0.        , 0.35273781, 0.1820063 , 0.23539595, 0.        ,
        0.21821789, 0.23904572, 0.25197632, 0.1820063 , 0.28171808],
       [0.35273781, 0.        , 0.26215206, 0.19374379, 0.        ,
        0.29934217, 0.16395646, 0.20739034, 0.11235088, 0.27824334],
       [0.1820063 , 0.26215206, 0.        , 0.11246431, 0.        ,
        0.0695048 , 0.26648545, 0.18057878, 0.13043478, 0.32302914],
       [0.23539595, 0.19374379, 0.11246431, 0.        , 0.06741999,
        0.13483997, 0.0492366 , 0.27247463, 0.11246431, 0.20889319],
       [0.        , 0.        , 0.        , 0.06741999, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.21821789, 0.29934217, 0.0695048 , 0.13483997, 0.        ,
        0.        , 0.06085806, 0.09622504, 0.0695048 , 0.17213259],
       [0.23904572, 0.16395646, 0.26648545, 0.0492366 , 0.        ,
        0.06085806, 0.        , 0.10540926, 0.1522774 , 0.18856181],
       [0.25197632, 0.20739034, 0.1805787

In [38]:
sentence_sim_graph = nx.from_numpy_array(sim_matrix)
scores = nx.pagerank(sentence_sim_graph)

In [40]:
ranked_sentence = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
ranked_sentence

[(0.1366725900418558,
  ['In',
   'an',
   'attempt',
   'to',
   'build',
   'an',
   'AI',
   'ready',
   'workforce',
   'Microsoft',
   'announced',
   'Intelligent',
   'Cloud',
   'Hub',
   'which',
   'has',
   'been',
   'launched',
   'to',
   'empower',
   'the',
   'next',
   'generation',
   'of',
   'students',
   'with',
   'AI',
   'ready']),
 (0.13164646115731624,
  ['Envisioned',
   'as',
   'a',
   'three',
   'year',
   'collaborative',
   'program',
   'Intelligent',
   'Cloud',
   'Hub',
   'will',
   'support',
   'around',
   'institutions',
   'with',
   'AI',
   'infrastructure',
   'course',
   'content',
   'and',
   'curriculum',
   'developer',
   'support',
   'development',
   'tools',
   'and',
   'give',
   'students',
   'access',
   'to',
   'cloud',
   'and',
   'AI']),
 (0.12973102878438217,
  ['This',
   'program',
   'also',
   'included',
   'developer',
   'focused',
   'AI',
   'school',
   'that',
   'provided',
   'a',
   'bunch',
   'of',
  

In [43]:
summarize_text = []

for i in range(4):
    summarize_text.append(" ".join(ranked_sentence[i][1]))

In [44]:
print("Summarized Text:\n", " ".join(summarize_text))

Summarized Text:
 In an attempt to build an AI ready workforce Microsoft announced Intelligent Cloud Hub which has been launched to empower the next generation of students with AI ready Envisioned as a three year collaborative program Intelligent Cloud Hub will support around institutions with AI infrastructure course content and curriculum developer support development tools and give students access to cloud and AI This program also included developer focused AI school that provided a bunch of assets to help build AI The company will provide AI development tools and Azure AI services such as Microsoft Cognitive Services Bot Services and Azure Machine Learning According to Manish Prakash Country General Manager PS Health and Education Microsoft India said With AI being the defining technology of our time it is transforming lives and industry and the jobs of tomorrow will require a different
