In [2]:
pip install spacy

Collecting spacy
  Downloading spacy-3.8.7-cp313-cp313-win_amd64.whl.metadata (28 kB)
Collecting spacy-legacy<3.1.0,>=3.0.11 (from spacy)
  Downloading spacy_legacy-3.0.12-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting spacy-loggers<2.0.0,>=1.0.0 (from spacy)
  Downloading spacy_loggers-1.0.5-py3-none-any.whl.metadata (23 kB)
Collecting murmurhash<1.1.0,>=0.28.0 (from spacy)
  Downloading murmurhash-1.0.13-cp313-cp313-win_amd64.whl.metadata (2.2 kB)
Collecting cymem<2.1.0,>=2.0.2 (from spacy)
  Downloading cymem-2.0.11-cp313-cp313-win_amd64.whl.metadata (8.8 kB)
Collecting preshed<3.1.0,>=3.0.2 (from spacy)
  Downloading preshed-3.0.10-cp313-cp313-win_amd64.whl.metadata (2.5 kB)
Collecting thinc<8.4.0,>=8.3.4 (from spacy)
  Downloading thinc-8.3.6-cp313-cp313-win_amd64.whl.metadata (15 kB)
Collecting wasabi<1.2.0,>=0.9.1 (from spacy)
  Downloading wasabi-1.1.3-py3-none-any.whl.metadata (28 kB)
Collecting srsly<3.0.0,>=2.4.3 (from spacy)
  Downloading srsly-2.5.1-cp313-cp313-win_amd6

In [64]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from collections import Counter
from heapq import nlargest

In [65]:
#Loading the model (English) into spaCy

In [6]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ------- -------------------------------- 2.4/12.8 MB 13.9 MB/s eta 0:00:01
     ------------------ --------------------- 6.0/12.8 MB 15.6 MB/s eta 0:00:01
     ---------------------------- ----------- 9.2/12.8 MB 15.3 MB/s eta 0:00:01
     ----------------------------------- --- 11.8/12.8 MB 14.8 MB/s eta 0:00:01
     --------------------------------------- 12.8/12.8 MB 14.1 MB/s eta 0:00:00
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.8.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [7]:
nlp = spacy.load('en_core_web_sm')

In [66]:
file_path = "F:/Text-Summarizer-Using-Spacy-main/Text-Summarizer-Using-Spacy-main/doc"


In [None]:
file_path = "doc"   
with open(file_path, "r", encoding="utf-8") as f:
    text = f.read()

print(text[:200])  


Artificial Intelligence (AI) has rapidly transformed various industries by enabling machines to perform tasks that traditionally required human intelligence. From healthcare to finance, AI application


In [68]:
doc = nlp(text)

In [69]:
#Finding the number of sentences in the document

In [70]:
len(list(doc.sents))


10

In [71]:
#Filtering the tokens

In [72]:
keyword = []
stopwords = list(STOP_WORDS)
pos_tag = ['PROPN','ADJ','NOUN','VERB']

for token in doc:
    if token.text.lower() in stopwords or token.text in punctuation:
        continue
    if token.pos_ in pos_tag:
        keyword.append(token.lemma_.lower())


In [73]:
#Calculating the frequency of each token using "Counter" function

In [74]:
freq_word = Counter(keyword)
freq_word.most_common(8)

[('ai', 9),
 ('human', 3),
 ('personalize', 3),
 ('intelligence', 2),
 ('healthcare', 2),
 ('detection', 2),
 ('financial', 2),
 ('development', 2)]

In [75]:
#Normalization

In [76]:
max_freq = freq_word.most_common(1)[0][1]
for word in freq_word.keys():
    freq_word[word] = freq_word[word] / max_freq
freq_word.most_common(5)


[('ai', 1.0),
 ('human', 0.3333333333333333),
 ('personalize', 0.3333333333333333),
 ('intelligence', 0.2222222222222222),
 ('healthcare', 0.2222222222222222)]

In [77]:
#This is the major part where each sentence is weighed based on the frequency of the token present in each sentence. The result is stored as a key-value pair in sent_strength where keys are the sentences in the string doc and the values are the weight of each sentence.

In [78]:
#Weighing Sentences

In [None]:
sent_strength = {}
for sent in doc.sents:
    for word in sent:
        lemma = word.lemma_.lower()
        if lemma in freq_word.keys():
            if sent in sent_strength:
                sent_strength[sent] += freq_word[lemma]
            else:
                sent_strength[sent] = freq_word[lemma]


for sent in sent_strength:
    sent_strength[sent] = sent_strength[sent] / (len(sent.text.split()) + 1)

print(sent_strength)


{Artificial Intelligence (AI) has rapidly transformed various industries by enabling machines to perform tasks that traditionally required human intelligence.: 0.13333333333333336, From healthcare to finance, AI applications have streamlined processes, improved accuracy, and reduced human effort.: 0.1527777777777778, In healthcare, AI is being used for early disease detection, personalized medicine, and drug discovery.: 0.14583333333333337, Financial institutions employ AI algorithms for fraud detection, risk assessment, and personalized financial advice.

: 0.1851851851851852, Education has also seen a major shift with AI-powered tools that provide personalized learning experiences, automate grading, and enhance student engagement.: 0.13131313131313133, Similarly, AI in transportation has led to the development of self-driving cars, predictive maintenance, and optimized logistics.: 0.12345679012345682, While the benefits of AI are enormous, challenges remain, including ethical concern

In [80]:
#Summarizing the string

In [None]:
summarized_sentences = nlargest(3, sent_strength, key=sent_strength.get)

summarized_sentences = sorted(summarized_sentences, key=lambda s: s.start)

print(summarized_sentences)


[From healthcare to finance, AI applications have streamlined processes, improved accuracy, and reduced human effort., Financial institutions employ AI algorithms for fraud detection, risk assessment, and personalized financial advice.

, Despite these challenges, the future of AI looks promising.]


In [82]:
print(type(summarized_sentences[0]))

<class 'spacy.tokens.span.Span'>


In [None]:

final_sentences = sorted(summarized_sentences, key=lambda s: s.start)


summary = " ".join([s.text.strip() for s in final_sentences])

print("=== Summary ===")
print(summary)


=== Summary ===
From healthcare to finance, AI applications have streamlined processes, improved accuracy, and reduced human effort. Financial institutions employ AI algorithms for fraud detection, risk assessment, and personalized financial advice. Despite these challenges, the future of AI looks promising.
