In [29]:
# Importing necessary libraries
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string

In [30]:
# Sample text
text ="""Indian shooter Manu Bhaker carved her name in the history books as she became the first Indian to win two Olympic medals in the single edition.
The 22-year-old Bhaker, who opened India's medal tally in Paris 2024 by clinching a bronze medal in women's 10m air pistol event on Sunday, claimed another medal - bronze - while teaming up with Sarabjot Singh in the 10 m air pistol mixed team on Tuesday.
"""

## **Tokenization**

In [31]:
sentences = sent_tokenize(text)
words = word_tokenize(text)

for sentence in sentences:
    print(sentence)

print("\n", words)

Indian shooter Manu Bhaker carved her name in the history books as she became the first Indian to win two Olympic medals in the single edition.
The 22-year-old Bhaker, who opened India's medal tally in Paris 2024 by clinching a bronze medal in women's 10m air pistol event on Sunday, claimed another medal - bronze - while teaming up with Sarabjot Singh in the 10 m air pistol mixed team on Tuesday.

 ['Indian', 'shooter', 'Manu', 'Bhaker', 'carved', 'her', 'name', 'in', 'the', 'history', 'books', 'as', 'she', 'became', 'the', 'first', 'Indian', 'to', 'win', 'two', 'Olympic', 'medals', 'in', 'the', 'single', 'edition', '.', 'The', '22-year-old', 'Bhaker', ',', 'who', 'opened', 'India', "'s", 'medal', 'tally', 'in', 'Paris', '2024', 'by', 'clinching', 'a', 'bronze', 'medal', 'in', 'women', "'s", '10m', 'air', 'pistol', 'event', 'on', 'Sunday', ',', 'claimed', 'another', 'medal', '-', 'bronze', '-', 'while', 'teaming', 'up', 'with', 'Sarabjot', 'Singh', 'in', 'the', '10', 'm', 'air', 'pisto

## **Sentiment Analysis**

In [32]:
# Initializing SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

# Perform sentiment analysis on each sentence
for sentence in sentences:
    sentiment = sia.polarity_scores(sentence)
    print(f"Sentiment: {sentiment}")

Sentiment: {'neg': 0.0, 'neu': 0.868, 'pos': 0.132, 'compound': 0.5859}
Sentiment: {'neg': 0.0, 'neu': 0.807, 'pos': 0.193, 'compound': 0.8519}


## **Processing Words and getting and removing stop-words**

In [33]:
# Remove punctuation
words = [word for word in words if word not in string.punctuation]

# Converting to lower case
words = [word.lower() for word in words]

# Getting stopwords
stop_words = set(stopwords.words('english'))
print(stop_words)

words = [word for word in words if word not in stop_words]

{'whom', 'themselves', 'but', 'than', 'over', 'ours', "she's", 'about', 'or', 'how', 'then', 'can', 'y', "hasn't", "it's", 'with', "you'll", "isn't", 'after', 'when', 'if', 'off', 'here', 'all', 't', "didn't", 'more', "doesn't", 'is', 'theirs', 'both', 'until', 'am', 'been', 'those', 'in', 'weren', 'her', 'during', 'why', 'between', 'are', 'do', 'did', 'you', 'himself', "don't", "mightn't", 'couldn', "needn't", 'most', 'each', 'now', 'some', 'any', 'through', 'under', 'other', "you've", "haven't", 'into', 'him', 'below', 'not', "weren't", 'ain', "hadn't", 'nor', 'them', 'hadn', 'of', 'didn', 'by', 'again', 'while', 'having', 're', 'our', 'an', 'has', 'i', 'further', 'same', 'will', 'mustn', 'aren', 'yours', "shan't", 'hasn', "you're", 'doesn', 'myself', 'where', 'wouldn', 'and', 'just', 'hers', 'few', "should've", 'its', 'they', 'before', 'up', 'yourselves', 'down', "wouldn't", 'very', 'was', 'for', 'no', 'only', 'doing', 'their', 'we', 'needn', 'isn', 'herself', 'should', "aren't", 'm

## **Lemmatization**

In [34]:
# Lemmatizing words
lemmatizer = WordNetLemmatizer()
words = [lemmatizer.lemmatize(word) for word in words]

print("\nProcessed Words:")
print(words)


Processed Words:
['indian', 'shooter', 'manu', 'bhaker', 'carved', 'name', 'history', 'book', 'became', 'first', 'indian', 'win', 'two', 'olympic', 'medal', 'single', 'edition', '22-year-old', 'bhaker', 'opened', 'india', "'s", 'medal', 'tally', 'paris', '2024', 'clinching', 'bronze', 'medal', 'woman', "'s", '10m', 'air', 'pistol', 'event', 'sunday', 'claimed', 'another', 'medal', 'bronze', 'teaming', 'sarabjot', 'singh', '10', 'air', 'pistol', 'mixed', 'team', 'tuesday']
