In [1]:
import pandas as pd
from nltk.corpus import stopwords
from nltk import pos_tag, word_tokenize, sent_tokenize
from collections import Counter

# Corpus of text
corpus = "As dawn broke over the sleepy town, a golden glow stretched across the horizon, slowly illuminating the rooftops and narrow cobbled streets. Birds began to chirp softly, their melody weaving through the gentle hum of early morning. In the marketplace, vendors set up their stalls, arranging fresh fruits and vegetables with care, while the smell of freshly baked bread wafted from a nearby bakery, tempting passersby. Soon, the streets filled with people of all kinds — shopkeepers greeting customers with a warm smile, children skipping along, and workers hurrying to catch the morning train. Dogs trotted by their owners’ sides, occasionally stopping to sniff at curious sights or sounds. As the clock struck eight, the church bells rang out, their chimes resonating across the square, bringing a sense of rhythm to the bustling scene. Amid the crowd, an elderly couple strolled hand-in-hand, sharing a quiet conversation, as a group of tourists snapped photos of the picturesque town. The day was only beginning, but already, the town was alive with a blend of sights, sounds, and stories waiting to unfold."

# Tokenize the corpus into sentences
sentences = sent_tokenize(corpus)

# Print each sentence with its index
for idx, sentence in enumerate(sentences):
    print(f"Sentence {idx+1}: {sentence}")

print('-' * 150)
print('-' * 150)

# Dictionary to store POS tag counts
pos_counts = Counter()

# Loop through each sentence to tokenize and tag POS
for i in range(len(sentences)):
    # Tokenize words and filter out stopwords
    words = word_tokenize(sentences[i])
    words = [word for word in words if word not in set(stopwords.words('english'))]
    
    # Generate POS tags
    pos_tagged = pos_tag(words)
    print("POS tags for sentence", i+1, ":", pos_tagged)
    
    # Count POS tags for each sentence
    sentence_pos_counts = Counter(tag for word, tag in pos_tagged)
    print("POS tag counts for sentence", i+1, ":", sentence_pos_counts)
    print('-' * 150)
    
    # Add to overall counts
    pos_counts.update(sentence_pos_counts)

# Convert overall POS tag counts to DataFrame
df_pos_counts = pd.DataFrame.from_dict(pos_counts, orient='index', columns=['Count'])
df_pos_counts = df_pos_counts.sort_values(by='Count', ascending=False).reset_index()
df_pos_counts.columns = ['POS Tag', 'Count']

# Display the DataFrame
print("\nTotal POS tag counts across all sentences in DataFrame format:")
df_pos_counts


Sentence 1: As dawn broke over the sleepy town, a golden glow stretched across the horizon, slowly illuminating the rooftops and narrow cobbled streets.
Sentence 2: Birds began to chirp softly, their melody weaving through the gentle hum of early morning.
Sentence 3: In the marketplace, vendors set up their stalls, arranging fresh fruits and vegetables with care, while the smell of freshly baked bread wafted from a nearby bakery, tempting passersby.
Sentence 4: Soon, the streets filled with people of all kinds — shopkeepers greeting customers with a warm smile, children skipping along, and workers hurrying to catch the morning train.
Sentence 5: Dogs trotted by their owners’ sides, occasionally stopping to sniff at curious sights or sounds.
Sentence 6: As the clock struck eight, the church bells rang out, their chimes resonating across the square, bringing a sense of rhythm to the bustling scene.
Sentence 7: Amid the crowd, an elderly couple strolled hand-in-hand, sharing a quiet conve

Unnamed: 0,POS Tag,Count
0,NN,33
1,NNS,25
2,",",21
3,JJ,16
4,VBG,13
5,VBD,10
6,RB,8
7,.,8
8,IN,5
9,VBP,2
