In [1]:
# pip install spacy
# python -m spacy download en_core_web_sm

import spacy

# load small English model (includes tokenizer, POS tagger, lemmatizer, stopwords)
nlp = spacy.load("en_core_web_sm")

text = "John enjoys playing football while Mary loves reading books in the library."
doc = nlp(text)

# 1) Segment into tokens
tokens = [t.text for t in doc]

# 2) Remove stopwords (and non-alphabetic tokens for cleanliness)
content_tokens = [t for t in doc if not t.is_stop and t.is_alpha]

# 3) Lemmatize (no stemming)
# 4) Keep only verbs and nouns (NOUN = common nouns, PROPN = proper nouns)
keep_pos = {"VERB", "NOUN", "PROPN"}
filtered_lemmas = [(t.lemma_, t.pos_) for t in content_tokens if t.pos_ in keep_pos]

# Pretty prints
print("Tokens:")
print(tokens)
print("\nAfter stopword removal (token/POS):")
print([(t.text, t.pos_) for t in content_tokens])
print("\nLemmatized + POS (only verbs & nouns):")
print(filtered_lemmas)

# If you only want the lemmas (strings):
final_lemmas_only = [lemma for lemma, pos in filtered_lemmas]
print("\nFinal lemmas (verbs & nouns only):")
print(final_lemmas_only)


Tokens:
['John', 'enjoys', 'playing', 'football', 'while', 'Mary', 'loves', 'reading', 'books', 'in', 'the', 'library', '.']

After stopword removal (token/POS):
[('John', 'PROPN'), ('enjoys', 'VERB'), ('playing', 'VERB'), ('football', 'NOUN'), ('Mary', 'PROPN'), ('loves', 'AUX'), ('reading', 'VERB'), ('books', 'NOUN'), ('library', 'NOUN')]

Lemmatized + POS (only verbs & nouns):
[('John', 'PROPN'), ('enjoy', 'VERB'), ('play', 'VERB'), ('football', 'NOUN'), ('Mary', 'PROPN'), ('read', 'VERB'), ('book', 'NOUN'), ('library', 'NOUN')]

Final lemmas (verbs & nouns only):
['John', 'enjoy', 'play', 'football', 'Mary', 'read', 'book', 'library']
