In [1]:
import spacy
import nltk

### Fundamental parts of speech of the english language
1. Noun
2. Pronoun
3. Verb
4. Interjection
5. Adjective
6. Adverb
7. Conjunction
8. Preposition

In [8]:
nlp = spacy.load("en_core_web_sm")
doc = nlp("I took a round trip to Mars, and say what you will next time I will bring along my paint supplies! :D")

for token in doc:
    print(token, "|", token.pos_, "|", spacy.explain(token.pos_))

I | PRON | pronoun
took | VERB | verb
a | DET | determiner
round | ADJ | adjective
trip | NOUN | noun
to | ADP | adposition
Mars | PROPN | proper noun
, | PUNCT | punctuation
and | CCONJ | coordinating conjunction
say | VERB | verb
what | PRON | pronoun
you | PRON | pronoun
will | AUX | auxiliary
next | ADJ | adjective
time | NOUN | noun
I | PRON | pronoun
will | AUX | auxiliary
bring | VERB | verb
along | ADP | adposition
my | PRON | pronoun
paint | NOUN | noun
supplies | NOUN | noun
! | PUNCT | punctuation
:D | PUNCT | punctuation


In [9]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [11]:
## Further explainability using tag
doc = nlp("I took a round trip to Mars, and say what you will next time I will bring along my paint supplies! :D")

for token in doc:
    print(token, "|", token.tag_, "|", spacy.explain(token.tag_))

I | PRP | pronoun, personal
took | VBD | verb, past tense
a | DT | determiner
round | JJ | adjective (English), other noun-modifier (Chinese)
trip | NN | noun, singular or mass
to | IN | conjunction, subordinating or preposition
Mars | NNP | noun, proper singular
, | , | punctuation mark, comma
and | CC | conjunction, coordinating
say | VB | verb, base form
what | WP | wh-pronoun, personal
you | PRP | pronoun, personal
will | MD | verb, modal auxiliary
next | JJ | adjective (English), other noun-modifier (Chinese)
time | NN | noun, singular or mass
I | PRP | pronoun, personal
will | MD | verb, modal auxiliary
bring | VB | verb, base form
along | RP | adverb, particle
my | PRP$ | pronoun, possessive
paint | NN | noun, singular or mass
supplies | NNS | noun, plural
! | . | punctuation mark, sentence closer
:D | . | punctuation mark, sentence closer


In [12]:
text = '''
NVIDIA’s outlook for the second quarter of fiscal 2026 is as follows:

Revenue is expected to be $45.0 billion, plus or minus 2%. This outlook reflects a loss in H20 revenue of approximately $8.0 billion due to the recent export control limitations.
GAAP and non-GAAP gross margins are expected to be 71.8%, and 72.0%, respectively, plus or minus 50 basis points. The company is continuing to work toward achieving gross margins in the mid-70%, range late this year.
GAAP and non-GAAP operating expenses are expected to be approximately $5.7 billion and $4.0 billion, respectively. Full year fiscal 2026 operating expense growth is expected to be in the mid-30%, range.
GAAP and non-GAAP other income and expense are expected to be an income of approximately $450 million, excluding gains and losses from non-marketable and publicly-held equity securities.
GAAP and non-GAAP tax rates are expected to be 16.5%, plus or minus 1%, excluding any discrete items.
'''

In [16]:
doc = nlp(text)
filtered_tokens = []
for token in doc:
    if token.pos_ not in ["SPACE", "PUNCT"]:
        filtered_tokens.append(token)
filtered_tokens[:10]

[NVIDIA, ’s, outlook, for, the, second, quarter, of, fiscal, 2026]

In [17]:
## How do I count the number of each POS?
count_pos = doc.count_by(spacy.attrs.POS)
count_pos

{103: 7,
 96: 2,
 94: 8,
 92: 50,
 85: 11,
 90: 10,
 84: 20,
 93: 19,
 87: 14,
 98: 1,
 100: 16,
 97: 20,
 99: 5,
 89: 15,
 86: 7}

In [21]:
for key, value in count_pos.items():
    print(doc.vocab[key].text, "|", value)

SPACE | 7
PROPN | 2
PART | 8
NOUN | 50
ADP | 11
DET | 10
ADJ | 20
NUM | 19
AUX | 14
SCONJ | 1
VERB | 16
PUNCT | 20
SYM | 5
CCONJ | 15
ADV | 7
