# Tokenization

![spacy.png](attachment:spacy.png)

In [1]:
import spacy
from prettytable import PrettyTable

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
text = "Hello Uditya Narayan Tiwari! let's learn NLP together i am 21"

![Tokenizer.png](attachment:Tokenizer.png)

In [4]:
doc = nlp(text)
doc

Hello Uditya Narayan Tiwari! let's learn NLP together i am 21

In [5]:
doc.to_dict()

{'text': "Hello Uditya Narayan Tiwari! let's learn NLP together i am 21",
 'array_head': (71, 81, 65, 67, 75, 79, 76, 77, 78, 452, 454, 73, 453, 74, 80),
 'array_body': array([[                   5,                    1, 15777305708150031551,
          5983625672228268878,  3252815442139690129,                    0,
          8206900633647566924,                    2,                    0,
                            0,                    0,  5983625672228268878,
                          456,                   91,                    1],
        [                   6,                    1,   621519759253969662,
         12044956042206692584, 15794550382381185553,                    2,
          7037928807040764755,                    3,                  380,
                            0,                    0,   621519759253969662,
         11292551915497242671,                   96, 18446744073709551615],
        [                   7,                    1, 17937943263439891957,
     

In [6]:
for token in doc:
        print(token.text, token.is_alpha, token.is_punct, token.like_num)

Hello True False False
Uditya True False False
Narayan True False False
Tiwari True False False
! False True False
let True False False
's False False False
learn True False False
NLP True False False
together True False False
i True False False
am True False False
21 False False True


In [7]:
table = PrettyTable()
table.field_names = ['token', 'is alpha', 'is punct', 'is number']
for token in doc:
        table.add_row([token.text, token.is_alpha, token.is_punct, token.like_num])

In [8]:
print(table)  # so using pretty table we can see the things proper in the formatted way

+----------+----------+----------+-----------+
|  token   | is alpha | is punct | is number |
+----------+----------+----------+-----------+
|  Hello   |   True   |  False   |   False   |
|  Uditya  |   True   |  False   |   False   |
| Narayan  |   True   |  False   |   False   |
|  Tiwari  |   True   |  False   |   False   |
|    !     |  False   |   True   |   False   |
|   let    |   True   |  False   |   False   |
|    's    |  False   |  False   |   False   |
|  learn   |   True   |  False   |   False   |
|   NLP    |   True   |  False   |   False   |
| together |   True   |  False   |   False   |
|    i     |   True   |  False   |   False   |
|    am    |   True   |  False   |   False   |
|    21    |  False   |  False   |    True   |
+----------+----------+----------+-----------+


# Part Of Speech(POS) Tagging

![POS.png](attachment:POS.png)

In [9]:
doc

Hello Uditya Narayan Tiwari! let's learn NLP together i am 21

In [10]:
table = PrettyTable()
table.field_names = ['token', 'pos', 'details', 'explanation']

for token in doc:
        table.add_row([token.text, token.pos_, token.tag_, spacy.explain(token.tag_)])

print(table)

+----------+-------+---------+---------------------------------------+
|  token   |  pos  | details |              explanation              |
+----------+-------+---------+---------------------------------------+
|  Hello   |  INTJ |    UH   |              interjection             |
|  Uditya  | PROPN |   NNP   |         noun, proper singular         |
| Narayan  | PROPN |   NNP   |         noun, proper singular         |
|  Tiwari  | PROPN |   NNP   |         noun, proper singular         |
|    !     | PUNCT |    .    |   punctuation mark, sentence closer   |
|   let    |  VERB |    VB   |            verb, base form            |
|    's    |  PRON |   PRP   |           pronoun, personal           |
|  learn   |  VERB |    VB   |            verb, base form            |
|   NLP    | PROPN |   NNP   |         noun, proper singular         |
| together |  ADV  |    RB   |                 adverb                |
|    i     |  PRON |   PRP   |           pronoun, personal           |
|    a