# Named Entity Recognition (NER) with spaCy

## Imports

In [None]:
import numpy as np
import pandas as pd
import spacy

## Model Setup

In [None]:
# Load English tokenizer, tagger, parser and NER
nlp = spacy.load("en_core_web_lg")

In [None]:
# Process whole documents
text = (
  "When Sebastian Thrun started working on self-driving cars at "
  "Google in 2007, few people outside of the company took him "
  "seriously. “I can tell you very senior CEOs of major American "
  "car companies would shake my hand and turn away because I wasn’t "
  "worth talking to,” said Thrun, in an interview with Recode earlier "
  "this week."
)

doc = nlp(text)

# Analyze syntax
print("Noun phrases:", [chunk.text for chunk in doc.noun_chunks])
print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"])

# Find named entities, phrases and concepts
for entity in doc.ents:
    print(entity.text, entity.label_)

In [None]:
tokens = [token.text for token in doc]

lemmas = [token.lemma_ for token in doc]

poss = [token.pos_ for token in doc]

tags = [token.tag_ for token in doc]

deps = [token.dep_ for token in doc]

shapes = [token.shape_ for token in doc]

is_alphas = [token.is_alpha for token in doc]

is_stops = [token.is_stop for token in doc]

cols = ["Token", "Lemma", "Pos", "Tag", "Dep", "Shape", "Is alpha?", "Is stop?"]

rows = np.array([tokens, lemmas, poss, tags, deps, shapes, is_alphas, is_stops]).transpose()

pd.DataFrame(data=rows, columns=cols)