In [24]:
import spacy 
from spacy import displacy

In [25]:
#Now we enter our sample text which we will be testing.

sample_text="""The Top 100 Companies of the World: U.S. vs Everyone. When it comes to breaking down the top 100 companies of the world, the United States still commands the largest slice of the pie.
Throughout the 20th century and before globalization reached its current peaks, American companies made the country an economic powerhouse and the source of a majority of global market value.
But even as countries like China have made headway with multi-billion dollar companies of their own, and the market’s most important sectors have shifted, the U.S. has managed to stay on top.
"""

In [26]:
#let's load only the NER model of spacy

NER = spacy.load("en_core_web_sm")

In [27]:
#let us fit the model on the sample text.

word = NER(sample_text)

In [28]:
#Printing the NEs named entity found by the model in our sample text.

for w in word.ents:
    print(w.text,w.label_)


U.S. GPE
100 CARDINAL
the United States GPE
the 20th century DATE
American NORP
China GPE
U.S. GPE


In [29]:
#In case you are confused about the named entity code, you can easily check it wuth the explain() method

spacy.explain("GPE")

'Countries, cities, states'

In [30]:
spacy.explain("DATE")

'Absolute or relative dates or periods'

In [31]:
spacy.explain("NORP")

'Nationalities or religious or political groups'

In [32]:
spacy.explain("EVENT")

'Named hurricanes, battles, wars, sports events, etc.'

In [33]:
spacy.explain("FAC")

'Buildings, airports, highways, bridges, etc.'

In [34]:
spacy.explain("PRODUCT")

'Objects, vehicles, foods, etc. (not services)'

In [35]:
#lets visualize the name entities with the data using the displacy package of spacy.

displacy.render(word,style="ent",jupyter=True)

## Implementation of NER using NLTK 

In [None]:
# import modules and download packages
import nltk
nltk.download('words')
nltk.download('punkt')
nltk.download('maxent_ne_chunker')
nltk.download('averaged_perceptron_tagger')
nltk.download('state_union')
from nltk.corpus import state_union
from nltk.tokenize import PunktSentenceTokenizer

# process the text and print Named entities
# tokenization
train_text = state_union.raw()
sample_text = state_union.raw("2006-GWBush.txt")
custom_sent_tokenizer = PunktSentenceTokenizer(train_text)
tokenized = custom_sent_tokenizer.tokenize(sample_text)

# function
def get_named_entity():
	try:
		for i in tokenized:
			words = nltk.word_tokenize(i)
			tagged = nltk.pos_tag(words)
			namedEnt = nltk.ne_chunk(tagged, binary=False)
			namedEnt.draw()
	except:
		pass


In [None]:
get_named_entity()