# Apply NER to identify entities such as names, organizations, and locations in a given text. Perform sentence segmentation on a paragraph and explain its importance in NLP tasks. 

In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
def show_ents(doc):
  if doc.ents:
    for ent in doc.ents:
      print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))

  else:
    print('No named entities found.')


In [5]:
doc = nlp("""Found my home (Ooh, ooh) She made me leave the thrills at home and I'm fine Really think I found my home Shorty make me feel at home She made me leave the thrills at home And I'm fine with it She really made me lose control I'ma let my love unfold We're just two lost souls But we're fine with it There's love at my front door, short notice You're not like the same girls I notice Think I met my soul mate Yeah, I know it When it gets dark outside In you I confide You help me face my demons I won't hide, hide Girls like you are hard to find I hope you don't mind If I give you the time of your life, life, life Really think I found my home Shorty make me feel at home She made me leave the thrills at home And I'm fine with it She really made me lose control I'ma let my love unfold We're just two lost souls But we're fine with it Life is not the same With your pictures in my frame Now that you're here I want nothing to change You pick me up when I'm down I need you around You seen me through my darkest times Girl, is there something that you try to find? You brought meaning to my life All because of you, I do right Because of you, I have a purpose Fight for the world, because you're worth it Really think I found my home Shorty make me feel at home She made me leave the thrills at home And I'm fine with it She really made me lose control I'ma let my love unfold We're just two lost souls But we're fine with it""")
show_ents(doc)

Shorty - ORG - Companies, agencies, institutions, etc.
two - CARDINAL - Numerals that do not fall under another type
Shorty - ORG - Companies, agencies, institutions, etc.
two - CARDINAL - Numerals that do not fall under another type
Girl - PRODUCT - Objects, vehicles, foods, etc. (not services)
Fight - PERSON - People, including fictional
Shorty - ORG - Companies, agencies, institutions, etc.
two - CARDINAL - Numerals that do not fall under another type


In [11]:
from spacy.language import Language

# Create a custom pipeline component
@Language.component("remove_white_space")
def remove_white_space(doc):
    doc.ents = [e for e in doc.ents if not e.text.isspace()]
    return doc

nlp.add_pipe('remove_white_space', after="ner")

doc = nlp("Don't stay awake for too long,Don't go to bed,I'll make a cup of coffee for your head,I'll get you up and going out of bed,Yeah, I don't wanna fall asleep,I don't wanna pass away,I've been thinking of our future 'Cause I'll never see those days,I don't know why this has happened, but I probably deserve it,I tried to do my best, but you know that I'm not perfect")
print([(ent.text, ent.label_) for ent in doc.ents])


[("Don't", 'NORP'), ("I'll", 'GPE')]


In [12]:
from spacy import displacy

In [13]:
doc = nlp(u"""Don't stay awake for too long,Don't go to bed,I'll make a cup of coffee for your head,I'll get you up and going out of bed,Yeah, I don't wanna fall asleep,I don't wanna pass away,I've been thinking of our future 'Cause I'll never see those days,I don't know why this has happened, but I probably deserve it,I tried to do my best, but you know that I'm not perfect""")

displacy.render(doc , style='ent' , jupyter=True)