# Part 4, Extracting Named Entities

In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import re
import string
import ktrain

In [2]:
pd.set_option('display.max_colwidth', 500)

In [3]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:
df_cleaned_news = pd.read_csv('df_cleaned_news.csv')
df_cleaned_news.head()

Unnamed: 0.1,Unnamed: 0,date,clean_text,clean_title
0,0,2022-01-06,chicago ap chicago school leaders canceled classes thursday second consecutive day failing reach agreement teachers union covid safety protocols nation third largest school district the chicago teachers union sought revert remote instruction latest surge infections sides hammer deal but chicago public schools leaders said remote learning work schools safely remain open protocols place the move cancel classes activities thursday affects roughly students ...,no deal school chicago cancels classes 2nd day
1,1,2022-01-06,chicago pd lies season episode promo voight employs new informant help solve tricky drug trafficking case atwater struggles reconcile personal professional life reaches decision jesseleesoffer marinasqu trspiridakos nbcchicagopd,watch chicago p d preview wednesday
2,2,2022-01-06,wise foolish the chicago tribune article titled chicago public schools cancels classes thursday deadlock union continues trump biden press secretary comment impasse reported trumps solution follows educate children person give every dollar spent education directly students get failing government schools trump said statement the purpose this post is relate ancient wisdom proverb trump solution king solomon go presence foolish man thou perceivest lips...,trump solution chicago public schools king solomon blog
3,3,2022-01-06,the neighbors near mashawn plummer portage park firehouse knew often sat outside friendly presence station he showed early shifts ready job thrilled friends family recalled chicago firefighter emt mashawn plummer engine chicago fire department hundreds chicago firefighters joined family friends thursday lay plummer rest died december battling apartment fire northwest side they remembered plummer gentle giant followed dream becoming chicago fi...,family friends colleagues gathering today south side church attend funeral services chicago firefighter mashawn plummer
4,4,2022-01-06,mashawn plummer died final month preventing fireplace town northwest aspect additionally killed civilian a visitation held plummer wednesday night time a remaining visitation probably held home hope church e 114th st related chicago firefighter amongst critically injured belmont central residence fireplace killed following ultimate visitation funeral companies start probably adopted procession oak woods cemetery 67th road the yea...,chicago firefighter funeral pals household say goodbye mashawn plummer


In [5]:
df_cleaned_news.isna().sum()

Unnamed: 0     0
date           0
clean_text     7
clean_title    6
dtype: int64

In [6]:
df_cleaned_news.dropna(inplace=True)

In [7]:
df_cleaned_news.reset_index(drop=True, inplace=True)

In [8]:
df_cleaned_news.drop(columns=df_cleaned_news.columns[0], inplace=True)

In [9]:
df_cleaned_news.shape

(165360, 3)

In [10]:
df_cleaned_news.head()

Unnamed: 0,date,clean_text,clean_title
0,2022-01-06,chicago ap chicago school leaders canceled classes thursday second consecutive day failing reach agreement teachers union covid safety protocols nation third largest school district the chicago teachers union sought revert remote instruction latest surge infections sides hammer deal but chicago public schools leaders said remote learning work schools safely remain open protocols place the move cancel classes activities thursday affects roughly students ...,no deal school chicago cancels classes 2nd day
1,2022-01-06,chicago pd lies season episode promo voight employs new informant help solve tricky drug trafficking case atwater struggles reconcile personal professional life reaches decision jesseleesoffer marinasqu trspiridakos nbcchicagopd,watch chicago p d preview wednesday
2,2022-01-06,wise foolish the chicago tribune article titled chicago public schools cancels classes thursday deadlock union continues trump biden press secretary comment impasse reported trumps solution follows educate children person give every dollar spent education directly students get failing government schools trump said statement the purpose this post is relate ancient wisdom proverb trump solution king solomon go presence foolish man thou perceivest lips...,trump solution chicago public schools king solomon blog
3,2022-01-06,the neighbors near mashawn plummer portage park firehouse knew often sat outside friendly presence station he showed early shifts ready job thrilled friends family recalled chicago firefighter emt mashawn plummer engine chicago fire department hundreds chicago firefighters joined family friends thursday lay plummer rest died december battling apartment fire northwest side they remembered plummer gentle giant followed dream becoming chicago fi...,family friends colleagues gathering today south side church attend funeral services chicago firefighter mashawn plummer
4,2022-01-06,mashawn plummer died final month preventing fireplace town northwest aspect additionally killed civilian a visitation held plummer wednesday night time a remaining visitation probably held home hope church e 114th st related chicago firefighter amongst critically injured belmont central residence fireplace killed following ultimate visitation funeral companies start probably adopted procession oak woods cemetery 67th road the yea...,chicago firefighter funeral pals household say goodbye mashawn plummer


In [11]:
import spacy 
from spacy import displacy

In [12]:
# !python -m spacy download en_core_web_md

In [13]:
nlp = spacy.load("en_core_web_md")

In [14]:
nlp.pipe_names

['tagger', 'parser', 'ner']

In [15]:
# Using spaCy to count the number of ORG entities and PERSON entities in each article

def entity_count(text):
    doc = nlp(text)

    entities_dict = {}
    business = 0
    person = 0

    for ent in doc.ents:
        if ent.label_ == "ORG":
            entities_dict[ent.text] = ent.label_
            business += 1
        elif ent.label_ == "PERSON":
            entities_dict[ent.text] = ent.label_
            person += 1

    return entities_dict, business, person

In [16]:
entity_count(df_cleaned_news['clean_text'][0])[0]

{'ap': 'ORG',
 'the chicago teachers union': 'ORG',
 'pedro martinez': 'PERSON',
 'white house': 'ORG',
 'jen psaki': 'PERSON',
 'joe biden': 'PERSON',
 'donald trump': 'PERSON',
 'jesse sharkey': 'PERSON',
 'cps': 'ORG',
 'payton': 'PERSON',
 'danelda craig': 'PERSON',
 'lincoln': 'ORG',
 'cps       ': 'ORG',
 'associated press': 'ORG',
 'sara burnett': 'PERSON',
 'rick callahan': 'PERSON',
 'sophia tareen': 'PERSON',
 'twitter    ': 'ORG',
 'the associated press': 'ORG'}

In [17]:
df_cleaned_news["entities_labels"] = [i for i in df_cleaned_news["clean_text"].apply(lambda x: entity_count(x)[0])]
df_cleaned_news["ORG_count"] = df_cleaned_news["clean_text"].apply(lambda x: entity_count(x)[1])
df_cleaned_news["PERSON_count"] = df_cleaned_news["clean_text"].apply(lambda x: entity_count(x)[2])

In [18]:
df_cleaned_news.head()

Unnamed: 0,date,clean_text,clean_title,entities_labels,ORG_count,PERSON_count
0,2022-01-06,chicago ap chicago school leaders canceled classes thursday second consecutive day failing reach agreement teachers union covid safety protocols nation third largest school district the chicago teachers union sought revert remote instruction latest surge infections sides hammer deal but chicago public schools leaders said remote learning work schools safely remain open protocols place the move cancel classes activities thursday affects roughly students ...,no deal school chicago cancels classes 2nd day,"{'ap': 'ORG', 'the chicago teachers union': 'ORG', 'pedro martinez': 'PERSON', 'white house': 'ORG', 'jen psaki': 'PERSON', 'joe biden': 'PERSON', 'donald trump': 'PERSON', 'jesse sharkey': 'PERSON', 'cps': 'ORG', 'payton': 'PERSON', 'danelda craig': 'PERSON', 'lincoln': 'ORG', 'cps ': 'ORG', 'associated press': 'ORG', 'sara burnett': 'PERSON', 'rick callahan': 'PERSON', 'sophia tareen': 'PERSON', 'twitter ': 'ORG', 'the associated press': 'ORG'}",9,13
1,2022-01-06,chicago pd lies season episode promo voight employs new informant help solve tricky drug trafficking case atwater struggles reconcile personal professional life reaches decision jesseleesoffer marinasqu trspiridakos nbcchicagopd,watch chicago p d preview wednesday,"{'chicago pd ': 'ORG', 'voight': 'ORG', 'jesseleesoffer': 'PERSON'}",2,1
2,2022-01-06,wise foolish the chicago tribune article titled chicago public schools cancels classes thursday deadlock union continues trump biden press secretary comment impasse reported trumps solution follows educate children person give every dollar spent education directly students get failing government schools trump said statement the purpose this post is relate ancient wisdom proverb trump solution king solomon go presence foolish man thou perceivest lips...,trump solution chicago public schools king solomon blog,"{'the chicago tribune article': 'ORG', 'biden press': 'ORG', 'solomon': 'PERSON', 'lightfoot': 'PERSON', 'obama': 'PERSON'}",2,3
3,2022-01-06,the neighbors near mashawn plummer portage park firehouse knew often sat outside friendly presence station he showed early shifts ready job thrilled friends family recalled chicago firefighter emt mashawn plummer engine chicago fire department hundreds chicago firefighters joined family friends thursday lay plummer rest died december battling apartment fire northwest side they remembered plummer gentle giant followed dream becoming chicago fi...,family friends colleagues gathering today south side church attend funeral services chicago firefighter mashawn plummer,"{'plummer portage': 'PERSON', 'emt': 'PERSON', 'plummer': 'PERSON', 'plummer ': 'PERSON'}",0,5
4,2022-01-06,mashawn plummer died final month preventing fireplace town northwest aspect additionally killed civilian a visitation held plummer wednesday night time a remaining visitation probably held home hope church e 114th st related chicago firefighter amongst critically injured belmont central residence fireplace killed following ultimate visitation funeral companies start probably adopted procession oak woods cemetery 67th road the yea...,chicago firefighter funeral pals household say goodbye mashawn plummer,"{'plummer': 'PERSON', 'englewood': 'PERSON', 'plummer ': 'PERSON', 'dolores johnson': 'PERSON', 'fireman johnson': 'PERSON', 'arnetta carr': 'PERSON', 'pat ': 'PERSON', 'gomez': 'ORG', 'johnson': 'PERSON', 'carr': 'PERSON', 'emt': 'PERSON'}",1,15


In [19]:
# Saving the extracted entities and counts to a csv

df_cleaned_news.to_csv("df_cleaned_news_ner.csv")