In [1]:
import spacy
import pandas as pd

In [2]:
nlp = spacy.load("en_core_web_sm")

In [3]:
emma_ja = "When listing these projects on your resume, be sure to provide a brief description of each project, including the problem statement, methodologies used, and key outcomes or results achieved. Additionally, highlight any specific skills or technologies utilized in each project, such as programming languages, libraries, frameworks, or machine learning algorithms."

In [4]:
emma_ja

'When listing these projects on your resume, be sure to provide a brief description of each project, including the problem statement, methodologies used, and key outcomes or results achieved. Additionally, highlight any specific skills or technologies utilized in each project, such as programming languages, libraries, frameworks, or machine learning algorithms.'

In [5]:
spacy_doc = nlp(emma_ja)

In [6]:
pos_df = pd.DataFrame(columns=['token','pos_tag'])

In [7]:
for token in spacy_doc:
    pos_df = pd.concat([pos_df, pd.DataFrame({'token':[token.text], 'pos_tag':[token.pos_]})],ignore_index=True)

In [8]:
pos_df.head(10)

Unnamed: 0,token,pos_tag
0,When,SCONJ
1,listing,VERB
2,these,DET
3,projects,NOUN
4,on,ADP
5,your,PRON
6,resume,NOUN
7,",",PUNCT
8,be,AUX
9,sure,ADJ


In [9]:
pos_df_counts = pos_df.groupby(['token','pos_tag']).size().reset_index(name='counts').sort_values(by="counts",ascending=False)

In [10]:
pos_df_counts.head(10)

Unnamed: 0,token,pos_tag,counts
0,",",PUNCT,9
27,or,CCONJ,3
13,each,DET,2
31,project,NOUN,2
1,.,PUNCT,2
43,these,DET,1
42,the,DET,1
26,on,ADP,1
46,utilized,VERB,1
28,outcomes,NOUN,1


In [11]:
pos_df_poscounts = pos_df_counts.groupby(['pos_tag'])['token'].count().sort_values(ascending=False)

In [12]:
pos_df_poscounts.head(10)

pos_tag
NOUN     18
VERB      7
ADJ       5
DET       5
ADP       4
CCONJ     2
PUNCT     2
ADV       1
AUX       1
PART      1
Name: token, dtype: int64

In [13]:
nouns = pos_df_counts[pos_df_counts.pos_tag == 'NOUN'][:10]
nouns

Unnamed: 0,token,pos_tag,counts
31,project,NOUN,2
28,outcomes,NOUN,1
29,problem,NOUN,1
30,programming,NOUN,1
32,projects,NOUN,1
34,results,NOUN,1
35,resume,NOUN,1
38,statement,NOUN,1
41,technologies,NOUN,1
36,skills,NOUN,1


In [14]:
adj = pos_df_counts[pos_df_counts.pos_tag == 'ADJ'][:10]
adj

Unnamed: 0,token,pos_tag,counts
37,specific,ADJ,1
39,such,ADJ,1
40,sure,ADJ,1
11,brief,ADJ,1
18,key,ADJ,1


## NAMED ENTITY RECOGNITION

In [15]:
import spacy
from spacy import displacy
from spacy import tokenizer
import re

In [16]:
nlp = spacy.load("en_core_web_sm")

In [17]:
google_text = "Google's history is a testament to innovation and exponential growth. Founded in 1998 by Larry Page and Sergey Brin, Google began as a research project at Stanford University. Its pioneering search engine quickly revolutionized the way information is accessed online. Over the years, Google expanded its offerings to include a wide range of products and services, from Gmail to Google Maps, cementing its status as one of the most influential technology companies in the world. Today, Google continues to lead the way in shaping the digital landscape with its commitment to innovation and user-centric design."

In [18]:
google_text

"Google's history is a testament to innovation and exponential growth. Founded in 1998 by Larry Page and Sergey Brin, Google began as a research project at Stanford University. Its pioneering search engine quickly revolutionized the way information is accessed online. Over the years, Google expanded its offerings to include a wide range of products and services, from Gmail to Google Maps, cementing its status as one of the most influential technology companies in the world. Today, Google continues to lead the way in shaping the digital landscape with its commitment to innovation and user-centric design."

In [19]:
spacy_doc = nlp(google_text)

In [20]:
for word in spacy_doc.ents:
    print(word.text, word.label_)

Google ORG
1998 DATE
Larry Page PERSON
Sergey Brin PERSON
Google ORG
Stanford University ORG
the years DATE
Google ORG
Gmail PERSON
Google Maps ORG
Today DATE
Google ORG


In [21]:
displacy.render(spacy_doc, style="ent", jupyter=True)

In [22]:
google_text_clean = re.sub(r"[^\w\s]",  '',google_text).lower()
print(google_text_clean)

googles history is a testament to innovation and exponential growth founded in 1998 by larry page and sergey brin google began as a research project at stanford university its pioneering search engine quickly revolutionized the way information is accessed online over the years google expanded its offerings to include a wide range of products and services from gmail to google maps cementing its status as one of the most influential technology companies in the world today google continues to lead the way in shaping the digital landscape with its commitment to innovation and usercentric design


In [23]:
spacy_doc_clean = nlp(google_text_clean)

In [24]:
for word in spacy_doc_clean.ents:
    print(word.text, word.label_)

1998 DATE
larry PERSON
sergey brin google ORG
stanford university ORG
the years DATE
google ORG
today DATE


In [25]:
displacy.render(spacy_doc_clean, style="ent", jupyter=True)