# Spacy
[spacy](https://spacy.io)

In [2]:
# Spacy 
import numpy as np
import pyarrow as pa
import spacy
import vaex
from spacy.cli import download
from spacy.language import Language
from goldilox import Pipeline

def download_nlp(lang='en_core_web_sm'):
    try:
        nlp = spacy.load(lang)
        return True
    except:
        download(lang)
    return False

download_nlp()

True

In [13]:
# Build a spacy entities pipeline
@Language.component("ents")
def ents(doc):
    return doc.ents

nlp_entitie = spacy.load('en_core_web_sm')
nlp_entitie.add_pipe('ents', name='ents', last=True)

texts = [
    "Net income was $9.4 million compared to the prior year of $2.7 million. Apple is doing very well",
    "IBM had their revenue exceeded twelve billion dollars, with a loss of $1b.",
]

df = vaex.from_arrays(text=texts)

@vaex.register_function()
def get_entities(ar, label='ORG'):    
    if not isinstance(ar, list):
        ar = ar.tolist()
    docs = [nlp_entitie(doc) for doc in ar]
    entities = [[str(ent.text) for ent in doc if ent.label_ == label] for doc in docs]
    return pa.array(entities)
df.add_function('get_entities', get_entities)

df.variables['ORG'] = 'ORG'
df.variables['MONEY'] = 'MONEY'

df['organisations'] = df.func.get_entities(df.text, 'ORG')
df['money'] = df.func.get_entities(df.text, 'MONEY')

pipeline = Pipeline.from_vaex(df)
pipeline.inference(pipeline.raw)



#,text,organisations,money
0,'Net income was $9.4 million compared to the pri...,['Apple'],"['$9.4 million', '$2.7 million']"
