# NER with Spacy

Teknik dalam memproses suatu kalimat dalam NLP dengan cara mengidentifikasi dan mengekstrak entitas dimana entitas disini tuh bisa aja jadi sebuah object, orang, lokasi, dan sebagainya. Tujuan utamanya adalah untuk mengenali entitas ini ke dalam sebuah kategori yang ditentukan.

In [3]:
import spacy

def extract_named_entities_and_parsed_trees(sentence):
    nlp = spacy.load('en_core_web_sm')
    doc = nlp(sentence)
    named_entities = {
        "persons": [ent.text for ent in doc.ents if ent.label_ == 'PERSON'],
        "organizations": [ent.text for ent in doc.ents if ent.label_ == 'ORG'],
        "locations": [ent.text for ent in doc.ents if ent.label_ == 'GPE']
    }

    print("Formatted Dependency Parse Tree:")
    for token in doc:
        print(f"{token.text} <--{token.dep_}-- {token.head.text} ({token.pos_})")
    return named_entities

def main():
    sentence = "Apple is a company founded by Steve Jobs. It is headquartered in Cupertino, California."
    named_entities = extract_named_entities_and_parsed_trees(sentence)

    print("\nExtracted Named Entities:")
    print("Persons: ", named_entities["persons"])
    print("Organizations: ", named_entities["organizations"])
    print("Locations: ", named_entities["locations"])

if __name__ == "__main__":
    main()

ModuleNotFoundError: No module named 'spacy'

In [None]:
import spacy

nlp = spacy.load('en_core_web_sm')

text = '''
Apple is a company founded by Steve Jobs. It is headquartered in Cupertino, California.
Apple Inc. is an American multinational technology company that specializes in consumer electronics, computer software, and online services.
Apple was founded by Steve Jobs, Steve Wozniak, and Ronald Wayne in April 1976 to develop and sell Wozniak's Apple I personal computer.
You can buy Apple products at their stores or on their website.
'''

doc = nlp(text)

categories = {}
for ent in doc.ents:
    label = ent.label
    if label not in categories:
        categories[label] = []
    categories[label].append(ent.text)

print("Categories Named Entities:")
for label, entities in categories.items():
    print(f"{label}: {entities}")