# Einführungsbeispiele NLP

Angelehnt an D. Sarkar: Text Analytics with Python (2nd Edition)

Ergänzt und aktualisiert von Heiko Rölke


In [None]:
import nltk
import spacy
import numpy as np
import pandas as pd

nlp = spacy.load('en_core_web_trf')

In [None]:
sentence = "The brown fox is quick and he is jumping over the lazy dog"
sentence

In [None]:
words = sentence.split()
np.random.shuffle(words)
print(words)

In [None]:
pos_tags = nltk.pos_tag(sentence.split())
pd.DataFrame(pos_tags).T

In [None]:
spacy_pos_tagged = [(word, word.tag_, word.pos_) for word in nlp(sentence)]
pd.DataFrame(spacy_pos_tagged).T

In [None]:
grammar = '''
            NP: {<DT>?<JJ>?<NN.*>}  
            ADJP: {<JJ>}
            ADVP: {<RB.*>}
            PP: {<IN>}      
            VP: {<MD>?<VB.*>+}
          '''

pos_tagged_sent = nltk.pos_tag(sentence.split())
rp = nltk.RegexpParser(grammar)
shallow_parsed_sent = rp.parse(pos_tagged_sent)
print(shallow_parsed_sent)

In [None]:
# visualize shallow parse tree
# !pip install svgling  # falls notwendig, zB im Browser...

shallow_parsed_sent

In [None]:
from spacy import displacy

displacy.render(nlp(sentence), jupyter=True, 
                options={'distance': 100,
                         'arrow_stroke': 1.5,
                         'arrow_width': 8})


In [None]:
spacy.explain("amod")

# Auf Deutsch geht es auch...

In [None]:
nlp = spacy.load("de_dep_news_trf")

In [None]:
satz = "Der braune Fuchs springt über den schlafenden Hund."

spacy_pos_tagged = [(word, word.tag_, word.pos_) for word in nlp(satz)]
pd.DataFrame(spacy_pos_tagged).T

In [None]:
cols = ["Wort", "Wortart", "Erklärung", "Wort-Tag", "Erklärung"]
rows = []
for token in nlp(satz):
    row = token.text, token.pos_, spacy.explain(token.pos_), token.tag_,  spacy.explain(token.tag_)
    rows.append(row)
df = pd.DataFrame(rows, columns=cols)
df

In [None]:
spacy.displacy.render(nlp(satz), options={'distance': 100,'arrow_stroke': 1.5,'arrow_width': 8})

In [None]:
spacy.explain("nk")

In [None]:
spacy.explain("sb")