In [5]:
import en_core_web_md
nlp = en_core_web_md.load()

## 1

In [6]:
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english")
template = "{:15}{:15}{:15}{:15}"
quote = nlp("truth truthful truthfulness countertruth untruth untruthful truthology")
original = [token.text.lower() for token in quote]
lemmas = [token.lemma_ for token in quote]
stems = [stemmer.stem(token.text.lower()) for token in quote]
lemmas_stems = [stemmer.stem(text.lower()) for text in lemmas]
print(", ".join(stems))
# print(template.format("original: ", "lemmas: ", "stems: ", "lemmas+stems: "))
# for o, l, s, ls in zip(original, lemmas, stems, lemmas_stems): 
#     print(template.format(o, l, s, ls))

truth, truth, truth, countertruth, untruth, untruth, trutholog


In [7]:
quote = nlp("flaw flaws flawed flawless flawlessness flawlessly")
original = [token.text.lower() for token in quote]
lemmas = [token.lemma_ for token in quote]
stems = [stemmer.stem(token.text.lower()) for token in quote]
lemmas_stems = [stemmer.stem(text.lower()) for text in lemmas]
print(", ".join(stems))
# print(template.format("original: ", "lemmas: ", "stems: ", "lemmas+stems: "))
# for o, l, s, ls in zip(original, lemmas, stems, lemmas_stems): 
#     print(template.format(o, l, s, ls))

flaw, flaw, flaw, flawless, flawless, flawless


In [8]:
stemmer = SnowballStemmer("russian")
quote = nlp("лес лесной лесник лесничий лесничество пролесье")
original = [token.text.lower() for token in quote]
lemmas = [token.lemma_ for token in quote]
stems = [stemmer.stem(word) for word in original]
lemmas_stems = [stemmer.stem(text.lower()) for text in lemmas]
print(", ".join(stems))
# print(template.format("original: ", "lemmas: ", "stems: ", "lemmas+stems: "))
# for o, l, s, ls in zip(original, lemmas, stems, lemmas_stems): 
#     print(template.format(o, l, s, ls))

лес, лесн, лесник, леснич, лесничеств, пролес


In [9]:
quote = nlp("окно окошко подоконник оконный окнище")
original = [token.text.lower() for token in quote]
lemmas = [token.lemma_ for token in quote]
stems = [stemmer.stem(word) for word in original]
lemmas_stems = [stemmer.stem(text.lower()) for text in lemmas]
print(", ".join(stems))
# print(template.format("original: ", "lemmas: ", "stems: ", "lemmas+stems: "))
# for o, l, s, ls in zip(original, lemmas, stems, lemmas_stems): 
#     print(template.format(o, l, s, ls))

окн, окошк, подоконник, окон, окнищ


## 2

In [12]:
import re
import spacy
def pos_father_tag(sentence):
    idx = sentence.find("{")
    processed = nlp(re.sub('[{}]', '', sentence))
    token = next(token for token in processed if token.idx == idx)

    processor = lambda token: (spacy.explain(token.pos_), ('ROOT' if token.dep_ == 'ROOT' else token.head.text))
    data = processor(token)

    return "{}: {}, {}".format(sentence, data[0], data[1])

In [13]:
raw_text = """
We can {but} hope that everything will be fine.
It's sad {but} true.
Jack brings nothing {but} trouble.
Let's do it this {way}!
This is {way} too much!
The prices are going {down}.
Someone pushed him and he fell {down} the stairs.
I’ve been feeling rather {down} lately.
It's not easy to {down} a cup of coffee in one gulp.
Bring a {down} jacket and a pair of gloves, and you'll be fine.
"""
text = raw_text.strip().split('\n')
print("\n".join([pos_father_tag(s) for s in text]))

We can {but} hope that everything will be fine.: coordinating conjunction, hope
It's sad {but} true.: coordinating conjunction, sad
Jack brings nothing {but} trouble.: adposition, nothing
Let's do it this {way}!: noun, do
This is {way} too much!: adverb, much
The prices are going {down}.: adverb, going
Someone pushed him and he fell {down} the stairs.: adposition, fell
I’ve been feeling rather {down} lately.: adverb, feeling
It's not easy to {down} a cup of coffee in one gulp.: adposition, 's
Bring a {down} jacket and a pair of gloves, and you'll be fine.: adjective, jacket


In [14]:
raw_text = """
{I} love turtles.
I {love} turtles.
I love {turtles}.
"""
text = raw_text.strip().split('\n')
print("\n".join([pos_father_tag(s) for s in text]))

{I} love turtles.: pronoun, love
I {love} turtles.: verb, ROOT
I love {turtles}.: noun, love


## 3

In [39]:
import pymorphy2
import tokenize_uk
morph = pymorphy2.MorphAnalyzer(lang='uk')

grammeme = {
    "NOUN" 	:"іменник",
    "ADJF" 	:"прикметник",
    "ADJS" 	:"прикметник",
    "COMP" 	:"компаратив",
    "VERB" 	:"дієслово",
    "INFN" 	:"дієслово (інфінітив)",
    "PRTF" 	:"дієприкметник",
    "PRTS" 	:"дієприкметник",
    "GRND" 	:"дієприслівник",
    "NUMR" 	:"числівник",
    "ADVB" 	:"прислівник",
    "NPRO" 	:"займенник",
    "PRED" 	:"предікатів",
    "PREP" 	:"прийменник",
    "CONJ" 	:"союз",
    "PRCL" 	:"частка",
    "INTJ" 	:"вигук"
}

def explain_ru(text):
    return grammeme[morph.parse(text)[0].tag.POS]

def pos_father_tag_ru(sentence):
    idx = sentence.find("{")
    processed = nlp(re.sub('[{}]', '', sentence))
    token = next(token for token in processed if token.idx == idx)

    processor = lambda token: (explain_ru(token.text), ('ROOT' if token.dep_ == 'ROOT' else token.head.text))
    data = processor(token)

    return "{}: {}, {}".format(sentence, data[0], data[1])

In [40]:
raw_text = """
{Я} люблю черепашок.
Я {люблю} черепашок.
Я люблю {черепашок}.
"""
text = raw_text.strip().split('\n')
print("\n".join([pos_father_tag_ru(s) for s in text]))

{Я} люблю черепашок.: займенник, черепашок
Я {люблю} черепашок.: дієслово, черепашок
Я люблю {черепашок}.: іменник, ROOT


In [29]:
raw_text = """
Рада міністрів Європейського союзу затвердила угоду про спрощений порядок видачі {віз} для України.
Батько Себастьяна {віз} на санях їх театральний гурт до Львова.
А ще дивний елемент інтер’єру – {віз} із продукцією одного з херсонських виробників.
У цю мить {повз} Євгена пролетів останній вагон товарняка.
Кліпнув очима і побачив малого песика, який саме пробігав {повз} у бік села.
Степанко перестав кричати, тільки ламкий стогін {повз} йому із грудей.
Ось присіла на {край} ліжка.
Поставив ту кузню не {край} дороги, як було заведено, а на Красній горі, біля Прадуба.
Розповідаючи про передній {край} лінґвістики, фон Лібіх, як завжди, мислив широко і глобально.
Не {край} мені серце.
"""
text = raw_text.strip().split('\n')
print("\n".join([pos_father_tag_ru(s) for s in text]))

Рада міністрів Європейського союзу затвердила угоду про спрощений порядок видачі {віз} для України.: іменник, для
Батько Себастьяна {віз} на санях їх театральний гурт до Львова.: іменник, на
А ще дивний елемент інтер’єру – {віз} із продукцією одного з херсонських виробників.: іменник, ROOT
У цю мить {повз} Євгена пролетів останній вагон товарняка.: дієслово, ROOT
Кліпнув очима і побачив малого песика, який саме пробігав {повз} у бік села.: дієслово, ROOT
Степанко перестав кричати, тільки ламкий стогін {повз} йому із грудей.: дієслово, кричати
Ось присіла на {край} ліжка.: дієслово, присіла
Поставив ту кузню не {край} дороги, як було заведено, а на Красній горі, біля Прадуба.: дієслово, ROOT
Розповідаючи про передній {край} лінґвістики, фон Лібіх, як завжди, мислив широко і глобально.: дієслово, лінґвістики
Не {край} мені серце.: дієслово, серце


In [46]:
raw_text = """
The Council of Ministers of the European Union has approved an agreement on a simplified procedure for issuing {visas} for Ukraine.
Sebastian's father {cart} on a sleigh their theatrical group to Lviv.
And a strange element of the interior is the {cart} with the products of one of the Kherson producers.
At that moment, the last wagon of the trucker flew {past} Eugene.
He glanced up and saw a small dog walking just {past} the village.
Stepanko stopped yelling, only a fragile groan {crawling} from his chest.
Here she sat on the {edge} of the bed.
He set up that forge not on the {edge} of road, as it was started, but on the Red Mountain, near Pradub.
In discussing the {forefront} of linguistics, von Liebig, as always, thought broadly and globally.
Don't {torment} my heart.
"""

text = raw_text.strip().split('\n')
print("\n".join([pos_father_tag(s) for s in text]))

The Council of Ministers of the European Union has approved an agreement on a simplified procedure for issuing {visas} for Ukraine.: noun, issuing
Sebastian's father {cart} on a sleigh their theatrical group to Lviv.: noun, ROOT
And a strange element of the interior is the {cart} with the products of one of the Kherson producers.: noun, is
At that moment, the last wagon of the trucker flew {past} Eugene.: adposition, flew
He glanced up and saw a small dog walking just {past} the village.: adposition, walking
Stepanko stopped yelling, only a fragile groan {crawling} from his chest.: noun, groan
Here she sat on the {edge} of the bed.: noun, on
He set up that forge not on the {edge} of road, as it was started, but on the Red Mountain, near Pradub.: noun, on
In discussing the {forefront} of linguistics, von Liebig, as always, thought broadly and globally.: noun, discussing
Don't {torment} my heart.: verb, ROOT


## 4

In [54]:
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
text = "The girl was standing where she was, trying to tidy up her tousled hair, extremely angry that that was seen by the drivers that were waiting at the crossing."
doc = nlp(text)
# Since this is an interactive Jupyter environment, we can use displacy.render here
svg = displacy.render(doc, style='dep', jupyter=False)

In [56]:
with open("doc.svg", "w", encoding="utf-8") as f:
    f.write(svg)

## 5

In [61]:
from nltk.corpus import wordnet 
syns = wordnet.synsets("age")

In [62]:
print(syns)

[Synset('age.n.01'), Synset('historic_period.n.01'), Synset('age.n.03'), Synset('long_time.n.01'), Synset('old_age.n.01'), Synset('age.v.01'), Synset('senesce.v.01'), Synset('age.v.03')]


In [73]:
def print_info(s):
    print(s.definition())
    for l in s.lemmas():
        print(l.name())
        print(l.antonyms())
s = syns[0]
print_info(s)

how long something has existed
age
[]


In [74]:
s = syns[1]
print_info(s)

an era of history having some distinctive feature
historic_period
[]
age
[]


In [75]:
s = syns[5]
print_info(s)

begin to seem older; get older
age
[]
