In [1]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [2]:
synonyms = set(["say", "tell", "speak", "claim", "communicate", "assert", "convey", "reply", "declare", "express"])

In [3]:
dbg = nlp("he expressed this strictly, angrily and very badly.")

for tok in dbg:
  print("{} [{}] ({}) - {} - {}".format(tok.text, tok.lemma_, tok.pos_, tok.dep_, tok.head.text))

he [-PRON-] (PRON) - nsubj - expressed
expressed [express] (VERB) - ROOT - expressed
this [this] (DET) - dobj - expressed
strictly [strictly] (ADV) - advmod - expressed
, [,] (PUNCT) - punct - strictly
angrily [angrily] (ADV) - conj - strictly
and [and] (CCONJ) - cc - angrily
very [very] (ADV) - advmod - badly
badly [badly] (ADV) - conj - angrily
. [.] (PUNCT) - punct - expressed


In [12]:
def bump_freq(d, verb_lemma, adv_lemma):
    verb_dict = d.setdefault(verb_lemma, {})
    verb_dict[adv_lemma] = verb_dict.get(adv_lemma, 0) + 1

def is_ly_adv(tok):
    return tok.pos_ == 'ADV' and tok.lemma_.endswith('ly')
    
def collect_through_conj(d, verb_lemma, adv_tok):
    for adv_child in adv_tok.children:
        if is_ly_adv(adv_child) and adv_child.dep_ == 'conj':
            bump_freq(d, verb_lemma, adv_child.lemma_)
            collect_through_conj(d, verb_lemma, adv_child)

def adverb_freqs(sents):
    result = {}
    for sent in sents:
        doc = nlp(sent)
        for tok in doc:
            if tok.pos_ == 'VERB' and tok.lemma_ in synonyms:
                for child in tok.children:
                    if is_ly_adv(child) and child.dep_ == 'advmod':
                        bump_freq(result, tok.lemma_, child.lemma_)
                        collect_through_conj(result, tok.lemma_, child)

    return result

In [13]:
adverb_freqs(["he expressed this strictly, angrily and very badly."])

{'express': {'strictly': 1, 'angrily': 1, 'badly': 1}}

In [6]:
def find_adverbs_in_sent(sent):
    result = set()
    freqs = adverb_freqs([sent])
    for v in freqs.values():
        result |= v.keys()
            
    return result

find_adverbs_in_sent("he expressed this strictly, angrily and very badly.")

{'angrily', 'badly', 'strictly'}

In [7]:
with open("../../../tasks/02-structural-linguistics/data/blog2008.txt") as f:
    blog = f.readlines()

blog[:5]

['Happy New Year .\n',
 'And just for another thing to celebrate , Dr. David Whitehouse , the British astronomer and former science editor of the BBC , tells us : " \' The fact is that the global temperature of 2007 is statistically the same as 2006 and every year since 2001 \' " Global warming stopped ?\n',
 'Surely not .\n',
 'What heresy is this ?\n',
 "Have n't we been told that the science of global warming is settled beyond doubt and that all that 's left to the so-called sceptics is the odd errant glacier that refuses to melt ?\n"]

In [8]:
blog_adv_freqs = adverb_freqs(blog)

for verb, d in blog_adv_freqs.items():
    for adv, freq in sorted(d.items(), key=lambda item: item[1], reverse=True):
        print("{} - {}: {}".format(verb, adv, freq))

claim - falsely: 76
claim - previously: 9
claim - repeatedly: 8
claim - recently: 4
claim - initially: 3
claim - actually: 3
claim - absurdly: 3
claim - originally: 3
claim - credibly: 3
claim - publicly: 3
claim - laughably: 3
claim - baselessly: 3
claim - surely: 2
claim - apparently: 2
claim - bizarrely: 2
claim - indignantly: 2
claim - immediately: 2
claim - erroneously: 2
claim - consistently: 2
claim - incorrectly: 2
claim - shamelessly: 2
claim - ironically: 2
claim - disingenuously: 2
claim - inaccurately: 2
claim - ludicrously: 2
claim - frequently: 2
claim - vaguely: 2
claim - formerly: 2
claim - brazenly: 1
claim - solemnly: 1
claim - oddly: 1
claim - fiercely: 1
claim - constantly: 1
claim - annually: 1
claim - piously: 1
claim - preposterously: 1
claim - widely: 1
claim - clearly: 1
claim - probably: 1
claim - illegally: 1
claim - righteously: 1
claim - explicitly: 1
claim - tearfully: 1
claim - patronizingly: 1
claim - openly: 1
claim - remotely: 1
claim - certainly: 1
cl