In [1]:
import json
from transformers import T5Tokenizer, T5ForConditionalGeneration
import spacy
from utils import run_model, match_dates_based_on_precision, parse_date

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
nlp = spacy.load('en_core_web_trf')

In [3]:
base_model_v2 = 'allenai/unifiedqa-v2-t5-base-1251000'
large_model_v2  = 'allenai/unifiedqa-v2-t5-large-1251000'
threeb_model_v2 = 'allenai/unifiedqa-v2-t5-3b-1251000'

In [4]:
base_tokenizer = T5Tokenizer.from_pretrained(base_model_v2)
base_model = T5ForConditionalGeneration.from_pretrained(base_model_v2)

In [5]:
large_tokenizer = T5Tokenizer.from_pretrained(large_model_v2)
large_model = T5ForConditionalGeneration.from_pretrained(large_model_v2)

In [6]:
threeb_tokenizer = T5Tokenizer.from_pretrained(threeb_model_v2)
threeb_model = T5ForConditionalGeneration.from_pretrained(threeb_model_v2)

In [7]:
annotated_f = open('../data/unified-qa-metrics-annotated.tsv')
annotated = {}
for line in annotated_f:
    vals = line.strip().split('\t')
    answerable = vals[5]
    if answerable == 'yes':
        annotated[vals[0]] = vals[3]
annotated_f.close()

In [8]:
len(annotated)

100

In [48]:
annotated['when did Joe Scarborough marry Mika?']

'Joe Scarborough and they were all around the age of my son or younger. So, something happened in 1989."" In October 2001 Scarborough married his second wife, Susan Waren, a former aide to Florida governor Jeb Bush and a former congressional committee staffer. Their daughter was born in August 2003; their son was born in May 2008. Scarborough and Waren were divorced in January 2013. Scarborough currently resides in New Canaan, Connecticut, an affluent suburb near New York City. In early 2017 during a trip to Antibes, France, he became engaged to his co-host Mika Brzezinski. They married on November 24, 2018'

In [99]:
unified_f = open('../data/dpr-post-process-unified-qa-matched.jl')
c = 0
n = 0
not_answered = []
for line in unified_f:
    j = json.loads(line.strip())
    mda = j['matched_dpr_answers']
    if j['q'] in annotated:
        n += 1
    dpr_with_answer = []
    for ma in mda:
        if j['q'] in annotated and not ma['uqa_matched']:
            if len(mda) ==1:
                c += 1
                dpr_with_answer.append(ma)
            elif len(mda) > 1:
                if ma['dpr_answer'].replace('\n', ' ').replace('"', '') == annotated[j['q']]:
                    dpr_with_answer.append(ma)
    if len(dpr_with_answer) > 0:
        j['matched_dpr_answers'] = dpr_with_answer
        not_answered.append(j)
unified_f.close()
print(c, n)

39 100


In [100]:
len(not_answered)

40

In [105]:
def run_model_v2(not_answered, tokenizer, model):
    new_matches = 0
    for na in not_answered:
        dpr_a = na['matched_dpr_answers'][0]['dpr_answer'].split('\n')[1]
        prev_a = na['matched_dpr_answers'][0]['unified_qa_answers']
        input_string = f"{na['q']}\n{dpr_a}"
        ans = run_model(input_string, tokenizer, model)
        parsed_date =parse_date(ans[0], nlp)
        if len(parsed_date) > 0:
            matched, prov = match_dates_based_on_precision(na['a'], na['precision'], parsed_date[0])
            if matched:
                new_matches += 1
                print(na['q'], na['a'], ans, prev_a, matched, prov)
    print(new_matches)

In [106]:
run_model_v2(not_answered, base_tokenizer, base_model)

  date_obj = stz.localize(date_obj)


when did Joshua Kushner marry Karlie? 2018-10-01 ['October 18, 2018'] ['July 2018, after six years of dating'] True month
when did Benedict Cumberbatch marry Sophie Irene? 2015-02-14 ['14 february 2015'] ['15 February 2015'] True day
when did Amber Heard marry Johnny? 2015-02-03 ['february 3, 2015'] ['2015'] True day
when did Cameron Diaz marry Benji? 2015-01-05 ['January 5, 2015'] ['June 2016.'] True day
when did Tatiana Navka marry Dmitry? 2015-01-01 ['2015'] ['2000'] True year
when did Hans Heinrich Thyssen-Bornemisza marry Carmen? 1985-08-16 ['16 august 1985'] ['15 August 1985'] True day
when did Seiko Matsuda marry Masaki? 1985-01-01 ['1985'] ['1985 to 1997'] True year
when did Richard Belzer marry Harlee? 1985-01-01 ['1985'] ['1981'] True year
when did John Thaw marry Sheila? 1973-12-24 ['24 December 1973'] ['1974'] True day
when did John Smith marry Elizabeth Smith, Baroness Smith of? 1967-07-05 ['5 July 1967'] ['1995'] True day
when did Thomas Beecham, 2nd Baronet marry Shirley

In [107]:
run_model_v2(not_answered, large_tokenizer, large_model)

  date_obj = stz.localize(date_obj)


when did Joshua Kushner marry Karlie? 2018-10-01 ['October 18, 2018'] ['July 2018, after six years of dating'] True month
when did Chiara Ferragni marry Fedez? 2018-09-01 ['September 1, 2018'] ['on may 6, 2017 during his concert in Verona'] True day
when did Morena Baccarin marry Ben? 2017-06-02 ['June 2, 2017'] ['2017'] True day
when did Ashley Hinshaw marry Topher? 2016-05-29 ['may 29, 2016'] ['2015'] True day
when did Benedict Cumberbatch marry Sophie Irene? 2015-02-14 ['14 february 2015'] ['15 February 2015'] True day
when did Jay Chou marry Hannah? 2015-01-01 ['2015'] ['December 2014'] True year
when did Tatiana Navka marry Dmitry? 2015-01-01 ['2015'] ['2000'] True year
when did Hans Heinrich Thyssen-Bornemisza marry Carmen? 1985-08-16 ['16 august 1985'] ['15 August 1985'] True day
when did Seiko Matsuda marry Masaki? 1985-01-01 ['1985'] ['1985 to 1997'] True year
when did Richard Belzer marry Harlee? 1985-01-01 ['1985'] ['1981'] True year
when did Richard Hell marry Patty? 1985-0

In [111]:
run_model_v2(not_answered, threeb_tokenizer, threeb_model)

when did Joe Scarborough marry Mika? 2018-11-24 ['November 24, 2018'] ['1989'] True day
when did Joshua Kushner marry Karlie? 2018-10-01 ['October 18, 2018'] ['July 2018, after six years of dating'] True month
when did Morena Baccarin marry Ben? 2017-06-02 ['June 2, 2017'] ['2017'] True day
when did Ashley Hinshaw marry Topher? 2016-05-29 ['may 29, 2016'] ['2015'] True day
when did Benedict Cumberbatch marry Sophie Irene? 2015-02-14 ['14 February 2015'] ['15 February 2015'] True day
when did Amber Heard marry Johnny? 2015-02-03 ['february 3, 2015'] ['2015'] True day
when did Jay Chou marry Hannah? 2015-01-01 ['17 January 2015'] ['December 2014'] True year
when did Hannah Quinlivan marry Jay? 2015-01-01 ['17 January 2015'] ['December 2014.'] True year
when did Tatiana Navka marry Dmitry? 2015-01-01 ['2015'] ['2000'] True year
when did Guillaume, Hereditary Grand Duke of Luxembourg marry Stéphanie, Hereditary Grand Duchess of? 2012-10-19 ['19 October 2012'] ['26 April 2012'] True day
whe

In [9]:
def run_unified_qa(tokenizer, model):
    unified_f = open('../data/dpr-post-process-unified-qa-matched.jl')
    new_answered = []
    for line in unified_f:
        j = json.loads(line.strip())
        mda = j['matched_dpr_answers']
        for ma in mda:
            dpr_a = ma['dpr_answer'].split('\n')[1]
            unified_qa_string = f"{j['q']}\n{dpr_a}"
            unified_ans = run_model(unified_qa_string, tokenizer, model)
            parsed_date =parse_date(unified_ans[0], nlp)
            if len(parsed_date) > 0:
                matched, prov = match_dates_based_on_precision(j['a'], j['precision'], parsed_date[0])
                if matched:
                    j['unified_qa_matched'] = True
                    new_answered.append(j)
                    break
    unified_f.close()
    return new_answered

In [None]:
large_answers  = run_unified_qa(large_tokenizer, large_model)

  date_obj = stz.localize(date_obj)
