In [11]:
#packages
import torch
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, T5Config, T5ForConditionalGeneration
from torch import nn

#local utilities
import extraction as ex
import indexing
import t5mod
import utils
from samples import text_samples

In [12]:
model, tokenizer = t5mod.get_model('model/t5mask_improved_2.pt', 'model/t5_config.json')

In [13]:
def fix_masks(unmasked, masked):
    p = ex.extract_masked_info(unmasked, masked)
    try:
        p = indexing.rectify(p)
        replacements = [pair[0] for pair in p]
        masked = ex.replace_masks(masked, replacements)
    except:
        print('Exception Occured')
    return masked, p

def mask(unmasked, model, tokenizer):
    first_mask = t5mod.infer(model, tokenizer, unmasked)
    cur_mask = utils.correct_corrupted(unmasked, first_mask)
    final_text, masks = fix_masks(unmasked, cur_mask)
    return final_text, masks
    

In [14]:
texts = text_samples()

In [39]:
cur_text = texts[3]
cur_text

'Oscar-Claude Monet ; (14 November 1840 – 5 December 1926) was a French painter and founder of impressionist painting who is seen as a key precursor to modernism, especially in his attempts to paint nature as he perceived it.(1) During his long career, he was the most consistent and prolific practitioner of impressionism\'s philosophy of expressing one\'s perceptions before nature, especially as applied to plein air (outdoor) landscape painting.(2) The term "Impressionism" is derived from the title of his painting Impression, soleil levant, exhibited in 1874 (the "exhibition of rejects") initiated by Monet and his associates as an alternative to the Salon. Monet was raised in Le Havre, Normandy, and became interested in the outdoors and drawing from an early age. Although his mother, Louise-Justine Aubrée Monet, supported his ambitions to be a painter, his father, Claude-Adolphe, disapproved and wanted him to pursue a career in business. He was very close to his mother, but she died in

In [21]:
#cur_text = "Aizada Marat is originally from Kyrgyzstan. Ms. Marat currently lives in California."

In [40]:
span, pairs = mask(cur_text, model, tokenizer)

In [41]:
span

'[[FULL_NAME_1]] ; ([[DOB]] – [[DOB]]) was a French painter and founder of impressionist painting who is seen as a key precursor to modernism, especially in his attempts to paint nature as he perceived it.(1) During his long career, he was the most consistent and prolific practitioner of impressionism\'s philosophy of expressing one\'s perceptions before nature, especially as applied to plein air (outdoor) landscape painting.(2) The term "Impressionism" is derived from the title of his painting Impression, [[FULL_NAME_2]], exhibited in [[DOB]] (the "exhibition of rejects") initiated by [[FIRST_NAME_3]] and his associates as an alternative to the Salon. [[FIRST_NAME_3]] was raised in [[FULL_NAME_4]], Normandy, and became interested in the outdoors and drawing from an early age. Although his mother,[[FULL_NAME_5]], supported his ambitions to be a painter, his father, [[FULL_NAME_6]], disapproved and wanted him to pursue a career in business. He was very close to his mother, but she died 

In [42]:
pairs

[('[[FULL_NAME_1]]', 'Oscar-Claude Monet'),
 ('[[DOB]]', '14 November 1840'),
 ('[[DOB]]', '5 December 1926'),
 ('[[FULL_NAME_2]]', 'soleil levant'),
 ('[[DOB]]', '1874'),
 ('[[FIRST_NAME_3]]', 'Monet'),
 ('[[FIRST_NAME_3]]', 'Monet'),
 ('[[FULL_NAME_4]]', 'Le Havre'),
 ('[[FULL_NAME_5]]', ' Louise-Justine Aubrée Monet'),
 ('[[FULL_NAME_6]]', 'Claude-Adolphe'),
 ('[[DOB]]', 'January 1857'),
 ('[[FULL_NAME_7]]', 'Marie-Jeanne Lecadre'),
 ('[[FULL_NAME_8]]', 'Charles Gleyre'),
 ('[[FULL_NAME_9]]', 'Auguste Renoir'),
 ('[[FULL_NAME_10]]', 'Eugène Boudin'),
 ('[[FIRST_NAME_3]]', 'Monet'),
 ('[[FIRST_NAME_11]]', 'Giverny')]

In [37]:
recovered = ex.recover(span, pairs)

In [38]:
recovered

'Charlie, Marnie and , Charlie Siang was his name, Siang born -- 12/21/2002, and Marnie Martindale (June 22, 1995) Martindale and Mr. Godfried the baker.  Godfried was not related to Marnie Martindale'