In [1]:
#packages
import torch
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, T5Config, T5ForConditionalGeneration
from torch import nn

#local utilities
import extraction as ex
import indexing
import t5mod
import utils

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model, tokenizer = t5mod.get_model('model/t5mask_improved_2.pt', 'model/t5_config.json')

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-large automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


In [23]:
def fix_masks(unmasked, masked):
    p = ex.extract_masked_info(unmasked, masked)
    try:
        p = indexing.rectify(p)
        replacements = [pair[0] for pair in p]
        masked = ex.replace_masks(masked, replacements)
    except:
        print('Exception Occured')
    return masked, p

def mask(unmasked, model, tokenizer):
    first_mask = t5mod.infer(model, tokenizer, unmasked)
    cur_mask = utils.correct_corrupted(unmasked, first_mask)
    final_text, masks = fix_masks(unmasked, cur_mask)
    return final_text, masks
    

In [3]:
txt1 = "Oscar-Claude Monet ; (14 November 1840 – 5 December 1926) was a French painter and founder of impressionist painting who is seen as a key precursor to modernism, especially in his attempts to paint nature as he perceived it.[1] During his long career, he was the most consistent and prolific practitioner of impressionism's philosophy of expressing one's perceptions before nature, especially as applied to plein air (outdoor) landscape painting.[2] The term \"Impressionism\" is derived from the title of his painting Impression, soleil levant, exhibited in 1874 (the \"exhibition of rejects\") initiated by Monet and his associates as an alternative to the Salon. Monet was raised in Le Havre, Normandy, and became interested in the outdoors and drawing from an early age. Although his mother, Louise-Justine Aubrée Monet, supported his ambitions to be a painter, his father, Claude-Adolphe, disapproved and wanted him to pursue a career in business. He was very close to his mother, but she died in January 1857 when he was sixteen years old, and he was sent to live with his childless, widowed but wealthy aunt, Marie-Jeanne Lecadre. He went on to study at the Académie Suisse, and under the academic history painter Charles Gleyre, where he was a classmate of Auguste Renoir. His early works include landscapes, seascapes, and portraits, but attracted little attention. A key early influence was Eugène Boudin who introduced him to the concept of plein air painting. From 1883, Monet lived in Giverny, also in northern France, where he purchased a house and property and began a vast landscaping project, including a water-lily pond."

In [4]:
txt2 = "Barack Obama (born August 4, 1961) is an American politician who served as the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African-American president. Obama previously served as a U.S. senator representing Illinois from 2005 to 2008, as an Illinois state senator from 1997 to 2004, and as a civil rights lawyer and university lecturer."

In [24]:
span, pairs = mask(txt2, model, tokenizer)

[[FULL_NAME_1]] (born [[DOB]]) is an American politician who served as the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African-American president. [[LAST_NAME_1]] previously served as a U.S. senator representing Illinois from 2005 to 2008, as an Illinois state senator from 1997 to 2004, and as a civil rights lawyer and university lecturer.


In [14]:
span

'[[FULL_NAME_1]] ; ([[DOB]] – [[DOB]]) was a French painter and founder of impressionist painting who is seen as a key precursor to modernism, especially in his attempts to paint nature as he perceived it.(1) During his long career, he was the most consistent and prolific practitioner of impressionism\'s philosophy of expressing one\'s perceptions before nature, especially as applied to plein air (outdoor) landscape painting.(2) The term "Impressionism" is derived from the title of his painting Impression, [[FULL_NAME_2]], exhibited in [[DOB]] (the "exhibition of rejects") initiated by [[FIRST_NAME_1]] and his associates as an alternative to the Salon. [[FIRST_NAME_1]] was raised in [[FULL_NAME_3]], Normandy, and became interested in the outdoors and drawing from an early age. Although his mother,[[FULL_NAME_1]], supported his ambitions to be a painter, his father, [[FULL_NAME_2]], disapproved and wanted him to pursue a career in business. He was very close to his mother, but she died 

In [6]:
masked = t5mod.infer(model, tokenizer, txt1)

In [9]:
masked

'[[FULL_NAME_1]] ; ([[DOB]] – [[DOB]]) was a French painter and founder of impressionist painting who is seen as a key precursor to modernism, especially in his attempts to paint nature as he perceived it.(1) During his long career, he was the most consistent and prolific practitioner of impressionism\'s philosophy of expressing one\'s perceptions before nature, especially as applied to plein air (outdoor) landscape painting.(2) The term "Impressionism" is derived from the title of his painting Impression, [[FULL_NAME_2]], exhibited in [[DOB]] (the "exhibition of rejects") initiated by [[FIRST_NAME_1]] and his associates as an alternative to the Salon. [[FIRST_NAME_1]] was raised in [[FULL_NAME_3]], Normandy, and became interested in the outdoors and drawing from an early age. Although his mother,[[FULL_NAME_1]], supported his ambitions to be a painter, his father, [[FULL_NAME_2]], disapproved and wanted him to pursue a career in business. He was very close to his mother, but she died 

In [10]:
txt1, output = utils.rm_space_b4_punc(txt1), utils.rm_space_b4_punc(masked)
output = utils.correct_corrupted(txt1, masked)

In [11]:
txt1, output

('Oscar-Claude Monet; (14 November 1840 – 5 December 1926) was a French painter and founder of impressionist painting who is seen as a key precursor to modernism, especially in his attempts to paint nature as he perceived it.(1) During his long career, he was the most consistent and prolific practitioner of impressionism\'s philosophy of expressing one\'s perceptions before nature, especially as applied to plein air (outdoor) landscape painting.(2) The term "Impressionism" is derived from the title of his painting Impression, soleil levant, exhibited in 1874 (the "exhibition of rejects") initiated by Monet and his associates as an alternative to the Salon. Monet was raised in Le Havre, Normandy, and became interested in the outdoors and drawing from an early age. Although his mother, Louise-Justine Aubrée Monet, supported his ambitions to be a painter, his father, Claude-Adolphe, disapproved and wanted him to pursue a career in business. He was very close to his mother, but she died in

In [None]:
a

In [None]:
ex.recover(a,b)