In [1]:
from contract import ContractPipeline
from ner.clf_ner import CLF_NER
from ner.regex_ner import RegexNER
from normalization.date_normalizer import DateNorm
from normalization.gov_normalizer import GovNorm
from normalization.lang_normalizer import LangNorm
from classification.transformer_classifier import TransformersClassifier
from classification.sklearn_classifier import SklearnClassifier
import pandas as pd

  hasattr(torch, "has_mps")
  and torch.has_mps  # type: ignore[attr-defined]
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
contract_pipeline = ContractPipeline()
gov_law_ner = CLF_NER(keywords=["law","jurisdicition","governing"],model="sguarnaccio/gov_law_clf_ner",normalizer=GovNorm())
contract_pipeline.add_pipe(name="governing_law",component=gov_law_ner)




In [3]:
effective_date_rules = [(r"(?:effective|dated) (?:as of|on)*? ((?:\d{1,2}[-/th|st|nd|rd\s]*)?(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|January|February|March|April|May|June|July|August|September|October|November|Decmebter)?[a-z\s,.]*(?:\d{1,2}[-/th|st|nd|rd)\s,]*)+(?:\d{2,4})+)",
                   "EFFECTIVE_DATE"),
                   (
                   r"(?:effective|dated) (?:as of|on)*? ((?<!\d)([1-9]|([12][0-9])|(3[01]))(?!\d))((?<=1)st|(?<=2)nd|(?<=3)rd|(?<=[0456789])th|\"|°)?\s*(January|February|March|April|May|June|July|August|September|October|November|December|Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER|JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s*(?<!\d)([12][0-9]{3})(?!\d)",
                   "EFFECTIVE_DATE"),
                   (
                   r"(?:effective|dated) (?:as of|on)*? ((?<!\d)([1-9]|([12][0-9])|(3[01]))(?!\d))((?<=1)st|(?<=2)nd|(?<=3)rd|(?<=[0456789])th|\"|°)?\s*(January|February|March|April|May|June|July|August|September|October|November|December|Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER|JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s*[,\.]\s*(?<!\d)([12][0-9]{3})(?!\d)",
                   "EFFECTIVE_DATE"),
                   (
                   r"(?:effective|dated) (?:as of|on)*? ((?<!\d)([1-9]|([12][0-9])|(3[01]))(?!\d))((?<=1)st|(?<=2)nd|(?<=3)rd|(?<=[0456789])th|\"|°)?\s*(day)\s*(of)\s*(January|February|March|April|May|June|July|August|September|October|November|December|Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER|JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s*[,\.]\s*(?<!\d)([12][0-9]{3})(?!\d)",
                   "EFFECTIVE_DATE"),
                   (
                   r"(?:effective|dated) (?:as of|on)*? (January|February|March|April|May|June|July|August|September|October|November|December|Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER|JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s*((?<!\d)([1-9]|([12][0-9])|(3[01]))(?!\d))((?<=1)st|(?<=2)nd|(?<=3)rd|(?<=[0456789])th|\"|°)?\s*[,\.]\s*(?<!\d)([12][0-9]{3})(?!\d)",
                   "EFFECTIVE_DATE"),
                   (
                   r"(?:effective|dated) (?:as of|on)*?  ((?<!\d)([1-9]|([12][0-9])|(3[01]))(?!\d))((?<=1)st|(?<=2)nd|(?<=3)rd|(?<=[0456789])th|\"|°)?\s*of\s*(January|February|March|April|May|June|July|August|September|October|November|December|Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER|JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s*[,\.]\s*(?<!\d)([12][0-9]{3})(?!\d)",
                   "EFFECTIVE_DATE"),
                   (
                   r"(?:effective|dated) (?:as of|on)*?  (January|February|March|April|May|June|July|August|September|October|November|December|Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|JANUARY|FEBRUARY|MARCH|APRIL|MAY|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER|JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)\s*[,\.]\s*(?<!\d)([12][0-9]{3})(?!\d)",
                   "EFFECTIVE_DATE")]

eff_date_ner = RegexNER(rules=effective_date_rules,normalizer=DateNorm())
eff_date_ner.load_raw_rules(effective_date_rules)
contract_pipeline.add_pipe(name="effective_date",component=eff_date_ner)

In [4]:
# document_type_classifier = TransformersClassifier(
#     model="sguarnaccio/document_type_identification",
#     attribute="document_type",
#     method = "lines",
#     positive_class="LABEL_1"
# )
# contract_pipeline.add_pipe(
#   name="document_type_classifier",
#    component=document_type_classifier,
#     params={"text_range":(0,15)})

In [5]:
document_type_classifier = SklearnClassifier(
    model="./classification/pretrained/document_type_model.pkl",
    method="lines",
    positive_class=1,
    attribute="document_type")
document_type_classifier.model = document_type_classifier.model["Linear SVM"]["model"]
contract_pipeline.add_pipe(name="document_type_classifier",component=document_type_classifier,params={"text_range":(0,15)})

In [6]:
language_classifier = SklearnClassifier(
    model="./classification/pretrained/document_language_model.pkl",
    attribute="language",
    method = "lines",
    positive_class="multi",
    normalizer=LangNorm()
)
model = language_classifier.model["Linear SVM"]

language_classifier.model = model["model"]
language_classifier.label_encoder = model["label_encoder"]
contract_pipeline.add_pipe(
    name="language_classifier",
    component=language_classifier,
    before="tokenizer",
    params={"text_range":(0,10)})


In [7]:
# language_classifier = TransformersClassifier(
#     model="papluca/xlm-roberta-base-language-detection",
#     attribute="language",
#     method = "lines",
#     positive_class="multi",
#     normalizer=LangNorm()
# )
# contract_pipeline.add_pipe(
#   name="language_classifier",
#    component=language_classifier,
#     params={"text_range":(0,15)})

In [8]:
contract_pipeline.pipeline

[{'component': <utils.clean_text.TextCleaner at 0x18bb948a090>,
  'name': 'clean_text',
  'params': {'lower': False,
   'remove_num': False,
   'add_stop_words': None,
   'remove_stop_words': None}},
 {'component': <classification.sklearn_classifier.SklearnClassifier at 0x18bd4944b50>,
  'name': 'language_classifier',
  'params': {'text_range': (0, 10)}},
 {'component': <tokenization.tokenizer.Tokenizer at 0x18bb8e63150>,
  'name': 'tokenizer'},
 {'component': <tokenization.sentence.SentenceTokenizer at 0x18bb925ccd0>,
  'name': 'sentence_tokenizer'},
 {'component': <tokenization.segments.SectionSegmenter at 0x18bb9205050>,
  'name': 'section_segmenter'},
 {'component': <definitions.definitions.DefinitionFinder at 0x18bb936c350>,
  'name': 'definition_finder'},
 {'component': <ner.clf_ner.CLF_NER at 0x18bb925f1d0>,
  'name': 'governing_law'},
 {'component': <ner.regex_ner.RegexNER at 0x18bec866c90>,
  'name': 'effective_date'},
 {'component': <classification.sklearn_classifier.SklearnC

In [49]:
with open("./tests/test6.txt",encoding="utf-8") as f:
    text = f.read() 


In [50]:
doc = contract_pipeline(text)

In [51]:
pd.DataFrame(
    [(segment.section,segment.subsection,segment.title,segment.text) 
        for segment in doc.segments]
    ,columns=["Section","Subsection","Title","Text"]).dropna(how="all")\
.to_excel('./tests/test.xlsx')

In [52]:
pd.DataFrame([(df.term,df.definition,df.phrase) for df in doc.glossary],columns=["Term","Definition","Phrase"])

Unnamed: 0,Term,Definition,Phrase
0,Covenant Relief Period,the period from the First Amendment Effective ...,"The Borrower, the Lenders and the Administrati..."
1,First Amendment,the period from the First Amendment Effective ...,"The Borrower, the Lenders and the Administrati..."
2,First Amendment Effective Date,the period from the First Amendment Effective ...,"The Borrower, the Lenders and the Administrati..."
3,Short Term Debt,", as of any date with respect to any Person, a...",""""""" Short Term Debt"" means, as of any date wit..."
4,Consolidated EBITDA,", for any period, Consolidated Operating Incom...","""( b) The definition of"" Consolidated EBITDA"" ..."
5,Pricing Grid,as follows: Level Index Debt Ratings Applicabl...,"( c) The definition of"" Pricing Grid"" in the C..."


In [53]:
for ent in doc.ents:
    print(ent.normalized,ent.label)

United States, New York gov_law
2020-05-27 EFFECTIVE_DATE
2020-03-17 EFFECTIVE_DATE
2020-05-27 EFFECTIVE_DATE


In [54]:
doc.language

'English'

In [55]:
doc.document_type

'FIRST AMENDMENT'

In [56]:
print(doc.text)

EX-10.243 14 d911381 dex10243.htm EX-10.243
Exhibit 10.243
Execution Version
FIRST AMENDMENT
FIRST AMENDMENT (this "Amendment"), dated as of May 27, 2020, to the 364-Day Credit Agreement, dated as of March 17, 2020, among FEDEX CORPORATION, a Delaware corporation (the "Borrower"), the several lenders party thereto (the "Lenders"), JPMORGAN CHASE BANK, N.A., as administrative agent (in such capacity, the "Administrative Agent") and each other party thereto (as amended, restated, supplemented or otherwise modified prior to the date hereof, the "Credit Agreement").
W I T N E S S E T H :
WHEREAS, the Borrower, the Lenders and the Administrative Agent originally entered into the Credit Agreement, pursuant to which the Lenders made and may make certain loans and other extensions of credit to the Borrower;
WHEREAS, the Borrower and the Lenders wish to make certain amendments to the Credit Agreement as described herein; and
WHEREAS, in furtherance thereof, each party hereto hereby consents to 