<a href="https://colab.research.google.com/github/velusrinath/GenAIPoc/blob/main/Gen_AI_Backend_Integration%20v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Variable declaration

In [None]:
common_variables = {
    "rental_agreement" : [
        'agreement_date', 'landlord_name', 'landlord_address', 'tenant_name', 'tenant_address', 'property_address',
        'start_date', 'end_date', 'monthly_rent_amount', 'security_deposit_amount',
        'list_of_utilities', 'notice_period', 'state'
    ]
}

specific_variables = {
    "Residential Lease" : [
        "due_date"
    ],
    "Commercial Lease" : [
        "permitted_use", "renewal_terms"
    ]
}

In [None]:
rental_headers = {
    'agreement_date': 'Agreement Date',
    'landlord_name': 'Landlord Name',
    'landlord_address': 'Landlord Address',
    'tenant_name': 'Tenant Name',
    'tenant_address': 'Tenant Address',
    'property_address': 'Property Address',
    'start_date': 'Start Date',
    'end_date': 'End Date',
    'due_date' : 'Due Date',
    'monthly_rent_amount': 'Monthly Rent Amount',
    'security_deposit_amount': 'Security Deposit Amount',
    'list_of_utilities': 'List of Utilities',
    'notice_period': 'Notice Period',
    'state': 'State',
    'permitted_use': 'Permitted Use',
    'renewal_terms': 'Renewal Terms'
}

# Retrieving specific variables for each agreement type

In [None]:
# Retrieving fields required for agreement

def get_fields(agreement_type, agreement_sub_type):
    return list(set(common_variables[agreement_type] + specific_variables[agreement_sub_type]))

In [None]:
get_fields("rental_agreement", "Residential Lease")

['landlord_address',
 'list_of_utilities',
 'state',
 'due_date',
 'landlord_name',
 'start_date',
 'monthly_rent_amount',
 'tenant_address',
 'notice_period',
 'security_deposit_amount',
 'property_address',
 'tenant_name',
 'agreement_date',
 'end_date']

# Prompt Engineering

## Inputs from Front End

In [None]:
agreement_name = "Residential Lease" # or "Commercial Lease"

user_values = {
    #'agreement_name' : "Residential Lease",
    'agreement_date': "April 19, 2024",
    'landlord_name': "Valerie Murphy",
    'landlord_address': "5647 Western Sunny Apt. 283\n East Holly, TN 32011",
    'tenant_name': "Mackenzie Baker",
    'tenant_address': "42408 Judy Divide Suite 732\n Porterstad, OK 20877",
    'property_address': "56607 Danielle Spring\n North Deborah, MS 72045",
    'start_date': "August 03, 2024",
    'end_date': "September 20, 2027",
    'monthly_rent_amount': "8083",
    'security_deposit_amount': "3358",
    'list_of_utilities': "Electricity, gas, water, sewer, internet, security system",
    'notice_period': "27",
    'state': "Georgia",
    'due_date': "5",
}

In [None]:
agreement_name = "Commercial Lease"

user_values = {
    #'agreement_name' : "Commercial Lease",
    'agreement_date': "April 19, 2024",
    'landlord_name': "Valerie Murphy",
    'landlord_address': "5647 Western Sunny Apt. 283\n East Holly, TN 46674",
    'tenant_name': "Mackenzie Baker",
    'tenant_address': "26485 Judy Divide Suite 732\n Porterstad, OK 56867",
    'property_address': "85736 Danielle Spring\n North Deborah, MS 92724",
    'start_date': "August 03, 2024",
    'end_date': "September 20, 2027",
    'monthly_rent_amount': "8083",
    'security_deposit_amount': "3358",
    'list_of_utilities': "Electricity, gas, water, sewer, internet, security system",
    'notice_period': "27",
    'state': "Georgia",
    'permitted_use': "operating a manufacturing facility for a clothing company",
    'renewal_terms': "not available for subleased spaces",
}

## Prompt Design

In [None]:
# it will be used for creating prompts based on user values
# values from front end needs to be stored on mentioned variable names
rental_headers = {
    'agreement_date': 'Agreement Date',
    'landlord_name': 'Landlord Name',
    'landlord_address': 'Landlord Address',
    'tenant_name': 'Tenant Name',
    'tenant_address': 'Tenant Address',
    'property_address': 'Property Address',
    'start_date': 'Start Date',
    'end_date': 'End Date',
    'due_date' : 'Due Date',
    'monthly_rent_amount': 'Monthly Rent Amount',
    'security_deposit_amount': 'Security Deposit Amount',
    'list_of_utilities': 'List of Utilities',
    'notice_period': 'Notice Period',
    'state': 'State',
    'permitted_use': 'Permitted Use',
    'renewal_terms': 'Renewal Terms'
}

In [None]:
def create_instruction(user_values):
    instruction = f"### Instruction : Generate a {agreement_name} agreement for a residential property with the following terms:\n"
    for variable in user_values.keys():
        instruction += str(rental_headers[variable]) + f" : {user_values[variable]} \n"
    return instruction

In [None]:
llm_prompt = create_instruction(user_values)
llm_prompt

'### Instruction : Generate a Residential Lease agreement for a residential property with the following terms:\nAgreement Date : April 19, 2024 \nLandlord Name : Valerie Murphy \nLandlord Address : 5647 Western Sunny Apt. 283\n East Holly, TN 32011 \nTenant Name : Mackenzie Baker \nTenant Address : 42408 Judy Divide Suite 732\n Porterstad, OK 20877 \nProperty Address : 56607 Danielle Spring\n North Deborah, MS 72045 \nStart Date : August 03, 2024 \nEnd Date : September 20, 2027 \nMonthly Rent Amount : 8083 \nSecurity Deposit Amount : 3358 \nList of Utilities : Electricity, gas, water, sewer, internet, security system \nNotice Period : 27 \nState : Georgia \nDue Date : 5 \n'

# Agreement Generation

## Installing and Importing required libraries

In [None]:
!pip install transformers -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m16.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m27.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m34.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m38.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.0/295.0 kB[0m [31m26.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

## Connecting to Hugging face

In [None]:
from huggingface_hub import login
login(token="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") # Read access

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
model_name = "velusrinath/gpt2_rental_mini_2"
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained(model_name)

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/908 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/498M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

## Creating Agreement

In [None]:
def generate_agreement(llm_prompt):

    input_chunks = [llm_prompt[i:i + 1024] for i in range(0, len(llm_prompt), 1024)]

    lease_agreement = ''

    for chunk in input_chunks:
        input_text = lease_agreement + chunk
        input_ids = tokenizer.encode(input_text, return_tensors='pt', add_special_tokens=False)

        output = model.generate(
            input_ids,
            max_length=650,
            num_return_sequences = 1)
        lease_agreement += tokenizer.decode(output[0], skip_special_tokens=True)
        #break

    lease_agreement = lease_agreement.replace(llm_prompt, "").strip('') #("### Response:")

    stop_text = "### END"
    final_agreement = lease_agreement.split(stop_text)[0]

    return final_agreement

In [None]:
lease_agreement = generate_agreement(llm_prompt)
lease_agreement

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


'Tenant Name : Jennifer L.\n### Response :\n\nResidential Lease Agreement\n\nThis Residential Lease Agreement (the "Agreement") is made and entered into on August 03, 2024 by and between:\n\nLandlord: Valerie Murphy \nAddress : 5647 Western Sunny Apt. 283\nEast Holly, TN 32011 \n\nTen'

# Final Pipeline

In [None]:
def run_pipeline(user_input, headers):
    llm_prompt = create_instruction(user_input)
    lease_agreement = generate_agreement(llm_prompt)
    return lease_agreement

In [None]:
lease_agreement = run_pipeline(user_input = user_values, headers = rental_headers)
lease_agreement

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


'Landlord Address : 745 W. W. W. Suite 845\nWest Holly, TN 32011 \nTenant Name : Mackenzie Baker \nTenant Address : 42408 Judy Divide Suite 732\n Porterstad, OK 20877 \n\n1. Property Description :   The Landlord agrees to lease the following property to the Tenant:    Property Address : 3185 Alicia Coves Apt. 283\nEast Holly, TN 32011 \n\n2. Lease Term :   The term of this lease shall begin on April 19, 2024 and end on September 20, 2027.\n\n3. Rent :   The monthly rent is 8083 due on the 12 of each month.\n\n4. Security Deposit :   The Tenant shall provide a security deposit of 3358 upon signing this lease.\n\n5. Maintenance and Repairs :   The Tenant is responsible for ordinary maintenance, while the Landlord is responsible for structural repairs.\n\n6. Utilities :   The Tenant shall pay for all utilities, including Electricity, gas, water, sewer, internet, security system.\n\n7. Termination :   Either party may terminate this lease with written notice of 27 days.\n\n8. Governing Law

# Integration codes

## Pre-requisite

Below codes are to download the model and to store it locally. Hence we will not be needed to download the model each time we connect to it.

In [None]:
!pip install transformers -q

In [None]:
from huggingface_hub import login
from transformers import GPT2LMHeadModel, GPT2Tokenizer

login(token="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") # Read access

model_name = "velusrinath/gpt2_rental_mini_2"
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained(model_name)

if needed as a function,

In [None]:
def download_llm_model():

    global model, tokenizer

    from huggingface_hub import login
    from transformers import GPT2LMHeadModel, GPT2Tokenizer

    login(token="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") # Read access

    model_name = "velusrinath/gpt2_rental_mini_2"
    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
    model = GPT2LMHeadModel.from_pretrained(model_name)

## Agreement generation

In [None]:
user_values = {
    #'agreement_name' : "Residential Lease",
    'agreement_date': "April 19, 2024",
    'landlord_name': "Valerie Murphy",
    'landlord_address': "5647 Western Sunny Apt. 283\n East Holly, TN 32011",
    'tenant_name': "Mackenzie Baker",
    'tenant_address': "42408 Judy Divide Suite 732\n Porterstad, OK 20877",
    'property_address': "56607 Danielle Spring\n North Deborah, MS 72045",
    'start_date': "August 03, 2024",
    'end_date': "September 20, 2027",
    'monthly_rent_amount': "8083",
    'security_deposit_amount': "3358",
    'list_of_utilities': "Electricity, gas, water, sewer, internet, security system",
    'notice_period': "27",
    'state': "Georgia",
    'due_date': "5",
}

In [None]:
def agreement_generate(user_values, agreement_name = "Residential Lease"):
    # it will be used for creating prompts based on user values
    # values from front end needs to be stored on mentioned variable names
    rental_headers = {
        'agreement_date': 'Agreement Date',
        'landlord_name': 'Landlord Name',
        'landlord_address': 'Landlord Address',
        'tenant_name': 'Tenant Name',
        'tenant_address': 'Tenant Address',
        'property_address': 'Property Address',
        'start_date': 'Start Date',
        'end_date': 'End Date',
        'due_date' : 'Due Date',
        'monthly_rent_amount': 'Monthly Rent Amount',
        'security_deposit_amount': 'Security Deposit Amount',
        'list_of_utilities': 'List of Utilities',
        'notice_period': 'Notice Period',
        'state': 'State',
        'permitted_use': 'Permitted Use',
        'renewal_terms': 'Renewal Terms'
    }

    llm_prompt = f"### Instruction : Generate a {agreement_name} agreement for a residential property with the following terms:\n"

    for variable in user_values.keys():
        llm_prompt += str(rental_headers[variable]) + f" : {user_values[variable]} \n"

    input_chunks = [llm_prompt[i:i + 1024] for i in range(0, len(llm_prompt), 1024)]

    lease_agreement = ''

    for chunk in input_chunks:
        input_text = lease_agreement + chunk
        input_ids = tokenizer.encode(input_text, return_tensors='pt', add_special_tokens=False)

        output = model.generate(
            input_ids,
            max_length=650,
            num_return_sequences = 1)
        lease_agreement += tokenizer.decode(output[0], skip_special_tokens=True)
        #break

    lease_agreement = lease_agreement.replace(llm_prompt, "").strip('') #("### Response:")

    stop_text = "### END"
    final_agreement = lease_agreement.split(stop_text)[0]

    return final_agreement