# AI Contract Auditor

# Initial Setup

In [1]:
import os
import shutil
import fitz
import gradio as gr
from dotenv import load_dotenv
from openai import OpenAI

In [2]:
ABSOLUTE_PATH = os.path.abspath(os.getcwd())
PDF_DIR = os.path.join(ABSOLUTE_PATH, "pdf")
PROMPT_DIR = os.path.join(ABSOLUTE_PATH, "prompts")

PDF_PATH = os.path.join(PDF_DIR, "document.pdf")
SYSTEM_PROMPT_PATH = os.path.join(PROMPT_DIR, "system_prompt.txt")
CLEANING_SYSTEM_PROMPT_PATH = os.path.join(PROMPT_DIR, "cleaning_system_prompt.txt")

if not os.path.exists(PDF_DIR):
    os.makedirs(PDF_DIR)
    print(f"{PDF_DIR} created.")

if not os.path.exists(PROMPT_DIR):
    os.makedirs(PROMPT_DIR)
    print(f"{PROMPT_DIR} created.")

In [3]:
load_dotenv(override=True)

# API Keys
OLLAMA_API_KEY = "ollama"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "type-your-api-key-here")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "type-your-api-key-here")

# BASE URL
OLLAMA_BASE_URL = "http://localhost:11434/v1"
GOOGLE_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/"

# Client
ollama_client = OpenAI(api_key=OLLAMA_API_KEY, base_url=OLLAMA_BASE_URL)
openai_client = OpenAI()
google_client = OpenAI(api_key=GOOGLE_API_KEY, base_url=GOOGLE_BASE_URL)

# Models
llama = "llama3.2"
gpt = "gpt-4o-mini"
gemini = "models/gemini-2.0-flash"

# Prompting

In [4]:
def get_system_prompt(system_prompt_path: str) -> str:
    with open(system_prompt_path, "r") as f:
        system_prompt = f.read()
    return system_prompt

# Cleaning text
def generate_cleaning_user_prompt(language: str, contract: str) -> str:
    user_prompt = f"Instructions: Remove headers/footers/page numbers and extract only {language} content. Preserve all contract terms and structure.\n"
    user_prompt += f"Target Language: {language}\n"
    user_prompt += f"Raw Contract:\n{contract}"
    return user_prompt

def generate_cleaning_messages(language: str, contract: str) -> str:
    return [
        {"role": "system", "content": get_system_prompt(CLEANING_SYSTEM_PROMPT_PATH)},
        {"role": "user", "content": generate_cleaning_user_prompt(language, contract)}
    ]

# Analyze text
def generate_user_prompt(job_title: str, industry :str, location :str, language: str, contract: str) -> str:
    user_prompt = f"Position: {job_title} | Industry: {industry} | Location: {location}\n"
    user_prompt += f"Target Language: {language}\n"
    user_prompt += f"CONTRACT:\n{contract}"
    return user_prompt

def generate_messages(job_title: str, industry :str, location :str, language: str, contract: str) -> list:
    return [
        {"role": "system", "content": get_system_prompt(SYSTEM_PROMPT_PATH)},
        {"role": "user", "content": generate_user_prompt(job_title, industry, location, language, contract)}
    ]

In [5]:
# Testing
test = generate_cleaning_messages(
    language="Bahasa Indonesia",
    contract="This is contract"
)
print(test[1]['content'])

Instructions: Remove headers/footers/page numbers and extract only Bahasa Indonesia content. Preserve all contract terms and structure.
Target Language: Bahasa Indonesia
Raw Contract:
This is contract


In [6]:
# Testing
test = generate_messages(
    job_title="ML Engineer",
    industry="Technology",
    location="Jakarta, Indonesia",
    language="Bahasa Indonesia",
    contract="This is contract"
)
print(test[1]['content'])

Position: ML Engineer | Industry: Technology | Location: Jakarta, Indonesia
Target Language: Bahasa Indonesia
CONTRACT:
This is contract


# Processing

## Extracting

In [7]:
def save_pdf(pdf, target_path: str=PDF_PATH):
    if not pdf:
        raise ValueError("No PDF uploaded.")
    if not os.path.exists(pdf):
        raise ValueError("PDF does not exist in internal directory.")
    try:
        shutil.move(pdf, target_path)
    except Exception as e:
        raise RuntimeError(f"Error: Failed to save PDF: {str(e)}")

def extract_pdf(pdf_path: str=PDF_PATH):
    try:
        with fitz.open(pdf_path) as doc:
            text_per_page = []
            for page in doc:
                text = page.get_text()
                text_per_page.append(text)

            return text_per_page
            
    except Exception as e:
        return f"Error reading PDF: {str(e)}"

In [8]:
# Testing
# test = extract_pdf("D:/Learn/LLM/llm_engineering/week4/pdf/pkwt.pdf")
# print(test)

## Cleaning

In [9]:
def clean_contract(language: str, raw_contract: str) -> str:
    cleaning_messages = generate_cleaning_messages(language, raw_contract)
    response = openai_client.chat.completions.create(
        model=gpt,
        messages=cleaning_messages,
        temperature=0
    )
    return response.choices[0].message.content

In [20]:
# Testing
# clean_contract(
#     "Bahasa Indonesia",
#     raw_contract="""
#     PIHAK PERTAMA berkewajiban untuk melakukan
# pembayaran gaji kepada PIHAK KEDUA dengan
# ketentuan gaji dihitung dan diberikan kepada PIHAK
# KEDUA pada hari kerja setiap awal bulan maksimal
# dibayarkan pada tanggal 1 setiap bulannya.

# The FIRST PARTY is obliged to make payment of
# salaries to the SECOND PARTY provided that the
# salary is calculated and given to the SECOND PARTY
# on the working day of the beginning of each month,
# the maximum is paid on the 1
# st of each month.
#     """
# )

'PIHAK PERTAMA berkewajiban untuk melakukan\npembayaran gaji kepada PIHAK KEDUA dengan\nketentuan gaji dihitung dan diberikan kepada PIHAK\nKEDUA pada hari kerja setiap awal bulan maksimal\ndibayarkan pada tanggal 1 setiap bulannya.'

## Analyzing

In [10]:
def analyze_contract(job_title: str, industry: str, location: str, language: str, contract: str) -> str:
    messages = generate_messages(job_title, industry, location, language, contract)
    response = openai_client.chat.completions.create(
        model=gpt,
        messages=messages,
        temperature=0.2
    )
    return response.choices[0].message.content

## Wrap-Up

In [11]:
def process_pdf(job_title: str, industry: str, location: str, language: str, pdf: str):
    try:
        if not job_title or not industry or not location:
            raise ValueError("Please fill in your job title, industry and work location first.")

        # Save uploaded pdf locally
        save_pdf(pdf)
        
        # Extract contract from pdf
        raw_contract = extract_pdf()
        if not raw_contract:
            raise ValueError("The pdf is empty.")
        
        # Clean contract
        try:
            contract = clean_contract(language, raw_contract)
        except Exception as e:
            raise ValueError(f"Failed to clean your employment contract - {str(e)}")
        
        # Analyze contract
        try:
            analysis_result = analyze_contract(job_title, industry, location, language, contract)
        except Exception as e:
            raise ValueError(f"Failed to analyze your employment contract - {str(e)}")
        
        return analysis_result
        
    except Exception as e:
        return f"Error: {str(e)}"


def process_text(job_title: str, industry: str, location: str, language: str, text: str):
    try:
        if not job_title or not industry or not location:
            raise ValueError("Please fill in your job title, industry and work location first.")
        if not text:
            raise ValueError("Please paste your employment contract first.")
        
        # Clean contract
        try:
            contract = clean_contract(language, text)
        except Exception as e:
            raise ValueError(f"Failed to clean your employment contract - {str(e)}")
        
        # Analyze text
        try:
            analysis_result = analyze_contract(job_title, industry, location, contract)
        except Exception as e:
            raise ValueError(f"Failed to analyze your employment contract - {str(e)}")
        
        return analysis_result
        
    except Exception as e:
        return f"Error: {str(e)}"

# UI

In [12]:
def add_br():
    gr.Markdown("<br>")

with gr.Blocks() as demo:
    add_br()
    with gr.Row():
        with gr.Column():
            gr.Markdown("# AI Contract Auditor")
            gr.Markdown("### An AI-powered app to analyze and review your employment contract.")
            gr.Markdown("**Note:** This app only provide informational analysis, not legal advice. Complex situations require professional legal consultation.")
    add_br()
    with gr.Row():
        with gr.Column():
            with gr.Column():
                job_title = gr.Textbox(label="Job Title", placeholder="E.g. Director, Manager, Staff")
                industry = gr.Textbox(label="Industry", placeholder="E.g. Technology, Financial, Education")
                location = gr.Textbox(label="Location", placeholder="E.g. Jakarta, Indonesia; New York, US; Tokyo, Japan")

            with gr.Column():
                doc_type = gr.Dropdown(["PDF", "Text"], value="PDF", label="Document type", interactive=True)
                language = gr.Dropdown(["English", "Bahasa Indonesia"], value="English", label="Language", interactive=True)
            
            with gr.Column(visible=True) as pdf_input:
                pdf = gr.File(label="Upload your employment contract (.pdf)", file_types=[".pdf"], type="filepath")
                
            with gr.Column(visible=False) as text_input:
                text = gr.Textbox(label="Paste your employment contract", lines=10)
                
            submit_btn = gr.Button("Analysis")
        with gr.Column():
            result = gr.Text(label="Analysis results", lines=35, show_copy_button=True)

    # Update UI
    def update_ui(doc_type):
        return (
            gr.update(visible=doc_type == "PDF"),
            gr.update(visible=doc_type == "Text"),
            ""
        )
        
    doc_type.change(
        fn=update_ui,
        inputs=doc_type,
        outputs=[pdf_input, text_input, result]
    )
    
    # Analyze
    def analyze(job_title: str, industry: str, location: str, doc_type: str, language: str, pdf: str, text: str):
        if doc_type == "PDF":
            return process_pdf(job_title, industry, location, language, pdf)
        elif doc_type == "Text":
            return process_text(job_title, industry, location, language, text)
        else:
            return "Unknown input type."
            
    submit_btn.click(
        fn=analyze,
        inputs=[job_title, industry, location, doc_type, language, pdf, text],
        outputs=result
    )
    
demo.launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


