## Install the necessary SDK packages


In [None]:
!pip install --upgrade gradio
!pip install --upgrade google-cloud-aiplatform
!pip install --upgrade google-cloud-storage
!pip install --upgrade google-cloud-documentai
!pip install --upgrade google-auth

## Import all the modules

In [None]:
import gradio as gr
import pandas

import gcp_functions.storage as StorageHelper
from gcp_functions.docai import process_document
from gcp_functions.config import SummaryParserConfig, ContractParserConfig, get_project_id
from gcp_functions.gemini import gemini_response
from typing import Callable

from google.oauth2.service_account import Credentials
import json
from urllib.parse import urlparse

## Summarizer Component

In [None]:
'''####################################################################
Handler function for uploading a file for doc summarization

Will take a local file and upload to a Cloud Storage bucket and then
use DocAI processor to make a batch request (to handle larger files)
and then parse out the Summary and OCR Text from the json results

NOTE: While the processing function can use any DocAI parser, the 
code assumes the json result to be from the Summarizer parser
####################################################################'''
def handle_summary_upload(file_url: str):
    upload_bucket = SummaryParserConfig.upload_bucket()
    
    # upload the file from the local dir to the cloud bucket
    f, gcs = StorageHelper.file_upload(file_url, upload_bucket)
    
    project_id = get_project_id()
    location = SummaryParserConfig.location()
    processor_id = SummaryParserConfig.processor_id()
    mime_type = SummaryParserConfig.mime_type()
    field_mask = SummaryParserConfig.field_mask()
    gcs_input_uri = gcs
    gcs_output_uri = f"gs://{SummaryParserConfig.output_bucket()}"

    # make a request for processing the uploaded file
    metadata = process_document(
        project_id=project_id, 
        location=location, 
        processor_id=processor_id, 
        mime_type=mime_type, 
        field_mask=field_mask, 
        gcs_input_uri=gcs_input_uri, 
        gcs_output_uri=gcs_output_uri
    )

    # assumes result json is from a DocAI Workbench Summarizer parser
    output_gcs_destination = metadata.individual_process_statuses[0].output_gcs_destination
    json_uri, summary, text = StorageHelper.extract_from_summary_output(output_gcs_destination)
    
    # returns the result location, the summary portion, and the OCR text
    return f, summary, text
    
'''####################################################################
Document Summarizer UI component

####################################################################'''
def summary_component(full_text: gr.components.textbox.Textbox, 
                      handle_func: Callable):
    with gr.Tab("Summarize") as tab:
        with gr.Row():
            file = gr.Textbox(lines=1, label="Upload File")

        with gr.Row():
            upload_btn = gr.UploadButton(
                "Click to upload",
                file_types=[".pdf"],
                file_count="single")
                
        with gr.Row():
            summary = gr.Textbox(lines=20, label="Summary")
            
        upload_btn.upload(
            handle_func,
            [upload_btn],
            [file, summary, full_text])

## Contract Parser Component

In [None]:
'''####################################################################
Handler function for uploading a file for doc contract parser

Will take a local file and upload to a Cloud Storage bucket and then
use DocAI processor to make a batch request (to handle larger files)
and then parse out the Entities and OCR Text from the json results

NOTE: While the processing function can use any DocAI parser, the 
code assumes the json result to be from the Contract parser

NOTE: The contract parser is hosted in another project, so there is
service account usage here
####################################################################'''
def handle_contract_upload(file_url: str):
    upload_bucket = 'gnw-contracts'
    
    # upload the file from the local dir to the cloud bucket
    f, gcs = StorageHelper.file_upload(file_url, upload_bucket, credentials)
    
    project_id = get_project_id()
    location = ContractParserConfig.location()
    processor_id = ContractParserConfig.processor_id()
    mime_type = ContractParserConfig.mime_type()
    field_mask = ContractParserConfig.field_mask()
    gcs_input_uri = gcs
    gcs_output_uri = f"gs://{ContractParserConfig.output_bucket()}"    

    # make a request for processing the uploaded file
    metadata = process_document(
        project_id=project_id, 
        location=location, 
        processor_id=processor_id, 
        mime_type=mime_type, 
        field_mask=field_mask, 
        gcs_input_uri=gcs_input_uri, 
        gcs_output_uri=gcs_output_uri,
        credentials=credentials
    )
    
    # assumes result json is from a DocAI Workbench Contract parser
    output_gcs_destination = metadata.individual_process_statuses[0].output_gcs_destination
    json_uri, entities, text = StorageHelper.extract_from_contract_output(output_gcs_destination)
    
    df_entities = pandas.DataFrame(entities)
    
    # returns the result location, the extracted entities, and the OCR text
    return f, df_entities, text

'''####################################################################
Document Contract Parser UI component

####################################################################'''
def contract_component(full_text: gr.components.textbox.Textbox, 
                      handle_func: Callable):
    with gr.Tab("Contracts") as tab:
        with gr.Row():
            file = gr.Textbox(lines=1, label="Upload Contract")

        with gr.Row():
            upload_btn = gr.UploadButton(
                "Click to upload",
                file_types=[".pdf"],
                file_count="single")
                
        with gr.Row():
            entities = gr.DataFrame(headers=['type', 'mentionText'], 
                                    column_widths=['200px'],
                                    label="Entities", 
                                    wrap=True)
            
        upload_btn.upload(
            handle_func,
            [upload_btn],
            [file, entities, full_text])


## QA Chat Component

In [None]:
'''####################################################################
QA Chat UI component

NOTE: the 'full_text' component should be HIDDEN in the main UI and 
is used as a way to cache and keep the document available for prompt
context
####################################################################'''   
def qa_component(full_text_component: gr.components.textbox.Textbox):
    with gr.Row():
        chatbot = gr.Chatbot()
        
    with gr.Row():
        msg = gr.Textbox()

    def respond(message, history, full_text_component):
        resp = gemini_response(message, history, full_text_component)
        history.append((message, resp))
        return "", history

    msg.submit(respond, [msg, chatbot, full_text_component], [msg, chatbot])    
    
    


## Main UI

In [None]:
'''####################################################################
Main UI
####################################################################'''   
with gr.Blocks() as demo:
    with gr.Row():
        # using this as a way to cache the full text from the document to use
        # for context in the prompt for the QA chatbot
        full_text = gr.Textbox(lines=20, label="Full Text", visible=False)
    with gr.Row():
        with gr.Column():
            # the summary UI
            summary_component(full_text, handle_summary_upload)
            
            # the contract UI
            contract_component(full_text, handle_contract_upload)
        with gr.Column():
            # the QA chatbot
            qa_component(full_text)
        

demo.launch(share=True, debug=True)   