In [8]:
!pip install paddlepaddle
!pip install paddleocr
!pip install cohere
!pip install gradio
!pip install opencv-python-headless
!pip install matplotlib
!pip install pandas




In [9]:
from PIL import Image
from paddleocr import PaddleOCR
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt
import cohere
import re
import gradio as gr
import io


In [10]:
# Initialize PaddleOCR and Cohere API
paddle_ocr = PaddleOCR(use_angle_cls=True, lang='en')  # PaddleOCR for English with angle classification
co = cohere.Client("MstkmO4CEcoxvJuj8yZVtFIp4ApNf1mD5rQJe0yT")  # Replace with your actual Cohere API key


[2024/12/13 13:13:17] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\DELLL/.paddleocr/whl\\det\\en\\en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\DELLL/.paddleocr/whl\\rec\\en\\en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=

In [11]:
# OCR and data extraction function
def process_documents(document_type, uploaded_files):
    extracted_data = []

    for uploaded_file in uploaded_files:
        # Load image
        image = Image.open(uploaded_file)
        image_cv2 = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

        # OCR processing
        paddle_results = paddle_ocr.ocr(np.array(image))
        texts = [line[1][0] for line in paddle_results[0]]  # Extract text
        extracted_text = " ".join(texts)

        # Cohere API call
        prompt = f"Extract the following information from the text: {extracted_text}\n\n"
        if document_type == "Salary Slip":
            prompt += "Find and return the values for: Net Salary, Gross Salary, Basic Salary."
        elif document_type == "Profit and Loss Statement":
            prompt += "Find and return the values for: Total Revenue, Net Income."
        elif document_type == "Check":
            prompt += "Find and return the values for: Account Number, Amount (in Rupees), Bank Name."

        response = co.generate(model='command', prompt=prompt, max_tokens=200)
        result_text = response.generations[0].text.strip()

        # Parse the response for visualization
        temp_data = {}
        for line in result_text.split("\n"):
            if ":" in line:
                key, value = line.split(":", 1)
                temp_data[key.strip()] = value.strip()
        extracted_data.append(temp_data)

    return extracted_data


In [12]:
def visualize_results(document_type, uploaded_files, plot_type):
    extracted_data = process_documents(document_type, uploaded_files)

    if not extracted_data:
        return "No data extracted. Please check your input files.", None

    # Display extracted data in text format
    extracted_info = "\n".join([f"{key}: {value}" for data in extracted_data for key, value in data.items()])

    # Flatten extracted data for numeric values
    flattened_data = {}
    for data in extracted_data:
        for key, value in data.items():
            try:
                # Clean value and convert to float
                numeric_value = float(re.sub(r"[^\d.]+", "", value))
                flattened_data[key] = flattened_data.get(key, 0) + numeric_value
            except ValueError:
                pass

    if not flattened_data:
        return extracted_info, "No numeric data available for visualization."

    # Generate visualization
    labels = list(flattened_data.keys())
    values = list(flattened_data.values())

    # Create the plot
    fig, ax = plt.subplots(figsize=(8, 6))

    if plot_type == "Bar Plot":
        ax.bar(labels, values, color="skyblue", edgecolor="black")
        ax.set_title(f"{document_type} - Bar Plot")
        ax.set_ylabel("Amount")
        ax.set_xlabel("Fields")
        plt.xticks(rotation=45, ha="right")
    elif plot_type == "Pie Chart":
        ax.pie(values, labels=labels, autopct="%1.1f%%", startangle=140, colors=plt.cm.Paired.colors)
        ax.set_title(f"{document_type} - Pie Chart")

    # Save plot to a buffer
    buf = io.BytesIO()
    plt.tight_layout()
    plt.savefig(buf, format="png")
    
    # Convert the buffer to an image that Gradio can process
    buf.seek(0)
    image = Image.open(buf)

    return extracted_info, image


In [13]:
# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# Document OCR, Analysis, and Visualization")

    document_type = gr.Radio(
        ["Salary Slip", "Profit and Loss Statement", "Check"],
        label="Select Document Type",
    )
    uploaded_files = gr.File(file_types=["image"], file_count="multiple", label="Upload Document Images")
    plot_type = gr.Radio(["Bar Plot", "Pie Chart"], label="Select Plot Type")

    result = gr.Textbox(label="Extracted Information", lines=10, interactive=False)
    visualization_output = gr.Image(label="Visualization")

    submit_button = gr.Button("Process and Visualize")

    submit_button.click(
        visualize_results,
        inputs=[document_type, uploaded_files, plot_type],
        outputs=[result, visualization_output]
    )

# Launch Gradio Interface
demo.launch()


* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




[2024/12/13 13:13:57] ppocr DEBUG: dt_boxes num : 41, elapsed : 1.42803955078125
[2024/12/13 13:13:57] ppocr DEBUG: cls num  : 41, elapsed : 0.440380334854126
[2024/12/13 13:14:03] ppocr DEBUG: rec_res num  : 41, elapsed : 6.015445947647095
