In [None]:
!pip install -q -U google-generativeai

In [None]:
import google.generativeai as genai

## SET API KEY

In [None]:
# # Used to securely store your API key
# from google.colab import userdata

# GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# genai.configure(api_key=GOOGLE_API_KEY)


from google.colab import userdata



# Store the API key as a secret (optional for direct setting)
# userdata.set('GOOGLE_API_KEY', GOOGLE_API_KEY)

import google.generativeai as genai
genai.configure(api_key=GOOGLE_API_KEY)

## LIST OF MODELS

In [None]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)


models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro
models/gemini-1.5-pro-exp-0801
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash
models/gemini-1.5-flash-001-tuning


In [None]:
# Model Configuration
MODEL_CONFIG = {
  "temperature": 0.2,
  "top_p": 1,
  "top_k": 32,
  "max_output_tokens": 4096,
}

## Safety Settings of Model
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  }
]

## LOAD GEMINI MODEL WITH MODEL CONFIGURATIONS

In [None]:
model = genai.GenerativeModel(model_name = "gemini-1.5-flash",
                              generation_config = MODEL_CONFIG,
                              safety_settings = safety_settings)

## DEFINE IMAGE FORMAT TO INPUT IN GEMINI

In [None]:
from pathlib import Path

def image_format(image_path):
    img = Path(image_path)

    if not img.exists():
        raise FileNotFoundError(f"Could not find image: {img}")

    image_parts = [
        {
            "mime_type": "image/png", ## Mime type are PNG - image/png. JPEG - image/jpeg. WEBP - image/webp
            "data": img.read_bytes()
        }
    ]
    return image_parts


## GEMINI MODEL OUTPUT

In [None]:
def gemini_output(image_path, system_prompt, user_prompt):

    image_info = image_format(image_path)
    input_prompt= [system_prompt, image_info[0], user_prompt]
    response = model.generate_content(input_prompt)
    return response.text

In [None]:
!pip install pymupdf




In [None]:
import fitz


def extract_images_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    images = []
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)
        image_list = page.get_images(full=True)
        for img_index, img in enumerate(image_list):
            xref = img[0]
            base_image = doc.extract_image(xref)
            image_bytes = base_image["image"]
            image_mime = base_image["ext"]
            images.append({"mime_type": f"image/{image_mime}", "data": image_bytes})
    return images

def gemini_output_from_pdf(pdf_path, system_prompt, user_prompt):
    image_infos = extract_images_from_pdf(pdf_path)
    if not image_infos:
        raise ValueError("No images found in the PDF.")

    input_prompt = [system_prompt]
    input_prompt.extend(image_infos)
    input_prompt.append(user_prompt)

    response = model.generate_content(input_prompt)
    return response.text


# from requests.exceptions import ReadTimeout

# def extract_images_from_pdf(pdf_path):
#     doc = fitz.open(pdf_path)
#     images = []
#     for page_num in range(len(doc)):
#         page = doc.load_page(page_num)
#         image_list = page.get_images(full=True)
#         for img_index, img in enumerate(image_list):
#             xref = img[0]
#             base_image = doc.extract_image(xref)
#             image_bytes = base_image["image"]
#             image_mime = base_image["ext"]
#             images.append({"mime_type": f"image/{image_mime}", "data": image_bytes})
#     return images

# def gemini_output_from_pdf(pdf_path, system_prompt, user_prompt, timeout=60):
#     image_infos = extract_images_from_pdf(pdf_path)
#     if not image_infos:
#         raise ValueError("No images found in the PDF.")

#     input_prompt = [system_prompt]
#     input_prompt.extend(image_infos)
#     input_prompt.append(user_prompt)

#     try:
#         response = model.generate_content(input_prompt, timeout=timeout)
#         return response.text
#     except ReadTimeout:
#         print(f"Request timed out after {timeout} seconds.")
#         return None

### EXTRACTING PART OF THE INFORMATION FROM INVOICE

In [None]:
system_prompt = """
               You are a specialist in comprehending receipts.
               Input images in the form of receipts will be provided to you,
               and your task is to respond to questions based on the content of the input image.
               """

image_path = "/content/Screenshot 2024-08-09 17153AGASH.png"

pdf_path = "/content/image131551472.pdf"

user_prompt = "Extract the Table data with that long word description each line in the PDF as a json format like description and extended amount in single object and also include invoice total,customer name,invoice no,invoice date seperate object  "

# gemini_output(image_path, system_prompt, user_prompt)

# Get the response from the model
output = gemini_output_from_pdf(pdf_path, system_prompt, user_prompt)
print(output)

```json
{"description": "SAUCE, SOY, FLST JUG SHLF STAI 1 GR/1 GR", "extended amount": "1.60"},
{"description": "SEASONING, RDSH BLEND TF 8/4 GR 4/24 D", "extended amount": "1.60"},
{"description": "REFRIGERATED", "extended amount": null},
{"description": "CHEESE, ATHER WH 3 LOAF EZ TREIT 6/5 LB", "extended amount": "18.16"},
{"description": "CHEESE, ATHER HV 35% BUTTR 1 UHT 12/12 GR", "extended amount": "69.00"},
{"description": "CREAM", "extended amount": null},
{"description": "FROZEN", "extended amount": null},
{"description": "CALAMARI RING -15 TINCT 5/8 MISC 10/2", "extended amount": "38.69"},
{"description": "SHRIMP, RAW 1/3 -15 WHTL 1/ON PISC 10/2", "extended amount": "11.16"},
{"description": "LOBSTER, ARTH CDN 4 KL HEAT - 5/3 LB", "extended amount": "16.00"},
{"description": "STORAGE LOCATION RECAP (IN):", "extended amount": null},
{"description": "** AUTO DEDUCTIN", "extended amount": null},
{"description": "** INVOICE SUMMARY", "extended amount": null},
{"description": "PIE

## EXTRACTING WHOLE DATA IN JSON FROM INVOICE

In [None]:
# system_prompt = """
#                You are a specialist in comprehending receipts.
#                Input images in the form of receipts will be provided to you,
#                and your task is to respond to questions based on the content of the input image.
#                """
# #system_prompt = "Convert Invoice data into json format with appropriate json tags as required for the data in image "
# image_path = "/content/handwritten.png"
# user_prompt = "Convert Invoice data into json format with appropriate json tags as required for the data in image "

# output = gemini_output(image_path, system_prompt, user_prompt)

In [None]:
# from IPython.display import Markdown
# Markdown(output)

In [None]:
# system_prompt = """
# You are a specialist in comprehending receipts and invoices.
# You will be provided with input images or PDFs of receipts and invoices,
# and your task is to extract and structure the data as JSON.
# The JSON should include the following fields:
# - VendorName
# - VendorAddress
# - InvoiceTotal
# - InvoiceDate
# - InvoiceId
# - ProductCode
# - TotalTax
# For each line item, extract:
# - Description (Ensure this is a valid, meaningful description and ignore any descriptions that are just placeholders like '**'.)
# - ProductCode
# - Amount
# - Quantity
# - UnitPrice
# If any fields or line items are missing, represent them with null values.
# """

# user_prompt = """
# Extract the data from the provided PDF and format it as a JSON object.
# Make sure to extract fields like VendorName, InvoiceTotal, InvoiceDate, and TotalTax, along with line items.
# """

# # Example PDF path
# pdf_path = "/content/image131551472.pdf"

# # Get the response from the model
# output = gemini_output_from_pdf(pdf_path, system_prompt, user_prompt)
# print(output)


In [None]:
# system_prompt = """
# You are a specialist in comprehending receipts and invoices.
# You will be provided with input images or PDFs of receipts and invoices,
# and your task is to extract and structure the data as JSON.
# The JSON should include the following fields:
# - VendorName
# - VendorAddress
# - InvoiceTotal
# - InvoiceDate
# - InvoiceId
# - ProductCode
# - TotalTax
# For each line item, extract:
# - Description (Ensure this is a valid, meaningful description and ignore any descriptions that are just placeholders like '**'.)
# - ProductCode
# - Amount
# - Quantity
# - UnitPrice
# Ensure that the JSON includes the 'confidence' score for each field extracted.
# If any fields or line items are missing, represent them with null values.
# """

# user_prompt = """
# Extract the data from the provided PDF and format it as a JSON object.
# Make sure to extract fields like VendorName, InvoiceTotal, InvoiceDate, and TotalTax, along with line items.
# Each field should include a 'confidence' score.
# Ignore any descriptions that are placeholders like '**' or do not provide meaningful information.
# If any fields or line items are missing, represent them with null values.
# """

# # Example PDF path
# pdf_path = "/content/image131551472.pdf"

# # Get the response from the model
# output = gemini_output_from_pdf(pdf_path, system_prompt, user_prompt)
# print(output)

In [None]:
# import google.generativeai as genai
# import fitz  # PyMuPDF for PDF handling
# from google.colab import userdata
# from time import sleep

# # Securely store your API key
# GOOGLE_API_KEY = 'AIzaSyAZpj6GOrKzE-C4uf9xEpR8G68PEzyHl2U'
# genai.configure(api_key=GOOGLE_API_KEY)

# # Model Configuration
# MODEL_CONFIG = {
#   "temperature": 0.2,
#   "top_p": 1,
#   "top_k": 32,
#   "max_output_tokens": 4096,
# }

# # Safety Settings of Model
# safety_settings = [
#   {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
#   {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
#   {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
#   {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
# ]

# model = genai.GenerativeModel(
#     model_name="gemini-1.5-flash",
#     generation_config=MODEL_CONFIG,
#     safety_settings=safety_settings
# )

# def extract_images_from_pdf(pdf_path):
#     doc = fitz.open(pdf_path)
#     images = []
#     for page_num in range(len(doc)):
#         page = doc.load_page(page_num)
#         image_list = page.get_images(full=True)
#         for img_index, img in enumerate(image_list):
#             xref = img[0]
#             base_image = doc.extract_image(xref)
#             image_bytes = base_image["image"]
#             image_mime = base_image["ext"]
#             images.append({"mime_type": f"image/{image_mime}", "data": image_bytes})
#     return images

# def gemini_output_from_pdf(pdf_path, system_prompt, user_prompt, retries=3, delay=10):
#     image_infos = extract_images_from_pdf(pdf_path)
#     if not image_infos:
#         raise ValueError("No images found in the PDF.")

#     input_prompt = [system_prompt]
#     input_prompt.extend(image_infos)
#     input_prompt.append(user_prompt)

#     for attempt in range(retries):
#         try:
#             response = model.generate_content(input_prompt)
#             return response.text
#         except Exception as e:
#             print(f"Attempt {attempt + 1} failed: {e}")
#             if attempt < retries - 1:
#                 sleep(delay)  # Wait before retrying
#             else:
#                 print("All retry attempts failed.")
#                 return None

# # Example usage
# pdf_path = "/content/image131551472.pdf"
# output = gemini_output_from_pdf(pdf_path, system_prompt, user_prompt)
# if output:
#     print(output)
