In [10]:
pip install ollama-ocr


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [4]:
import numpy as np
import cv2

def rotate_to_horizontal(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 100, minLineLength=100, maxLineGap=10)
    
    angles = []
    for line in lines:
        x1, y1, x2, y2 = line[0]
        angle = np.degrees(np.arctan2(y2 - y1, x2 - x1))
        angles.append(angle)
    
    median_angle = np.median(angles)
    if abs(median_angle) > 45:  # Rotate only if text is significantly skewed
        (h, w) = image.shape[:2]
        center = (w // 2, h // 2)
        rotation_matrix = cv2.getRotationMatrix2D(center, median_angle, 1.0)
        rotated_image = cv2.warpAffine(image, rotation_matrix, (w, h), flags=cv2.INTER_CUBIC)
        return rotated_image
    return image

# Correct orientation
corrected_image = rotate_to_horizontal(cv2.imread('test.JPG'))
cv2.imwrite("corrected.jpg", corrected_image)  # Save corrected image

True

In [None]:
text = """
You are an intelligent parser that extracts structured invoice data from raw OCR text and returns it as JSON.

Your responsibilities:
1. Parse all relevant fields:
   - Vendor details (name, address, contact, GSTIN)
   - Invoice number and date
   - Customer billing and shipping info
   - Bank account details (bank name, account number, IFSC, UPI ID if available)
   - Final invoice totals (subtotal, tax breakdown, grand total)
   - Line items with full breakdown

2. Do not miss any line items. Each must include:
   - description
   - hsn
   - uom
   - quantity
   - rate
   - taxable_value
   - cgst (rate, amount)
   - sgst (rate, amount)

3. All tax amounts (CGST, SGST, IGST) and totals must match what's present in the text.

Return only valid JSON, following this structure exactly:
{
  "vendor": {
    "name": ...,
    "address": ...,
    "contact": [...],
    "gstin": ...
  },
  "invoice": {
    "number": ...,
    "date": ...
  },
  "bill_to": {
    "name": ...,
    "address": ...
  },
  "ship_to": {
    "name": ...,
    "address": ...
  },
  "bank_details": {
    "bank_name": ...,
    "account_number": ...,
    "ifsc": ...,
    "upi_id": ...
  },
  "line_items": [
    {
      "sno": ...,
      "description": ...,
      "hsn": ...,
      "uom": ...,
      "quantity": ...,
      "rate": ...,
      "taxable_value": ...,
      "cgst": {
        "rate": ...,
        "amount": ...
      },
      "sgst": {
        "rate": ...,
        "amount": ...
      }
    }
  ],
  "total": {
    "subtotal": ...,
    "cgst_total": ...,
    "sgst_total": ...,
    "igst_total": ...,
    "grand_total": ...
  }
}
"""

In [23]:


from ollama_ocr import OCRProcessor

# Initialize OCR processor
ocr = OCRProcessor(model_name='llama3.2-vision:11b', max_workers=4)  # You can use any vision model available on Ollama
# you can pass your custom ollama api

# Process an image
result = ocr.process_image(
    image_path="../ocr_parser/2.jpg", # path to your pdf files "path/to/your/file.pdf"
    format_type="text",  # Options: markdown, text, json, structured, key_value
    preprocess=True, 
    
    custom_prompt=text, # Optional custom prompt
    language="English" # Specify the language of the text (New! 🆕)
)
print(result)

Using custom prompt: 
You are an intelligent parser that extracts structured invoice data from raw OCR text and returns it as JSON.

Your responsibilities:
1. Parse all relevant fields:
   - Vendor details (name, address, contact, GSTIN)
   - Invoice number and date
   - Customer billing and shipping info
   - Bank account details (bank name, account number, IFSC, UPI ID if available)
   - Final invoice totals (subtotal, tax breakdown, grand total)
   - Line items with full breakdown

2. Do not miss any line items. Each must include:
   - description
   - hsn
   - uom
   - quantity
   - rate
   - taxable_value
   - cgst (rate, amount)
   - sgst (rate, amount)

3. All tax amounts (CGST, SGST, IGST) and totals must match what's present in the text.

Return only valid JSON, following this structure exactly:
{
  "vendor": {
    "name": ...,
    "address": ...,
    "contact": [...],
    "gstin": ...
  },
  "invoice": {
    "number": ...,
    "date": ...
  },
  "bill_to": {
    "name": ...,


KeyboardInterrupt: 