# Parser demo

Quick check that invoice HTML is parsed as expected (using the Entersoft/Sklavenitis sample).

In [8]:
# # Install optional dependencies if running outside the container
# !pip install fastapi uvicorn

In [9]:
from pathlib import Path
import json
import app

sample_path = Path("data/samples/entersoft.mhtml")
html_text = sample_path.read_text(encoding="utf-8")

invoice = app.parse_invoice(html_text)

print("Parser:", invoice.get("parser"))
print("Supplier:", invoice.get("supplier_name"))
print("Invoice number:", invoice.get("invoice_number"))
print("Invoice date:", invoice.get("invoice_date"))
print("Payment method:", invoice.get("payment_method"))
print("Total amount:", invoice.get("total_amount"))
print("Items parsed:", len(invoice.get("items", [])))
print("\nFirst 3 items:\n", json.dumps(invoice.get("items", [])[:3], ensure_ascii=False, indent=2))

[96m---- DEBUG ----[0m mhtml html parts found=2 using_len=42490


[96m---- DEBUG ----[0m mhtml html parts found=2 using_len=42490


[96m---- DEBUG ----[0m mhtml detected -> extracted html len=42490


[96m---- DEBUG ----[0m mhtml detected -> extracted html len=42490


Parser: entersoft
Supplier: ΕΛΛΗΝΙΚΕΣ ΥΠΕΡΑΓΟΡΕΣ ΣΚΛΑΒΕΝΙΤΗΣ Α.Ε.Ε.
Invoice number: 03 65983
Invoice date: 20-12-2025
Payment method: POS / e-POS
Total amount: 163.15
Items parsed: 41

First 3 items:
 [
  {
    "id": "3d169e6760",
    "description": "ΞΥΛΟΚΑΡΒΟΥΝΑ 5kg SKLA5",
    "quantity": 1.0,
    "price": 4.86,
    "total": 6.0,
    "participants": []
  },
  {
    "id": "3aaa4aa094",
    "description": "ΗΛΙΕΛΑΙΟ ΜΑΡΑΤΑ 2L",
    "quantity": 1.0,
    "price": 3.45,
    "total": 3.9,
    "participants": []
  },
  {
    "id": "91ccb621d5",
    "description": "ΓΑΛΑ ΦΡΕΣΚΟ ΜΑΡΑΤΑ 3,5%ΛΙΠ.100% ΕΛΛ.1L",
    "quantity": 1.0,
    "price": 0.93,
    "total": 1.05,
    "participants": []
  }
]


In [10]:
# Helper: parse any HTML file path and view summary
def summarize_invoice(path_str: str):
    html = Path(path_str).read_text(encoding="utf-8")
    data = app.parse_invoice(html)
    print(f"Parsed with: {data.get('parser')}")
    print(f"Supplier: {data.get('supplier_name')}")
    print(f"Items: {len(data.get('items', []))}, total: {data.get('total_amount')}")
    return data

# Example usage:
# summarize_invoice("uploads/receipt-xxxx.html")

## QR debug helper
Run the cell below with a path to your QR image. It tries local decode (Pillow/pyzbar), optionally remote API, and shows invoice parsing if HTML is found.


In [11]:
from pathlib import Path
import urllib.error
import app

def inspect_qr_image(path_str: str, try_remote: bool = False, fetch_if_url: bool = False):
    path = Path(path_str)
    if not path.exists():
        print(f'File not found: {path}')
        return None
    data = path.read_bytes()
    print(f'Loaded {path} ({len(data)} bytes)')
    decoded_local, local_err = app.decode_qr_locally(data)
    if decoded_local:
        print('Local decode: success')
        print('  preview:', decoded_local[:200])
    else:
        print(f'Local decode: none (err={local_err})')
    decoded_remote = None
    if try_remote:
        try:
            decoded_remote = app.post_qr_for_data(data, filename=path.name)
            print('Remote decode: success')
            print('  preview:', str(decoded_remote)[:200])
        except urllib.error.URLError as e:
            print('Remote decode URLError:', e)
        except Exception as e:
            print('Remote decode error:', e)
    qr_payload = decoded_local or decoded_remote
    html_text = None
    if isinstance(qr_payload, str) and qr_payload.startswith(('http://', 'https://')) and fetch_if_url:
        try:
            html_text = app.fetch_html(qr_payload)
            print(f'Fetched HTML from QR URL (len={len(html_text)})')
        except urllib.error.URLError as e:
            print('Fetch HTML URLError:', e)
    elif isinstance(qr_payload, str):
        html_text = qr_payload
    if html_text:
        parsed = app.parse_invoice(html_text)
        print('Parsed invoice summary:')
        print('  parser:', parsed.get('parser'))
        print('  supplier:', parsed.get('supplier_name'))
        print('  items:', len(parsed.get('items', [])), 'total:', parsed.get('total_amount'))
        return parsed
    return {'local': decoded_local, 'remote': decoded_remote, 'local_err': local_err}

# Example: inspect_qr_image('path/to/receipt.jpg', try_remote=False, fetch_if_url=False)


In [12]:
import importlib, app
importlib.reload(app)


<module 'app' from '/root/project-application-invoice-splitter/app.py'>

In [13]:
inspect_qr_image('./data/samples/entersoft.jpg', try_remote=False, fetch_if_url=True)


Loaded data/samples/entersoft.jpg (121179 bytes)
[96m---- DEBUG ----[0m qr decode local: success variant=thresh size=(716, 669) symbols=1 payload_len=94


[96m---- DEBUG ----[0m qr decode local: success variant=thresh size=(716, 669) symbols=1 payload_len=94


Local decode: success
  preview: https://e-invoicing.gr/edocuments/ViewInvoice/-1/b1655ed3-5453-4b5a-a3db-be01d5fd50e2_6ee4ik98
[96m---- DEBUG ----[0m fetch_html basic len=3037


[96m---- DEBUG ----[0m fetch_html basic len=3037


[96m---- DEBUG ----[0m fetch_html basic len=46990


[96m---- DEBUG ----[0m fetch_html basic len=46990


[96m---- DEBUG ----[0m fetch_html entersoft iframe resolved url=https://e-invoicing.gr/api/GetInvoice?contentType=PEPPOL&hashToken=6ee4ik98&intRefDocID=b1655ed3-5453-4b5a-a3db-be01d5fd50e2&isPreview=True&ofenm=-1 len=46990


[96m---- DEBUG ----[0m fetch_html entersoft iframe resolved url=https://e-invoicing.gr/api/GetInvoice?contentType=PEPPOL&hashToken=6ee4ik98&intRefDocID=b1655ed3-5453-4b5a-a3db-be01d5fd50e2&isPreview=True&ofenm=-1 len=46990


Fetched HTML from QR URL (len=46990)
Parsed invoice summary:
  parser: entersoft
  supplier: ΕΛΛΗΝΙΚΕΣ ΥΠΕΡΑΓΟΡΕΣ ΣΚΛΑΒΕΝΙΤΗΣ Α.Ε.Ε.
  items: 41 total: 163.15


{'supplier_name': 'ΕΛΛΗΝΙΚΕΣ ΥΠΕΡΑΓΟΡΕΣ ΣΚΛΑΒΕΝΙΤΗΣ Α.Ε.Ε.',
 'supplier_vat': '800764388',
 'invoice_number': '03 65983',
 'invoice_date': '20-12-2025',
 'currency': 'EUR',
 'total_amount': 163.15,
 'payment_method': 'POS / e-POS',
 'items': [{'id': '079dfb414c',
   'description': 'ΞΥΛΟΚΑΡΒΟΥΝΑ 5kg SKLA5',
   'quantity': 1.0,
   'price': 4.86,
   'total': 6.0,
   'participants': []},
  {'id': 'aa7942f617',
   'description': 'ΗΛΙΕΛΑΙΟ ΜΑΡΑΤΑ 2L',
   'quantity': 1.0,
   'price': 3.45,
   'total': 3.9,
   'participants': []},
  {'id': '274c015b55',
   'description': 'ΓΑΛΑ ΦΡΕΣΚΟ ΜΑΡΑΤΑ 3,5%ΛΙΠ.100% ΕΛΛ.1L',
   'quantity': 1.0,
   'price': 0.93,
   'total': 1.05,
   'participants': []},
  {'id': 'f71626bb81',
   'description': 'ΓΑΛΑ ΦΡΕΣΚΟ ΜΑΡΑΤΑ 3,5%ΛΙΠ.100% ΕΛΛ.1L',
   'quantity': 1.0,
   'price': 0.93,
   'total': 1.05,
   'participants': []},
  {'id': 'c22032fee5',
   'description': 'ΡΕΓΓΕΣ ΚΑΠΝΙΣΤΕΣ ΚΑΡΑΓΚΟΥΝΗ FAO27..',
   'quantity': 0.322,
   'price': 7.85714,
   'total': 2.86,
   '

In [14]:
html = app.fetch_html("https://e-invoicing.gr/edocuments/ViewInvoice/-1/b1655ed3-5453-4b5a-a3db-be01d5fd50e2_6ee4ik98")
print(html)

[96m---- DEBUG ----[0m fetch_html basic len=3037


[96m---- DEBUG ----[0m fetch_html basic len=3037


[96m---- DEBUG ----[0m fetch_html basic len=46990


[96m---- DEBUG ----[0m fetch_html basic len=46990


[96m---- DEBUG ----[0m fetch_html entersoft iframe resolved url=https://e-invoicing.gr/api/GetInvoice?contentType=PEPPOL&hashToken=6ee4ik98&intRefDocID=b1655ed3-5453-4b5a-a3db-be01d5fd50e2&isPreview=True&ofenm=-1 len=46990


[96m---- DEBUG ----[0m fetch_html entersoft iframe resolved url=https://e-invoicing.gr/api/GetInvoice?contentType=PEPPOL&hashToken=6ee4ik98&intRefDocID=b1655ed3-5453-4b5a-a3db-be01d5fd50e2&isPreview=True&ofenm=-1 len=46990


<!DOCTYPE html><html>
  <head>
    <META http-equiv="Content-Type" content="text/html; charset=utf-16">
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1"><script src="https://kendo.cdn.telerik.com/2022.2.510/js/jquery.min.js"></script><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap@4.0.0/dist/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
    <style>
          .cf th, .cf tr, .cf td{
          border:0!important;
          }


          @media print {
          .printingButton {
          display: none;
          }

          .backgrey{
          background-color:white
          }

          .fontSize12pt{font-size: 18px!important;}
          .fontSize8pt{font-size: 13px!important;}
          .fonSize10pt{font-size: 15px!important;}
          .fontSize6pt{font-size: 12px!important;}
          }

          @media only screen and (max-w