In [3]:
import json
from collections import defaultdict

def extract_first_article_entity_order(receipt):
    """
    Extracts the order of entities for only the first 'RECEIPT_ITEM' in the receipt.
    """
    receipt_order = []
    receipt_items = []

    # Process each entity
    for entity in receipt["entities"]:
        entity_type = entity[2]
        
        if entity_type == "RECEIPT_ITEM":
            # Only process the first RECEIPT_ITEM, so break once we encounter it
            if receipt_items:
                break  # We stop processing further items as we already processed the first item
            receipt_items = [entity_type]  # Start the first item list
        else:
            receipt_items.append(entity_type)  # Add entity type to the first item

    # If the first item is found, return its structure as a tuple
    if receipt_items:
        return tuple(receipt_items)
    return None  # If no items are found, return None

def analyze_receipts(json_data):
    """
    Analyzes all receipts and counts the occurrences of entity sequences for the first item in each receipt.
    """
    receipt_order_count = defaultdict(int)  # To count occurrences of first item entity sequences

    # Process each receipt
    for receipt in json_data:
        receipt_order = extract_first_article_entity_order(receipt)
        if receipt_order:
            receipt_order_count[receipt_order] += 1

    return receipt_order_count

def main():
    # Read the JSON data from a file
    try:
        with open('ner-swiss-receipts.json', 'r') as file:
            json_data = json.load(file)
    except FileNotFoundError:
        print("The file was not found. Please check the file path and name.")
        return
    except json.JSONDecodeError:
        print("The file contains invalid JSON. Please check the file content.")
        return

    # Analyze the receipts
    receipt_order_count = analyze_receipts(json_data)

    # Print the results
    print("First article entity order occurrences:")
    for receipt_order, count in receipt_order_count.items():
        print(f"{receipt_order}: {count}")

if __name__ == "__main__":
    main()

First article entity order occurrences:
('RECEIPT_ITEM', 'RECEIPT_ITEM_NAME', 'RECEIPT_ITEM_PRICE'): 29
('RECEIPT_ITEM', 'RECEIPT_ITEM_QUANTITY', 'RECEIPT_ITEM_NAME', 'RECEIPT_ITEM_PRICE'): 51
('RECEIPT_ITEM', 'RECEIPT_ITEM_NAME'): 1
('RECEIPT_ITEM', 'RECEIPT_ITEM_QUANTITY', 'RECEIPT_ITEM_NAME'): 1
('RECEIPT_ITEM', 'RECEIPT_ITEM_QUANTITY', 'RECEIPT_ITEM_NAME', 'RECEIPT_ITEM_PRICE', 'RECEIPT_ITEM_PRICE'): 1
