In [None]:
! pip install toonstream=1.0.1

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
# Import the library
import toonstream
import json

## 1. Basic Usage - Simple Data Types

TOON supports all standard JSON data types: strings, numbers, booleans, null, objects, and arrays.

In [2]:
# Create a simple Python object
simple_data = {
    "name": "John Doe",
    "age": 30,
    "salary": 75000.50,
    "is_active": True,
    "department": None
}

print("Original Python object:")
print(simple_data)

Original Python object:
{'name': 'John Doe', 'age': 30, 'salary': 75000.5, 'is_active': True, 'department': None}


In [3]:
# Encode to TOON format (compact)
toon_compact = toonstream.encode(simple_data, indent=0)

print("TOON format (compact):")
print(toon_compact)

TOON format (compact):
name: "John Doe"
age: 30
salary: 75000.5
is_active: true
department: null


In [4]:
# Encode to TOON format (pretty-printed)
toon_pretty = toonstream.encode(simple_data, indent=2)

print("TOON format (pretty):")
print(toon_pretty)

TOON format (pretty):
name: "John Doe"

age: 30

salary: 75000.5

is_active: true

department: null


In [5]:
# Decode back to Python
decoded = toonstream.decode(toon_pretty)

print("Decoded back to Python:")
print(decoded)
print(f"\nLossless conversion: {simple_data == decoded}")

Decoded back to Python:
{'name': 'John Doe', 'age': 30, 'salary': 75000.5, 'is_active': True, 'department': None}

Lossless conversion: True


## 2. Arrays of Objects - TOON's Strength

TOON excels at encoding arrays of objects by using a tabular format, significantly reducing token usage.

In [6]:
# Employee data - common use case
employees = {
    "employees": [
        {"id": 1, "name": "Alice Johnson", "role": "Engineer", "salary": 95000},
        {"id": 2, "name": "Bob Smith", "role": "Designer", "salary": 85000},
        {"id": 3, "name": "Carol Williams", "role": "Manager", "salary": 110000},
        {"id": 4, "name": "David Brown", "role": "Engineer", "salary": 92000},
        {"id": 5, "name": "Eve Davis", "role": "Designer", "salary": 87000}
    ]
}

print("Employee data:")
print(json.dumps(employees, indent=2))

Employee data:
{
  "employees": [
    {
      "id": 1,
      "name": "Alice Johnson",
      "role": "Engineer",
      "salary": 95000
    },
    {
      "id": 2,
      "name": "Bob Smith",
      "role": "Designer",
      "salary": 85000
    },
    {
      "id": 3,
      "name": "Carol Williams",
      "role": "Manager",
      "salary": 110000
    },
    {
      "id": 4,
      "name": "David Brown",
      "role": "Engineer",
      "salary": 92000
    },
    {
      "id": 5,
      "name": "Eve Davis",
      "role": "Designer",
      "salary": 87000
    }
  ]
}


In [7]:
# Encode to TOON - see the tabular format
toon_employees = toonstream.encode(employees, indent=2)

print("TOON format (tabular):")
print(toon_employees)

TOON format (tabular):
employees[5]{id,name,role,salary}:
1,Alice Johnson,Engineer,95000
2,Bob Smith,Designer,85000
3,Carol Williams,Manager,110000
4,David Brown,Engineer,92000
5,Eve Davis,Designer,87000


In [8]:
# Compare token counts
import tiktoken

encoder = tiktoken.get_encoding("cl100k_base")

json_compact = json.dumps(employees, separators=(',', ':'))
json_tokens = len(encoder.encode(json_compact))
toon_tokens = len(encoder.encode(toon_employees))

savings = (json_tokens - toon_tokens) / json_tokens * 100

print(f"JSON tokens: {json_tokens}")
print(f"TOON tokens: {toon_tokens}")
print(f"Token savings: {savings:.1f}%")

JSON tokens: 97
TOON tokens: 63
Token savings: 35.1%


In [9]:
# Decode back - lossless conversion
decoded_employees = toonstream.decode(toon_employees)

print("Decoded successfully:")
print(f"Lossless: {employees == decoded_employees}")

Decoded successfully:
Lossless: True


## 3. Nested Structures

TOON handles nested objects and arrays seamlessly.

In [10]:
# Complex nested structure
company_data = {
    "company": "TechCorp",
    "founded": 2010,
    "departments": [
        {
            "name": "Engineering",
            "head": "Alice Johnson",
            "teams": [
                {"name": "Backend", "size": 8},
                {"name": "Frontend", "size": 6}
            ]
        },
        {
            "name": "Design",
            "head": "Bob Smith",
            "teams": [
                {"name": "UI/UX", "size": 4},
                {"name": "Graphics", "size": 3}
            ]
        }
    ],
    "location": {
        "city": "San Francisco",
        "state": "CA",
        "country": "USA"
    }
}

print("Company data:")
print(json.dumps(company_data, indent=2))

Company data:
{
  "company": "TechCorp",
  "founded": 2010,
  "departments": [
    {
      "name": "Engineering",
      "head": "Alice Johnson",
      "teams": [
        {
          "name": "Backend",
          "size": 8
        },
        {
          "name": "Frontend",
          "size": 6
        }
      ]
    },
    {
      "name": "Design",
      "head": "Bob Smith",
      "teams": [
        {
          "name": "UI/UX",
          "size": 4
        },
        {
          "name": "Graphics",
          "size": 3
        }
      ]
    }
  ],
  "location": {
    "city": "San Francisco",
    "state": "CA",
    "country": "USA"
  }
}


In [11]:
# Encode to TOON
toon_company = toonstream.encode(company_data, indent=2)

print("TOON format:")
print(toon_company)

TOON format:
company: "TechCorp"

founded: 2010

departments: [{"name": "Engineering", "head": "Alice Johnson", "teams": [{"name": "Backend", "size": 8}, {"name": "Frontend", "size": 6}]}, {"name": "Design", "head": "Bob Smith", "teams": [{"name": "UI/UX", "size": 4}, {"name": "Graphics", "size": 3}]}]

location: {"city": "San Francisco", "state": "CA", "country": "USA"}


In [12]:
# Decode and verify
decoded_company = toonstream.decode(toon_company)

print(f"Lossless conversion: {company_data == decoded_company}")

Lossless conversion: True


## 4. Smart Optimization

The encoder includes smart array detection that only uses tabular format when beneficial.

In [15]:
# Import the ToonEncoder class
from toonstream import ToonEncoder

# Small array (not worth tabular format)
small_array = {
    "users": [
        {"id": 1, "name": "Alice"},
        {"id": 2, "name": "Bob"}
    ]
}

# Standard encoder (always uses tabular) - legacy mode
toon_standard = ToonEncoder(smart_optimize=False).encode(small_array)
print("Legacy mode (always tabular):")
print(toon_standard)
print()

# Smart optimization (recommended) - decides when tabular is beneficial
toon_optimized = ToonEncoder(smart_optimize=True).encode(small_array)
print("Smart optimization (recommended):")
print(toon_optimized)

Legacy mode (always tabular):
users[2]{id,name}:
1,Alice
2,Bob

Smart optimization (recommended):
users: [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]


## 5. Performance Comparison

Compare TOON vs JSON for different data structures.

In [22]:
import time

def compare_formats(data, description):
    """Compare TOON vs JSON for token efficiency and speed."""
    encoder = tiktoken.get_encoding("cl100k_base")
    
    # JSON Compact
    json_compact = json.dumps(data, separators=(',', ':'))
    json_tokens = len(encoder.encode(json_compact))
    
    # JSON Pretty
    json_pretty = json.dumps(data, indent=2)
    json_pretty_tokens = len(encoder.encode(json_pretty))
    
    # TOON
    toon_str = toonstream.encode(data, indent=0)
    toon_tokens = len(encoder.encode(toon_str))
    
    # Calculate savings
    vs_compact = (json_tokens - toon_tokens) / json_tokens * 100
    vs_pretty = (json_pretty_tokens - toon_tokens) / json_pretty_tokens * 100
    
    print(f"\n{description}")
    print("-" * 60)
    print(f"JSON Compact: {json_tokens} tokens")
    print(f"JSON Pretty:  {json_pretty_tokens} tokens")
    print(f"TOON:         {toon_tokens} tokens")
    print(f"Savings vs Compact: {vs_compact:.1f}%")
    print(f"Savings vs Pretty:  {vs_pretty:.1f}%")

# Test different data structures
compare_formats(employees, "Array of Objects (5 employees)")


Array of Objects (5 employees)
------------------------------------------------------------
JSON Compact: 97 tokens
JSON Pretty:  182 tokens
TOON:         63 tokens
Savings vs Compact: 35.1%
Savings vs Pretty:  65.4%


In [23]:
# Generate larger dataset
large_employees = {
    "employees": [
        {
            "id": i,
            "name": f"Employee {i}",
            "email": f"employee{i}@company.com",
            "department": ["Engineering", "Design", "Marketing", "Sales"][i % 4],
            "salary": 50000 + (i * 1000),
            "active": i % 3 != 0
        }
        for i in range(1, 51)
    ]
}

compare_formats(large_employees, "Array of Objects (50 employees)")


Array of Objects (50 employees)
------------------------------------------------------------
JSON Compact: 1554 tokens
JSON Pretty:  2609 tokens
TOON:         914 tokens
Savings vs Compact: 41.2%
Savings vs Pretty:  65.0%


## Summary

**When to use TOON:**
- ✅ Arrays of objects (3+ items): 38-55% token savings
- ✅ Flat tabular data: Matches CSV efficiency
- ✅ LLM applications where token count matters
- ✅ API responses with repeated structures

**When to stick with JSON:**
- Small objects (< 3 items)
- Deeply nested configurations
- When standard JSON tooling is required

**Key Features:**
- Lossless conversion (100% compatible with JSON)
- Fast encoding/decoding (1.62x faster average)
- Clean, readable format
- Full Unicode support
- Comprehensive error handling