## 1. Basic Setup

Import required modules and initialize the container engine.


In [None]:
from pathlib import Path

from dbx_container.data.scraper import RuntimeScraper
from dbx_container.engine import RuntimeContainerEngine
from dbx_container.utils.logging import get_logger

# Initialize logger
logger = get_logger(__name__)

# Initialize the container engine
engine = RuntimeContainerEngine(
    data_dir=Path("../data/"),
    max_workers=5,
    verify_ssl=False,
    latest_lts_count=3,  # Only build latest 3 LTS versions
)

logger.info("Container engine initialized successfully")

## 2. Fetch Runtime Information

Fetch available runtimes from Databricks documentation.


In [None]:
# Fetch runtime information
logger.info("Fetching runtime information from Databricks...")

scraper = RuntimeScraper(verify_ssl=False)
runtimes = scraper.get_supported_runtimes()

logger.info(f"Successfully fetched {len(runtimes)} runtimes")

# Filter LTS runtimes (non-ML)
lts_runtimes = sorted(
    [r for r in runtimes if "LTS" in r.version and not r.is_ml], key=lambda r: r.version, reverse=True
)

logger.info(f"Found {len(lts_runtimes)} LTS runtimes")
logger.info("\nLatest 3 LTS runtimes:")
for i, runtime in enumerate(lts_runtimes[:3], 1):
    logger.info(
        f"  {i}. {runtime.version} (Python {runtime.system_environment.python_version}, {runtime.system_environment.operating_system})"
    )

## 3. Generate Dockerfiles for a Specific Runtime

Build all image types for a specific runtime version.


In [None]:
# Select a specific runtime version
target_version = "17.3 LTS"
target_runtime = None

for runtime in lts_runtimes:
    if runtime.version == target_version:
        target_runtime = runtime
        break

if target_runtime:
    logger.info(f"\n[bold]Building images for {target_version}[/bold]")

    # Build all images for this runtime
    generated_files = engine.build_all_images_for_runtime(
        runtime=target_runtime,
        registry=None,  # Use local tag, or specify registry like "ghcr.io/username"
    )

    # Display results
    total_files = sum(len(files) for files in generated_files.values())
    logger.info(f"\nSuccessfully generated {total_files} files across {len(generated_files)} image types")

    for image_type, files in generated_files.items():
        if files:
            logger.info(f"\n{image_type}:")
            for file in files[:2]:  # Show first 2 files
                logger.info(f"  - {file}")
            if len(files) > 2:
                logger.info(f"  ... and {len(files) - 2} more")
else:
    logger.warning(f"Runtime {target_version} not found")

## 4. Build Non-Runtime-Specific Images

Generate base images that don't depend on specific runtime versions.


In [None]:
# Build base images (minimal, minimal-gpu)
logger.info("\n[bold]Building non-runtime-specific images...[/bold]")

base_files = engine.build_non_runtime_specific_images(registry=None)

total_base_files = sum(len(files) for files in base_files.values())
logger.info(f"\nGenerated {total_base_files} base image files")

for image_type, files in base_files.items():
    logger.info(f"\n{image_type}: {len(files)} file(s)")
    for file in files:
        logger.info(f"  - {file}")

## 5. Build All Images for All Runtimes

Generate Dockerfiles for all image types across the latest LTS runtimes.


In [None]:
# Build everything (respecting latest_lts_count=3)
logger.info("\n[bold]Starting comprehensive Dockerfile generation...[/bold]")
logger.info("This will generate Dockerfiles for all image types across latest 3 LTS runtimes\n")

all_generated_files = engine.build_all_images_for_all_runtimes(registry=None)

# Display summary
logger.print("\n" + "=" * 60)
logger.print("[bold cyan]ðŸ“Š Build Summary[/bold cyan]")
logger.print("=" * 60)

total_files_count = 0
for runtime_key, image_types in all_generated_files.items():
    runtime_file_count = sum(len(files) for files in image_types.values())
    total_files_count += runtime_file_count

    logger.info(f"\n[bold]{runtime_key}[/bold] ({runtime_file_count} files)")
    for image_type, files in image_types.items():
        if files:
            logger.info(f"  â€¢ {image_type}: {len(files)} file(s)")

logger.print(f"\n[bold green]âœ… Total: {total_files_count} files generated[/bold green]")

## 6. Generate Build Matrix for CI/CD

Create a build matrix JSON file for GitHub Actions or other CI/CD pipelines.


In [None]:
import json

# Generate build matrix for CI/CD
logger.info("\n[bold]Generating build matrices for CI/CD...[/bold]")

# Full matrix (all runtimes, all image types)
matrix_full = engine.generate_build_matrix(only_lts=False, image_type=None, latest_lts_count=None)
logger.info(f"\nFull matrix: {len(matrix_full.get('include', []))} build configurations")

# LTS-only matrix
matrix_lts = engine.generate_build_matrix(only_lts=True, image_type=None, latest_lts_count=None)
logger.info(f"LTS-only matrix: {len(matrix_lts.get('include', []))} build configurations")

# Latest 3 LTS with GPU images only
matrix_gpu = engine.generate_build_matrix(only_lts=True, image_type="gpu", latest_lts_count=3)
logger.info(f"Latest 3 LTS GPU matrix: {len(matrix_gpu.get('include', []))} build configurations")

# Latest 3 LTS with Python images only
matrix_python = engine.generate_build_matrix(only_lts=True, image_type="python", latest_lts_count=3)
logger.info(f"Latest 3 LTS Python matrix: {len(matrix_python.get('include', []))} build configurations")

# Display sample matrix entry
if matrix_python.get("include"):
    logger.print("\n[bold]Sample Matrix Entry (Python):[/bold]")
    logger.print(json.dumps(matrix_python["include"][0], indent=2))

# Save matrix to file
matrix_path = Path("../data/build_matrix.json")
matrix_path.write_text(json.dumps(matrix_python, indent=2))
logger.info(f"\nBuild matrix saved to: {matrix_path}")

## 7. Inspect Generated Files

Examine the generated Dockerfiles and requirements files.


In [None]:
# Check what directories were created
data_dir = Path("../data")
image_type_dirs = [d for d in data_dir.iterdir() if d.is_dir() and d.name not in ["__pycache__", "dbfsfuse"]]

logger.info("\n[bold]Generated image type directories:[/bold]")
for img_dir in sorted(image_type_dirs):
    runtime_dirs = [d for d in img_dir.iterdir() if d.is_dir()]
    logger.info(f"  â€¢ {img_dir.name}: {len(runtime_dirs)} runtime(s)")

# Inspect a Python Dockerfile
python_dir = data_dir / "python"
if python_dir.exists():
    runtime_variants = list(python_dir.iterdir())
    if runtime_variants:
        variant_dir = runtime_variants[0]
        dockerfile = variant_dir / "Dockerfile"

        if dockerfile.exists():
            logger.info(f"\n[bold]Dockerfile Preview:[/bold] {dockerfile.relative_to(data_dir)}")
            content = dockerfile.read_text()
            lines = content.split("\n")

            # Show first 25 lines
            logger.print("\n" + "\n".join(lines[:25]))
            if len(lines) > 25:
                logger.print(f"\n... ({len(lines) - 25} more lines)")

            logger.info(f"\nTotal: {len(content)} bytes, {len(lines)} lines")

        # Check for requirements.txt
        requirements = variant_dir / "requirements.txt"
        if requirements.exists():
            logger.info(f"\n[bold]Requirements Preview:[/bold] {requirements.relative_to(data_dir)}")
            content = requirements.read_text()
            lines = [l for l in content.split("\n") if l.strip() and not l.startswith("#")]

            # Show first 15 packages
            logger.print("\n" + "\n".join(lines[:15]))
            if len(lines) > 15:
                logger.print(f"\n... ({len(lines) - 15} more packages)")

            logger.info(f"\nTotal packages: {len(lines)}")
else:
    logger.warning("No python directory found. Run the build examples first.")

## 8. Advanced: Generate Single Dockerfile

Generate a single Dockerfile for maximum control over the process.


In [None]:
# Generate a single Dockerfile with full control
if lts_runtimes:
    target_runtime = lts_runtimes[0]
    logger.info(f"\n[bold]Generating single Python Dockerfile for {target_runtime.version}[/bold]")

    # Get image configuration
    config = engine.image_types.get("python")

    # Get runtime variations (different OS/Python combinations)
    variations = engine.get_runtime_variations(target_runtime)
    logger.info(f"Found {len(variations)} variations for this runtime")

    if variations:
        variation = variations[0]
        logger.info(f"Using variation: {variation['suffix']}")

        # Generate Dockerfile content
        dockerfile_content = engine.generate_dockerfile_for_image_type(
            runtime=target_runtime,
            image_type="python",
            config=config,
            variation=variation,
            registry=None,  # or "ghcr.io/myorg"
        )

        # Save the Dockerfile
        saved_path = engine.save_dockerfile(
            dockerfile_content=dockerfile_content, runtime=target_runtime, image_type="python", variation=variation
        )

        logger.info(f"\nDockerfile saved to: {saved_path}")
        logger.info(f"Dockerfile size: {len(dockerfile_content)} bytes, {len(dockerfile_content.splitlines())} lines")

        # Preview the content
        lines = dockerfile_content.split("\n")
        logger.print("\n[bold]Preview (first 20 lines):[/bold]")
        logger.print("\n".join(lines[:20]))
        if len(lines) > 20:
            logger.print(f"\n... ({len(lines) - 20} more lines)")
    else:
        logger.warning("No variations found for this runtime")

## 9. Understanding Image Dependencies

Visualize the dependency chain between different image types.


In [None]:
# Show image dependency chains
logger.print("\n" + "=" * 60)
logger.print("[bold cyan]Image Dependency Chains[/bold cyan]")
logger.print("=" * 60 + "\n")

# Standard chain
logger.info("[bold]Standard Chain (CPU):[/bold]")
logger.info("  ubuntu:24.04")
logger.info("    â†“")
logger.info("  minimal (adds Java)")
logger.info("    â†“")
logger.info("  standard (adds FUSE, SSH)")
logger.info("    â†“")
logger.info("  python (adds Python env, packages)")

# GPU chain
logger.info("\n[bold]GPU Chain:[/bold]")
logger.info("  nvidia/cuda:11.8.0")
logger.info("    â†“")
logger.info("  minimal-gpu (adds Java)")
logger.info("    â†“")
logger.info("  standard-gpu (adds FUSE, SSH)")
logger.info("    â†“")
logger.info("  python-gpu (adds Python env, packages)")

# Standalone GPU
logger.info("\n[bold]Standalone GPU:[/bold]")
logger.info("  nvidia/cuda:11.8.0")
logger.info("    â†“")
logger.info("  gpu (adds Java, Spark, Python)")

logger.info("\n[bold]Image Type Details:[/bold]")
for img_type, config in engine.image_types.items():
    runtime_specific = "Yes" if config["runtime_specific"] else "No"
    depends_on = config.get("depends_on") or "None"
    logger.info(f"\n  â€¢ {img_type}")
    logger.info(f"    Description: {config['description']}")
    logger.info(f"    Depends on: {depends_on}")
    logger.info(f"    Runtime-specific: {runtime_specific}")

## Next Steps

Now you can:

- Use the generated Dockerfiles to build container images
- Integrate the build matrix into your CI/CD pipeline
- Customize image types by modifying the engine configuration
- See `list.ipynb` for runtime analysis and discovery examples
