# CKAN to ESS-DIVE transfer (with Tapis helpers)

Use this notebook to pull datasets from the TACC CKAN instance, evaluate required metadata, and publish the package to ESS-DIVE. The widgets below keep secrets (API keys and tokens) out of the code cells.


## Notebook goals
- Authenticate against CKAN, ESS-DIVE, and (optionally) a Tapis system for staging files
- Browse CKAN datasets, preview their metadata, and spot gaps that ESS-DIVE requires
- Map CKAN metadata to an ESS-DIVE payload you can edit before submission
- Stage resource files locally or on a Tapis Files system and push metadata to ESS-DIVE


## Prerequisites
- CKAN API key (only needed for private datasets; public data works anonymously)
- ESS-DIVE dataset API token (https://docs.ess-dive.lbl.gov/programmatic-tools/ess-dive-dataset-api)
- Optional: Tapis base URL, access token, and a Files system ID + path where staged data should land
- This notebook expects the CKAN instance at `https://ckan.tacc.utexas.edu/` by default


In [1]:
import json
import logging
import importlib
from typing import Any, Dict, List, Optional

import ipywidgets as widgets
from IPython.display import Markdown, display

import ckan_essdive
importlib.reload(ckan_essdive)
from ckan_essdive import (
    CkanEssDiveClient,
    REQUIRED_FIELDS,
    fetch_ckan_token_via_tapis,
    test_ckan_status,
)

logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")




## Configure endpoints and tokens
Fill in the endpoints and tokens you want to use.
- CKAN: provide the site URL and fetch a CKAN token via Tapis username/password (same flow as Ckan-metadata-netcdf).
- ESS-DIVE: paste your ESS-DIVE dataset API token.
- Staging: local directory where CKAN resources are downloaded.


In [None]:

DEFAULT_CKAN_URL = "https://ckan.tacc.utexas.edu"
DEFAULT_ESS_URL = "https://data.ess-dive.lbl.gov/metadata/api"
DEFAULT_STAGE_DIR = "./staging"

# State
ckan_token_value = ""

# Widgets
ckan_url_input = widgets.Text(value=DEFAULT_CKAN_URL, description="CKAN URL", layout=widgets.Layout(width="500px"))
ckan_auth_base_input = widgets.Text(value="https://portals.tapis.io", description="Auth URL", layout=widgets.Layout(width="300px"))
ckan_user_input = widgets.Text(value="", description="CKAN user", layout=widgets.Layout(width="250px"))
ckan_pass_input = widgets.Password(value="", description="Password", layout=widgets.Layout(width="250px"))
ckan_login_button = widgets.Button(description="Get CKAN token", button_style="warning")
ckan_login_status = widgets.Label(value="")
ckan_auth_output = widgets.Output()
ckan_token_preview = widgets.Label(value="")

ckan_test_button = widgets.Button(description="Test CKAN access", button_style="info")
ckan_test_output = widgets.Output()

ess_url_input = widgets.Text(value=DEFAULT_ESS_URL, description="ESS-DIVE API", layout=widgets.Layout(width="500px"))
ess_token_input = widgets.Password(value="", description="ESS token", layout=widgets.Layout(width="300px"))

local_stage_input = widgets.Text(value=DEFAULT_STAGE_DIR, description="Local stage", layout=widgets.Layout(width="500px"))

dry_run_toggle = widgets.Checkbox(value=True, description="Dry-run (skip ESS-DIVE write)")


# Helpers

def build_client() -> CkanEssDiveClient:
    return CkanEssDiveClient(
        ckan_url=ckan_url_input.value,
        ckan_key=ckan_token_value,
        ess_url=ess_url_input.value,
        ess_token=ess_token_input.value,
        local_stage=local_stage_input.value,
        dry_run=dry_run_toggle.value,
    )


def on_fetch_ckan_token(_=None):
    global ckan_token_value
    ckan_login_status.value = "Requesting CKAN token via Tapis..."
    ckan_auth_output.clear_output()
    try:
        token = fetch_ckan_token_via_tapis(
            username=ckan_user_input.value,
            password=ckan_pass_input.value,
            base_url=ckan_auth_base_input.value,
        )
        ckan_token_value = token
        preview = token[:6] + "..." + token[-4:] if token else ""
        ckan_token_preview.value = f"CKAN token (preview): {preview}" if preview else ""
        ckan_login_status.value = "✅ CKAN token fetched via Tapis"
        with ckan_auth_output:
            print("Token retrieved via Tapis")
    except Exception as exc:
        ckan_login_status.value = "⚠️ Token fetch failed"
        ckan_token_preview.value = ""
        with ckan_auth_output:
            print(f"Error: {exc}")


def on_test_ckan(_=None):
    ckan_test_output.clear_output()
    try:
        client = build_client()
        result = test_ckan_status(client)
        with ckan_test_output:
            print(f"CKAN status: {result}")
    except Exception as exc:
        with ckan_test_output:
            print(f"Error talking to CKAN: {exc}")


# Wire buttons
ckan_login_button.on_click(on_fetch_ckan_token)
ckan_test_button.on_click(on_test_ckan)

config_box = widgets.VBox([
    widgets.HTML("<b>CKAN</b>"),
    ckan_url_input,
    widgets.HBox([ckan_auth_base_input, ckan_user_input, ckan_pass_input, ckan_login_button]),
    ckan_login_status,
    ckan_auth_output,
    ckan_token_preview,
    widgets.HBox([ckan_test_button]),
    ckan_test_output,
    widgets.HTML("<b>ESS-DIVE</b>"),
    widgets.HBox([ess_url_input, ess_token_input]),
    widgets.HTML("<b>Staging</b>"),
    local_stage_input,
    dry_run_toggle,
])

config_box

# ipywidgets is required; run.sh installs extensions to avoid model-not-found issues.


VBox(children=(HTML(value='<b>CKAN</b>'), Text(value='https://ckan.tacc.utexas.edu', description='CKAN URL', l…

## Browse CKAN and inspect metadata
Use the search box to narrow down the CKAN list. Selecting a dataset shows the mapped payload plus missing fields.


In [None]:
search_input = widgets.Text(value="", description="Search", layout=widgets.Layout(width="400px"))
refresh_button = widgets.Button(description="Load CKAN datasets", button_style="info")
dataset_dropdown = widgets.Dropdown(options=[], description="Dataset", layout=widgets.Layout(width="500px"))
metadata_output = widgets.Output()

current_package: Optional[Dict[str, Any]] = None
current_payload: Optional[Dict[str, Any]] = None


def refresh_datasets(_=None):
    metadata_output.clear_output()
    client = build_client()
    try:
        packages = client.list_ckan_packages(
            search=search_input.value or None,
        )
    except Exception as exc:
        with metadata_output:
            print(f"Error loading CKAN datasets: {exc}")
        return
    options = [(pkg.get("title") or pkg.get("name"), pkg.get("name")) for pkg in packages]
    dataset_dropdown.options = options
    if options:
        dataset_dropdown.value = options[0][1]


def on_dataset_selected(change):
    global current_package, current_payload
    if not change["new"]:
        return
    client = build_client()
    try:
        pkg = client.get_ckan_package(change["new"])
        payload = client.map_ckan_to_essdive(pkg)
        missing = client.find_missing_metadata(payload)
    except Exception as exc:
        with metadata_output:
            metadata_output.clear_output()
            print(f"Error loading dataset: {exc}")
        return
    current_package = pkg
    current_payload = payload
    with metadata_output:
        metadata_output.clear_output()
        display(Markdown(f"### {payload.get('title')}"))
        display(Markdown(f"""**Mapped metadata:**
```
{client.summarize_payload(payload)}
```"""))
        if missing:
            display(Markdown("<b>Missing metadata for ESS-DIVE:</b> " + ", ".join(missing)))
        else:
            display(Markdown("All required fields are present."))
        if payload.get("resources"):
            display(Markdown(f"Resources to stage: {len(payload['resources'])}"))

refresh_button.on_click(refresh_datasets)
dataset_dropdown.observe(on_dataset_selected, names="value")

widgets.VBox([
    widgets.HBox([search_input, refresh_button]),
    dataset_dropdown,
    metadata_output,
])


## Transfer to ESS-DIVE and stage data
Click the transfer button to validate metadata, optionally stage files via Tapis, and create or update the ESS-DIVE dataset. With `Dry-run` checked the notebook only validates and stages files.


In [None]:
transfer_output = widgets.Output()
transfer_button = widgets.Button(description="Validate and transfer", button_style="success")


def on_transfer(_=None):
    if not current_payload or not current_package:
        with transfer_output:
            transfer_output.clear_output()
            print("Select a CKAN dataset first.")
        return
    client = build_client()
    missing = client.find_missing_metadata(current_payload)
    with transfer_output:
        transfer_output.clear_output()
        print("Validating metadata...")
        if missing:
            print("Missing required fields:", ", ".join(missing))
        else:
            print("Metadata check passed.")
        print("Staging CKAN resources...")
        staged = client.stage_resources(current_package)
        print(f"Staged {len(staged)} file(s) locally at {local_stage_input.value}")
        if client.dry_run:
            print("Dry-run enabled: skipping ESS-DIVE write.")
            return
        try:
            response = client.submit_to_essdive(current_payload)
            print("ESS-DIVE response:", json.dumps(response, indent=2))
        except Exception as exc:
            print(f"ESS-DIVE submission failed: {exc}")

transfer_button.on_click(on_transfer)

widgets.VBox([
    transfer_button,
    transfer_output,
])


## Finalize ESS-DIVE payload
Use these cells to edit `current_payload`, export it, and submit to ESS-DIVE without using the button UI.


In [None]:

import copy, json
from IPython.display import Markdown

if not current_payload:
    raise RuntimeError("Load a CKAN dataset first (above) before editing the payload.")

payload = copy.deepcopy(current_payload)
print("Working copy created: payload")
print("Title:", payload.get("title"))
print("Resources:", len(payload.get("resources", [])))


### Add/adjust metadata
Fill in fields required by your ESS-DIVE profile. Examples below are placeholders; replace with your real values.


In [None]:
# Example edits — replace with your project-specific metadata
payload.setdefault("fundingReferences", [
    {
        "funderName": "Example Funder",
        "awardNumber": "12345",
        "awardTitle": "Example project",
    }
])

payload.setdefault("relatedIdentifiers", [
    {
        "relationType": "IsDerivedFrom",
        "relatedIdentifier": "doi:10.1234/example",
        "relatedIdentifierType": "DOI",
    }
])

# Spatial coverage prompt
if not payload.get("spatialCoverage"):
    spatial_input = widgets.Textarea(
        value='{"type": "Polygon", "coordinates": [[[ -96.1453778, 32.0541064 ], [ -96.1409515, 29.5311765 ], [ -93.20217, 29.4952382 ], [ -93.1143694, 32.0172335 ], [ -96.1453778, 32.0541064 ]]]}',
        description="Spatial JSON",
        layout=widgets.Layout(width="700px", height="120px")
    )
    display(widgets.VBox([widgets.HTML("<b>Spatial coverage required</b>"), spatial_input]))
    def _save_spatial(change=None):
        try:
            payload["spatialCoverage"] = json.loads(spatial_input.value)
        except Exception:
            payload["spatialCoverage"] = spatial_input.value
    spatial_input.observe(_save_spatial, names='value')

# Prompt for contacts/creators if missing
if not payload.get("contacts"):
    contact_name = widgets.Text(description="Contact name")
    contact_email = widgets.Text(description="Contact email")
    display(widgets.VBox([widgets.HTML("<b>Contacts required</b>"), contact_name, contact_email]))
    payload["contacts"] = []
    def _save_contact(change=None):
        payload["contacts"] = [{"name": contact_name.value, "email": contact_email.value}]
    contact_name.observe(_save_contact, names='value')
    contact_email.observe(_save_contact, names='value')

if not payload.get("creators"):
    creator_name = widgets.Text(description="Creator name")
    creator_email = widgets.Text(description="Creator email")
    display(widgets.VBox([widgets.HTML("<b>Creators required</b>"), creator_name, creator_email]))
    payload["creators"] = []
    def _save_creator(change=None):
        payload["creators"] = [{"name": creator_name.value, "email": creator_email.value}]
    creator_name.observe(_save_creator, names='value')
    creator_email.observe(_save_creator, names='value')

# Fill temporal if missing
if not payload.get("temporalCoverage"):
    payload["temporalCoverage"] = {"startDate": "YYYY-MM-DD", "endDate": "YYYY-MM-DD"}

print("Updated payload fields. Review below:")
payload


### Export payload for audit

In [None]:

with open("ess-dive-payload.json", "w") as f:
    json.dump(payload, f, indent=2)
print("Wrote ess-dive-payload.json")


### Submit to ESS-DIVE via API
Set `dry_run_toggle.value = False` to actually submit; otherwise this will just echo the payload.


In [None]:

if dry_run_toggle.value:
    print("Dry-run is enabled. Set dry_run_toggle.value = False and rerun to submit.")
else:
    client = build_client()
    response = client.submit_to_essdive(payload)
    print("ESS-DIVE response:
", json.dumps(response, indent=2))
