# JSON Repair Notebook (Juno iOS)

This notebook lets you **repair a JSON file** so it **strictly validates** against an uploaded JSON Schema.

**Rules enforced:**
- You never type JSON syntax (no quotes, no brackets).
- Enum fields use pickers only.
- Date fields use a date picker only.
- Object fields are rendered as sub-fields, never as raw text.
- Schema is authoritative; no hardcoded fields.
- All changes are undoable.

**Workflow**
1. Upload the JSON Schema.
2. Upload the JSON instance to repair.
3. Use the UI to fix validation errors.
4. Export only when validation passes.

**Append mode** (optional): Upload a second, **valid** JSON file and append `exercises[]` from the repaired file.


In [None]:
import copy
import json
import re
from datetime import date

import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
from jsonschema import Draft7Validator, RefResolver
from jsonpointer import resolve_pointer


In [None]:
# ---------- File upload helpers (Juno-safe) ----------

def _extract_upload_items(uploader_value):
    if uploader_value is None:
        return []
    if isinstance(uploader_value, dict):
        return list(uploader_value.values())
    if isinstance(uploader_value, tuple):
        return list(uploader_value)
    if hasattr(uploader_value, "values"):
        return list(uploader_value.values())
    return []


def _extract_bytes(file_item):
    if file_item is None:
        return None
    content = None
    if isinstance(file_item, dict):
        content = file_item.get("content")
    else:
        content = getattr(file_item, "content", None)
    if isinstance(content, memoryview):
        content = content.tobytes()
    if isinstance(content, bytearray):
        content = bytes(content)
    if isinstance(content, bytes):
        return content
    return None


def load_uploaded_json(uploader_widget):
    items = _extract_upload_items(uploader_widget.value)
    if not items:
        raise ValueError("No file uploaded.")
    file_item = items[0]
    raw = _extract_bytes(file_item)
    if raw is None:
        raise ValueError("Unable to read uploaded file bytes.")
    text = raw.decode("utf-8")
    return json.loads(text)


In [None]:
# ---------- Schema helpers ----------

def make_resolver(schema):
    return RefResolver.from_schema(schema)


def resolve_schema(schema, resolver):
    if "$ref" in schema:
        ref = schema["$ref"]
        resolved = resolver.resolve(ref)[1]
        merged = copy.deepcopy(resolved)
        for k, v in schema.items():
            if k != "$ref":
                merged[k] = v
        return merged
    return schema


def schema_allows_null(schema):
    t = schema.get("type")
    if isinstance(t, list):
        return "null" in t
    return t == "null"


def schema_primary_type(schema):
    t = schema.get("type")
    if isinstance(t, list):
        for item in t:
            if item != "null":
                return item
        return "null"
    return t


def default_for_schema(schema):
    if schema_allows_null(schema):
        return None
    t = schema_primary_type(schema)
    if t == "object":
        return {}
    if t == "array":
        return []
    if t == "string":
        return ""
    if t == "number":
        return 0.0
    if t == "integer":
        return 0
    if t == "boolean":
        return False
    return None


def get_schema_for_path(root_schema, resolver, path_segments):
    schema = resolve_schema(root_schema, resolver)
    for segment in path_segments:
        schema = resolve_schema(schema, resolver)
        t = schema_primary_type(schema)
        if t == "object":
            props = schema.get("properties", {})
            if segment in props:
                schema = props[segment]
            else:
                addl = schema.get("additionalProperties", {})
                if isinstance(addl, dict):
                    schema = addl
                else:
                    return {}
        elif t == "array":
            items = schema.get("items", {})
            if isinstance(items, list):
                if isinstance(segment, int) and segment < len(items):
                    schema = items[segment]
                else:
                    schema = items[-1] if items else {}
            else:
                schema = items
        else:
            return schema
    return resolve_schema(schema, resolver)


In [None]:
# ---------- State ----------
state = {
    "schema": None,
    "resolver": None,
    "validator": None,
    "data": None,
    "history": [],
    "pending_required": set(),
    "append_target": None,
}


def record_history(action):
    if state["data"] is None:
        return
    snapshot = {
        "action": action,
        "data": copy.deepcopy(state["data"]),
        "pending_required": copy.deepcopy(state["pending_required"]),
    }
    state["history"].append(snapshot)


def undo_last(_=None):
    if not state["history"]:
        return
    snapshot = state["history"].pop()
    state["data"] = snapshot["data"]
    state["pending_required"] = snapshot["pending_required"]
    rebuild_ui()


In [None]:
# ---------- Error classification ----------

def path_to_pointer(path_segments):
    if not path_segments:
        return ""
    pointer = ""
    for seg in path_segments:
        if isinstance(seg, int):
            pointer += f"/{seg}"
        else:
            pointer += "/" + str(seg).replace("~", "~0").replace("/", "~1")
    return pointer


def extract_missing_property(message):
    match = re.search(r"'(.+?)' is a required property", message)
    if match:
        return match.group(1)
    return None


def extract_additional_properties(message):
    match = re.search(r"\((.+)\)", message)
    if not match:
        return []
    content = match.group(1)
    parts = [p.strip().strip("'").strip('"') for p in content.split(",")]
    return [p for p in parts if p]


def validate_data():
    if state["validator"] is None or state["data"] is None:
        return []
    errors = []
    for err in sorted(state["validator"].iter_errors(state["data"]), key=lambda e: e.path):
        path = list(err.path)
        errors.append({
            "validator": err.validator,
            "message": err.message,
            "path": path,
            "schema_path": list(err.schema_path),
            "missing_property": extract_missing_property(err.message),
            "illegal_keys": extract_additional_properties(err.message) if err.validator == "additionalProperties" else [],
            "expected": err.validator_value if err.validator == "type" else None,
        })
    return errors


def exercise_info_for_path(path):
    if not path:
        return (None, None)
    if len(path) >= 2 and path[0] == "exercises" and isinstance(path[1], int):
        ex_list = state["data"].get("exercises", []) if isinstance(state["data"], dict) else []
        idx = path[1]
        if 0 <= idx < len(ex_list):
            ex = ex_list[idx]
            if isinstance(ex, dict):
                return ex.get("exercise_id"), ex.get("canonical_name")
    return (None, None)


In [None]:
# ---------- UI rendering ----------

editor_container = widgets.VBox([])
errors_container = widgets.VBox([])
status_output = widgets.Output()


def set_value_at_path(data, path, value):
    cur = data
    for seg in path[:-1]:
        cur = cur[seg]
    cur[path[-1]] = value


def delete_key_at_path(data, path):
    cur = data
    for seg in path[:-1]:
        cur = cur[seg]
    del cur[path[-1]]


def with_required_warning(widget, path):
    pointer = path_to_pointer(path)
    if pointer in state["pending_required"]:
        warning = widgets.HTML('<span style="color:#b45309"><b>Required:</b> choose a value</span>')
        return widgets.VBox([warning, widget])
    return widget


def build_editor(value, schema, path, label=None):
    schema = resolve_schema(schema, state["resolver"])
    schema_type = schema_primary_type(schema)
    enum = schema.get("enum")
    fmt = schema.get("format")

    title = label if label is not None else (path[-1] if path else "root")

    # Enum handling
    if enum is not None:
        if schema_type == "array":
            current = value if isinstance(value, list) else []
            w = widgets.SelectMultiple(options=enum, value=tuple(current), description=title)
            def on_change(change):
                if change["name"] == "value":
                    record_history(f"Set enum array {path_to_pointer(path)}")
                    set_value_at_path(state["data"], path, list(change["new"]))
                    if path_to_pointer(path) in state["pending_required"]:
                        state["pending_required"].discard(path_to_pointer(path))
            w.observe(on_change, names="value")
            return with_required_warning(w, path)
        else:
            options = [("Select…", None)] + [(str(v), v) for v in enum]
            current = value if value in enum else None
            w = widgets.Dropdown(options=options, value=current, description=title)
            def on_change(change):
                if change["name"] == "value":
                    record_history(f"Set enum {path_to_pointer(path)}")
                    set_value_at_path(state["data"], path, change["new"])
                    if change["new"] is not None:
                        state["pending_required"].discard(path_to_pointer(path))
            w.observe(on_change, names="value")
            return with_required_warning(w, path)

    # Date handling
    if fmt == "date":
        current = None
        if isinstance(value, str):
            try:
                parts = [int(p) for p in value.split("-")]
                current = date(parts[0], parts[1], parts[2])
            except Exception:
                current = None
        w = widgets.DatePicker(description=title, value=current)
        def on_change(change):
            if change["name"] == "value":
                record_history(f"Set date {path_to_pointer(path)}")
                new_val = change["new"].isoformat() if change["new"] else None
                set_value_at_path(state["data"], path, new_val)
                if new_val:
                    state["pending_required"].discard(path_to_pointer(path))
        w.observe(on_change, names="value")
        return with_required_warning(w, path)

    # Object handling
    if schema_type == "object":
        if value is None or not isinstance(value, dict):
            value = {}
            set_value_at_path(state["data"], path, value)
        props = schema.get("properties", {})
        items = []
        # render known properties
        for key, sub_schema in props.items():
            if key in value or path_to_pointer(path + [key]) in state["pending_required"]:
                sub_value = value.get(key)
                items.append(build_editor(sub_value, sub_schema, path + [key], label=key))
        # render extra properties if allowed and present
        extra_keys = [k for k in value.keys() if k not in props]
        addl = schema.get("additionalProperties", False)
        if extra_keys and addl:
            for key in extra_keys:
                extra_schema = addl if isinstance(addl, dict) else {}
                items.append(build_editor(value.get(key), extra_schema, path + [key], label=key))
        return widgets.VBox(items)

    # Array handling
    if schema_type == "array":
        items_schema = schema.get("items", {})
        if isinstance(value, list):
            current = value
        else:
            current = []
            set_value_at_path(state["data"], path, current)

        items_schema_resolved = resolve_schema(items_schema, state["resolver"])
        items_type = schema_primary_type(items_schema_resolved)

        # array of enum values (items.enum)
        if items_schema_resolved.get("enum"):
            options = items_schema_resolved["enum"]
            w = widgets.SelectMultiple(options=options, value=tuple(current), description=title)
            def on_change(change):
                if change["name"] == "value":
                    record_history(f"Set enum array {path_to_pointer(path)}")
                    set_value_at_path(state["data"], path, list(change["new"]))
                    if path_to_pointer(path) in state["pending_required"]:
                        state["pending_required"].discard(path_to_pointer(path))
            w.observe(on_change, names="value")
            return with_required_warning(w, path)

        # array of strings
        if items_type == "string" and "enum" not in items_schema_resolved:
            text = "
".join(current)
            w = widgets.Textarea(value=text, description=title, layout=widgets.Layout(width="100%"))
            def on_change(change):
                if change["name"] == "value":
                    record_history(f"Set array of strings {path_to_pointer(path)}")
                    lines = change["new"].splitlines()
                    set_value_at_path(state["data"], path, lines)
                    if lines:
                        state["pending_required"].discard(path_to_pointer(path))
            w.observe(on_change, names="value")
            return with_required_warning(w, path)

        # array of objects or mixed
        row_widgets = []
        for idx, item in enumerate(current):
            item_editor = build_editor(item, items_schema, path + [idx], label=f"{title}[{idx}]")
            remove_btn = widgets.Button(description="Remove", button_style="danger")
            def make_remove(ix):
                def _remove(_):
                    record_history(f"Remove item {path_to_pointer(path + [ix])}")
                    current.pop(ix)
                    rebuild_ui()
                return _remove
            remove_btn.on_click(make_remove(idx))
            row_widgets.append(widgets.HBox([item_editor, remove_btn]))
        add_btn = widgets.Button(description="Add item", button_style="success")
        def on_add(_):
            record_history(f"Add item {path_to_pointer(path)}")
            current.append(default_for_schema(items_schema_resolved))
            rebuild_ui()
        add_btn.on_click(on_add)
        return widgets.VBox(row_widgets + [add_btn])

    # Scalar handling
    if schema_type == "string":
        w = widgets.Text(value=value if isinstance(value, str) else "", description=title)
        def on_change(change):
            if change["name"] == "value":
                record_history(f"Set string {path_to_pointer(path)}")
                new_val = change["new"]
                set_value_at_path(state["data"], path, new_val)
                if new_val:
                    state["pending_required"].discard(path_to_pointer(path))
        w.observe(on_change, names="value")
        return with_required_warning(w, path)

    if schema_type == "integer":
        w = widgets.IntText(value=value if isinstance(value, int) else 0, description=title)
        def on_change(change):
            if change["name"] == "value":
                record_history(f"Set integer {path_to_pointer(path)}")
                set_value_at_path(state["data"], path, change["new"])
                state["pending_required"].discard(path_to_pointer(path))
        w.observe(on_change, names="value")
        return with_required_warning(w, path)

    if schema_type == "number":
        w = widgets.FloatText(value=value if isinstance(value, (int, float)) else 0.0, description=title)
        def on_change(change):
            if change["name"] == "value":
                record_history(f"Set number {path_to_pointer(path)}")
                set_value_at_path(state["data"], path, change["new"])
                state["pending_required"].discard(path_to_pointer(path))
        w.observe(on_change, names="value")
        return with_required_warning(w, path)

    if schema_type == "boolean":
        w = widgets.Checkbox(value=bool(value), description=title)
        def on_change(change):
            if change["name"] == "value":
                record_history(f"Set boolean {path_to_pointer(path)}")
                set_value_at_path(state["data"], path, change["new"])
                state["pending_required"].discard(path_to_pointer(path))
        w.observe(on_change, names="value")
        return with_required_warning(w, path)

    # Fallback
    w = widgets.Text(value=str(value) if value is not None else "", description=title)
    def on_change(change):
        if change["name"] == "value":
            record_history(f"Set value {path_to_pointer(path)}")
            set_value_at_path(state["data"], path, change["new"])
    w.observe(on_change, names="value")
    return w


def rebuild_ui():
    editor_container.children = []
    errors_container.children = []
    status_output.clear_output()
    if state["data"] is None or state["schema"] is None:
        return

    root_schema = state["schema"]
    resolver = state["resolver"]

    # Render main editor
    top_schema = resolve_schema(root_schema, resolver)
    main_editor = None
    if isinstance(state["data"], dict) and "exercises" in state["data"]:
        exercises = state["data"].get("exercises", [])
        exercises_schema = get_schema_for_path(root_schema, resolver, ["exercises"])
        item_schema = resolve_schema(exercises_schema.get("items", {}), resolver)
        accordions = []
        titles = []
        for idx, ex in enumerate(exercises):
            ex_path = ["exercises", idx]
            editor = build_editor(ex, item_schema, ex_path, label=f"exercise[{idx}]")
            exercise_id = ex.get("exercise_id") if isinstance(ex, dict) else None
            name = ex.get("canonical_name") if isinstance(ex, dict) else None
            title = f"{idx}: {exercise_id or 'exercise'}" + (f" — {name}" if name else "")
            accordions.append(editor)
            titles.append(title)
        if accordions:
            acc = widgets.Accordion(children=accordions)
            for i, title in enumerate(titles):
                acc.set_title(i, title)
            main_editor = acc
        else:
            main_editor = build_editor(state["data"], top_schema, [])
    else:
        main_editor = build_editor(state["data"], top_schema, [])

    editor_container.children = [main_editor]

    # Render errors
    errors = validate_data()
    error_widgets = []
    for err in errors:
        ex_id, ex_name = exercise_info_for_path(err["path"])
        pointer = path_to_pointer(err["path"]) or "(root)"
        header = f"{err['validator']}: {pointer}"
        if ex_id or ex_name:
            header = f"{ex_id or ''} {('- ' + ex_name) if ex_name else ''} | {header}".strip()

        detail_lines = [
            f"<b>Path:</b> {pointer}",
            f"<b>Reason:</b> {err['message']}",
        ]
        if ex_id:
            detail_lines.append(f"<b>exercise_id:</b> {ex_id}")
        if ex_name:
            detail_lines.append(f"<b>canonical_name:</b> {ex_name}")

        actions = []
        if err["validator"] == "additionalProperties" and err["illegal_keys"]:
            for bad_key in err["illegal_keys"]:
                current_val = None
                try:
                    current_val = resolve_pointer(state["data"], path_to_pointer(err["path"] + [bad_key]))
                except Exception:
                    current_val = None
                detail_lines.append(f"<b>Illegal key:</b> {bad_key} = {current_val}")

                del_btn = widgets.Button(description=f"Delete '{bad_key}'", button_style="danger")
                def make_delete(path, key):
                    def _delete(_):
                        record_history(f"Delete additionalProperties {path_to_pointer(path + [key])}")
                        delete_key_at_path(state["data"], path + [key])
                        rebuild_ui()
                    return _delete
                del_btn.on_click(make_delete(err["path"], bad_key))
                actions.append(del_btn)

                if err["path"][:1] == ["exercises"]:
                    del_all_btn = widgets.Button(description=f"Delete '{bad_key}' for ALL exercises", button_style="danger")
                    def make_delete_all(key):
                        def _delete_all(_):
                            record_history(f"Delete additionalProperties {key} for all exercises")
                            ex_list = state["data"].get("exercises", [])
                            for ex in ex_list:
                                if isinstance(ex, dict) and key in ex:
                                    del ex[key]
                            rebuild_ui()
                        return _delete_all
                    del_all_btn.on_click(make_delete_all(bad_key))
                    actions.append(del_all_btn)

        if err["validator"] == "type":
            expected = err.get("expected")
            expected_type = None
            if isinstance(expected, list):
                non_null = [t for t in expected if t != "null"]
                expected_type = non_null[0] if non_null else None
            elif isinstance(expected, str):
                expected_type = expected
            if expected_type:
                reset_btn = widgets.Button(description=f"Replace with empty {expected_type}", button_style="warning")
                def make_reset(path):
                    def _reset(_):
                        schema = get_schema_for_path(state["schema"], state["resolver"], path)
                        record_history(f"Reset type {path_to_pointer(path)}")
                        set_value_at_path(state["data"], path, default_for_schema(schema))
                        rebuild_ui()
                    return _reset
                reset_btn.on_click(make_reset(err["path"]))
                actions.append(reset_btn)

        if actions:
            action_box = widgets.VBox(actions)
            body = widgets.VBox([widgets.HTML("<br/>".join(detail_lines)), action_box])
        else:
            body = widgets.HTML("<br/>".join(detail_lines))

        error_widgets.append(widgets.VBox([widgets.HTML(f"<b>{header}</b>"), body]))

    errors_container.children = error_widgets

    with status_output:
        clear_output()
        if errors or state["pending_required"]:
            display(HTML(f"<b>Validation errors:</b> {len(errors)} | Pending required fields: {len(state['pending_required'])}"))
        else:
            display(HTML("<b>Validation:</b> ✅ No schema errors"))


def add_missing_required(_=None):
    if state["data"] is None:
        return
    record_history("Add missing required keys")
    _add_missing_required_recursive(state["data"], resolve_schema(state["schema"], state["resolver"]), [])
    rebuild_ui()


def _add_missing_required_recursive(data_node, schema_node, path):
    schema_node = resolve_schema(schema_node, state["resolver"])
    t = schema_primary_type(schema_node)
    if t == "object" and isinstance(data_node, dict):
        required = schema_node.get("required", [])
        props = schema_node.get("properties", {})
        for key in required:
            if key not in data_node:
                sub_schema = props.get(key, {})
                data_node[key] = default_for_schema(sub_schema)
                state["pending_required"].add(path_to_pointer(path + [key]))
        for key, val in list(data_node.items()):
            sub_schema = props.get(key)
            if sub_schema is not None:
                _add_missing_required_recursive(val, sub_schema, path + [key])
        return
    if t == "array" and isinstance(data_node, list):
        items_schema = schema_node.get("items", {})
        for idx, item in enumerate(data_node):
            _add_missing_required_recursive(item, items_schema, path + [idx])


In [None]:
# ---------- Export + Append ----------

def can_export():
    if state["data"] is None:
        return False
    if state["pending_required"]:
        return False
    return len(validate_data()) == 0


def export_json(_=None):
    with status_output:
        if not can_export():
            display(HTML("<b>Export blocked:</b> Please resolve all validation errors and required fields."))
            return
    data_bytes = json.dumps(state["data"], ensure_ascii=False, indent=2).encode("utf-8")
    dl = widgets.FileDownload(
        data=lambda: data_bytes,
        filename="repaired.json",
        description="Save repaired JSON",
    )
    display(dl)


def append_and_export(_=None):
    if state["append_target"] is None:
        with status_output:
            display(HTML("<b>Append blocked:</b> upload a valid target JSON first."))
        return
    if not can_export():
        with status_output:
            display(HTML("<b>Append blocked:</b> resolve all errors first."))
        return

    repaired = state["data"]
    target = copy.deepcopy(state["append_target"])

    repaired_ex = repaired.get("exercises", []) if isinstance(repaired, dict) else []
    target_ex = target.get("exercises", []) if isinstance(target, dict) else []

    repaired_ids = {ex.get("exercise_id") for ex in repaired_ex if isinstance(ex, dict)}
    target_ids = {ex.get("exercise_id") for ex in target_ex if isinstance(ex, dict)}

    dupes = repaired_ids.intersection(target_ids)
    if dupes:
        with status_output:
            display(HTML(f"<b>Append failed:</b> duplicate exercise_id(s): {', '.join(sorted(dupes))}"))
        return

    target_ex.extend(copy.deepcopy(repaired_ex))
    target["exercises"] = target_ex

    data_bytes = json.dumps(target, ensure_ascii=False, indent=2).encode("utf-8")
    dl = widgets.FileDownload(
        data=lambda: data_bytes,
        filename="appended.json",
        description="Save appended JSON",
    )
    display(dl)


In [None]:
# ---------- Loaders + Controls ----------

schema_uploader = widgets.FileUpload(accept=".json", multiple=False, description="Upload schema")
instance_uploader = widgets.FileUpload(accept=".json", multiple=False, description="Upload broken JSON")
append_uploader = widgets.FileUpload(accept=".json", multiple=False, description="Upload valid JSON (append)")

load_btn = widgets.Button(description="Load schema + JSON", button_style="primary")
load_append_btn = widgets.Button(description="Load append target", button_style="info")
add_required_btn = widgets.Button(description="Add missing required keys", button_style="warning")
undo_btn = widgets.Button(description="Undo last change", button_style="")
validate_btn = widgets.Button(description="Re-validate", button_style="")
export_btn = widgets.Button(description="Export repaired JSON", button_style="success")
append_btn = widgets.Button(description="Append & Export", button_style="success")


def on_load(_):
    with status_output:
        clear_output()
        try:
            schema = load_uploaded_json(schema_uploader)
            data = load_uploaded_json(instance_uploader)
        except Exception as e:
            display(HTML(f"<b>Load failed:</b> {e}"))
            return

    state["schema"] = schema
    state["resolver"] = make_resolver(schema)
    state["validator"] = Draft7Validator(schema, resolver=state["resolver"])
    state["data"] = data
    state["history"] = []
    state["pending_required"] = set()
    rebuild_ui()


def on_load_append(_):
    with status_output:
        clear_output()
        try:
            append_data = load_uploaded_json(append_uploader)
        except Exception as e:
            display(HTML(f"<b>Append load failed:</b> {e}"))
            return
    state["append_target"] = append_data
    with status_output:
        display(HTML("<b>Append target loaded.</b>"))


def on_validate(_):
    rebuild_ui()


load_btn.on_click(on_load)
load_append_btn.on_click(on_load_append)
add_required_btn.on_click(add_missing_required)
undo_btn.on_click(undo_last)
validate_btn.on_click(on_validate)
export_btn.on_click(export_json)
append_btn.on_click(append_and_export)

controls = widgets.VBox([
    widgets.HTML("<b>Step 1: Upload files</b>"),
    schema_uploader,
    instance_uploader,
    load_btn,
    widgets.HTML("<hr/><b>Optional: Append mode</b>"),
    append_uploader,
    load_append_btn,
    widgets.HTML("<hr/><b>Step 2: Repair</b>"),
    widgets.HBox([add_required_btn, undo_btn, validate_btn]),
    widgets.HTML("<hr/><b>Step 3: Export</b>"),
    export_btn,
    append_btn,
])

display(controls)
display(status_output)
display(widgets.HTML("<h3>Editor</h3>"))
display(editor_container)
display(widgets.HTML("<h3>Validation Errors</h3>"))
display(errors_container)
