In [1]:
#| default_exp notebook_context

## Jupyter Kernels detailed technical implementation

https://chatgpt.com/share/692bea08-4510-8004-b9ab-c02feeb97c08

## Jupyterlab extension development tutorial

https://jupyterlab.readthedocs.io/en/latest/extension/extension_tutorial.html

## Build this extension

### Edit wordslab_notebooks_lib/__init__.py

Add this function definition:

```python
def _jupyter_labextension_paths():
    return [{
        "src": "labextension",
        "dest": "wordslab-notebooks-extension"
    }]
```

This is what Jupyterlab uses in development to find the Jupyterlab extension in a module.

### Edit settings.ini

Add this classifier for Pypi:

> Framework :: Jupyter :: JupyterLab :: Extensions :: Prebuilt

This is what Jupyterlab uses in production to find python packages with Jupyterlab extensions.

### Build everything

Open a Terminal

```bash

cd $WORDSLAB_WORKSPACE/wordslab-notebooks-lib

source ../../jupyterlab/.venv/bin/activate

# Install dependencies
jlpm clean
jlpm install

# Build TypeScript extension
jlpm build

source .venv/bin/activate

# Export notebooks to Python modules
nbdev_export

# Prepare for release
nbdev_prepare
```

### Test locally

```bash
source ../../jupyterlab/.venv/bin/activate

# Install in development mode
uv pip install -e .

# Register the extension with JupyterLab during development
jupyter labextension develop . --overwrite

# Verify extension is found
jupyter labextension list

# Start JupyterLab
jupyter lab
```

### When ready to publish:

```bash
source ../../jupyterlab/.venv/bin/activate

# Make sure extension is built
jlpm build

source .venv/bin/activate

# Publish to PyPI
nbdev_pypi
```

In [14]:
#| export
from ipykernel.comm import Comm
import asyncio

async def get_notebook_data(timeout=0.1):
    future = asyncio.Future()
    
    def on_msg(msg):
        if not future.done():
            future.set_result(msg['content']['data'])
    
    comm = Comm(target_name='wordslab_notebook_comm', show_warning=False)
    comm.on_msg(on_msg)
    comm.send({'request': 'get_notebook_data'})

    try:
        return await asyncio.wait_for(future, timeout=timeout)
    except asyncio.TimeoutError:
        raise TimeoutError("Failed to receive notebook context from frontend")

# The first call always fails, I didn't find a way around it
try:
    await get_notebook_data()
except:
    ...

In [15]:
data =  await get_notebook_data()
data["cell_id"]

'21bd5abf-7507-4f84-874e-52bfdf53ba1c'

In [16]:
data =  await get_notebook_data()
data["cell_id"]

'7d2d4d5c-c7f4-4325-a4dc-60eca3090763'

In [17]:
1/0

ZeroDivisionError: division by zero

In [18]:
data =  await get_notebook_data()
notebook_content_dict = data["notebook"]
executing_cell_id = data["cell_id"]
executing_cell_id

'51a7be30-8758-4fdb-ae44-af3e1db751c6'

https://nbformat.readthedocs.io/en/latest/format_description.html

In [None]:
from IPython.display import Markdown, display
import nbformat

nb = nbformat.from_dict(notebook_content_dict)

code_language = nb.metadata.language_info.name
print("> " + code_language + " notebook")

for cell in nb.cells:
    if cell.id == executing_cell_id: break
        
    is_markdown = cell.cell_type == "markdown"
    is_code = cell.cell_type == "code"
    is_raw = cell.cell_type == "raw"

    print("---------------------")
    print("cell", cell.id, cell.cell_type)
    print("---------------------")
    if is_markdown:
        display(Markdown(cell.source))
    elif is_code:
        display(Markdown(f"```{code_language}\n" + cell.source + "\n```"))
    elif is_raw:
        print(cell.source)
    if is_code and cell.execution_count>0 and len(cell.outputs)>0:
        print("---------------------")
        print("cell outputs", cell.id, cell.execution_count)
        print("---------------------")
        for output in cell.outputs:
            if output.output_type == "stream":
                print(f"<{output.name}>")
                print(output.text)
                print(f"</{output.name}>")
            elif output.output_type == "display_data":
                print("<display>")
                if "data" in output:
                    print("  <data>")
                    repr(output.data)
                    print("  </data>")
                if "metadata" in output:
                    print("  <metadata>")
                    repr(output.metadata)
                    print("  </metadata>")
                print("</display>")
            elif output.output_type == "execute_result":
                print("<result>")
                if "data" in output:
                    print("  <data>")
                    print(output.data)
                    print("  </data>")
                if "metadata" in output:
                    print("  <metadata>")
                    print(output.metadata)
                    print("  </metadata>")
                print("</result>")
            elif output.output_type == "error":
                print("<error>")
                print(output.ename)
                print(output.evalue)
                for frame in output.traceback:
                    print(frame)
                print("</error>")
        print("---------------------")

Notebooks to compact XML conversion inspired by toolslm by AnswerDotAI:

https://github.com/AnswerDotAI/toolslm/blob/main/00_xml.ipynb

In [28]:
#| exports
from fastcore.utils import *
from fastcore.xml import to_xml, Src, Source,Out,Outs,Cell

In [29]:
#| exports
def get_mime_text(data):
    "Get text from MIME bundle, preferring markdown over plain"
    if 'text/markdown' in data: return ''.join(list(data['text/markdown']))
    if 'text/plain' in data: return ''.join(list(data['text/plain']))

In [30]:
#| exports
def cell2out(o):
    "Convert single notebook output to XML format"
    if hasattr(o, 'data'): 
        txt = get_mime_text(o.data)
        if txt: return Out(txt, mime='markdown' if 'text/markdown' in o.data else 'plain')
    if hasattr(o, 'text'):
        txt = o.text if isinstance(o.text, str) else ''.join(o.text)
        return Out(txt, type='stream', name=o.get('name', 'stdout'))
    if hasattr(o, 'ename'): return Out(f"{o.ename}: {o.evalue}", type='error')

In [31]:
#| exports
def cell2xml(cell):
    "Convert notebook cell to concise XML format"
    cts = Source(''.join(cell.source)) if hasattr(cell, 'source') and cell.source else None
    out_items = L(getattr(cell,'outputs',[])).map(cell2out).filter()
    outs = []
    if out_items: outs = Outs(*out_items)
    parts = [p for p in [cts, outs] if p]
    return Cell(*parts, type=cell.cell_type)

In [46]:
#| exports
def nb2xml(nb, until_cell_id):
    cells_xml = []
    for c in nb.cells:
        if c.id == until_cell_id: break
        if c.cell_type in ('code','markdown'):
            cells_xml.append(to_xml(cell2xml(c), do_escape=False))
    return '\n'.join(cells_xml)     

In [50]:
nb2xml(nb, executing_cell_id)



In [42]:
executing_cell_id

'51a7be30-8758-4fdb-ae44-af3e1db751c6'