# `ipython`
> Set of utility functions to be used in Jupyter and Jupyter Lab notebooks.


In [None]:
#|default_exp ipython

In [None]:
#| export
from __future__ import annotations
from IPython.core.getipython import get_ipython
from IPython.display import display, Markdown, display_markdown
from pathlib import Path

import numpy as np
import pandas as pd
import sys

In [None]:
#| hide
from nbdev import show_doc, nbdev_export

In [None]:
#| export
def nb_setup(autoreload:bool = True,   # True to set autoreload in this notebook
             paths:list(Path) = None   # Paths to add to the path environment variable
            ):
    """Use in first cell of notebook to set autoreload, and paths"""
#   Add paths. Default is 'src' if it exists
    if paths is None:
        p = Path('../src').resolve().absolute()
        if p.is_dir():
            paths = [str(p)]
        else:
            paths=[]
    if paths:
        for p in paths:
            sys.path.insert(1, str(p))
        print(f"Added following paths: {','.join(paths)}")

#   Setup auto reload
    if autoreload:
        ipshell = get_ipython()
        ipshell.run_line_magic('load_ext',  'autoreload')
        ipshell.run_line_magic('autoreload', '2')
        print('Set autoreload mode')

By default, `ipython.nb_setup()` 
- loads and set `autoreload`
- adds a path to a directory named `src` when it exists at the same level as where the notebook directory is located. It no such `src` directory exists, no path is added

`ipython.nb_setup` assumes the following file structure:

```
    project_directory
          | --- notebooks
          |        | --- current_nb.ipynb
          |        | --- ...
          |
          |--- src
          |     | --- scripts_to_import.py
          |     | --- ...
          |
          |--- data
          |     |
          |     | ...
```

For other file structure, specify paths as a `list` of `Path`

In [None]:
#| export
def colab_install_project_code(
    package_name:str # project package name, e.g. git+https://github.com/vtecftwy/metagentools.git@main
):
    """When nb is running on colab, pip install the project code package"""
    try:
        from google.colab import drive
        ON_COLAB = True
        print('The notebook is running on colab')
        print('Installing project code')
        cmd = f"pip install -U {package_name}"
        run(cmd)

    except ModuleNotFoundError:
        ON_COLAB = False
        print('The notebook is running locally, will not automatically install project code')

    return ON_COLAB

When using colab, the project code must be install every time from its github repo or from the Python Package Index (PyPI).

When running locally, the project code should be pre-installed as part of the environment

In [None]:
colab_install_project_code(package_name='git+https://github.com/vtecftwy/metagentools.git@main')

The notebook is running locally, will not automatically install project code


False

In [None]:
#| export
def files_in_tree(
    path: str|Path,               # path to the directory to scan  
    pattern: str|None = None      # pattern (glob style) to match in file name to filter the content
):
    """List files in directory and its subdiretories, print tree starting from parent directory"""
    pattern = '*' if pattern is None else f"*{pattern}*"
    parents = [p.name for p in path.parents]
    paths = []
    pad = ' ' * 2
    idx = 0
    print(f"{parents[0]}")
    print(f"{pad}|--{path.name}")
    for f in [p for p in path.glob(pattern) if p.is_file()]:
        paths.append(f)
        print(f"{pad}|{pad*2}|--{f.name} ({idx})")
        idx += 1
    for d in [p for p in path.iterdir() if p.is_dir()]:
        print(f"{pad}|{pad*2}|--{d.name}")
        for f in [p for p in d.glob(pattern) if p.is_file()]:
            paths.append(f)
            print(f"{pad}|{pad*2}|{pad*2}|--{f.name} ({idx})")
            idx += 1
    return paths

In [None]:
p2dir = Path('').resolve()

files = files_in_tree(p2dir)
print(f"List of {len(files)} files when unfiltered")

ec-utils
  |--nbs-dev
  |    |--0_02_plotting.ipynb (0)
  |    |--2_01_image_utils.ipynb (1)
  |    |--1_01_eda_stats_utils.ipynb (2)
  |    |--0_01_ipython.ipynb (3)
  |    |--.last_checked (4)
  |    |--sidebar.yml (5)
  |    |--1_02_ml.ipynb (6)
  |    |--index.ipynb (7)
  |    |--nbdev.yml (8)
  |    |--9_01_dev_utils.ipynb (9)
  |    |--styles.css (10)
  |    |--_quarto.yml (11)
  |    |--.ipynb_checkpoints
  |    |    |--0_02_plotting-checkpoint.ipynb (12)
  |    |    |--9_01_dev_utils-checkpoint.ipynb (13)
  |    |    |--0_01_ipython-checkpoint.ipynb (14)
  |    |    |--1_01_eda_stats_utils-checkpoint.ipynb (15)
  |    |    |--index-checkpoint.ipynb (16)
  |    |    |--2_01_image_utils-checkpoint.ipynb (17)
  |    |    |--1_02_ml-checkpoint.ipynb (18)
List of 19 files when unfiltered


In [None]:
files = files_in_tree(p2dir, pattern='ipynb')
print(f"List of {len(files)} files when filtered")

ec-utils
  |--nbs-dev
  |    |--0_02_plotting.ipynb (0)
  |    |--2_01_image_utils.ipynb (1)
  |    |--1_01_eda_stats_utils.ipynb (2)
  |    |--0_01_ipython.ipynb (3)
  |    |--1_02_ml.ipynb (4)
  |    |--index.ipynb (5)
  |    |--9_01_dev_utils.ipynb (6)
  |    |--.ipynb_checkpoints
  |    |    |--0_02_plotting-checkpoint.ipynb (7)
  |    |    |--9_01_dev_utils-checkpoint.ipynb (8)
  |    |    |--0_01_ipython-checkpoint.ipynb (9)
  |    |    |--1_01_eda_stats_utils-checkpoint.ipynb (10)
  |    |    |--index-checkpoint.ipynb (11)
  |    |    |--2_01_image_utils-checkpoint.ipynb (12)
  |    |    |--1_02_ml-checkpoint.ipynb (13)
List of 14 files when filtered


In [None]:
#| export
def display_mds(
    *strings:str # any number of strings with text in markdown format
):
    """Display one or several strings formatted in markdown format"""
    for string in strings:
        display_markdown(Markdown(data=string))

In [None]:
display_mds('**bold** and _italic_')

**bold** and _italic_

In [None]:
display_mds('**bold** and _italic_',
            '- bullet',
            '- bullet',
            '> Note: this is a note'
           )

**bold** and _italic_

- bullet

- bullet

> Note: this is a note

In [None]:
#| export
def display_dfs(*dfs:pd.DataFrame       # any number of Pandas DataFrames
               ):
    """Display one or several DataFrame in a single cell output"""
    for df in dfs:
        display(df)

In [None]:
df1 = pd.DataFrame(data=np.random.normal(size=(10,5)))
df2 = pd.DataFrame(data=np.random.normal(size=(20,10)))

display_dfs(df1.head(3), df2.head(3))

Unnamed: 0,0,1,2,3,4
0,-0.932433,-0.192335,0.089406,0.908922,1.467167
1,0.808964,0.276916,1.869681,0.642074,1.134207
2,-0.155523,1.014947,1.785617,-0.330721,-0.645049


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,-0.37732,1.026669,-1.38686,-1.767088,-1.118104,0.770374,1.507177,-0.381338,-0.650765,-2.525749
1,1.750157,-1.667336,0.696466,0.165477,-0.192437,-0.26017,0.934496,-0.302568,-0.924098,-0.010387
2,-0.958004,-2.562483,-0.761086,0.929342,1.623859,0.624701,0.524622,1.529208,0.788544,0.000719


In [None]:
#| hide
nbdev_export()