# `ipython`
> Set of utility functions to be used in Jupyter and Jupyter Lab notebooks.


In [None]:
#|default_exp ipython

In [None]:
#| export
from __future__ import annotations
from fastcore.test import test_fail
from functools import wraps
from IPython.core.getipython import get_ipython
from IPython.display import display, Markdown, display_markdown
from pathlib import Path
from typing import Any, List, Callable, Optional, Union
from eccore.core import safe_path, path_to_parent_dir, is_type, CurrentMachine

import numpy as np
import pandas as pd
import sys
import subprocess
import warnings

In [None]:
#| hide
from nbdev import show_doc, nbdev_export

# System and CLI

In [None]:
#| export

# TODO: replace with fastcore run

def run_cli(cmd:str = 'ls -l'   # command to execute in the cli
           ):
    """Runs a cli command from jupyter notebook and print the shell output message
    
    Uses subprocess.run with passed command to run the cli command"""
    p = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True)
    print(str(p.stdout, 'utf-8'))

In [None]:
run_cli('pwd')

/home/vtec/projects/ec-packages/eccore/nbs-dev



# Notebook setup

To be updated using fastcore functions, incl. `walk`, `in_notebook`, ...

In [None]:
#| export

def nb_setup(
    autoreload:bool = True,       # True to set autoreload in this notebook
    paths: list[Union[str,Path]] = None   # Paths to add to the path environment variable
    ):
    """Use in first cell of notebook to set autoreload, and add system paths
    
    Always add a path to the directoruy 'src' if `srs` directory exists at the same level as the `nbs` directory.

    When the notebook is not located in a tree including the name `nbs`, `src` directory is searched at the same level
    as the directory in which the notebook is located.
    """
    #  Handle paths
    #  Add src if it exists
    nbs_root = path_to_parent_dir('nbs')
    p2src = (nbs_root / '../src').resolve().absolute()
    if p2src.is_dir():
        p = str(p2src.absolute())
        if p not in sys.path:
            sys.path.insert(0, p)
            print(f"Added path: {p2src.absolute()}")
    # Add passed paths
    if paths:
        for p in paths:
            if isinstance(p, Path): p = str(p.resolve().absolute())
            if p not in sys.path:
                sys.path.insert(1, p)
                print(f"Added path: {p}")

#   Setup auto reload
    if autoreload:
        ipshell = get_ipython()
        ipshell.run_line_magic('load_ext',  'autoreload')
        ipshell.run_line_magic('autoreload', '2')
        print('Set autoreload mode')

In [None]:
show_doc(nb_setup)

---

### nb_setup

>      nb_setup (autoreload:bool=True,
>                paths:list[typing.Union[str,pathlib.Path]]=None)

*Use in first cell of notebook to set autoreload, and add system paths

Always add a path to the directoruy 'src' if `srs` directory exists at the same level as the `nbs` directory.

When the notebook is not located in a tree including the name `nbs`, `src` directory is searched at the same level
as the directory in which the notebook is located.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| autoreload | bool | True | True to set autoreload in this notebook |
| paths | list[Union[str, Path]] | None | Paths to add to the path environment variable |

By default, `ipython.nb_setup()` 
- loads and set `autoreload`
- adds a path to a directory named `src` when it exists at the same level as where the notebook directory is located. It no such `src` directory exists, no path is added

`ipython.nb_setup` assumes the following file structure:

```
    project_directory
          |--- nbs
          |     | --- current_nb.ipynb
          |     | --- ...
          |
          |--- src
          |     | --- module_to_import.py
          |     | --- ...
          |
          |--- data
          |     |
          |     | ...
```

For other file structure, specify paths as a `list` of `Path`

Before running `nb_setup`, `sys.path` does not include the path to the local source directory. After running it, it will be added, unless the directory does not exist.

In [None]:
sys.path

['/home/vtec/projects/ec-packages/eccore/nbs-dev',
 '/home/vtec/projects/lewagon/1286-Jul-2023/1_data-challenges/data-context-and-setup',
 '/home/vtec/projects/ec-packages/eccore/nbs-dev',
 '/home/vtec/miniconda3/envs/eccore/lib/python310.zip',
 '/home/vtec/miniconda3/envs/eccore/lib/python3.10',
 '/home/vtec/miniconda3/envs/eccore/lib/python3.10/lib-dynload',
 '',
 '/home/vtec/miniconda3/envs/eccore/lib/python3.10/site-packages',
 '/home/vtec/miniconda3/envs/eccore/lib/python3.10/site-packages/PyQt5_sip-12.11.0-py3.10-linux-x86_64.egg',
 '/home/vtec/projects/ec-packages/eccore']

In [None]:
nb_setup(autoreload=False)

In [None]:
sys.path

['/home/vtec/projects/ec-packages/eccore/nbs-dev',
 '/home/vtec/projects/lewagon/1286-Jul-2023/1_data-challenges/data-context-and-setup',
 '/home/vtec/projects/ec-packages/eccore/nbs-dev',
 '/home/vtec/miniconda3/envs/eccore/lib/python310.zip',
 '/home/vtec/miniconda3/envs/eccore/lib/python3.10',
 '/home/vtec/miniconda3/envs/eccore/lib/python3.10/lib-dynload',
 '',
 '/home/vtec/miniconda3/envs/eccore/lib/python3.10/site-packages',
 '/home/vtec/miniconda3/envs/eccore/lib/python3.10/site-packages/PyQt5_sip-12.11.0-py3.10-linux-x86_64.egg',
 '/home/vtec/projects/ec-packages/eccore']

We also can add other specific paths:

In [None]:
path_to_add = str(Path('../nbs').resolve().absolute())
nb_setup(autoreload=False, paths=[path_to_add])

Added path: /home/vtec/projects/ec-packages/eccore/nbs


In [None]:
sys.path

['/home/vtec/projects/ec-packages/eccore/nbs-dev',
 '/home/vtec/projects/ec-packages/eccore/nbs',
 '/home/vtec/projects/lewagon/1286-Jul-2023/1_data-challenges/data-context-and-setup',
 '/home/vtec/projects/ec-packages/eccore/nbs-dev',
 '/home/vtec/miniconda3/envs/eccore/lib/python310.zip',
 '/home/vtec/miniconda3/envs/eccore/lib/python3.10',
 '/home/vtec/miniconda3/envs/eccore/lib/python3.10/lib-dynload',
 '',
 '/home/vtec/miniconda3/envs/eccore/lib/python3.10/site-packages',
 '/home/vtec/miniconda3/envs/eccore/lib/python3.10/site-packages/PyQt5_sip-12.11.0-py3.10-linux-x86_64.egg',
 '/home/vtec/projects/ec-packages/eccore']

In [None]:
#| export
def install_code_on_cloud(
    package_name:str, # project package name, e.g. metagentools or git+https://github.com/repo.git@main
    quiet:bool=False # install quietly with Trud
):
    """pip install the project code package, when nb is running in the cloud."""
    
    machine = CurrentMachine()

    if machine.is_colab:
        CLOUD = True
        print('The notebook is running on colab.', end=' ')
        print(f'Will install {package_name}')
    elif machine.is_kaggle:
        CLOUD = True
        print('The notebook is running on kaggle.', end=' ')
        print(f'Will install {package_name}')
    elif machine.is_local:
        CLOUD = False
        print('The notebook is running locally, will not automatically install project code')
    else:
        CLOUD = True
        print('The notebook is running on a cloud VM or the machine was not registered as local')
        print(f'Will install {package_name}')

    if CLOUD:
        print(f'Installing project code {package_name}')
        cmd = f"pip install -{'qq' if quiet else ''}U {package_name}"
        run_cli(cmd)
        print((f"{package_name} is installed."))

In [None]:
show_doc(install_code_on_cloud)

---

### install_code_on_cloud

>      install_code_on_cloud (package_name:str, quiet:bool=False)

*pip install the project code package, when nb is running in the cloud.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| package_name | str |  | project package name, e.g. metagentools or git+https://github.com/repo.git@main |
| quiet | bool | False | install quietly with Trud |

When using colab, kaggle or another cloud VM, specicif code must be installed every time from the Python Package Index (PyPI) or its GitHub repo.

When running locally, the project code should be pre-installed as part of the environment

In [None]:
install_code_on_cloud(package_name='metagentools');

The notebook is running locally, will not automatically install project code


# Improve output cell formats

In [None]:
#| export
def display_mds(
    *strings:str|tuple[str] # any number of strings with text in markdown format
):
    """Display one or several strings formatted in markdown format"""
    for string in strings:
        display_markdown(Markdown(data=string))

In [None]:
show_doc(display_mds)

---

### display_mds

>      display_mds (*strings:str|tuple[str])

*Display one or several strings formatted in markdown format*

In [None]:
display_mds('**bold** and _italic_')

**bold** and _italic_

In [None]:
display_mds('**bold** and _italic_',
            '- bullet',
            '- bullet',
            '> Note: this is a note'
)

**bold** and _italic_

- bullet

- bullet

> Note: this is a note

In [None]:
#| export
def display_dfs(*dfs:pd.DataFrame       # any number of Pandas DataFrames
               ):
    """Display one or several `pd.DataFrame` in a single cell output"""
    for df in dfs:
        display(df)

In [None]:
show_doc(display_dfs)

---

### display_dfs

>      display_dfs (*dfs:pandas.core.frame.DataFrame)

*Display one or several `pd.DataFrame` in a single cell output*

In [None]:
df1 = pd.DataFrame(data=np.random.normal(size=(10,5)))
df2 = pd.DataFrame(data=np.random.normal(size=(20,10)))

display_dfs(df1.head(3), df2.head(3))

Unnamed: 0,0,1,2,3,4
0,1.181754,0.615892,-1.18646,-0.294719,-0.49517
1,-0.493491,0.380444,0.782281,-0.105894,-1.223624
2,-0.311006,1.246225,0.046744,0.050204,-1.087755


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,-0.939571,0.394111,-0.140105,0.125774,-1.155984,0.285345,-1.539391,1.678055,-1.670574,-1.002644
1,0.301599,-0.462982,-0.559338,-0.726182,-0.711528,-0.984906,-0.607021,0.007495,-0.791158,-0.136291
2,-1.613467,0.405388,-0.600787,-0.29188,-1.57987,0.0646,1.236128,-0.128559,0.131884,0.173675


In [None]:
#| export
class pandas_nrows_ncols:
    """Context manager that sets the max number of rows and cols to apply to any output within the context"""
    def __init__(
        self, 
        nrows:int|None=None, # max number of rows to show; show all rows if `None`
        ncols:int|None=None, # max number of columns to show; show all columns if `None`
    ):
        self.nrows = nrows
        self.ncols = ncols
    
    def __enter__(self):
        self.max_rows = pd.options.display.max_rows
        self.max_cols = pd.options.display.max_columns
        pd.options.display.max_rows = self.nrows
        pd.options.display.max_columns = self.ncols
        return self.max_rows, self.max_cols

    def __exit__(self, exc_type, exc_value, exc_tb):
        pd.options.display.max_rows = self.max_rows
        pd.options.display.max_columns = self.max_cols

In [None]:
show_doc(pandas_nrows_ncols)

---

### pandas_nrows_ncols

>      pandas_nrows_ncols (nrows:int|None=None, ncols:int|None=None)

*Context manager that sets the max number of rows and cols to apply to any output within the context*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| nrows | int \| None | None | max number of rows to show; show all rows if `None` |
| ncols | int \| None | None | max number of columns to show; show all columns if `None` |

With no context manager, the pandas object are displayed with a maximum of 60 rows and 20 columns.

In [None]:
df = pd.DataFrame(np.random.randint(low=0, high=100, size=(3,50)))
display(df)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,35,70,23,27,0,1,23,55,80,30,...,39,39,83,19,51,49,23,26,41,38
1,74,78,0,16,18,37,18,21,27,45,...,19,50,29,65,87,16,5,39,79,13
2,64,71,83,74,54,75,80,6,2,2,...,87,41,91,46,79,37,6,94,81,77


Using the context manager, all rows and columns will be displayed

In [None]:
with pandas_nrows_ncols():
    display(df)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49
0,35,70,23,27,0,1,23,55,80,30,78,12,69,89,44,57,30,0,16,49,29,2,14,51,48,61,85,19,71,87,30,94,27,59,57,29,88,71,41,0,39,39,83,19,51,49,23,26,41,38
1,74,78,0,16,18,37,18,21,27,45,80,6,71,38,5,7,76,19,71,35,8,36,98,69,64,42,34,1,39,53,42,27,22,47,88,81,26,94,44,86,19,50,29,65,87,16,5,39,79,13
2,64,71,83,74,54,75,80,6,2,2,6,61,8,22,64,45,57,67,23,62,96,10,61,63,92,8,39,26,53,79,5,98,41,16,99,89,14,41,9,0,87,41,91,46,79,37,6,94,81,77


It is also possible to specifically define the number of rows and columns to display

In [None]:
with pandas_nrows_ncols(nrows=2, ncols=6):
    display(df)

Unnamed: 0,0,1,2,...,47,48,49
0,35,70,23,...,26,41,38
...,...,...,...,...,...,...,...
2,64,71,83,...,94,81,77


In [None]:
with pandas_nrows_ncols(2,6):
    print(df)

    0   1   2   ...  47  48  49
0   35  70  23  ...  26  41  38
..  ..  ..  ..  ...  ..  ..  ..
2   64  71  83  ...  94  81  77

[3 rows x 50 columns]


> ### Technical background
> 
> the context manager uses pandas's [`options API`](https://pandas.pydata.org/docs/user_guide/options.html)

In [None]:
pd.options.display.max_rows, pd.options.display.max_columns

(60, 20)

In [None]:
pd.get_option('display.max_rows'), pd.get_option('display.max_columns')

(60, 20)

In [None]:
pd.describe_option('display.max_rows')

display.max_rows : int
    If max_rows is exceeded, switch to truncate view. Depending on
    `large_repr`, objects are either centrally truncated or printed as
    a summary view. 'None' value means unlimited.

    In case python/IPython is running in a terminal and `large_repr`
    equals 'truncate' this can be set to 0 and pandas will auto-detect
    the height of the terminal and print a truncated object which fits
    the screen height. The IPython notebook, IPython qtconsole, or
    IDLE do not run in a terminal and hence it is not possible to do
    correct auto-detection.
    [default: 60] [currently: 60]


In [None]:
pd.options.display.max_rows = 10
pd.reset_option('display.max_rows')
pd.options.display.max_rows

60

In [None]:
#| export
def display_full_df(
    df:pd.DataFrame|pd.Series,  # `DataFrame` or `Series` to display
):
    """Display a pandas `DataFrame` or `Series` showing all rows and columns"""
    if is_type(df, pd.DataFrame, raise_error=False) or is_type(df, pd.Series, raise_error=False):
        with pandas_nrows_ncols():
            display(df)
    else:
        raise TypeError(f"df must me a pandas `DataFrame` or `Series`, not a {type(df)}")

In [None]:
show_doc(display_full_df)

---

### display_full_df

>      display_full_df
>                       (df:pandas.core.frame.DataFrame|pandas.core.series.Serie
>                       s)

*Display a pandas `DataFrame` or `Series` showing all rows and columns*

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| df | pd.DataFrame \| pd.Series | `DataFrame` or `Series` to display |

In [None]:
df = pd.DataFrame(np.random.randint(low=0, high=100, size=(3,50)))
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,61,70,53,42,29,48,99,7,22,12,...,9,21,81,91,3,72,92,44,47,41
1,6,27,8,89,14,66,58,37,57,19,...,40,42,23,10,38,83,0,5,74,44
2,91,1,65,57,5,23,90,33,15,64,...,36,79,62,97,72,44,0,67,63,85


In [None]:
display_full_df(df)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49
0,61,70,53,42,29,48,99,7,22,12,24,42,67,81,49,63,39,89,13,76,7,63,2,69,95,47,40,37,56,73,56,19,48,52,12,39,75,48,1,50,9,21,81,91,3,72,92,44,47,41
1,6,27,8,89,14,66,58,37,57,19,16,31,55,45,11,81,27,6,49,87,62,29,26,59,80,45,8,4,57,25,61,86,4,10,80,52,32,65,87,4,40,42,23,10,38,83,0,5,74,44
2,91,1,65,57,5,23,90,33,15,64,31,53,39,4,17,48,31,13,92,42,28,85,61,47,53,29,27,95,13,2,89,92,8,30,42,83,13,22,1,84,36,79,62,97,72,44,0,67,63,85


In [None]:
#| hide
display_full_df(df.loc[0, :].T)

0     61
1     70
2     53
3     42
4     29
5     48
6     99
7      7
8     22
9     12
10    24
11    42
12    67
13    81
14    49
15    63
16    39
17    89
18    13
19    76
20     7
21    63
22     2
23    69
24    95
25    47
26    40
27    37
28    56
29    73
30    56
31    19
32    48
33    52
34    12
35    39
36    75
37    48
38     1
39    50
40     9
41    21
42    81
43    91
44     3
45    72
46    92
47    44
48    47
49    41
Name: 0, dtype: int64

In [None]:
msg = 'should raise a TypeError'
contains = 'df must me a pandas `DataFrame` or `Series`'

test_fail(display_full_df, args=['a string'], msg=msg, contains=contains)

In [None]:
#| hide
nbdev_export()