# `ipython`
> Set of utility functions to be used in Jupyter and Jupyter Lab notebooks.


In [None]:
#|default_exp ipython

In [None]:
#| export
from __future__ import annotations
from fastcore.test import test_fail
from functools import wraps
from IPython.core.getipython import get_ipython
from IPython.display import display, Markdown, display_markdown
from pathlib import Path
from typing import Any, Callable, Optional
from ecutilities.core import validate_path, validate_type, IsLocalMachine

import configparser
import numpy as np
import os
import pandas as pd
import subprocess
import warnings

In [None]:
#| hide
from nbdev import show_doc, nbdev_export

# System and CLI

In [None]:
#| export
def run_cli(cmd:str = 'ls -l'   # command to execute in the cli
           ):
    """Runs a cli command from jupyter notebook and print the shell output message
    
    Uses subprocess.run with passed command to run the cli command"""
    p = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True)
    print(str(p.stdout, 'utf-8'))

In [None]:
run_cli('pwd')

/home/vtec/projects/ec-packages/ecutilities/nbs-dev



# Notebook setup

In [None]:
#| export
def nb_setup(autoreload:bool = True,   # True to set autoreload in this notebook
             paths:list(Path) = None   # Paths to add to the path environment variable
            ):
    """Use in first cell of notebook to set autoreload, and paths"""
#   Add paths. Default is 'src' if it exists
    if paths is None:
        p = Path('../src').resolve().absolute()
        if p.is_dir():
            paths = [str(p)]
        else:
            paths=[]
    if paths:
        for p in paths:
            sys.path.insert(1, str(p))
        print(f"Added following paths: {','.join(paths)}")

#   Setup auto reload
    if autoreload:
        ipshell = get_ipython()
        ipshell.run_line_magic('load_ext',  'autoreload')
        ipshell.run_line_magic('autoreload', '2')
        print('Set autoreload mode')

In [None]:
show_doc(nb_setup)

---

[source](https://github.com/vtecftwy/ecutils/blob/master/ecutilities/ipython.py#L33){target="_blank" style="float:right; font-size:smaller"}

### nb_setup



Use in first cell of notebook to set autoreload, and paths

In [None]:
nb_setup()

Set autoreload mode


By default, `ipython.nb_setup()` 
- loads and set `autoreload`
- adds a path to a directory named `src` when it exists at the same level as where the notebook directory is located. It no such `src` directory exists, no path is added

`ipython.nb_setup` assumes the following file structure:

```
    project_directory
          | --- notebooks
          |        | --- current_nb.ipynb
          |        | --- ...
          |
          |--- src
          |     | --- scripts_to_import.py
          |     | --- ...
          |
          |--- data
          |     |
          |     | ...
```

For other file structure, specify paths as a `list` of `Path`

In [None]:
#| export
def cloud_install_project_code(
    package_name:str # project package name, e.g. metagentools or git+https://github.com/repo.git@main
):
    """When nb is running in the cloud, pip install the project code package"""
    
    # test whether it runs on colab
    try:
        from google.colab import drive
        RUN_LOCALLY = False
        print('The notebook is running on colab')

    except ModuleNotFoundError:
        # not running on colab, testing is it runs on on a local machine
        RUN_LOCALLY = IsLocalMachine().is_local()
        
        if RUN_LOCALLY:
            print('The notebook is running locally, will not automatically install project code')
        else:
            print('The notebook is running on a cloud VM or the machine was not registered as local')

    if not RUN_LOCALLY:
        print(f'Installing project code {package_name}')
        cmd = f"pip install -U {package_name}"
        run_cli(cmd)
        print((f"{package_name} is installed."))
        
    return RUN_LOCALLY

When using colab or another cloud VM, project code must be installed every time from the Python Package Index (PyPI) or its GitHub repo.

When running locally, the project code should be pre-installed as part of the environment

In [None]:
cloud_install_project_code(package_name='metagentools');

The notebook is running locally, will not automatically install project code


# Improve output cell formats

In [None]:
#| export
def display_mds(
    *strings:str|tuple[str] # any number of strings with text in markdown format
):
    """Display one or several strings formatted in markdown format"""
    for string in strings:
        display_markdown(Markdown(data=string))

In [None]:
show_doc(display_mds)

---

[source](https://github.com/vtecftwy/ecutils/blob/master/ecutilities/ipython.py#L86){target="_blank" style="float:right; font-size:smaller"}

### display_mds

>      display_mds (*strings:str|tuple[str])

Display one or several strings formatted in markdown format

In [None]:
display_mds('**bold** and _italic_')

**bold** and _italic_

In [None]:
display_mds('**bold** and _italic_',
            '- bullet',
            '- bullet',
            '> Note: this is a note'
)

**bold** and _italic_

- bullet

- bullet

> Note: this is a note

In [None]:
#| export
def display_dfs(*dfs:pd.DataFrame       # any number of Pandas DataFrames
               ):
    """Display one or several `pd.DataFrame` in a single cell output"""
    for df in dfs:
        display(df)

In [None]:
show_doc(display_dfs)

---

[source](https://github.com/vtecftwy/ecutils/blob/master/ecutilities/ipython.py#L94){target="_blank" style="float:right; font-size:smaller"}

### display_dfs

>      display_dfs (*dfs:pandas.core.frame.DataFrame)

Display one or several `pd.DataFrame` in a single cell output

In [None]:
df1 = pd.DataFrame(data=np.random.normal(size=(10,5)))
df2 = pd.DataFrame(data=np.random.normal(size=(20,10)))

display_dfs(df1.head(3), df2.head(3))

Unnamed: 0,0,1,2,3,4
0,0.834359,0.85049,-0.538193,0.028271,1.393566
1,0.510553,0.459742,0.120589,-2.075388,-1.895673
2,0.967313,-1.139417,0.880035,0.009709,1.018101


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1.073694,-0.587213,-1.125889,1.197766,-0.604013,1.920884,0.531634,-0.573419,-2.129225,-1.667402
1,-0.068742,0.782907,0.779117,-1.441317,0.135539,-1.302849,-0.376867,-0.497717,1.089608,-1.884657
2,1.075957,0.600451,-0.227211,-0.743412,-0.27733,1.062011,-1.114491,-0.778327,2.048844,0.106391


In [None]:
#| export
class pandas_nrows_ncols:
    """Context manager set max number of rows and cols to apply to any output within the context"""
    def __init__(
        self, 
        nrows:int|None=None, # max number of rows to show; show all rows if `None`
        ncols:int|None=None, # max number of columns to show; show all columns if `None`
    ):
        self.nrows = nrows
        self.ncols = ncols
    
    def __enter__(self):
        self.max_rows = pd.options.display.max_rows
        self.max_cols = pd.options.display.max_columns
        pd.options.display.max_rows = self.nrows
        pd.options.display.max_columns = self.ncols
        return self.max_rows, self.max_cols

    def __exit__(self, exc_type, exc_value, exc_tb):
        pd.options.display.max_rows = self.max_rows
        pd.options.display.max_columns = self.max_cols

In [None]:
show_doc(pandas_nrows_ncols)

---

[source](https://github.com/vtecftwy/ecutils/blob/master/ecutilities/ipython.py#L101){target="_blank" style="float:right; font-size:smaller"}

### pandas_nrows_ncols

>      pandas_nrows_ncols (nrows:int|None=None, ncols:int|None=None)

Context manager set max number of rows and cols to apply to any output within the context

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| nrows | int \| None | None | max number of rows to show; show all rows if `None` |
| ncols | int \| None | None | max number of columns to show; show all columns if `None` |

With no context manager, the pandas object are displayed with a maximum of 60 rows and 20 columns.

In [None]:
df = pd.DataFrame(np.random.randint(low=0, high=100, size=(3,50)))
display(df)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,46,34,5,88,80,1,0,10,41,57,...,50,67,61,74,36,6,43,80,32,57
1,64,3,13,75,82,38,85,66,11,82,...,81,18,79,15,31,66,79,42,51,55
2,34,39,36,65,90,66,56,2,29,11,...,87,66,63,15,21,2,47,64,16,88


Using the context manager, all rows and columns will be displayed

In [None]:
with pandas_nrows_ncols():
    display(df)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49
0,46,34,5,88,80,1,0,10,41,57,76,58,7,80,97,89,90,18,12,60,62,97,63,66,79,26,1,39,13,5,74,3,17,61,98,96,20,64,68,72,50,67,61,74,36,6,43,80,32,57
1,64,3,13,75,82,38,85,66,11,82,10,67,44,38,41,38,29,77,24,44,15,78,42,84,77,95,25,14,98,10,68,90,27,45,61,83,14,73,70,23,81,18,79,15,31,66,79,42,51,55
2,34,39,36,65,90,66,56,2,29,11,22,45,92,26,5,39,20,84,70,38,33,22,88,93,25,77,89,21,35,37,86,19,62,74,5,4,80,55,84,24,87,66,63,15,21,2,47,64,16,88


It is also possible to specifically define the number of rows and columns to display

In [None]:
with pandas_nrows_ncols(nrows=2, ncols=4):
    display(df)

Unnamed: 0,0,1,...,48,49
0,46,34,...,32,57
...,...,...,...,...,...
2,34,39,...,16,88


> **Technical background**:
> 
> the context manager uses pandas's [`options API`](https://pandas.pydata.org/docs/user_guide/options.html)

In [None]:
pd.options.display.max_rows, pd.options.display.max_columns

(60, 20)

In [None]:
pd.get_option('display.max_rows'), pd.get_option('display.max_columns')

(60, 20)

In [None]:
pd.describe_option('display.max_rows')

display.max_rows : int
    If max_rows is exceeded, switch to truncate view. Depending on
    `large_repr`, objects are either centrally truncated or printed as
    a summary view. 'None' value means unlimited.

    In case python/IPython is running in a terminal and `large_repr`
    equals 'truncate' this can be set to 0 and pandas will auto-detect
    the height of the terminal and print a truncated object which fits
    the screen height. The IPython notebook, IPython qtconsole, or
    IDLE do not run in a terminal and hence it is not possible to do
    correct auto-detection.
    [default: 60] [currently: 60]


In [None]:
pd.options.display.max_rows = 10
pd.reset_option('display.max_rows')
pd.options.display.max_rows

60

In [None]:
#| export
def df_all_cols_and_rows(
    f:Callable,   # function to apply the decorator ti
)-> Callable:     # decorated function
    """decorator function forcing all rows and columns of `DataFrames` to be displayed in the wrapped function"""
    
    msg = 'This decorator is deprecated. Will be removed soon. Use context manager `pandas_nrows_ncols` instead.'
    warnings.warn(msg, category=DeprecationWarning)
    
    @wraps(f)
    def wrapper(*args, **kwargs):
        max_rows = pd.options.display.max_rows
        max_cols = pd.options.display.max_columns
        pd.options.display.max_rows = None
        pd.options.display.max_columns = None
        f(*args, **kwargs)
        pd.options.display.max_rows = max_rows
        pd.options.display.max_columns = max_cols
    
    return wrapper

In [None]:
show_doc(df_all_cols_and_rows)

---

[source](https://github.com/vtecftwy/ecutils/blob/master/ecutilities/ipython.py#L123){target="_blank" style="float:right; font-size:smaller"}

### df_all_cols_and_rows

>      df_all_cols_and_rows (f:Callable)

decorator function forcing all rows and columns of `DataFrames` to be displayed in the wrapped function

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| f | Callable | function to apply the decorator ti |
| **Returns** | **Callable** | **decorated function** |

Usage of the decorator

In [None]:
#| hide
@df_all_cols_and_rows
def show_df(df):
    display(df)

df = pd.DataFrame(np.random.randint(low=0, high=100, size=(3,50)))
show_df(df)



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49
0,77,33,58,96,85,26,79,83,90,16,45,67,3,56,86,56,52,35,62,33,89,63,23,73,54,86,20,66,42,16,49,99,52,37,84,66,26,42,9,54,76,16,78,74,48,65,15,80,40,27
1,71,14,54,86,53,98,52,75,25,9,73,78,36,72,13,55,40,10,94,48,42,35,32,99,22,76,8,48,48,97,38,49,9,5,97,61,0,46,66,97,27,21,52,92,18,14,37,88,24,93
2,47,15,71,83,67,21,33,24,19,58,28,34,40,66,43,60,33,39,68,71,17,32,66,84,45,82,76,18,39,34,1,68,82,19,71,69,68,47,78,9,29,41,25,67,41,63,24,61,83,83


In [None]:
#| export
def display_full_df(
    df:pd.DataFrame|pd.Series,  # `DataFrame` or `Series` to display
):
    """Display a pandas `DataFrame` or `Series` showing all rows and columns"""
    if validate_type(df, pd.DataFrame, raise_error=False) or validate_type(df, pd.Series, raise_error=False):
        with pandas_nrows_ncols():
            display(df)
    else:
        raise TypeError(f"df must me a pandas `DataFrame` or `Series`, not a {type(df)}")

In [None]:
show_doc(display_full_df)

---

[source](https://github.com/vtecftwy/ecutils/blob/master/ecutilities/ipython.py#L145){target="_blank" style="float:right; font-size:smaller"}

### display_full_df

>      display_full_df
>                       (df:pandas.core.frame.DataFrame|pandas.core.series.Serie
>                       s)

Display a pandas `DataFrame` or `Series` showing all rows and columns

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| df | pd.DataFrame \| pd.Series | `DataFrame` or `Series` to display |

In [None]:
df = pd.DataFrame(np.random.randint(low=0, high=100, size=(3,50)))
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,16,97,72,65,52,91,47,91,90,5,...,12,81,37,18,46,81,97,13,41,96
1,80,32,74,26,2,97,78,26,68,10,...,33,41,97,88,66,1,38,96,78,59
2,42,98,3,86,93,25,34,16,76,27,...,48,28,16,58,61,15,13,50,14,99


In [None]:
display_full_df(df)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49
0,16,97,72,65,52,91,47,91,90,5,90,29,32,65,56,82,56,98,20,85,81,35,34,64,8,55,65,83,49,58,98,1,10,25,67,65,78,73,87,34,12,81,37,18,46,81,97,13,41,96
1,80,32,74,26,2,97,78,26,68,10,51,21,49,0,97,27,62,5,19,86,75,43,76,34,58,61,96,32,77,58,16,58,23,6,10,44,50,92,26,81,33,41,97,88,66,1,38,96,78,59
2,42,98,3,86,93,25,34,16,76,27,87,95,15,5,27,16,74,96,15,46,62,47,91,17,91,36,35,7,83,16,14,52,78,36,16,77,33,10,68,3,48,28,16,58,61,15,13,50,14,99


In [None]:
#| hide
display_full_df(df.loc[0, :].T)

0     16
1     97
2     72
3     65
4     52
5     91
6     47
7     91
8     90
9      5
10    90
11    29
12    32
13    65
14    56
15    82
16    56
17    98
18    20
19    85
20    81
21    35
22    34
23    64
24     8
25    55
26    65
27    83
28    49
29    58
30    98
31     1
32    10
33    25
34    67
35    65
36    78
37    73
38    87
39    34
40    12
41    81
42    37
43    18
44    46
45    81
46    97
47    13
48    41
49    96
Name: 0, dtype: int64

In [None]:
msg = 'should raise a TypeError'
contains = 'df must me a pandas `DataFrame` or `Series`'

test_fail(display_full_df, args=['a string'], msg=msg, contains=contains)

In [None]:
#| hide
nbdev_export()