# `core`
> set of functions and classes used across this package and usable for other packages

In [None]:
#|default_exp core

In [None]:
#| export
from __future__ import annotations
from pathlib import Path
from typing import Any, List, Optional

import configparser
import numpy as np
import os
import sys
import warnings

# Try to load Google drive package for Google Colab
try: from google.colab import drive
except: pass

In [None]:
#| hide
from nbdev import show_doc, nbdev_export

In [None]:
#| export
# Retrieve the package root
from ecutilities import __file__
CODE_ROOT = Path(__file__).parents[0]
PACKAGE_ROOT = Path(__file__).parents[1]

In [None]:
#| hide
CODE_ROOT, PACKAGE_ROOT

(Path('/home/vtec/projects/ec-packages/ecutilities/ecutilities'),
 Path('/home/vtec/projects/ec-packages/ecutilities'))

# Validation tools

In [None]:
#| export
def is_type(
    obj:Any,                 # object whose type to validate
    obj_type:type,           # expected type for `obj`
    raise_error:bool=False,  # when True, raise a ValueError is `obj` is not of the right type
)-> bool:                    # True when `obj` is of the right type, False otherwise 
    """Validate that `obj` is of type `obj_type`. Raise error in the negative when `raise_error` is `True`"""
    if not isinstance(obj_type, type): raise ValueError(f"{obj_type} is not a type")
    if isinstance(obj, obj_type): return True
    else:
        if raise_error: raise ValueError(f"passed object is not of type {obj_type}")
        else: return False

In [None]:
show_doc(is_type)

---

[source](https://github.com/vtecftwy/ecutils/blob/master/ecutilities/core.py#L29){target="_blank" style="float:right; font-size:smaller"}

### is_type

>      is_type (obj:Any, obj_type:type, raise_error:bool=False)

Validate that `obj` is of type `obj_type`. Raise error in the negative when `raise_error` is `True`

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| obj | Any |  | object whose type to validate |
| obj_type | type |  | expected type for `obj` |
| raise_error | bool | False | when True, raise a ValueError is `obj` is not of the right type |
| **Returns** | **bool** |  | **True when `obj` is of the right type, False otherwise** |

In [None]:
is_type(obj='this is a string', obj_type=str)

True

In [None]:
is_type(obj=np.ones(shape=(2,2)), obj_type=np.ndarray)

True

In [None]:
#| export
def validate_path(
    path:str|Path,           # path to validate
    path_type:str='file',    # type of the target path: `'file'`, `'dir'` or `'any'`
    raise_error:bool=False,  # when True, raise a ValueError is path does not a file
)-> bool:                    # True when path is a valid path, False otherwise 
    """Validate that path is a Path or str and points to a real file or directory"""
    if isinstance(path, str): 
        path = Path(path)
    if (path_type=='file' and path.is_file()) or (path_type=='dir' and path.is_dir()) :
        return True
    if path_type=='any' and path.exists():
        return True
    else:
        if raise_error: raise ValueError(f"No file at {path.absolute()}. Check the path")
        else: return False

In [None]:
show_doc(validate_path)

---

[source](https://github.com/vtecftwy/ecutils/blob/master/ecutilities/core.py#L42){target="_blank" style="float:right; font-size:smaller"}

### validate_path

>      validate_path (path:str|pathlib.Path, path_type:str='file',
>                     raise_error:bool=False)

Validate that path is a Path or str and points to a real file or directory

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| path | str \| Path |  | path to validate |
| path_type | str | file | type of the target path: `'file'`, `'dir'` or `'any'` |
| raise_error | bool | False | when True, raise a ValueError is path does not a file |
| **Returns** | **bool** |  | **True when path is a valid path, False otherwise** |

In [None]:
path_file = Path('../data/img/IMG_001_512px.jpg')
validate_path(path_file)

True

In [None]:
validate_path(path_file, path_type='any')

True

In [None]:
path_dir = Path('../data')
validate_path(path_dir, path_type='dir')

True

In [None]:
validate_path(path_dir, path_type='any')

True

In [None]:
path_error = Path('../data/img/IIIMG_001_512px.jpg')
validate_path(path_error)

False

In [None]:
#| export
def safe_path(
    path:str|Path, # path to validate
)-> Path:          # validated path returned as a pathlib.Path
    """Return a Path object when given a valid path as a string or a Path, raise error otherwise"""
    validate_path(path, path_type='any', raise_error=True)
    if isinstance(path, str): 
        path = Path(path)
    return path

In [None]:
show_doc(safe_path)

---

[source](https://github.com/vtecftwy/ecutils/blob/master/ecutilities/core.py#L59){target="_blank" style="float:right; font-size:smaller"}

### safe_path

>      safe_path (path:str|pathlib.Path)

Return a Path object when given a valid path as a string or a Path, raise error otherwise

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| path | str \| Path | path to validate |
| **Returns** | **Path** | **validated path returned as a pathlib.Path** |

# Access key files and directories

In [None]:
#| export
def get_config_value(section:str,                        # section in the configparser cfg file
                     key:str,                            # key in the selected section
                     path_to_config_file:Path|str=None   # path to the cfg file
                    )-> Any :                            # the value corresponding to section>key>value 
    """Returns the value corresponding to the key-value pair in the configuration file (configparser format)
    
    When no path_to_config_file is provided, the function will try to find the file in: the system's `home`, 
    the parent directory of the current directory, and the Google drive directory mounted to the Colab environment.
    """
    if path_to_config_file is None:
        # try several possible file locations
        possible_paths = [
            Path().home(), 
            Path('..').resolve(),
            Path('/content/gdrive/MyDrive/private-across-accounts/config-api-keys.cfg')]
        for path in possible_paths:
            if (path/'config-api-keys.cfg').is_file():
                path_to_config_file = path/'config-api-keys.cfg'
                break
            else:
                raise ValueError(f"No config file found in possible_paths. Please provide a specific path")
    safe_path(path_to_config_file)
    print(f"Using config file at {path_to_config_file.absolute()}")

    configuration = configparser.ConfigParser()
    configuration.read(path_to_config_file)
    return configuration[section][key]

In [None]:
show_doc(get_config_value)

---

[source](https://github.com/vtecftwy/ecutils/blob/master/ecutilities/core.py#L69){target="_blank" style="float:right; font-size:smaller"}

### get_config_value

>      get_config_value (section:str, key:str,
>                        path_to_config_file:pathlib.Path|str=None)

Returns the value corresponding to the key-value pair in the configuration file (configparser format)

When no path_to_config_file is provided, the function will try to find the file in: the system's `home`, 
the parent directory of the current directory, and the Google drive directory mounted to the Colab environment.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| section | str |  | section in the configparser cfg file |
| key | str |  | key in the selected section |
| path_to_config_file | Path \| str | None | path to the cfg file |
| **Returns** | **Any** |  | **the value corresponding to section>key>value** |

By defaults (`path_to_config_file is None`), it is assumed that the configuration file is located in the `private-accross-accounts directory` on google drive. If not, a path to the file (`Path` or `str`) must be provided.

The configuration file is expected to be in the format used by the standard module `configparser` [documentation](https://docs.python.org/3/library/configparser.html)

```ascii
    [DEFAULT]
    key = value

    [section_name]
    key = value

    [section_name]
    key = value
```

In [None]:
path2cfg = Path('../config-sample.cfg').resolve()
assert path2cfg.is_file(), f"{path2cfg} is not a file"

with open(path2cfg, 'r') as fp:
    print(fp.read())

[azure]
azure-api-key= dummy_api_key_for_azure

[kaggle]
kaggle_username = not_my_real_kaggle_name
kaggle_key = dummy_api_key_for_kaggle

[wandb]
api_key = dummy_api_key_for_wandb



In [None]:
value = get_config_value(section='azure', key='azure-api-key', path_to_config_file=path2cfg)
assert value == 'dummy_api_key_for_azure'

Using config file at /home/vtec/projects/ec-packages/ecutilities/config-sample.cfg


In [None]:
value = get_config_value(section='kaggle', key='kaggle_username', path_to_config_file=path2cfg)
assert value == 'not_my_real_kaggle_name'

Using config file at /home/vtec/projects/ec-packages/ecutilities/config-sample.cfg


In [None]:
value = get_config_value(section='wandb', key='api_key', path_to_config_file=path2cfg)
assert value == 'dummy_api_key_for_wandb'

Using config file at /home/vtec/projects/ec-packages/ecutilities/config-sample.cfg


In [None]:
#|eval  false
value = get_config_value(section='dummy', key='dummy-user-id')
assert value.startswith('dummy-userID-from')

Using config file at /home/vtec/config-api-keys.cfg


# Setup utilities

In [None]:
#| export
class CurrentMachine:
    """Callable class to represent info on the current machine. When called, instance return a dict all `attrs`:
    
    - `os`
    - `home` path
    - `is_local`, `is_colab`, `is_kaggle`
    - `p2config` path to the config file
    - `package_root` path to the root of the package root directory
    """
    
    _instance = None
    _config_dir = '.ecutilities'
    _config_fname = 'ecutilities.cfg'

    def __new__(cls, *args, **kwargs):
        # Create instance if it does not exist yet
        if cls._instance is None:
            cls.home = Path.home().resolve()
            cls.p2config = cls.home / cls._config_dir / cls._config_fname
            cls.package_root = Path(__file__).parents[1]
            cls._instance = super().__new__(cls)
        return cls._instance
    
    def __init__(
        self, 
        mount_gdrive:bool=True  # True to mount Google Drive if running on Colab
        ):
            self.is_colab = 'google.colab' in sys.modules       
            if self.is_colab and mount_gdrive:
                drive.mount('/content/gdrive')
                self.gdrive = Path('/content/gdrive/MyDrive')

            self.is_kaggle = 'kaggle_web_client' in sys.modules
            if self.is_kaggle:
                raise NotImplemented(f"ProjectFileSystem is not implemented for Kaggle yet")

            if not self.is_colab and not self.is_kaggle and not self.is_local:
                msg = """
                      Code does not seem to run on the cloud but computer is not registered as local
                      If you are running on a local computer, you must register it as local by running
                        `ProjectFileSystem().register_as_local()`
                      before you can use the ProjectFileSystem class.
                      """
                warnings.warn(msg, UserWarning)

    def __call__(self): 
        attrs = 'os home is_local is_colab is_kaggle p2config package_root'.split()
        d = {k: getattr(self, k,None) for k in attrs}
        return d

    def read_config(self):
        """Read config from the configuration file if it exists and return an empty config in does not"""
        cfg = configparser.ConfigParser()
        if self.p2config.is_file(): 
            cfg.read(self.p2config)
        else:
            cfg.add_section('Infra')
        return cfg
    
    def register_as_local(self):
        """Update the configuration file to register the machine as local machine"""
        cfg = self.read_config()
        os.makedirs(self.home/self._config_dir, exist_ok=True)
        cfg['Infra']['registered_as_local'] = 'True'
        with open(self.p2config, 'w') as fp:
            cfg.write(fp)
        return cfg
  
    def deregister_as_local(self):
        """Update the configuration file to deregister the machine from local machine status"""
        cfg = self.read_config()
        os.makedirs(self.home/self._config_dir, exist_ok=True)
        cfg['Infra']['registered_as_local'] = 'False'
        with open(self.p2config, 'w') as fp:
            cfg.write(fp)
        return cfg

    @property
    def home(self): return Path.home().absolute()
    
    @property
    def os(self): return sys.platform

    @property
    def p2config(self): return self.home / self._config_dir / self._config_fname
           
    @property
    def is_local(self):
        """Return `True` if the current machine was registered as a local machine"""
        cfg = self.read_config()
        return cfg['Infra'].getboolean('registered_as_local', False)


In [None]:
show_doc(CurrentMachine)

---

[source](https://github.com/vtecftwy/ecutils/blob/master/ecutilities/core.py#L98){target="_blank" style="float:right; font-size:smaller"}

### CurrentMachine

>      CurrentMachine (*args, **kwargs)

Callable class to represent info on the current machine. When called, instance return a dict all `attrs`:

- `os`
- `home` path
- `is_local`, `is_colab`, `is_kaggle`
- `p2config` path to the config file
- `package_root` path to the root of the package root directory

In [None]:
machine = CurrentMachine()
machine()

{'os': 'linux',
 'home': Path('/home/vtec'),
 'is_local': True,
 'is_colab': False,
 'is_kaggle': False,
 'p2config': Path('/home/vtec/.ecutilities/ecutilities.cfg'),
 'package_root': Path('/home/vtec/projects/ec-packages/ecutilities')}

In [None]:
#| hide
machine.deregister_as_local();

In [None]:
machine.is_local, machine.is_colab, machine.is_kaggle

(False, False, False)

This machine is not registered a local machine, but is also not running in the cloud. We should register it as a local machine with `register_as_local`

In [None]:
show_doc(CurrentMachine.register_as_local)

---

[source](https://github.com/vtecftwy/ecutils/blob/master/ecutilities/core.py#L157){target="_blank" style="float:right; font-size:smaller"}

### CurrentMachine.register_as_local

>      CurrentMachine.register_as_local ()

Update the configuration file to register the machine as local machine

Use this method to register the current machine as local machine. Only needs to be used once on a machine. Do not use on cloud VMs

In [None]:
machine.register_as_local()
machine.is_local, machine.is_colab, machine.is_kaggle

(True, False, False)

> **Technical Note**:
>
> The configuration file is located at a standard location, which varies depending on the OS:
> 
> - Windows:
>    - home is `C:\Users\username`
>    - application data in `C:\Users\username\AppData/Local/...` or `C:\Users\username\AppData\Roaming\...` (see [StackExchange](https://superuser.com/questions/21458/why-are-there-directories-called-local-locallow-and-roaming-under-users-user))
>    - application also can be loaded under a dedicated directory under `C:\Users\username` like `C:\Users\username\.conda\...`
>
> - Linux:
>     - home is `/home/username`
>     - application data in a file or dedicated directory `/home/username/` s.a.:
>         - file in home directory, e.g. `.gitconfig`
>         - file in an application dedicated directory, e.g. `/home/username/.conda/...`
> 
> `ecutilities` places the configuration file in a dedicated directory in the home directory:
> - `C:\Users\username\.ecutilities\ecutilities.cfg`
> - `/home/username/.ecutilities/ecutilities.cfg`
> 
> 
> Retrieve the OS:
> ```python
> sys.platform
> ```
> ```shell
> win32           with Windows
> linux           with linux
> darwin          with macOs
> ```
> 
> Accessing the correct path depending on the OS:
> ```python
> Path().home().absolute()
> ```
> ```shell
> WindowsPath('C:/Users/username') with Windows
> Path('/home/username')           with linux
> ``` 
> 

In [None]:
#| export
class ProjectFileSystem(CurrentMachine):
    """Class representing the project file system and key subfolders (data, nbs, src)
    
    Set paths to key directories, according to whether the code is running locally or in the cloud.
    Give access to path to these key folders and information about the environment.
    """

    _instance = None
    _config_dir = '.ecutilities'
    _config_fname = 'ecutilities.cfg'
    _shared_project_dir = None
    
    def create_project_file_system(
        self, 
        p2project_root,     # path to project root, where all subfolder will be located
        overwrite=False     # overwrite current folders if they exist when True (not implemented yet)
        ):
        """Create a standard project file system with the following structure:
        
        ```
            project_root
                |--- data   all data files
                |--- nbs    all notebooks for work and experiments
                |--- src    all scripts and code
        ```
        """
        template = 'data nbs src'.split()
        path = safe_path(p2project_root)
        os.makedirs(path, exist_ok=True)
        for subdir in template:
            print(path/subdir)
            os.makedirs(path/subdir, exist_ok=True)
        print(f"Created project file system in {path}")

    @property
    def project_root(self):
        #TODO: this code is not correct. It only works when installed in the same folder as the project.
        if self.is_local:
            return PACKAGE_ROOT
        elif self.is_colab:
            return self.gdrive / self._shared_project_dir
        elif self.is_kaggle:
            raise NotImplemented(f"ProjectFileSystem is not implemented for Kaggle yet")
        else:
            raise ValueError('Not running locally, on Colab or on Kaggle')

    @property
    def data(self): return self.project_root / 'data'

    @property
    def nbs(self): return self.project_root / 'nbs'        

In [None]:
pfs = ProjectFileSystem()
pfs()

{'os': 'linux',
 'home': Path('/home/vtec'),
 'is_local': True,
 'is_colab': False,
 'is_kaggle': False,
 'p2config': Path('/home/vtec/.ecutilities/ecutilities.cfg'),
 'package_root': Path('/home/vtec/projects/ec-packages/ecutilities')}

In [None]:
show_doc(ProjectFileSystem.create_project_file_system)

---

[source](https://github.com/vtecftwy/ecutils/blob/master/ecutilities/core.py#L204){target="_blank" style="float:right; font-size:smaller"}

### ProjectFileSystem.create_project_file_system

>      ProjectFileSystem.create_project_file_system (p2project_root,
>                                                    overwrite=False)

Create a standard project file system with the following structure:

```
    project_root
        |--- data   all data files
        |--- nbs    all notebooks for work and experiments
        |--- src    all scripts and code
```

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| p2project_root |  |  | path to project root, where all subfolder will be located |
| overwrite | bool | False | overwrite current folders if they exist when True (not implemented yet) |

In [None]:
pfs.create_project_file_system(Path('/home/vtec/projects/ec-packages/ecutilities'))

/home/vtec/projects/ec-packages/ecutilities/data
/home/vtec/projects/ec-packages/ecutilities/nbs
/home/vtec/projects/ec-packages/ecutilities/src
Created project file system in /home/vtec/projects/ec-packages/ecutilities


# File structure exploration

In [None]:
#| export
def files_in_tree(
    path: str|Path,               # path to the directory to scan  
    pattern: str|None = None      # pattern (glob style) to match in file name to filter the content
):
    """List files in directory and its subdiretories, print tree starting from parent directory"""
    validate_path(path, path_type='dir', raise_error=True)

    pattern = '*' if pattern is None else f"*{pattern}*"
    parents = [p.name for p in path.parents]
    paths = []
    pad = ' ' * 2
    idx = 0
    print(f"{parents[0]}")
    print(f"{pad}|--{path.name}")
    for f in [p for p in path.glob(pattern) if p.is_file()]:
        paths.append(f)
        print(f"{pad}|{pad*2}|--{f.name} ({idx})")
        idx += 1
    for d in [p for p in path.iterdir() if p.is_dir()]:
        print(f"{pad}|{pad*2}|--{d.name}")
        for f in [p for p in d.glob(pattern) if p.is_file()]:
            paths.append(f)
            print(f"{pad}|{pad*2}|{pad*2}|--{f.name} ({idx})")
            idx += 1
    return paths

In [None]:
show_doc(files_in_tree)

---

[source](https://github.com/vtecftwy/ecutils/blob/master/ecutilities/core.py#L243){target="_blank" style="float:right; font-size:smaller"}

### files_in_tree

>      files_in_tree (path:str|pathlib.Path, pattern:str|None=None)

List files in directory and its subdiretories, print tree starting from parent directory

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| path | str \| Path |  | path to the directory to scan |
| pattern | str \| None | None | pattern (glob style) to match in file name to filter the content |

In [None]:
p2dir = Path('').resolve()
print(p2dir, '\n')

files = files_in_tree(p2dir)
print(f"List of {len(files)} files when unfiltered")

/home/vtec/projects/ec-packages/ecutilities/nbs-dev 

ecutilities
  |--nbs-dev
  |    |--0_02_plotting.ipynb (0)
  |    |--2_01_image_utils.ipynb (1)
  |    |--1_01_eda_stats_utils.ipynb (2)
  |    |--0_01_ipython.ipynb (3)
  |    |--0_00_core.ipynb (4)
  |    |--.last_checked (5)
  |    |--sidebar.yml (6)
  |    |--1_02_ml.ipynb (7)
  |    |--index.ipynb (8)
  |    |--nbdev.yml (9)
  |    |--9_01_dev_utils.ipynb (10)
  |    |--styles.css (11)
  |    |--_quarto.yml (12)
  |    |--.ipynb_checkpoints
  |    |    |--0_02_plotting-checkpoint.ipynb (13)
  |    |    |--9_01_dev_utils-checkpoint.ipynb (14)
  |    |    |--0_01_ipython-checkpoint.ipynb (15)
  |    |    |--0_00_core-checkpoint.ipynb (16)
  |    |    |--1_01_eda_stats_utils-checkpoint.ipynb (17)
  |    |    |--index-checkpoint.ipynb (18)
  |    |    |--2_01_image_utils-checkpoint.ipynb (19)
  |    |    |--1_02_ml-checkpoint.ipynb (20)
List of 21 files when unfiltered


Use `pattern` to filter the paths to return (using `glob` syntax)

In [None]:
files = files_in_tree(p2dir, pattern='ipynb')
print(f"List of {len(files)} files when filtered")

ecutilities
  |--nbs-dev
  |    |--0_02_plotting.ipynb (0)
  |    |--2_01_image_utils.ipynb (1)
  |    |--1_01_eda_stats_utils.ipynb (2)
  |    |--0_01_ipython.ipynb (3)
  |    |--0_00_core.ipynb (4)
  |    |--1_02_ml.ipynb (5)
  |    |--index.ipynb (6)
  |    |--9_01_dev_utils.ipynb (7)
  |    |--.ipynb_checkpoints
  |    |    |--0_02_plotting-checkpoint.ipynb (8)
  |    |    |--9_01_dev_utils-checkpoint.ipynb (9)
  |    |    |--0_01_ipython-checkpoint.ipynb (10)
  |    |    |--0_00_core-checkpoint.ipynb (11)
  |    |    |--1_01_eda_stats_utils-checkpoint.ipynb (12)
  |    |    |--index-checkpoint.ipynb (13)
  |    |    |--2_01_image_utils-checkpoint.ipynb (14)
  |    |    |--1_02_ml-checkpoint.ipynb (15)
List of 16 files when filtered


In [None]:
#| export
def path_to_parent_dir(
    pattern:str,               # pattern to identify the parent directory
    path:str|Path|None = None, # optional path from where to seek for parent directory
)-> Path:                      # path of the parent directory
    """Climb directory tree up to a directory starting with the string `pattern`, and return its path
    
    Can pass a starting path to climb from. 
    """
    if path is None: path = Path()
    path = safe_path(path).absolute()
    tree = [path.name] + [p.name for p in path.parents]
    mask = [True if n.startswith(pattern) else False for n in tree]
    tree = tree[mask.index(True):]
    tree.reverse()
    nbs = Path('/'.join(tree))
    return nbs

In [None]:
show_doc(path_to_parent_dir)

---

[source](https://github.com/vtecftwy/ecutils/blob/master/ecutilities/core.py#L270){target="_blank" style="float:right; font-size:smaller"}

### path_to_parent_dir

>      path_to_parent_dir (pattern:str, path:str|pathlib.Path|None=None)

Climb directory tree up to a directory starting with the string `pattern`, and return its path

Can pass a starting path to climb from.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| pattern | str |  | pattern to identify the parent directory |
| path | str \| Path \| None | None | optional path from where to seek for parent directory |
| **Returns** | **Path** |  | **path of the parent directory** |

In [None]:
p2dir = path_to_parent_dir('nbs')
assert 'nbs-dev' in p2dir.parts and 'nbs' not in p2dir.parts
p2dir

Path('/home/vtec/projects/ec-packages/ecutilities/nbs-dev')

In [None]:
p2dir = path_to_parent_dir('nbs', Path('../nbs/sandbox.ipynb').resolve())
assert 'nbs' in p2dir.parts and 'nbs-dev' not in p2dir.parts
p2dir

Path('/home/vtec/projects/ec-packages/ecutilities/nbs')

In [None]:
p2project_root = path_to_parent_dir('ecutilities')
assert 'ecutilities' in p2project_root.parts and 'nbs' not in p2project_root.parts
p2project_root

Path('/home/vtec/projects/ec-packages/ecutilities')

In [None]:
#| hide
nbdev_export()