In [None]:
#|default_exp wandb

In [None]:
#|hide
import nbdev
from pathlib import Path
from __future__ import annotations
from fastcore.test import *
from nbdev.showdoc import *
from pdb import set_trace

# Todo: move the google code in utils module
try:
    from google.colab import drive
    ON_COLAB = True
    print('Running on colab')
    print('Installing wandb and project code')
    !pip install -U git+https://github.com/vtecftwy/metagenomics.git@refactor_cnn_virus
    !pip install -qqU wandb
    
    # Assumes shared gdrive dir accessible through shortcut `Metagenomics` under the root of gdrive.     
    drive.mount('/content/gdrive')
    p2drive = Path('/content/gdrive/MyDrive/Metagenonics')
    p2data =  p2drive / 'CNN_Virus_data'
    assert p2drive.is_dir()
    assert p2data.is_dir()

except ModuleNotFoundError:
    ON_COLAB = False
    print('Running locally')
    try:
        import metagentools
    except ModuleNotFoundError:
        raise ModuleNotFoundError('Cannot find package metagentools. Make sure you pip -e install it in your environment')
    p2data = Path('../data/CNN_Virus_data').resolve()
    assert p2data.is_dir()




%load_ext autoreload
%autoreload 2

Running locally


In [None]:
#|export
# Imports all dependencies
import numpy as np
import os
import tensorflow as tf
import wandb

from pathlib import Path

In [None]:
#|hide
print(f"Tensorflow version: {tf.__version__} - Expected 2.8.2")

Tensorflow version: 2.8.2 - Expected 2.8.2


# `wandb`: WandB tracking

> Utility functions and classes to work with **WandB** for this metagenomics project

Once setup, **WandB** tracks datasets, models, training runs, evaluation runs across several experiments. The original documentation is [here](https://docs.wandb.ai/).

Key concepts we use in this package: 
- `run` (status, losses and other metadata logged during training or evaluation experiments).
- `artifact` (datasets, code (incl. notebooks), models, ...); 

## Steps:

- login to `wandb`: requires an API key, available at https://wandb.ai/authorize (requires to be logged in)
- initialize a **`run`** with desired parameters and metadata
- perform operations to be tracked (e.g. train model, load dataset as artifact, ...)
- finish the **run**

The first step is to login into WandB, from the active notebook. To allow WandB to store the code used for the session, the name or path of the notebook should be passed as argument.

In [None]:
#| export
def login_nb(
    nb_file: str|Path=None   # name of the notebook (str) or path to the notebook (Path)
    ):
    """Logs in to WandB from the current notebook. Registers current notebooks as the source of code"""

    # Validate nb_file
    if nb_file is None:
        raise TypeError('login requires the file name of the current nb to allow code tracking')   
    if isinstance(nb_file, str):
        if nb_file[-6:] != '.ipynb': nb_file = f"{nb_file}.ipynb"
        nb_file = Path.cwd()/nb_file
    elif not isinstance(nb_file, Path):
        raise TypeError('nb_file must me a `str` or a `Path`')
    
    if not nb_file.is_file():
        raise ValueError(f"{nb_file.name} is not a file, please correct the file name")

    # Registers notebook as WandB code
    os.environ['WANDB_NOTEBOOK_NAME'] = str(nb_file.absolute())
    print(f"Logging in from notebook: {os.environ['WANDB_NOTEBOOK_NAME']}")

    wandb.login(relogin=False)    

In [None]:
login_nb('01_wandb')

Logging in from notebook: /home/vtec/projects/bio/metagentools/nbs-dev/01_wandb.ipynb


[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


```python
login_nb('01_wandb')
```
Logs as:
- Logging in from notebook: `/home/vtec/projects/bio/metagentools/nbs-dev/01_wandb.ipynb`
- wandb: W&B API key is configured. Use `wandb login --relogin` to force relogin

`login_nb` raises error in the following cases:

- If `nb_file` is not passed, the function raises a `TypeError`

In [None]:
#| hide
test_fail(login_nb, kwargs={'nb_file':None}, contains='requires the file name')

- If `nb_file` is not a string or a Path, the function raises a `TypeError`

In [None]:
#| hide
test_fail(login_nb, kwargs={'nb_file':999}, contains='nb_file must me a `str` or a `Path`')

- There must exist a file `nb_file` or a `ValueError` is raised

In [None]:
#| hide
test_fail(login_nb, kwargs={'nb_file':'fake_file_name'}, contains='is not a file, please correct the file name')

In [None]:
#| export
class WandbRun():
    """Manages a run with WandB and all registered actions performed while the run is active. Close run with .finish()"""
    
    def __init__(
        self,
        entity: str='', # the user or organization under which the run will be logged. Default: `metagenomics_sh` 
        project: str='', # the name of the WandB project under which the run will be logged 
        run_name: str='', # unique name for the run,
        job_type: str='', # e.g.: `load_datasets`, `train_exp`, ... 
        notes: str='', # (optional) any text description or additional information to store with the run 
        testing: bool=False # (optional) If True, will not create a run on WandB. Use for local testing
        ) -> wandb.sdk.wandb_run.Run:
        """Validates metadata inputs and initialize the wandb run, unless testing is set to True"""
        
        # Validate inputs
        for k,v in [key_val for key_val in locals().items() if key_val[0] not in ['self', 'notes', 'testing']]:
            if v == '': raise ValueError(f"{k} may not be an empty string. Please provide a value")

        for k,v in [key_val for key_val in locals().items() if key_val[0] not in ['self', 'testing']]:
            if not isinstance(v, str): raise TypeError(f"{k} must be a string, not a {type(v)}")

        self.entity = entity
        self.project = project
        self.run_name = run_name
        self.job_type = job_type
        self.notes = notes
        
        if not testing:
            self.run = wandb.init(
                entity=entity, 
                project=project, 
                name=run_name, 
                job_type=job_type, 
                notes=notes, 
                save_code=True
            )


    def finish(self):
        """End the run"""
        self.run.finish()

A WandB run allows to set a range of metadata, such as: `entity`, `project`, `name`, `job_type` and some additional `notes`.

In [None]:
entity = 'metagenomics_sh'
project = 'reproduce_cnn_virus'
run_name = 'nbdev'
job_type = "code_testing"
notes = '???'

In [None]:
#| hide
# wandb_run = WandbRun(
#     entity=entity, 
#     project=project, 
#     run_name=run_name, 
#     job_type=job_type, 
#     notes=notes
#     )

```python
wandb_run = WandbRun(
    entity=entity, 
    project=project, 
    run_name=run_name, 
    job_type=job_type, 
    notes=notes
    )
```
Will print the following log, including links to WandB project pages and documentation:
- Waiting for wandb.init()...
- Tracking run with wandb version 0.13.5
-Run data is saved locally in /home/vtec/projects/bio/metagentools/nbs-dev/wandb/run-20221123_150346-1aa0q1n4
- Syncing run [nbdev](https://wandb.ai/metagenomics_sh/reproduce_cnn_virus/runs/1aa0q1n4?workspace=user-vtecftyw) to [Weights & Biases](https://wandb.ai/metagenomics_sh/reproduce_cnn_virus?workspace=user-vtecftyw) ([docs](https://docs.wandb.ai/guides))

#### Close a run

In [None]:
#| hide
# wandb_run.finish()

```python
wandb_run.finish()
```
Will print the following log, including links to WandB project pages and documentation:
- Waiting for W&B process to finish... (success).
- Synced nbdev-2: https://wandb.ai/metagenomics_sh/reproduce_cnn_virus/runs/1aa0q1n4
- Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 2 other file(s)
- Find logs at: ./wandb/run-20221123_150346-1aa0q1n4/logs

- If one of `entity`, `project`, `run_name` or `job_type` is not passed, the function raises a `ValueError`

In [None]:
#| hide
test_fail(WandbRun, kwargs={}, contains='entity may not be an empty string')
test_fail(WandbRun, kwargs={'entity':entity}, contains='project may not be an empty string')
test_fail(WandbRun, kwargs={'entity':entity, 'project':project}, contains='run_name may not be an empty string')
test_fail(WandbRun, kwargs={'entity':entity, 'project':project, 'run_name':run_name}, contains='job_type may not be an empty string')

- If one of `entity`, `project`, `run_name`, `job_type` or `notes` is not a string, the function raises a `TypeError`

In [None]:
#| hide
test_fail(
    WandbRun, 
    kwargs={'entity':9, 'project':project, 'run_name':run_name, 'job_type':job_type, 'notes':notes, 'testing':True}, 
    contains='entity must be a string'
    )
test_fail(
    WandbRun, 
    kwargs={'entity':entity, 'project':9, 'run_name':run_name, 'job_type':job_type, 'notes':notes, 'testing':True}, 
    contains='project must be a string'
    )
test_fail(
    WandbRun, 
    kwargs={'entity':entity, 'project':project, 'run_name':9, 'job_type':job_type, 'notes':notes, 'testing':True}, 
    contains='run_name must be a string'
    )
test_fail(
    WandbRun, 
    kwargs={'entity':entity, 'project':project, 'run_name':run_name, 'job_type':9, 'notes':notes, 'testing':True}, 
    contains='job_type must be a string'
    )
test_fail(
    WandbRun, 
    kwargs={'entity':entity, 'project':project, 'run_name':run_name, 'job_type':job_type, 'notes':9, 'testing':True}, 
    contains='notes must be a string'
    )

In [None]:
#| hide
nbdev.nbdev_export()