From e6db12401601c3c61d862b043412bf383a3bcd23 Mon Sep 17 00:00:00 2001
From: Zewen Kelvin Tuong <kt16@sanger.ac.uk>
Date: Tue, 8 Nov 2022 20:13:57 +0000
Subject: [PATCH] fix api documentation styling (#214)

* fix api

* Update _core.py

* try this

* oops

* which package is causing issue?

* update setup actions

* Update tests.yml

* Update tests.yml

* Update tests.yml

* Update tests.yml

* Update tests.yml

* Update tests.yml

Co-authored-by: Kelvin <26215587+zktuong@users.noreply.github.com>
---
 .github/workflows/badge.yml                   |  6 +-
 .github/workflows/formatting.yaml             |  8 +--
 .github/workflows/publish_pypi.yml            |  4 +-
 .../singularity_container-install.yml         |  7 ++-
 .github/workflows/tests.yml                   | 57 +++++++++++--------
 dandelion/tools/_tools.py                     | 27 +++++----
 dandelion/utilities/_core.py                  | 40 ++++++++-----
 dandelion/utilities/_io.py                    | 21 ++++---
 dandelion/utilities/_utilities.py             |  6 +-
 9 files changed, 104 insertions(+), 72 deletions(-)

diff --git a/.github/workflows/badge.yml b/.github/workflows/badge.yml
index 26e6cd0b6..b8b2a93ae 100644
--- a/.github/workflows/badge.yml
+++ b/.github/workflows/badge.yml
@@ -17,9 +17,9 @@ jobs:
         shell: bash -l {0}
     if: github.ref == 'refs/heads/master'
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - name: Set up Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v3
         with:
           python-version: '3.7'
       - id: version
@@ -27,7 +27,7 @@ jobs:
         run: |
           pip install setuptools wheel setuptools_scm numpy
           pip install git+https://www.github.com/zktuong/dandelion.git
-          echo "##[set-output name=version;]$(python dandelion/logging/_badge.py)"
+          echo "version=$(python dandelion/logging/_badge.py)" >> $GITHUB_OUTPUT
       - name: version badge
         uses: RubbaBoy/BYOB@v1.2.1
         with:
diff --git a/.github/workflows/formatting.yaml b/.github/workflows/formatting.yaml
index 47b6ccb51..d9685dac2 100644
--- a/.github/workflows/formatting.yaml
+++ b/.github/workflows/formatting.yaml
@@ -9,10 +9,10 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Setup Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v3
 
       - name: Install black
         run: python -m pip install black
@@ -26,10 +26,10 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
 
       - name: Setup Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v3
 
       - name: Install interrogate
         run: python -m pip install interrogate
diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml
index 6ba4ff69e..7d64c9167 100644
--- a/.github/workflows/publish_pypi.yml
+++ b/.github/workflows/publish_pypi.yml
@@ -13,9 +13,9 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
     - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v3
       with:
         python-version: '3.7'
     - name: Install dependencies
diff --git a/.github/workflows/singularity_container-install.yml b/.github/workflows/singularity_container-install.yml
index 605a81a1f..16554600a 100644
--- a/.github/workflows/singularity_container-install.yml
+++ b/.github/workflows/singularity_container-install.yml
@@ -23,9 +23,9 @@ jobs:
       - id: checkfiles
         run: |
           if [[ ${{ steps.files.outputs.added_modified }} == *.yml* || ${{ steps.files.outputs.added_modified }} == *.py* || ${{ steps.files.outputs.added_modified }} == *test* || ${{ steps.files.outputs.added_modified }} == *requirements* ]] ; then
-            echo "::set-output name=keepgoing::true"
+            echo "keepgoing=true" >> $GITHUB_OUTPUT
           else
-            echo "::set-output name=keepgoing::false"
+            echo "keepgoing=false" >> $GITHUB_OUTPUT
           fi
 
   container:
@@ -70,7 +70,8 @@ jobs:
 
       - name: Extract repository location
         shell: bash
-        run: echo "##[set-output name=location;]$(echo "  - git+https://www.github.com/${GITHUB_REPOSITORY}@${GITHUB_REF#refs/heads/}")"
+        run: |
+          echo "location=$(echo "  - git+https://www.github.com/${GITHUB_REPOSITORY}@${GITHUB_REF#refs/heads/}")" >> $GITHUB_OUTPUT
         id: extract_location
 
       - name: Build Container
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 2686a630c..ebc870eec 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -7,6 +7,18 @@ on:
   push:
     branches:
       - "master"
+  schedule:
+    # every Sunday at 0:00
+    #        ┌───────────── minute (0 - 59)
+    #        │ ┌───────────── hour (0 - 23)
+    #        │ │ ┌───────────── day of the month (1 - 31)
+    #        │ │ │ ┌───────────── month (1 - 12)
+    #        │ │ │ │ ┌───────────── day of the week (0 - 6)
+    #        │ │ │ │ │                                   
+    #        │ │ │ │ │
+    #        │ │ │ │ │
+    #        * * * * *
+    - cron: '0 0 * * 0'
 
 env:
   cache-version: 'cache-v1'
@@ -27,8 +39,9 @@ jobs:
         run: |
           if [[ ${{ steps.files.outputs.added_modified }} == *.yml* || ${{ steps.files.outputs.added_modified }} == *.py* || ${{ steps.files.outputs.added_modified }} == *test* || ${{ steps.files.outputs.added_modified }} == *requirements* ]]; then
             echo "::set-output name=keepgoing::true"
+            echo "keepgoing=true" >> $GITHUB_OUTPUT
           else
-            echo "::set-output name=keepgoing::false"
+            echo "keepgoing=false" >> $GITHUB_OUTPUT
           fi
 
   tests:
@@ -61,8 +74,8 @@ jobs:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
 
     steps:
-    - uses: actions/checkout@v2
-    - uses: actions/setup-node@v2
+    - uses: actions/checkout@v3
+    - uses: actions/setup-node@v3
 
     - name: Install ubuntu system dependencies
       if: matrix.config.os == 'ubuntu-latest'
@@ -76,7 +89,7 @@ jobs:
         brew install cairo pkg-config autoconf automake libtool
 
     - name: Cache conda
-      uses: actions/cache@v1
+      uses: actions/cache@v3
       env:
         # Increase this value to reset cache if etc/example-environment.yml has not changed
         CACHE_NUMBER: 0
@@ -122,25 +135,20 @@ jobs:
       name: Check R version
       run: |
         R --version > VERSION
-        echo "##[set-output name=version;]$(head -1 VERSION | awk '{print $3}')"
-        echo "##[set-output name=mainbiocversion;]$(Rscript -e 'cat(unlist(tools:::.BioC_version_associated_with_R_version()))' | awk '{print $1}')"
-        echo "##[set-output name=subbiocversion;]$(Rscript -e 'cat(unlist(tools:::.BioC_version_associated_with_R_version()))' | awk '{print $2}')"
-        echo "##[set-output name=biocversion;]$(Rscript -e 'cat(as.character(tools:::.BioC_version_associated_with_R_version()))' | awk '{print $1}')"
+        echo "version=$(head -1 VERSION | awk '{print $3}')" >> $GITHUB_OUTPUT
+        echo "mainbiocversion=$(Rscript -e 'cat(unlist(tools:::.BioC_version_associated_with_R_version()))' | awk '{print $1}')" >> $GITHUB_OUTPUT
+        echo "subbiocversion=$(Rscript -e 'cat(unlist(tools:::.BioC_version_associated_with_R_version()))' | awk '{print $2}')" >> $GITHUB_OUTPUT
+        echo "biocversion=$(Rscript -e 'cat(as.character(tools:::.BioC_version_associated_with_R_version()))' | awk '{print $1}')" >> $GITHUB_OUTPUT
       shell: bash -l {0}
 
     - name: Set up R
       uses: r-lib/actions/setup-r@v2
       with:
         r-version: ${{ steps.R.outputs.version}}
-
-    - name: Setup r-lib/remotes
-      run: |
-        install.packages('remotes')
-      shell: Rscript {0}
-
+    
     - name: Cache ubuntu R packages
       if: "!contains(github.event.head_commit.message, '/nocache') && matrix.config.os == 'ubuntu-latest'"
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: /home/runner/work/_temp/Library
         key: ${{ env.cache-version }}-${{ matrix.config.os }}-biocversion-RELEASE_${{ steps.R.outputs.mainbiocversion}}_${{ steps.R.outputs.subbiocversion}}-r-${{ steps.R.outputs.version}}-${{ hashFiles('.github/depends.Rds') }}
@@ -148,12 +156,18 @@ jobs:
 
     - name: Cache macOS R packages
       if: "!contains(github.event.head_commit.message, '/nocache') && matrix.config.os != 'ubuntu-latest'"
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: ${{ env.R_LIBS_USER }}
         key: ${{ env.cache-version }}-${{ matrix.config.os }}-biocversion-RELEASE_${{ steps.R.outputs.mainbiocversion}}_${{ steps.R.outputs.subbiocversion}}-r-${{ steps.R.outputs.version}}-${{ hashFiles('.github/depends.Rds') }}
         restore-keys: ${{ env.cache-version }}-${{ matrix.config.os }}-biocversion-RELEASE_${{ steps.R.outputs.mainbiocversion}}_${{ steps.R.outputs.subbiocversion}}-r-${{ steps.R.outputs.version}}-
 
+    - name: Setup r-lib/remotes
+      run: |
+        install.packages(c('remotes', 'optparse', 'RCurl', 'XML','matrixStats'))
+      shell: Rscript {0}
+
+    
     - name: Install Dandelion
       run: |
         python -m pip install git+https://github.com/emdann/milopy.git palantir
@@ -162,25 +176,18 @@ jobs:
     - name: Install ubuntu R dependencies
       if: matrix.config.os == 'ubuntu-latest'
       run: |
-        install.packages(c('RCurl','XML'))
         remotes::install_cran("BiocManager")
-        BiocManager::install(version = ${{ steps.R.outputs.biocversion}}, ask = FALSE)
-        BiocManager::install(c('GenomeInfoDb', 'Rsamtools'))
         BiocManager::install(c('Biostrings', 'GenomicAlignments', 'IRanges'))
-        install.packages(c('shazam', 'alakazam', 'tigger', 'airr', 'optparse'))
+        install.packages(c('shazam', 'alakazam', 'tigger', 'optparse'))
       shell: Rscript {0}
 
     - name: Install macOS R dependencies
       if: matrix.config.os != 'ubuntu-latest'
       run: |
         options(install.packages.compile.from.source = "never")
-        install.packages(c('RCurl','XML'))
         remotes::install_cran("BiocManager")
-        BiocManager::install(version = ${{ steps.R.outputs.biocversion}}, ask = FALSE)
-        BiocManager::install(c('GenomeInfoDb', 'Rsamtools'))
         BiocManager::install(c('Biostrings', 'GenomicAlignments', 'IRanges'))
-        install.packages('matrixStats')
-        install.packages(c('shazam', 'alakazam', 'tigger', 'airr', 'optparse'))
+        install.packages(c('shazam', 'alakazam', 'tigger', 'optparse'))
       shell: Rscript {0}
 
     - name: Test if R dependencies are installed properly
diff --git a/dandelion/tools/_tools.py b/dandelion/tools/_tools.py
index 73eb8b6b7..0f1b82187 100644
--- a/dandelion/tools/_tools.py
+++ b/dandelion/tools/_tools.py
@@ -2,7 +2,7 @@
 # @Author: Kelvin
 # @Date:   2020-05-13 23:22:18
 # @Last Modified by:   Kelvin
-# @Last Modified time: 2022-10-27 10:20:14
+# @Last Modified time: 2022-11-08 14:30:34
 """tools module."""
 import math
 import os
@@ -73,7 +73,8 @@ def find_clones(
 
     Returns
     -------
-    `Dandelion` object with clone_id annotated in `.data` slot and `.metadata` initialized.
+    Dandelion
+        `Dandelion` object with clone_id annotated in `.data` slot and `.metadata` initialized.
     """
     start = logg.info("Finding clonotypes")
     pd.set_option("mode.chained_assignment", None)
@@ -706,8 +707,9 @@ def transfer(
         list of column names will overwrite that specific column(s).
 
     Returns
-    ----------
-    `AnnData` object with updated `.obs`, `.obsm` and '.obsp' slots with data from `Dandelion` object.
+    -------
+    AnnData
+        `AnnData` object with updated `.obs`, `.obsm` and '.obsp' slots with data from `Dandelion` object.
     """
     start = logg.info("Transferring network")
     # always overwrite with whatever columns are in dandelion's metadata:
@@ -963,7 +965,8 @@ def define_clones(
 
     Returns
     -------
-    `Dandelion` object with clone_id annotated in `.data` slot and `.metadata` initialized.
+    Dandelion
+        `Dandelion` object with clone_id annotated in `.data` slot and `.metadata` initialized.
     """
     start = logg.info("Finding clones")
     if ncpu is None:
@@ -1387,7 +1390,8 @@ def clone_size(
 
     Returns
     -------
-    `Dandelion` object with clone size columns annotated in `.metadata` slot.
+    Dandelion
+        `Dandelion` object with clone size columns annotated in `.metadata` slot.
     """
     start = logg.info("Quantifying clone sizes")
 
@@ -1609,7 +1613,8 @@ def clone_overlap(
         whether to print progress
     Returns
     -------
-    a `pandas DataFrame`.
+    Union[AnnData, pd.DataFrame]:
+        Either `AnnData` or a `pandas.DataFrame`.
     """
     start = logg.info("Finding clones")
     if isinstance(self, Dandelion):
@@ -1755,13 +1760,14 @@ def productive_ratio(
     groupby: str,
     groups: Optional[List] = None,
     locus: Literal["TRB", "TRA", "TRD", "TRG", "IGH", "IGK", "IGL"] = "TRB",
-):
+) -> None:
     """
     Compute the cell-level productive/non-productive contig ratio.
 
     Only the contig with the highest umi count in a cell will be used for this
     tabulation.
 
+    Returns inplace `AnnData` with `.uns['productive_ratio']`.
     Parameters
     ----------
     adata : AnnData
@@ -1774,9 +1780,6 @@ def productive_ratio(
         Optional list of categories to return.
     locus : Literal["TRB", "TRA", "TRD", "TRG", "IGH", "IGK", "IGL"], optional
         One of the accepted locuses to perform the tabulation
-    Returns
-    -------
-        `AnnData` with `.uns['productive_ratio']`.
     """
     start = logg.info("Tabulating productive ratio")
     vdjx = vdj[(vdj.data.cell_id.isin(adata.obs_names))].copy()
@@ -1853,7 +1856,7 @@ def vj_usage_pca(
     ],
     verbose=False,
     **kwargs,
-):
+) -> AnnData:
     """
     Extract productive V/J gene usage from single cell data and compute PCA.
 
diff --git a/dandelion/utilities/_core.py b/dandelion/utilities/_core.py
index 2fb148539..e76721f5a 100644
--- a/dandelion/utilities/_core.py
+++ b/dandelion/utilities/_core.py
@@ -2,7 +2,7 @@
 # @Author: Kelvin
 # @Date:   2021-02-11 12:22:40
 # @Last Modified by:   Kelvin
-# @Last Modified time: 2022-10-27 10:22:30
+# @Last Modified time: 2022-11-08 15:24:59
 """core module."""
 import bz2
 import copy
@@ -2219,19 +2219,28 @@ def update_metadata(
         Column name of clone id. None defaults to 'clone_id'.
     retrieve_mode: str
         One of:
-            `split and unique only` returns the retrieval splitted into two columns,
-                i.e. one for VDJ and one for VJ chains, separated by '|' for unique elements.
-            `merge and unique only` returns the retrieval merged into one column,
-                separated by '|' for unique elements.
-            `split and merge` returns the retrieval splitted into two columns,
-                i.e. one for VDJ and one for VJ chains, separated by '|' for every elements.
-            `split` returns the retrieval splitted into separate columns for each contig.
-            `merge` returns the retrieval merged into one columns for each contig,
-                separated by '|' for unique elements.
-            'split and sum' returns the retrieval sumed in the VDJ and VJ columns (separately).
-            'split and average' returns the retrieval averaged in the VDJ and VJ columns (separately).
-            'sum' returns the retrieval sumed into one column for all contigs.
-            'average' returns the retrieval averaged into one column for all contigs.
+            `split and unique only`
+                returns the retrieval splitted into two columns,
+                i.e. one for VDJ and one for VJ chains, separated by `|` for unique elements.
+            `merge and unique only`
+                returns the retrieval merged into one column,
+                separated by `|` for unique elements.
+            `split and merge`
+                returns the retrieval splitted into two columns,
+                i.e. one for VDJ and one for VJ chains, separated by `|` for every elements.
+            `split`
+                returns the retrieval splitted into separate columns for each contig.
+            `merge`
+                returns the retrieval merged into one columns for each contig,
+                separated by `|` for unique elements.
+            `split and sum`
+                returns the retrieval sumed in the VDJ and VJ columns (separately).
+            `split and average`
+                returns the retrieval averaged in the VDJ and VJ columns (separately).
+            `sum`
+                returns the retrieval sumed into one column for all contigs.
+            `average`
+                returns the retrieval averaged into one column for all contigs.
     collapse_alleles : bool
         Returns the V(D)J genes with allelic calls if False.
     reinitialize : bool
@@ -2239,7 +2248,8 @@ def update_metadata(
         Useful when updating older versions of `dandelion` to newer version.
     Returns
     -------
-    `Dandelion` object with `.metadata` slot initialized.
+    Dandelion
+        `Dandelion` object with `.metadata` slot initialized.
     """
 
     if clone_key is None:
diff --git a/dandelion/utilities/_io.py b/dandelion/utilities/_io.py
index c2ce6f032..aaac13fd0 100644
--- a/dandelion/utilities/_io.py
+++ b/dandelion/utilities/_io.py
@@ -2,7 +2,7 @@
 # @Author: kt16
 # @Date:   2020-05-12 14:01:32
 # @Last Modified by:   Kelvin
-# @Last Modified time: 2022-10-27 10:22:34
+# @Last Modified time: 2022-11-08 14:27:16
 """io module."""
 import bz2
 import gzip
@@ -173,7 +173,8 @@ def read_h5(filename: str = "dandelion_data.h5") -> Dandelion:
 
     Returns
     -------
-    `Dandelion` object.
+    Dandelion
+        `Dandelion` object.
     """
     try:
         data = pd.read_hdf(filename, "data")
@@ -266,7 +267,8 @@ def read_h5ddl(filename: str = "dandelion_data.h5ddl") -> Dandelion:
 
     Returns
     -------
-    `Dandelion` object.
+    Dandelion
+        `Dandelion` object.
     """
     try:
         data = pd.read_hdf(filename, "data")
@@ -359,7 +361,8 @@ def read_10x_airr(file: str) -> Dandelion:
 
     Returns
     -------
-    `Dandelion` object of pandas data frame.
+    Dandelion
+        `Dandelion` object of pandas data frame.
 
     """
     dat = load_data(file)
@@ -413,7 +416,8 @@ def to_scirpy(data: Dandelion, transfer: bool = False, **kwargs) -> AnnData:
 
     Returns
     -------
-    `AnnData` object in the format initialized by `scirpy`.
+    AnnData
+        `AnnData` object in the format initialized by `scirpy`.
 
     """
     try:
@@ -448,7 +452,8 @@ def from_scirpy(adata: AnnData) -> Dandelion:
 
     Returns
     -------
-    `Dandelion` object.
+    Dandelion
+        `Dandelion` object.
 
     """
     try:
@@ -487,6 +492,7 @@ def concat(
 
     Returns
     -------
+    Dandelion
         `Dandelion` object
     """
     arrays = list(arrays)
@@ -570,7 +576,8 @@ def read_10x_vdj(
         whether or not to print which files are read/found. Default is False.
     Returns
     -------
-    `Dandelion` or pandas `DataFrame` object.
+    Union[Dandelion, pd.DataFrame]
+        `Dandelion` or pandas `DataFrame` object.
 
     """
     if filename_prefix is None:
diff --git a/dandelion/utilities/_utilities.py b/dandelion/utilities/_utilities.py
index 2e281be77..f5e70eb17 100644
--- a/dandelion/utilities/_utilities.py
+++ b/dandelion/utilities/_utilities.py
@@ -2,7 +2,7 @@
 # @Author: kt16
 # @Date:   2020-05-12 14:01:32
 # @Last Modified by:   Kelvin
-# @Last Modified time: 2022-10-24 22:21:57
+# @Last Modified time: 2022-11-08 14:28:06
 """utilities module."""
 import os
 import re
@@ -73,6 +73,7 @@ def dict_from_table(meta: pd.DataFrame, columns: Tuple[str, str]) -> Dict:
 
     Returns
     -------
+    Dict
         dictionary
     """
     if (isinstance(meta, pd.DataFrame)) & (columns is not None):
@@ -99,6 +100,7 @@ def clean_nan_dict(d: Dict) -> Dict:
 
     Returns
     -------
+    Dict
         dictionary with no NAs.
     """
     return {k: v for k, v in d.items() if v is not np.nan}
@@ -115,6 +117,7 @@ def flatten(l: Sequence) -> Sequence:
 
     Returns
     -------
+    Sequence
         a flattened list.
     """
     for el in l:
@@ -149,6 +152,7 @@ def bh(pvalues: np.array) -> np.array:
             array of p-values to correct
     Returns
     -------
+    np.array
         np.array of corrected p-values
     """
     n = int(pvalues.shape[0])