From e6db12401601c3c61d862b043412bf383a3bcd23 Mon Sep 17 00:00:00 2001 From: Zewen Kelvin Tuong Date: Tue, 8 Nov 2022 20:13:57 +0000 Subject: [PATCH] fix api documentation styling (#214) * fix api * Update _core.py * try this * oops * which package is causing issue? * update setup actions * Update tests.yml * Update tests.yml * Update tests.yml * Update tests.yml * Update tests.yml * Update tests.yml Co-authored-by: Kelvin <26215587+zktuong@users.noreply.github.com> --- .github/workflows/badge.yml | 6 +- .github/workflows/formatting.yaml | 8 +-- .github/workflows/publish_pypi.yml | 4 +- .../singularity_container-install.yml | 7 ++- .github/workflows/tests.yml | 57 +++++++++++-------- dandelion/tools/_tools.py | 27 +++++---- dandelion/utilities/_core.py | 40 ++++++++----- dandelion/utilities/_io.py | 21 ++++--- dandelion/utilities/_utilities.py | 6 +- 9 files changed, 104 insertions(+), 72 deletions(-) diff --git a/.github/workflows/badge.yml b/.github/workflows/badge.yml index 26e6cd0b6..b8b2a93ae 100644 --- a/.github/workflows/badge.yml +++ b/.github/workflows/badge.yml @@ -17,9 +17,9 @@ jobs: shell: bash -l {0} if: github.ref == 'refs/heads/master' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: '3.7' - id: version @@ -27,7 +27,7 @@ jobs: run: | pip install setuptools wheel setuptools_scm numpy pip install git+https://www.github.com/zktuong/dandelion.git - echo "##[set-output name=version;]$(python dandelion/logging/_badge.py)" + echo "version=$(python dandelion/logging/_badge.py)" >> $GITHUB_OUTPUT - name: version badge uses: RubbaBoy/BYOB@v1.2.1 with: diff --git a/.github/workflows/formatting.yaml b/.github/workflows/formatting.yaml index 47b6ccb51..d9685dac2 100644 --- a/.github/workflows/formatting.yaml +++ b/.github/workflows/formatting.yaml @@ -9,10 +9,10 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 - name: Install black run: python -m pip install black @@ -26,10 +26,10 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 - name: Install interrogate run: python -m pip install interrogate diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml index 6ba4ff69e..7d64c9167 100644 --- a/.github/workflows/publish_pypi.yml +++ b/.github/workflows/publish_pypi.yml @@ -13,9 +13,9 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: '3.7' - name: Install dependencies diff --git a/.github/workflows/singularity_container-install.yml b/.github/workflows/singularity_container-install.yml index 605a81a1f..16554600a 100644 --- a/.github/workflows/singularity_container-install.yml +++ b/.github/workflows/singularity_container-install.yml @@ -23,9 +23,9 @@ jobs: - id: checkfiles run: | if [[ ${{ steps.files.outputs.added_modified }} == *.yml* || ${{ steps.files.outputs.added_modified }} == *.py* || ${{ steps.files.outputs.added_modified }} == *test* || ${{ steps.files.outputs.added_modified }} == *requirements* ]] ; then - echo "::set-output name=keepgoing::true" + echo "keepgoing=true" >> $GITHUB_OUTPUT else - echo "::set-output name=keepgoing::false" + echo "keepgoing=false" >> $GITHUB_OUTPUT fi container: @@ -70,7 +70,8 @@ jobs: - name: Extract repository location shell: bash - run: echo "##[set-output name=location;]$(echo " - git+https://www.github.com/${GITHUB_REPOSITORY}@${GITHUB_REF#refs/heads/}")" + run: | + echo "location=$(echo " - git+https://www.github.com/${GITHUB_REPOSITORY}@${GITHUB_REF#refs/heads/}")" >> $GITHUB_OUTPUT id: extract_location - name: Build Container diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2686a630c..ebc870eec 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -7,6 +7,18 @@ on: push: branches: - "master" + schedule: + # every Sunday at 0:00 + # ┌───────────── minute (0 - 59) + # │ ┌───────────── hour (0 - 23) + # │ │ ┌───────────── day of the month (1 - 31) + # │ │ │ ┌───────────── month (1 - 12) + # │ │ │ │ ┌───────────── day of the week (0 - 6) + # │ │ │ │ │ + # │ │ │ │ │ + # │ │ │ │ │ + # * * * * * + - cron: '0 0 * * 0' env: cache-version: 'cache-v1' @@ -27,8 +39,9 @@ jobs: run: | if [[ ${{ steps.files.outputs.added_modified }} == *.yml* || ${{ steps.files.outputs.added_modified }} == *.py* || ${{ steps.files.outputs.added_modified }} == *test* || ${{ steps.files.outputs.added_modified }} == *requirements* ]]; then echo "::set-output name=keepgoing::true" + echo "keepgoing=true" >> $GITHUB_OUTPUT else - echo "::set-output name=keepgoing::false" + echo "keepgoing=false" >> $GITHUB_OUTPUT fi tests: @@ -61,8 +74,8 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 - - uses: actions/setup-node@v2 + - uses: actions/checkout@v3 + - uses: actions/setup-node@v3 - name: Install ubuntu system dependencies if: matrix.config.os == 'ubuntu-latest' @@ -76,7 +89,7 @@ jobs: brew install cairo pkg-config autoconf automake libtool - name: Cache conda - uses: actions/cache@v1 + uses: actions/cache@v3 env: # Increase this value to reset cache if etc/example-environment.yml has not changed CACHE_NUMBER: 0 @@ -122,25 +135,20 @@ jobs: name: Check R version run: | R --version > VERSION - echo "##[set-output name=version;]$(head -1 VERSION | awk '{print $3}')" - echo "##[set-output name=mainbiocversion;]$(Rscript -e 'cat(unlist(tools:::.BioC_version_associated_with_R_version()))' | awk '{print $1}')" - echo "##[set-output name=subbiocversion;]$(Rscript -e 'cat(unlist(tools:::.BioC_version_associated_with_R_version()))' | awk '{print $2}')" - echo "##[set-output name=biocversion;]$(Rscript -e 'cat(as.character(tools:::.BioC_version_associated_with_R_version()))' | awk '{print $1}')" + echo "version=$(head -1 VERSION | awk '{print $3}')" >> $GITHUB_OUTPUT + echo "mainbiocversion=$(Rscript -e 'cat(unlist(tools:::.BioC_version_associated_with_R_version()))' | awk '{print $1}')" >> $GITHUB_OUTPUT + echo "subbiocversion=$(Rscript -e 'cat(unlist(tools:::.BioC_version_associated_with_R_version()))' | awk '{print $2}')" >> $GITHUB_OUTPUT + echo "biocversion=$(Rscript -e 'cat(as.character(tools:::.BioC_version_associated_with_R_version()))' | awk '{print $1}')" >> $GITHUB_OUTPUT shell: bash -l {0} - name: Set up R uses: r-lib/actions/setup-r@v2 with: r-version: ${{ steps.R.outputs.version}} - - - name: Setup r-lib/remotes - run: | - install.packages('remotes') - shell: Rscript {0} - + - name: Cache ubuntu R packages if: "!contains(github.event.head_commit.message, '/nocache') && matrix.config.os == 'ubuntu-latest'" - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: /home/runner/work/_temp/Library key: ${{ env.cache-version }}-${{ matrix.config.os }}-biocversion-RELEASE_${{ steps.R.outputs.mainbiocversion}}_${{ steps.R.outputs.subbiocversion}}-r-${{ steps.R.outputs.version}}-${{ hashFiles('.github/depends.Rds') }} @@ -148,12 +156,18 @@ jobs: - name: Cache macOS R packages if: "!contains(github.event.head_commit.message, '/nocache') && matrix.config.os != 'ubuntu-latest'" - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ${{ env.R_LIBS_USER }} key: ${{ env.cache-version }}-${{ matrix.config.os }}-biocversion-RELEASE_${{ steps.R.outputs.mainbiocversion}}_${{ steps.R.outputs.subbiocversion}}-r-${{ steps.R.outputs.version}}-${{ hashFiles('.github/depends.Rds') }} restore-keys: ${{ env.cache-version }}-${{ matrix.config.os }}-biocversion-RELEASE_${{ steps.R.outputs.mainbiocversion}}_${{ steps.R.outputs.subbiocversion}}-r-${{ steps.R.outputs.version}}- + - name: Setup r-lib/remotes + run: | + install.packages(c('remotes', 'optparse', 'RCurl', 'XML','matrixStats')) + shell: Rscript {0} + + - name: Install Dandelion run: | python -m pip install git+https://github.com/emdann/milopy.git palantir @@ -162,25 +176,18 @@ jobs: - name: Install ubuntu R dependencies if: matrix.config.os == 'ubuntu-latest' run: | - install.packages(c('RCurl','XML')) remotes::install_cran("BiocManager") - BiocManager::install(version = ${{ steps.R.outputs.biocversion}}, ask = FALSE) - BiocManager::install(c('GenomeInfoDb', 'Rsamtools')) BiocManager::install(c('Biostrings', 'GenomicAlignments', 'IRanges')) - install.packages(c('shazam', 'alakazam', 'tigger', 'airr', 'optparse')) + install.packages(c('shazam', 'alakazam', 'tigger', 'optparse')) shell: Rscript {0} - name: Install macOS R dependencies if: matrix.config.os != 'ubuntu-latest' run: | options(install.packages.compile.from.source = "never") - install.packages(c('RCurl','XML')) remotes::install_cran("BiocManager") - BiocManager::install(version = ${{ steps.R.outputs.biocversion}}, ask = FALSE) - BiocManager::install(c('GenomeInfoDb', 'Rsamtools')) BiocManager::install(c('Biostrings', 'GenomicAlignments', 'IRanges')) - install.packages('matrixStats') - install.packages(c('shazam', 'alakazam', 'tigger', 'airr', 'optparse')) + install.packages(c('shazam', 'alakazam', 'tigger', 'optparse')) shell: Rscript {0} - name: Test if R dependencies are installed properly diff --git a/dandelion/tools/_tools.py b/dandelion/tools/_tools.py index 73eb8b6b7..0f1b82187 100644 --- a/dandelion/tools/_tools.py +++ b/dandelion/tools/_tools.py @@ -2,7 +2,7 @@ # @Author: Kelvin # @Date: 2020-05-13 23:22:18 # @Last Modified by: Kelvin -# @Last Modified time: 2022-10-27 10:20:14 +# @Last Modified time: 2022-11-08 14:30:34 """tools module.""" import math import os @@ -73,7 +73,8 @@ def find_clones( Returns ------- - `Dandelion` object with clone_id annotated in `.data` slot and `.metadata` initialized. + Dandelion + `Dandelion` object with clone_id annotated in `.data` slot and `.metadata` initialized. """ start = logg.info("Finding clonotypes") pd.set_option("mode.chained_assignment", None) @@ -706,8 +707,9 @@ def transfer( list of column names will overwrite that specific column(s). Returns - ---------- - `AnnData` object with updated `.obs`, `.obsm` and '.obsp' slots with data from `Dandelion` object. + ------- + AnnData + `AnnData` object with updated `.obs`, `.obsm` and '.obsp' slots with data from `Dandelion` object. """ start = logg.info("Transferring network") # always overwrite with whatever columns are in dandelion's metadata: @@ -963,7 +965,8 @@ def define_clones( Returns ------- - `Dandelion` object with clone_id annotated in `.data` slot and `.metadata` initialized. + Dandelion + `Dandelion` object with clone_id annotated in `.data` slot and `.metadata` initialized. """ start = logg.info("Finding clones") if ncpu is None: @@ -1387,7 +1390,8 @@ def clone_size( Returns ------- - `Dandelion` object with clone size columns annotated in `.metadata` slot. + Dandelion + `Dandelion` object with clone size columns annotated in `.metadata` slot. """ start = logg.info("Quantifying clone sizes") @@ -1609,7 +1613,8 @@ def clone_overlap( whether to print progress Returns ------- - a `pandas DataFrame`. + Union[AnnData, pd.DataFrame]: + Either `AnnData` or a `pandas.DataFrame`. """ start = logg.info("Finding clones") if isinstance(self, Dandelion): @@ -1755,13 +1760,14 @@ def productive_ratio( groupby: str, groups: Optional[List] = None, locus: Literal["TRB", "TRA", "TRD", "TRG", "IGH", "IGK", "IGL"] = "TRB", -): +) -> None: """ Compute the cell-level productive/non-productive contig ratio. Only the contig with the highest umi count in a cell will be used for this tabulation. + Returns inplace `AnnData` with `.uns['productive_ratio']`. Parameters ---------- adata : AnnData @@ -1774,9 +1780,6 @@ def productive_ratio( Optional list of categories to return. locus : Literal["TRB", "TRA", "TRD", "TRG", "IGH", "IGK", "IGL"], optional One of the accepted locuses to perform the tabulation - Returns - ------- - `AnnData` with `.uns['productive_ratio']`. """ start = logg.info("Tabulating productive ratio") vdjx = vdj[(vdj.data.cell_id.isin(adata.obs_names))].copy() @@ -1853,7 +1856,7 @@ def vj_usage_pca( ], verbose=False, **kwargs, -): +) -> AnnData: """ Extract productive V/J gene usage from single cell data and compute PCA. diff --git a/dandelion/utilities/_core.py b/dandelion/utilities/_core.py index 2fb148539..e76721f5a 100644 --- a/dandelion/utilities/_core.py +++ b/dandelion/utilities/_core.py @@ -2,7 +2,7 @@ # @Author: Kelvin # @Date: 2021-02-11 12:22:40 # @Last Modified by: Kelvin -# @Last Modified time: 2022-10-27 10:22:30 +# @Last Modified time: 2022-11-08 15:24:59 """core module.""" import bz2 import copy @@ -2219,19 +2219,28 @@ def update_metadata( Column name of clone id. None defaults to 'clone_id'. retrieve_mode: str One of: - `split and unique only` returns the retrieval splitted into two columns, - i.e. one for VDJ and one for VJ chains, separated by '|' for unique elements. - `merge and unique only` returns the retrieval merged into one column, - separated by '|' for unique elements. - `split and merge` returns the retrieval splitted into two columns, - i.e. one for VDJ and one for VJ chains, separated by '|' for every elements. - `split` returns the retrieval splitted into separate columns for each contig. - `merge` returns the retrieval merged into one columns for each contig, - separated by '|' for unique elements. - 'split and sum' returns the retrieval sumed in the VDJ and VJ columns (separately). - 'split and average' returns the retrieval averaged in the VDJ and VJ columns (separately). - 'sum' returns the retrieval sumed into one column for all contigs. - 'average' returns the retrieval averaged into one column for all contigs. + `split and unique only` + returns the retrieval splitted into two columns, + i.e. one for VDJ and one for VJ chains, separated by `|` for unique elements. + `merge and unique only` + returns the retrieval merged into one column, + separated by `|` for unique elements. + `split and merge` + returns the retrieval splitted into two columns, + i.e. one for VDJ and one for VJ chains, separated by `|` for every elements. + `split` + returns the retrieval splitted into separate columns for each contig. + `merge` + returns the retrieval merged into one columns for each contig, + separated by `|` for unique elements. + `split and sum` + returns the retrieval sumed in the VDJ and VJ columns (separately). + `split and average` + returns the retrieval averaged in the VDJ and VJ columns (separately). + `sum` + returns the retrieval sumed into one column for all contigs. + `average` + returns the retrieval averaged into one column for all contigs. collapse_alleles : bool Returns the V(D)J genes with allelic calls if False. reinitialize : bool @@ -2239,7 +2248,8 @@ def update_metadata( Useful when updating older versions of `dandelion` to newer version. Returns ------- - `Dandelion` object with `.metadata` slot initialized. + Dandelion + `Dandelion` object with `.metadata` slot initialized. """ if clone_key is None: diff --git a/dandelion/utilities/_io.py b/dandelion/utilities/_io.py index c2ce6f032..aaac13fd0 100644 --- a/dandelion/utilities/_io.py +++ b/dandelion/utilities/_io.py @@ -2,7 +2,7 @@ # @Author: kt16 # @Date: 2020-05-12 14:01:32 # @Last Modified by: Kelvin -# @Last Modified time: 2022-10-27 10:22:34 +# @Last Modified time: 2022-11-08 14:27:16 """io module.""" import bz2 import gzip @@ -173,7 +173,8 @@ def read_h5(filename: str = "dandelion_data.h5") -> Dandelion: Returns ------- - `Dandelion` object. + Dandelion + `Dandelion` object. """ try: data = pd.read_hdf(filename, "data") @@ -266,7 +267,8 @@ def read_h5ddl(filename: str = "dandelion_data.h5ddl") -> Dandelion: Returns ------- - `Dandelion` object. + Dandelion + `Dandelion` object. """ try: data = pd.read_hdf(filename, "data") @@ -359,7 +361,8 @@ def read_10x_airr(file: str) -> Dandelion: Returns ------- - `Dandelion` object of pandas data frame. + Dandelion + `Dandelion` object of pandas data frame. """ dat = load_data(file) @@ -413,7 +416,8 @@ def to_scirpy(data: Dandelion, transfer: bool = False, **kwargs) -> AnnData: Returns ------- - `AnnData` object in the format initialized by `scirpy`. + AnnData + `AnnData` object in the format initialized by `scirpy`. """ try: @@ -448,7 +452,8 @@ def from_scirpy(adata: AnnData) -> Dandelion: Returns ------- - `Dandelion` object. + Dandelion + `Dandelion` object. """ try: @@ -487,6 +492,7 @@ def concat( Returns ------- + Dandelion `Dandelion` object """ arrays = list(arrays) @@ -570,7 +576,8 @@ def read_10x_vdj( whether or not to print which files are read/found. Default is False. Returns ------- - `Dandelion` or pandas `DataFrame` object. + Union[Dandelion, pd.DataFrame] + `Dandelion` or pandas `DataFrame` object. """ if filename_prefix is None: diff --git a/dandelion/utilities/_utilities.py b/dandelion/utilities/_utilities.py index 2e281be77..f5e70eb17 100644 --- a/dandelion/utilities/_utilities.py +++ b/dandelion/utilities/_utilities.py @@ -2,7 +2,7 @@ # @Author: kt16 # @Date: 2020-05-12 14:01:32 # @Last Modified by: Kelvin -# @Last Modified time: 2022-10-24 22:21:57 +# @Last Modified time: 2022-11-08 14:28:06 """utilities module.""" import os import re @@ -73,6 +73,7 @@ def dict_from_table(meta: pd.DataFrame, columns: Tuple[str, str]) -> Dict: Returns ------- + Dict dictionary """ if (isinstance(meta, pd.DataFrame)) & (columns is not None): @@ -99,6 +100,7 @@ def clean_nan_dict(d: Dict) -> Dict: Returns ------- + Dict dictionary with no NAs. """ return {k: v for k, v in d.items() if v is not np.nan} @@ -115,6 +117,7 @@ def flatten(l: Sequence) -> Sequence: Returns ------- + Sequence a flattened list. """ for el in l: @@ -149,6 +152,7 @@ def bh(pvalues: np.array) -> np.array: array of p-values to correct Returns ------- + np.array np.array of corrected p-values """ n = int(pvalues.shape[0])