Skip to content

Commit

Permalink
Merge f723f10 into bf4fca7
Browse files Browse the repository at this point in the history
  • Loading branch information
vinisalazar committed Dec 16, 2020
2 parents bf4fca7 + f723f10 commit 9079d09
Show file tree
Hide file tree
Showing 32 changed files with 304 additions and 90 deletions.
18 changes: 16 additions & 2 deletions CHANGELOG.md
Expand Up @@ -4,8 +4,22 @@
* Fix documentation issues [ ]
* Add subpackage docstrings [ ]
* Create methods for Sample and Project
* .describe []
* .write_paths_to_file, .copy_files_to_dir(), .link_files_to_dir()
* .describe [ ]
* .write_paths_to_file, .copy_files_to_dir(), .link_files_to_dir() [ ]
* Add logger calls when saving to JSON and uploading to ProvStore [ ]

### v0.1.20
* Debug graphical DOT output [x]
* Add Muscle PresetProgram [x]
* Add MAFFT PresetProgram [x]
* Add Kallisto PresetProgram [x]
* Add extra_flags attribute to PresetProgram [x]
* Refactor EnvProv class as Environment [x]
* Allow creation of users and envs to be optional [x]
* Create environments only when there's an associated activity [x]
* Create Sample dunder enter and dunder exit methods [x]
* Fixing wasDerivedFrom bug when sample and file have same names [x]
* Fix extra Environments bug [x]

### v0.1.19
* Debug API endpoint (#23) [x]
Expand Down
1 change: 1 addition & 0 deletions CONTRIBUTING.md
Expand Up @@ -126,4 +126,5 @@ Before you submit a pull request, if your pull request includes code changes che
* If the pull request adds functionality:
* Put your new functionality into a function or class with a docstring;
* Make a new test for said functionality.
* Add a one-line description of the contribution to the CHANGELOG.md file under the current development version.
* Make sure your code changes were formatted with the Black styling tool.
4 changes: 2 additions & 2 deletions README.md
@@ -1,7 +1,7 @@
### BioProv - W3C-PROV provenance documents for bioinformatics


Package | [![License](https://img.shields.io/github/license/vinisalazar/bioprov)](https://img.shields.io/github/license/vinisalazar/bioprov) | [![PyPI Version](https://img.shields.io/pypi/v/bioprov)](https://pypi.org/project/bioprov/) | [![Requirements Status](https://requires.io/github/vinisalazar/BioProv/requirements.svg?branch=master)](https://requires.io/github/vinisalazar/BioProv/requirements/?branch=master)
Package | [![License](https://img.shields.io/github/license/vinisalazar/bioprov)](https://github.com/vinisalazar/BioProv/blob/master/LICENSE) | [![PyPI Version](https://img.shields.io/pypi/v/bioprov)](https://pypi.org/project/bioprov/) | [![Requirements Status](https://requires.io/github/vinisalazar/BioProv/requirements.svg?branch=master)](https://requires.io/github/vinisalazar/BioProv/requirements/?branch=master)
---------------|--|--|--
Tests | [![Build Status](https://travis-ci.org/vinisalazar/BioProv.svg?branch=master)](https://travis-ci.org/vinisalazar/BioProv) | [![tests](https://github.com/vinisalazar/bioprov/workflows/tests/badge.svg?branch=master)](https://github.com/vinisalazar/bioprov/actions?query=workflow%3Atests) | [![Coverage Status](https://coveralls.io/repos/github/vinisalazar/BioProv/badge.svg?branch=master&service=github)](https://coveralls.io/github/vinisalazar/BioProv?branch=master&service=github)
Code | [![Code style](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) | [![lint](https://github.com/vinisalazar/BioProv/workflows/lint/badge.svg?branch=master)](https://github.com/vinisalazar/BioProv/actions?query=workflow%3Alint)
Expand Down Expand Up @@ -95,7 +95,7 @@ You can import data into BioProv using Pandas objects.

### Installation

```
```sh
# Install from pip
$ pip install bioprov

Expand Down
4 changes: 2 additions & 2 deletions bioprov/__init__.py
Expand Up @@ -2,7 +2,7 @@
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.19"
__version__ = "0.1.20"


"""
Expand All @@ -11,7 +11,7 @@
Inherits objects from the src/ package.
"""

from .src.config import config, EnvProv, BioProvDB
from .src.config import config, Environment, BioProvDB
from .src.files import File, SeqFile, Directory
from .src.main import (
Program,
Expand Down
2 changes: 1 addition & 1 deletion bioprov/bioprov
Expand Up @@ -3,7 +3,7 @@ __author__ = "Vini Salazar"
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.19"
__version__ = "0.1.20"

"""
Executable that goes in $PATH. Code for the command-line is on the bioprov.py module.
Expand Down
2 changes: 1 addition & 1 deletion bioprov/bioprov.py
Expand Up @@ -2,7 +2,7 @@
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.19"
__version__ = "0.1.20"

"""
BioProv command-line application. This module holds the main executable.
Expand Down
2 changes: 1 addition & 1 deletion bioprov/data/__init__.py
Expand Up @@ -2,7 +2,7 @@
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.19"
__version__ = "0.1.20"


"""
Expand Down
15 changes: 13 additions & 2 deletions bioprov/programs/__init__.py
Expand Up @@ -2,7 +2,18 @@
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.19"
__version__ = "0.1.20"


from .programs import prodigal, diamond, blastn, blastp, prokka, kaiju, kaiju2table
from .programs import (
prodigal,
diamond,
blastn,
blastp,
muscle,
mafft,
prokka,
kallisto_quant,
kaiju,
kaiju2table,
)
105 changes: 93 additions & 12 deletions bioprov/programs/programs.py
Expand Up @@ -2,7 +2,7 @@
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.19"
__version__ = "0.1.20"


"""
Expand Down Expand Up @@ -42,20 +42,17 @@ def diamond(blast_type, sample, db, query_tag="query", outformat=6, extra_flags=
sample=sample,
input_files={"--query": query_tag},
output_files={"--out": ("_dmnd_hits", "_dmnd_hits.tsv")},
extra_flags=extra_flags,
)

if extra_flags is not None:
params = [Parameter(key=command) for command in extra_flags]
for param in params:
_diamond.add_parameter(param)

return _diamond


def prodigal(sample=None, input_tag="assembly"):
def prodigal(sample=None, input_tag="assembly", extra_flags=None):
"""
:param sample: Instance of BioProv.Sample.
:param input_tag: Instance of BioProv.Sample.
:param list extra_flags: A list of extra parameters to pass to Prodigal.
:return: Instance of PresetProgram containing Prodigal.
"""
_prodigal = PresetProgram(
Expand All @@ -68,12 +65,13 @@ def prodigal(sample=None, input_tag="assembly"):
"-s": ("scores", "_scores.cds"),
},
preffix_tag=input_tag,
extra_flags=extra_flags,
)

return _prodigal


def _create_blast_preset(blast_type, sample, db, query_tag, outformat):
def _create_blast_preset(blast_type, sample, db, query_tag, outformat, extra_flags):
"""
:param str blast_type: What BLAST program to build (e.g. 'blastn');
:return: Instance of PresetProgram for the chosen blast program type.
Expand All @@ -93,43 +91,126 @@ def _create_blast_preset(blast_type, sample, db, query_tag, outformat):
sample=sample,
input_files={"-query": query_tag},
output_files={"-out": (f"{blast_type}_hits", f"_{blast_type}_hits.txt")},
extra_flags=extra_flags,
)

return _blast_program


def blastn(sample=None, db=None, query_tag="query", outformat=6):
def blastn(sample=None, db=None, query_tag="query", outformat=6, extra_flags=None):
"""
:param Sample sample: Instance of BioProv.Sample.
:param str db: A string pointing to the reference database directory and title.
:param str query_tag: A tag for the query file.
:param int outformat: The output format to gather from blastn.
:param list extra_flags: A list of extra parameters to pass to BLASTN.
:return: Instance of PresetProgram for BLASTN.
:rtype: BioProv.PresetProgram.
:raises AssertionError: Path to the reference database does not exist.
"""

_blastn = _create_blast_preset("blastn", sample, db, query_tag, outformat)
_blastn = _create_blast_preset(
"blastn", sample, db, query_tag, outformat, extra_flags
)

return _blastn


def blastp(sample, db, query_tag="query", outformat=6):
def blastp(sample, db, query_tag="query", outformat=6, extra_flags=None):
"""
:param Sample sample: Instance of BioProv.Sample.
:param str db: A string pointing to the reference database directory and title.
:param str query_tag: A tag for the query file.
:param int outformat: The output format to gather from blastp.
:param list extra_flags: A list of extra parameters to pass to BLASTP.
:return: Instance of PresetProgram for BLASTP.
:rtype: BioProv.PresetProgram.
:raises AssertionError: Path to the reference database does not exist.
"""

_blastp = _create_blast_preset("blastp", sample, db, query_tag, outformat)
_blastp = _create_blast_preset(
"blastp", sample, db, query_tag, outformat, extra_flags
)

return _blastp


def muscle(sample, input_tag="input", msf=False, extra_flags=None):
"""
:param Sample sample: Instance of BioProv.Sample.
:param str input_tag: A tag for the input multi-fasta file.
:param bool msf: Whether or not to have the output in msf format.
:param list extra_flags: A list of extra parameters to pass to Muscle.
:return: Instance of PresetProgram for Muscle.
:rtype: BioProv.PresetProgram.
"""

_muscle = PresetProgram(
name="muscle",
sample=sample,
input_files={"-in": input_tag},
output_files={"-out": ("_muscle_hits", "_muscle_hits.afa")},
extra_flags=extra_flags,
)

if msf:
_muscle.add_parameter(Parameter(key="-msf"))

return _muscle


def mafft(sample, input_tag="input", extra_flags=None):
"""
:param Sample sample: Instance of BioProv.Sample.
:param str input_tag: A tag for the input fasta file.
:param list extra_flags: A list of extra parameters to pass to MAFFT.
:return: Instance of PresetProgram containing MAFFT.
:rtype: BioProv.PresetProgram.
"""
_mafft = PresetProgram(
name="mafft",
sample=sample,
input_files={"": input_tag},
output_files={">": ("aligned", "_aligned.afa")},
preffix_tag=input_tag,
extra_flags=extra_flags,
)

return _mafft


def kallisto_quant(sample, index, output_dir="./", extra_flags=None):
"""
Run kallisto's alignment and quantification
:param Sample sample: Instance of BioProv.Sample.
:param str index: A path to a kallisto index file.
:param str output_dir: A path to kallisto's output directory.
:param list extra_flags: A list of extra parameters to pass to kallisto
(e.g. --single or --plaintext).
:return: Instance of PresetProgram containing kallisto.
:rtype: BioProv.PresetProgram.
"""

_kallisto = PresetProgram(
name="kallisto",
params=(
Parameter(key="quant"),
Parameter(key="--index", value=index),
Parameter(key="--output-dir", value=output_dir),
),
sample=sample,
extra_flags=extra_flags,
)

input_files = [Parameter(key=str(fastq.path)) for fastq in sample.files.values()]

for read in input_files:
_kallisto.add_parameter(read)

return _kallisto


def prokka_():
"""
:return: Instance of PresetProgram containing Prokka.
Expand Down
2 changes: 1 addition & 1 deletion bioprov/src/__init__.py
Expand Up @@ -2,7 +2,7 @@
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.19"
__version__ = "0.1.20"


"""
Expand Down
8 changes: 4 additions & 4 deletions bioprov/src/config.py
Expand Up @@ -2,7 +2,7 @@
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.19"
__version__ = "0.1.20"


"""
Expand Down Expand Up @@ -35,7 +35,7 @@ def __init__(self, db_path=None, threads=0):
"""
# This duplication is to order the keys in the __dict__ attribute.
self.user = None
self.env = EnvProv()
self.env = Environment()
self.user = self.env.user
if not threads:
threads = int(os.cpu_count() / 2)
Expand Down Expand Up @@ -248,7 +248,7 @@ def _get_confirm():
print("Canceled operation.")


class EnvProv:
class Environment:
"""
Class containing provenance information about the current environment.
"""
Expand All @@ -265,7 +265,7 @@ def __init__(self):
self.update()

def __repr__(self):
return f"Environment_{self.env_hash}"
return self.env_hash

def update(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion bioprov/src/files.py
Expand Up @@ -2,7 +2,7 @@
__license__ = "MIT"
__maintainer__ = "Vini Salazar"
__url__ = "https://github.com/vinisalazar/bioprov"
__version__ = "0.1.19"
__version__ = "0.1.20"


"""
Expand Down

0 comments on commit 9079d09

Please sign in to comment.