Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/improve speed and limit memory (#11) #100

Closed
wants to merge 45 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
2f879e5
Feature/improve speed and limit memory (#11)
sambenfredj Apr 12, 2023
bf0c2ce
:lipstick: linting (#12)
gessulat Apr 17, 2023
3ad792c
Fix bugs (#17)
gessulat Apr 20, 2023
29f5549
fix test model: remove subset_max_train from percolator model (#18)
sambenfredj May 5, 2023
ae7f880
Fix test brew: (#20)
sambenfredj May 9, 2023
4e7235b
fix test datasets: (#19)
sambenfredj May 11, 2023
3e7dda9
Fix test confidence (#22)
sambenfredj May 11, 2023
c5d158a
Fix cli tests: (#28)
sambenfredj May 15, 2023
1d2fdf0
Fix system tests: (#29)
sambenfredj May 16, 2023
6e08b70
Fix parser pin test: (#30)
sambenfredj May 17, 2023
d16cedc
Add tests: (#31)
sambenfredj May 22, 2023
40f8394
Fix writer tests: (#32)
sambenfredj May 22, 2023
531d4ae
fix error no psms found during training : if no psms passed the fdr v…
sambenfredj May 31, 2023
84c427b
Introduce new executable and bug fixes
sambenfredj Aug 4, 2023
b85d176
✨ force ci re-run
gessulat Feb 16, 2024
58e8481
Merge branch 'develop' into 'main'
Feb 22, 2024
74f91f1
💄 lint mokapot
gessulat Feb 22, 2024
2985a7f
💄 lints tests
gessulat Feb 22, 2024
12ebe26
💄 fixes format with ruff
gessulat Feb 22, 2024
49608e1
💄 fixes format with ruff
gessulat Feb 22, 2024
6ccc88e
Merge branch 'main' of gitlab:msaid/inferys/mokapot into main
gessulat Feb 22, 2024
0b4fdc5
💄 make ruff and black happy together
gessulat Feb 22, 2024
f595804
Feature/improve speed and limit memory (#11)
sambenfredj Apr 12, 2023
46fbf6b
:lipstick: linting (#12)
gessulat Apr 17, 2023
ee95fbd
Fix bugs (#17)
gessulat Apr 20, 2023
f3d50c8
fix test model: remove subset_max_train from percolator model (#18)
sambenfredj May 5, 2023
4293410
Fix test brew: (#20)
sambenfredj May 9, 2023
623b7d8
fix test datasets: (#19)
sambenfredj May 11, 2023
8f417dd
Fix test confidence (#22)
sambenfredj May 11, 2023
2e1723e
Fix cli tests: (#28)
sambenfredj May 15, 2023
6355834
Fix system tests: (#29)
sambenfredj May 16, 2023
296fb73
Fix parser pin test: (#30)
sambenfredj May 17, 2023
096b07f
Add tests: (#31)
sambenfredj May 22, 2023
d497fcc
Fix writer tests: (#32)
sambenfredj May 22, 2023
d241adb
fix error no psms found during training : if no psms passed the fdr v…
sambenfredj May 31, 2023
41ed445
Introduce new executable and bug fixes
sambenfredj Aug 4, 2023
ac43547
✨ force ci re-run
gessulat Feb 16, 2024
4a9872f
💄 lint mokapot
gessulat Feb 22, 2024
346a0c0
💄 lints tests
gessulat Feb 22, 2024
f543166
💄 fixes format with ruff
gessulat Feb 22, 2024
0742dc2
💄 fixes format with ruff
gessulat Feb 22, 2024
a2602df
💄 make ruff and black happy together
gessulat Feb 22, 2024
f12a43d
✨ removed deprecated error ignore
gessulat Feb 27, 2024
0fd515b
Merge branch 'main' into 'feature/sync'
Feb 27, 2024
6726dea
Merge branch 'feature/sync' into 'main'
Feb 27, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@ name: tests

on:
push:
branches: [ main ]
branches:
- main
- develop
pull_request:
branches: [ main ]
branches:
- main
- develop

jobs:
build:
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,5 @@ venv.bak/
# idea
.idea/

tests/integration_tests/run*

1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,4 @@ interpreter and affords greater flexibility:

Check out our [documentation](https://mokapot.readthedocs.io) for more details
and examples of mokapot in action.

5 changes: 3 additions & 2 deletions mokapot/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Initialize the mokapot package."""

try:
from importlib.metadata import version, PackageNotFoundError

Expand All @@ -15,11 +16,11 @@
except DistributionNotFound:
pass

from .dataset import LinearPsmDataset
from .dataset import LinearPsmDataset, OnDiskPsmDataset, read_file
from .model import Model, PercolatorModel, save_model, load_model
from .brew import brew
from .parsers.pin import read_pin, read_percolator
from .parsers.pepxml import read_pepxml
from .parsers.fasta import read_fasta, make_decoys, digest
from .writers import to_flashlfq, to_txt
from .confidence import LinearConfidence, plot_qvalues
from .confidence import LinearConfidence, plot_qvalues, assign_confidence
132 changes: 132 additions & 0 deletions mokapot/aggregatePsmsToPeptides.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import os
import sys
import argparse
import logging
import warnings
from pathlib import Path

from . import __version__
from .confidence import LinearConfidence
from .dataset import OnDiskPsmDataset
from .utils import get_unique_peptides_from_psms, merge_sort


def main():
parser = argparse.ArgumentParser()
parser.add_argument("--targets_psms", type=str, required=True)
parser.add_argument("--decoys_psms", type=str, required=True)
parser.add_argument("--test_fdr", type=float, default=0.01)
parser.add_argument(
"--keep_decoys",
action="store_true",
default=False,
)
parser.add_argument("--dest_dir", type=str)
parser.add_argument(
"--verbosity", type=int, choices=[0, 1, 2, 3], default=2
)

args = parser.parse_args()

verbosity_dict = {
0: logging.ERROR,
1: logging.WARNING,
2: logging.INFO,
3: logging.DEBUG,
}

if verbosity_dict[args.verbosity] != logging.DEBUG:
warnings.filterwarnings("ignore")
logging.basicConfig(
format=("[{levelname}] {message}"),
style="{",
level=verbosity_dict[args.verbosity],
)

logging.info("mokapot version %s", str(__version__))
logging.info("Command issued:")
logging.info("%s", " ".join(sys.argv))
logging.info("")

psms = OnDiskPsmDataset(
target_column="Label",
peptide_column="peptide",
filename=None,
columns=None,
spectrum_columns=None,
protein_column=None,
group_column=None,
feature_columns=None,
metadata_columns=None,
filename_column=None,
scan_column=None,
specId_column=None,
calcmass_column=None,
expmass_column=None,
rt_column=None,
charge_column=None,
spectra_dataframe=None,
)

iterable = merge_sort(
paths=[args.decoys_psms, args.targets_psms],
target_column=psms.target_column,
col_score="score",
)
sep = "\t"
metadata_columns = ["PSMId", "Label", "peptide", "score", "proteinIds"]
output_columns = [
"PSMId",
"peptide",
"score",
"q-value",
"posterior_error_prob",
"proteinIds",
]
peptides_path = "peptides.csv"
with open(peptides_path, "w") as f_peptide:
f_peptide.write(f"{sep.join(metadata_columns)}\n")
unique_peptides = get_unique_peptides_from_psms(
iterable=iterable,
peptide_col_index=2,
out_peptides=peptides_path,
sep=sep,
)
logging.info("\t- Found %i unique peptides.", unique_peptides)

out_targets, out_decoys = [
os.path.split(in_path)[-1].rsplit(".", 1)[0] + ".peptides"
for in_path in [args.targets_psms, args.decoys_psms]
]
if args.dest_dir is not None:
Path(args.dest_dir).mkdir(exist_ok=True)
out_targets, out_decoys = [
os.path.join(args.dest_dir, out_path)
for out_path in [out_targets, out_decoys]
]
with open(out_targets, "w") as fp:
fp.write(f"{sep.join(output_columns)}\n")
if args.keep_decoys:
with open(out_decoys, "w") as fp:
fp.write(f"{sep.join(output_columns)}\n")

LinearConfidence(
psms=psms,
levels=["peptide"],
level_paths=[peptides_path],
out_paths=[[out_targets, out_decoys]],
decoys=args.keep_decoys,
eval_fdr=args.test_fdr,
sep=sep,
)


if __name__ == "__main__":
try:
main()
except RuntimeError as e:
logging.error(f"[Error] {e}")
sys.exit(250) # input failure
except ValueError as e:
logging.error(f"[Error] {e}")
sys.exit(250) # input failure

Check warning on line 132 in mokapot/aggregatePsmsToPeptides.py

View check run for this annotation

Codecov / codecov/patch

mokapot/aggregatePsmsToPeptides.py#L127-L132

Added lines #L127 - L132 were not covered by tests
Loading
Loading