diff --git a/pyani/anim.py b/pyani/anim.py index ad2e9989..1dc4cfb3 100644 --- a/pyani/anim.py +++ b/pyani/anim.py @@ -105,7 +105,7 @@ def get_version(nucmer_exe=pyani_config.NUCMER_DEFAULT): # Generate list of Job objects, one per NUCmer run def generate_nucmer_jobs( filenames, - outdir=".", + outdir=Path("."), nucmer_exe=pyani_config.NUCMER_DEFAULT, filter_exe=pyani_config.FILTER_DEFAULT, maxmatch=False, diff --git a/pyani/pyani_files.py b/pyani/pyani_files.py index 32d063ee..a48c9e1c 100644 --- a/pyani/pyani_files.py +++ b/pyani/pyani_files.py @@ -74,10 +74,9 @@ def get_fasta_paths(dirname, extlist=None): """ extlist = extlist or [".fna", ".fa", ".fasta", ".fas"] return [ - os.path.join(dirname, fname) - for fname in os.listdir(dirname) - if os.path.isfile(os.path.join(dirname, fname)) - and os.path.splitext(fname)[-1] in extlist + dirname / fname + for fname in dirname.iterdir() + if (dirname / fname).is_file() and fname.suffix in extlist ] @@ -92,8 +91,8 @@ def get_fasta_and_hash_paths(dirname="."): infiles = get_fasta_paths(dirname) outfiles = [] for infile in infiles: - hashfile = os.path.splitext(infile)[0] + ".md5" - if not os.path.isfile(hashfile): + hashfile = infile.with_suffix(".md5") + if not hashfile.is_file(): raise IOError("Hashfile %s does not exist" % hashfile) outfiles.append((infile, hashfile)) return outfiles @@ -106,8 +105,8 @@ def get_input_files(dirname, *ext): :param dirname: str, path to input directory :param *ext: list of arguments describing permitted file extensions """ - filelist = [f for f in os.listdir(dirname) if os.path.splitext(f)[-1] in ext] - return [os.path.join(dirname, f) for f in filelist] + filelist = [fname for fname in dirname.iterdir() if fname.suffix in ext] + return [dirname / fname for fname in filelist] # Get lengths of input sequences @@ -124,9 +123,7 @@ def get_sequence_lengths(fastafilenames): """ tot_lengths = {} for fname in fastafilenames: - tot_lengths[os.path.splitext(os.path.split(fname)[-1])[0]] = sum( - [len(s) for s in SeqIO.parse(fname, "fasta")] - ) + tot_lengths[fname.stem] = sum([len(s) for s in SeqIO.parse(fname, "fasta")]) return tot_lengths @@ -195,7 +192,5 @@ def collect_existing_output(dirpath, program, args): suffix = ".filter" elif program == "blastn": suffix = ".blast_tab" - existingfiles = [ - fname for fname in os.listdir(dirpath) if os.path.splitext(fname)[-1] == suffix - ] + existingfiles = [fname for fname in dirpath.iterdir() if fname.suffix == suffix] return existingfiles diff --git a/tests/test_anim.py b/tests/test_anim.py index 37ddfeb5..dc5fb8c3 100644 --- a/tests/test_anim.py +++ b/tests/test_anim.py @@ -1,8 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -"""test_anim.py - -Test anim.py module. +"""Test anim.py module. These tests are intended to be run from the repository root using: @@ -50,6 +48,8 @@ import os import unittest +from pathlib import Path + import pandas as pd import pytest @@ -68,6 +68,7 @@ class TestNUCmerCmdline(unittest.TestCase): def setUp(self): """Set parameters for tests.""" # Basic NUCmer and delta-filter command-line targets + testdir = Path("tests") self.ntgt = " ".join( [ "nucmer --mum -p", @@ -89,82 +90,82 @@ def setUp(self): "tests/test_output/anim/nucmer_output/file1_vs_file2.filter", ] ) - self.files = ["file1", "file2", "file3", "file4"] + self.files = [Path("file1"), Path("file2"), Path("file3"), Path("file4")] self.ncmdlist = [ - "nucmer --mum -p ./nucmer_output/file1_vs_file2 file1 file2", - "nucmer --mum -p ./nucmer_output/file1_vs_file3 file1 file3", - "nucmer --mum -p ./nucmer_output/file1_vs_file4 file1 file4", - "nucmer --mum -p ./nucmer_output/file2_vs_file3 file2 file3", - "nucmer --mum -p ./nucmer_output/file2_vs_file4 file2 file4", - "nucmer --mum -p ./nucmer_output/file3_vs_file4 file3 file4", + "nucmer --mum -p nucmer_output/file1_vs_file2 file1 file2", + "nucmer --mum -p nucmer_output/file1_vs_file3 file1 file3", + "nucmer --mum -p nucmer_output/file1_vs_file4 file1 file4", + "nucmer --mum -p nucmer_output/file2_vs_file3 file2 file3", + "nucmer --mum -p nucmer_output/file2_vs_file4 file2 file4", + "nucmer --mum -p nucmer_output/file3_vs_file4 file3 file4", ] self.fcmdlist = [ " ".join( [ "delta_filter_wrapper.py delta-filter -1", - "./nucmer_output/file1_vs_file2.delta", - "./nucmer_output/file1_vs_file2.filter", + "nucmer_output/file1_vs_file2.delta", + "nucmer_output/file1_vs_file2.filter", ] ), " ".join( [ "delta_filter_wrapper.py delta-filter -1", - "./nucmer_output/file1_vs_file3.delta", - "./nucmer_output/file1_vs_file3.filter", + "nucmer_output/file1_vs_file3.delta", + "nucmer_output/file1_vs_file3.filter", ] ), " ".join( [ "delta_filter_wrapper.py delta-filter -1", - "./nucmer_output/file1_vs_file4.delta", - "./nucmer_output/file1_vs_file4.filter", + "nucmer_output/file1_vs_file4.delta", + "nucmer_output/file1_vs_file4.filter", ] ), " ".join( [ "delta_filter_wrapper.py delta-filter -1", - "./nucmer_output/file2_vs_file3.delta", - "./nucmer_output/file2_vs_file3.filter", + "nucmer_output/file2_vs_file3.delta", + "nucmer_output/file2_vs_file3.filter", ] ), " ".join( [ "delta_filter_wrapper.py delta-filter -1", - "./nucmer_output/file2_vs_file4.delta", - "./nucmer_output/file2_vs_file4.filter", + "nucmer_output/file2_vs_file4.delta", + "nucmer_output/file2_vs_file4.filter", ] ), " ".join( [ "delta_filter_wrapper.py delta-filter -1", - "./nucmer_output/file3_vs_file4.delta", - "./nucmer_output/file3_vs_file4.filter", + "nucmer_output/file3_vs_file4.delta", + "nucmer_output/file3_vs_file4.filter", ] ), ] - self.outdir = os.path.join("tests", "test_output", "anim") - self.indir = os.path.join("tests", "test_input", "anim") + self.outdir = testdir / "test_output" / "anim" + self.indir = testdir / "test_input" / "anim" def test_single_cmd_generation(self): - """generate single abstract NUCmer/delta-filter command-line. + """Generate single abstract NUCmer/delta-filter command-line. Tests that a single NUCmer/delta-filter command-line pair is produced correctly """ cmds = anim.construct_nucmer_cmdline( - "file1.fna", "file2.fna", outdir=self.outdir + Path("file1.fna"), Path("file2.fna"), outdir=self.outdir ) self.assertEqual(cmds, (self.ntgt, self.ftgt)) def test_maxmatch_cmd_generation(self): - """generate NUCmer command line with maxmatch.""" + """Generate NUCmer command line with maxmatch.""" ncmd, _ = anim.construct_nucmer_cmdline( - "file1.fna", "file2.fna", outdir=self.outdir, maxmatch=True + Path("file1.fna"), Path("file2.fna"), outdir=self.outdir, maxmatch=True ) self.assertEqual(ncmd, self.ntgtmax) def test_multi_cmd_generation(self): - """generate multiple abstract NUCmer/delta-filter command-lines. + """Generate multiple abstract NUCmer/delta-filter command-lines. Tests that all the input files are correctly-paired """ @@ -172,7 +173,7 @@ def test_multi_cmd_generation(self): self.assertEqual(cmds, (self.ncmdlist, self.fcmdlist)) def test_nucmer_job_generation(self): - """generate dependency tree of NUCmer/delta-filter jobs. + """Generate dependency tree of NUCmer/delta-filter jobs. Tests that the correct dependency graph and naming scheme is produced. """ @@ -188,12 +189,13 @@ def test_nucmer_job_generation(self): class TestDeltafileProcessing(unittest.TestCase): - """Class defining tests for .delta/.filter file parsing""" + """Class defining tests for .delta/.filter file parsing.""" def setUp(self): """Set parameters for tests.""" - self.indir = os.path.join("tests", "test_input", "anim") - self.seqdir = os.path.join("tests", "test_input", "sequences") + testdir = Path("tests") + self.indir = testdir / "test_input" / "anim" + self.seqdir = testdir / "test_input" / "sequences" self.deltafile = os.path.join(self.indir, "test.delta") self.deltadir = os.path.join(self.indir, "deltadir") self.df_pid = pd.DataFrame( @@ -208,12 +210,12 @@ def setUp(self): ) def test_deltafile_import(self): - """parses NUCmer .delta/.filter file.""" + """Parse test NUCmer .delta/.filter file.""" result = anim.parse_delta(self.deltafile) self.assertEqual(result, (4074001, 2191)) def test_process_deltadir(self): - """processes directory of .delta files into ANIResults.""" + """Process test directory of .delta files into ANIResults.""" seqfiles = pyani_files.get_fasta_files(self.seqdir) orglengths = pyani_files.get_sequence_lengths(seqfiles) result = anim.process_deltadir(self.deltadir, orglengths) diff --git a/tests/test_subcmd_02_index.py b/tests/test_subcmd_02_index.py index e8b34f88..3d9b6e74 100644 --- a/tests/test_subcmd_02_index.py +++ b/tests/test_subcmd_02_index.py @@ -1,9 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -"""test_subcmd_02_index.py - -Test index subcommand for pyani +"""Test index subcommand for pyani. The test suite is intended to be run from the repository root using: @@ -62,6 +60,7 @@ import unittest from argparse import Namespace +from pathlib import Path from pyani.scripts import subcommands @@ -71,11 +70,12 @@ class TestIndexSubcommand(unittest.TestCase): def setUp(self): """Configure parameters for tests.""" - self.indir = os.path.join("tests", "test_input", "subcmd_index") - self.outdir = os.path.join("tests", "test_output", "subcmd_index") - self.tgtdir = os.path.join("tests", "test_targets", "subcmd_index") + testdir = Path("tests") + self.indir = testdir / "test_input" / "subcmd_index" + self.outdir = testdir / "test_output" / "subcmd_index" + self.tgtdir = testdir / "test_targets" / "subcmd_index" # Remove output directory before copying input data across - if os.path.exists(self.outdir): + if self.outdir.exists(): shutil.rmtree(self.outdir) shutil.copytree(self.indir, self.outdir) @@ -86,14 +86,12 @@ def setUp(self): # Command line namespaces self.argsdict = { "index": Namespace( - indir=self.outdir, - labelfname=os.path.join("labels.txt"), - classfname=os.path.join("classes.txt"), + indir=self.outdir, labelfname="labels.txt", classfname="classes.txt" ) } def test_index(self): - """test indexing of downloaded files""" + """Test indexing of downloaded files.""" # Create index subcommands.subcmd_index(self.argsdict["index"], self.logger)