diff --git a/Makefile b/Makefile index 90678b94..629ff1f0 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,7 @@ walkthrough: clean_walkthrough pyani report --runs -o C_blochmannia_ANIm/ --formats html excel stdout pyani report --run_results 1 --formats html excel stdout -o C_blochmannia_ANIm/ pyani report --run_matrices 1 --formats html excel stdout -o C_blochmannia_ANIm/ - pyani plot --formats png pdf --method seaborn -o C_blochmannia_ANIm --run_ids 1 + pyani plot --formats png pdf --method seaborn -o C_blochmannia_ANIm --run_id 1 # pyani anib C_blochmannia C_blochmannia_ANIb \ # --name "C. blochmannia run 2" \ # --labels C_blochmannia/labels.txt --classes C_blochmannia/classes.txt diff --git a/pyani/pyani_graphics/sns/__init__.py b/pyani/pyani_graphics/sns/__init__.py index c8bcb284..fdcddea5 100644 --- a/pyani/pyani_graphics/sns/__init__.py +++ b/pyani/pyani_graphics/sns/__init__.py @@ -37,6 +37,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. """Code to implement Seaborn graphics output for ANI analyses.""" +import warnings import matplotlib # pylint: disable=C0411 import pandas as pd @@ -106,17 +107,37 @@ def get_clustermap(dfr, params, title=None, annot=True): :param title: str, plot title :param annot: Boolean, add text for cell values? """ - fig = sns.clustermap( - dfr, - cmap=params.cmap, - vmin=params.vmin, - vmax=params.vmax, - col_colors=params.colorbar, - row_colors=params.colorbar, - figsize=(params.figsize, params.figsize), - linewidths=params.linewidths, - annot=annot, - ) + + # If we do not catch warnings here, then we often get the following warning: + # ClusterWarning: scipy.cluster: The symmetric non-negative hollow + # observation matrix looks suspiciously like an uncondensed distance matrix + # The usual solution would be to convert the array with + # scipy.spatial.distance.squareform(), but this requires that all values in + # the main diagonal are zero, which is not the case for ANI. + # As we know this is a (1-distance) matrix, we could just set the diagonal + # to zero and fudge it, but this is not a good solution. Instead, we suppress + # the warning in a context manager for this function call only, because we + # know the warning is not relevant. + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=( + "scipy.cluster: The symmetric non-negative " + "hollow observation matrix looks suspiciously like an " + "uncondensed distance matrix" + ), + ) + fig = sns.clustermap( + dfr, + cmap=params.cmap, + vmin=params.vmin, + vmax=params.vmax, + col_colors=params.colorbar, + row_colors=params.colorbar, + figsize=(params.figsize, params.figsize), + linewidths=params.linewidths, + annot=annot, + ) # add labels for each of the input genomes add_labels(fig, params) diff --git a/pyani/scripts/average_nucleotide_identity.py b/pyani/scripts/average_nucleotide_identity.py index 10fc7767..0fc0dcbe 100755 --- a/pyani/scripts/average_nucleotide_identity.py +++ b/pyani/scripts/average_nucleotide_identity.py @@ -812,7 +812,6 @@ def draw(args: Namespace, filestems: List[str], gformat: str) -> None: infilename = fullstem.with_suffix(".tab") dfm = pd.read_csv(infilename, index_col=0, sep="\t") logger.info("Writing heatmap to %s", outfilename) - print(args.labels, args.classes) params = pyani_graphics.Params( params_mpl(dfm)[filestem], pyani_tools.get_labels(args.labels, logger=logger), diff --git a/pyani/scripts/parsers/plot_parser.py b/pyani/scripts/parsers/plot_parser.py index 0aac8031..b35f8e01 100644 --- a/pyani/scripts/parsers/plot_parser.py +++ b/pyani/scripts/parsers/plot_parser.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # (c) The James Hutton Institute 2016-2019 -# (c) University of Strathclyde 2019-2020 +# (c) University of Strathclyde 2019-2022 # Author: Leighton Pritchard # # Contact: @@ -17,7 +17,7 @@ # The MIT License # # Copyright (c) 2016-2019 The James Hutton Institute -# Copyright (c) 2019-2020 University of Strathclyde +# Copyright (c) 2019-2022 University of Strathclyde # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/pyani/scripts/parsers/report_parser.py b/pyani/scripts/parsers/report_parser.py index 22767a79..c1ca01e1 100644 --- a/pyani/scripts/parsers/report_parser.py +++ b/pyani/scripts/parsers/report_parser.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # (c) The James Hutton Institute 2016-2019 -# (c) University of Strathclyde 2019-2020 +# (c) University of Strathclyde 2019-2022 # Author: Leighton Pritchard # # Contact: @@ -17,7 +17,7 @@ # The MIT License # # Copyright (c) 2016-2019 The James Hutton Institute -# Copyright (c) 2019-2020 University of Strathclyde +# Copyright (c) 2019-2022 University of Strathclyde # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/pyani/scripts/subcommands/subcmd_plot.py b/pyani/scripts/subcommands/subcmd_plot.py index d3682d1d..4e3f1ab0 100644 --- a/pyani/scripts/subcommands/subcmd_plot.py +++ b/pyani/scripts/subcommands/subcmd_plot.py @@ -194,6 +194,9 @@ def write_distribution( title=f"matrix_{matdata.name}_run{run_id}", ) + # Be tidy with matplotlib caches + plt.close("all") + def write_heatmap( run_id: int, diff --git a/pyani/scripts/subcommands/subcmd_report.py b/pyani/scripts/subcommands/subcmd_report.py index 82423893..453b29cc 100644 --- a/pyani/scripts/subcommands/subcmd_report.py +++ b/pyani/scripts/subcommands/subcmd_report.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # (c) The James Hutton Institute 2017-2019 -# (c) University of Strathclyde 2019-2020 +# (c) University of Strathclyde 2019-2022 # Author: Leighton Pritchard # # Contact: @@ -18,7 +18,7 @@ # The MIT License # # Copyright (c) 2017-2019 The James Hutton Institute -# Copyright (c) 2019-2020 University of Strathclyde +# Copyright (c) 2019-2022 University of Strathclyde # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -40,6 +40,7 @@ """Provides the report subcommand for pyani.""" import logging +import warnings from argparse import Namespace from typing import List, NamedTuple @@ -154,12 +155,7 @@ def subcmd_report(args: Namespace) -> int: "genome label", "genome class", ] - report( - args, - session, - formats, - ReportParams("runs_genomes", statement, headers), - ) + report(args, session, formats, ReportParams("runs_genomes", statement, headers)) # Report table of all runs in which a genome is involved if args.show_genomes_runs: @@ -196,12 +192,7 @@ def subcmd_report(args: Namespace) -> int: "method", "date run", ] - report( - args, - session, - formats, - ReportParams("genomes_runs", statement, headers), - ) + report(args, session, formats, ReportParams("genomes_runs", statement, headers)) # Report table of comparison results for the indicated runs if args.run_results: @@ -234,6 +225,7 @@ def subcmd_report(args: Namespace) -> int: .filter(Run.run_id == run_id) .statement ) + logger.debug("Results query: %s", statement) headers = [ "Comparison ID", "Query ID", @@ -300,12 +292,7 @@ def subcmd_report(args: Namespace) -> int: return 0 -def report( - args: Namespace, - session, - formats: List[str], - params: ReportParams, -) -> None: +def report(args: Namespace, session, formats: List[str], params: ReportParams) -> None: """Write tabular report of pyani runs from database. :param args: Namespace of command-line arguments @@ -319,7 +306,47 @@ def report( logger.debug( "Writing table of pyani %s from the database to %s.*", params.name, outfname ) - data = pd.read_sql(params.statement, session.bind) + + # With newer versions of SQLAlchemy, Pandas may throw a warning due to the composition + # of our statement including a Cartesian product, even though this is what we want: + # SAWarning: SELECT statement has a cartesian product between FROM element(s) + # "runs" and FROM element "genome_query". Apply join condition(s) between each + # element to resolve. + # We could use SQLAlchemy's true() function to force the join condition, but this has + # to be done from within Pandas, and is an issue for them to fix. + # We suppress the warning, instead. + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=( + "SELECT statement has a cartesian product between FROM " + 'element\\(s\\) "runs" and FROM element "genome_query"' + ), + ) + warnings.filterwarnings( + "ignore", + message=( + "SELECT statement has a cartesian product between FROM " + 'element\\(s\\) "runs" and FROM element "comparisons"' + ), + ) + warnings.filterwarnings( + "ignore", + message=( + "SELECT statement has a cartesian product between FROM " + 'element\\(s\\) "runs" and FROM element "genome_subject"' + ), + ) + warnings.filterwarnings( + "ignore", + message=( + "SELECT statement has a cartesian product between FROM " + 'element\\(s\\) "genome_query", "genome_subject", "comparisons" ' + 'and FROM element "runs"' + ), + ) + data = pd.read_sql(params.statement, session.bind) + data.columns = params.headers pyani_report.write_dbtable(data, outfname, formats) diff --git a/requirements.txt b/requirements.txt index 15d97e59..62841ab0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,5 +10,5 @@ Pillow PyQt5 scipy seaborn -sqlalchemy==1.3.10 +sqlalchemy tqdm diff --git a/tests/test_anib.py b/tests/test_anib.py index 2f8d2380..f522a107 100644 --- a/tests/test_anib.py +++ b/tests/test_anib.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # (c) The James Hutton Institute 2017-2019 -# (c) University of Strathclyde 2019-2020 +# (c) University of Strathclyde 2019-2022 # Author: Leighton Pritchard # # Contact: leighton.pritchard@strath.ac.uk @@ -17,7 +17,7 @@ # The MIT License # # Copyright (c) 2017-2019 The James Hutton Institute -# Copyright (c) 2019-2020 University of Strathclyde +# Copyright (c) 2019-2022 University of Strathclyde # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -49,7 +49,6 @@ import pandas as pd import pytest # noqa: F401 # pylint: disable=unused-import import unittest -import unittest.mock as mock from pandas.testing import assert_frame_equal @@ -367,8 +366,8 @@ def test_parse_legacy_blastdir(anib_output_dir): anib_output_dir.legacyblastdir, orglengths, fraglengths, mode="ANIblastall" ) assert_frame_equal( - result.percentage_identity.sort_index(1).sort_index(), - anib_output_dir.legacyblastresult.sort_index(1).sort_index(), + result.percentage_identity.sort_index(axis=1).sort_index(), + anib_output_dir.legacyblastresult.sort_index(axis=1).sort_index(), ) @@ -380,8 +379,8 @@ def test_parse_blastdir(anib_output_dir): anib_output_dir.blastdir, orglengths, fraglengths, mode="ANIb" ) assert_frame_equal( - result.percentage_identity.sort_index(1).sort_index(), - anib_output_dir.blastresult.sort_index(1).sort_index(), + result.percentage_identity.sort_index(axis=1).sort_index(), + anib_output_dir.blastresult.sort_index(axis=1).sort_index(), ) diff --git a/tests/test_anim.py b/tests/test_anim.py index d3c40621..baf5b5b9 100644 --- a/tests/test_anim.py +++ b/tests/test_anim.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # (c) The James Hutton Institute 2017-2019 -# (c) University of Strathclyde 2019-2021 +# (c) University of Strathclyde 2019-2022 # Author: Leighton Pritchard # # Contact: @@ -18,7 +18,7 @@ # The MIT License # # Copyright (c) 2017-2019 The James Hutton Institute -# Copyright (c) 2019-2021 University of Strathclyde +# Copyright (c) 2019-2022 University of Strathclyde # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -43,7 +43,7 @@ pytest -v """ -import sys + from pathlib import Path from typing import List, NamedTuple, Tuple @@ -51,7 +51,7 @@ import pytest import unittest -from pandas.util.testing import assert_frame_equal +from pandas.testing import assert_frame_equal from pyani import anim, pyani_files @@ -208,8 +208,8 @@ def test_deltadir_parsing(delta_output_dir): orglengths = pyani_files.get_sequence_lengths(seqfiles) result = anim.process_deltadir(delta_output_dir.deltadir, orglengths) assert_frame_equal( - result.percentage_identity.sort_index(1).sort_index(), - delta_output_dir.deltaresult.sort_index(1).sort_index(), + result.percentage_identity.sort_index(axis=1).sort_index(), + delta_output_dir.deltaresult.sort_index(axis=1).sort_index(), ) @@ -251,17 +251,14 @@ def test_mummer_single(tmp_path, path_file_two): path_file_two[0], path_file_two[1], outdir=tmp_path ) dir_nucmer = tmp_path / "nucmer_output" + outprefix = ( + dir_nucmer + / str(path_file_two[0].stem) + / str(path_file_two[0].stem + "_vs_" + path_file_two[1].stem) + ) expected = ( - ( - "nucmer --mum -p " - f"{dir_nucmer / str(path_file_two[0].stem) / str(path_file_two[0].stem + '_vs_' + path_file_two[1].stem)} " - f"{path_file_two[0]} {path_file_two[1]}" - ), - ( - "delta_filter_wrapper.py delta-filter -1 " - f"{dir_nucmer / str(path_file_two[0].stem ) / str(path_file_two[0].stem + '_vs_' + path_file_two[1].stem + '.delta')} " - f"{dir_nucmer / str(path_file_two[0].stem ) / str(path_file_two[0].stem + '_vs_' + path_file_two[1].stem + '.filter')}" - ), + f"nucmer --mum -p {outprefix} {path_file_two[0]} {path_file_two[1]}", + f"delta_filter_wrapper.py delta-filter -1 {outprefix}.delta {outprefix}.filter", ) assert cmds == expected diff --git a/tests/test_subcmd_05_report.py b/tests/test_subcmd_05_report.py index 49bf0419..d30096ce 100644 --- a/tests/test_subcmd_05_report.py +++ b/tests/test_subcmd_05_report.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # (c) The James Hutton Institute 2017-2019 -# (c) The University of Strathclude 2019 +# (c) The University of Strathclude 2019-2022 # Author: Leighton Pritchard # # Contact: @@ -19,7 +19,7 @@ # The MIT License # # Copyright (c) 2017-2018 The James Hutton Institute -# (c) The University of Strathclude 2019 +# (c) The University of Strathclude 2019-2022 # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal diff --git a/tests/test_tetra.py b/tests/test_tetra.py index 752dcd47..13507c9d 100644 --- a/tests/test_tetra.py +++ b/tests/test_tetra.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # (c) The James Hutton Institute 2017-2019 -# (c) The University of Strathclude 2019-2020 +# (c) The University of Strathclude 2019-2022 # Author: Leighton Pritchard # # Contact: @@ -19,7 +19,7 @@ # The MIT License # # Copyright (c) 2017-2019 The James Hutton Institute -# (c) The University of Strathclude 2019-2020 +# (c) The University of Strathclude 2019-2022 # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -46,13 +46,10 @@ """ import json -import unittest - -from pathlib import Path import pandas as pd -from pandas.util.testing import assert_frame_equal +from pandas.testing import assert_frame_equal from pyani.tetra import ( calculate_correlations,