Skip to content

Commit

Permalink
Merge pull request #375 from widdowquinn/issue_373
Browse files Browse the repository at this point in the history
Issue 373 - preliminary merge with multiple fixes to annoyances (warnings/small errors) prior to real fix of #373.
  • Loading branch information
widdowquinn committed Jan 20, 2022
2 parents 8a59453 + 7f76767 commit c748d55
Show file tree
Hide file tree
Showing 12 changed files with 113 additions and 70 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ walkthrough: clean_walkthrough
pyani report --runs -o C_blochmannia_ANIm/ --formats html excel stdout
pyani report --run_results 1 --formats html excel stdout -o C_blochmannia_ANIm/
pyani report --run_matrices 1 --formats html excel stdout -o C_blochmannia_ANIm/
pyani plot --formats png pdf --method seaborn -o C_blochmannia_ANIm --run_ids 1
pyani plot --formats png pdf --method seaborn -o C_blochmannia_ANIm --run_id 1
# pyani anib C_blochmannia C_blochmannia_ANIb \
# --name "C. blochmannia run 2" \
# --labels C_blochmannia/labels.txt --classes C_blochmannia/classes.txt
Expand Down
43 changes: 32 additions & 11 deletions pyani/pyani_graphics/sns/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
"""Code to implement Seaborn graphics output for ANI analyses."""
import warnings

import matplotlib # pylint: disable=C0411
import pandas as pd
Expand Down Expand Up @@ -106,17 +107,37 @@ def get_clustermap(dfr, params, title=None, annot=True):
:param title: str, plot title
:param annot: Boolean, add text for cell values?
"""
fig = sns.clustermap(
dfr,
cmap=params.cmap,
vmin=params.vmin,
vmax=params.vmax,
col_colors=params.colorbar,
row_colors=params.colorbar,
figsize=(params.figsize, params.figsize),
linewidths=params.linewidths,
annot=annot,
)

# If we do not catch warnings here, then we often get the following warning:
# ClusterWarning: scipy.cluster: The symmetric non-negative hollow
# observation matrix looks suspiciously like an uncondensed distance matrix
# The usual solution would be to convert the array with
# scipy.spatial.distance.squareform(), but this requires that all values in
# the main diagonal are zero, which is not the case for ANI.
# As we know this is a (1-distance) matrix, we could just set the diagonal
# to zero and fudge it, but this is not a good solution. Instead, we suppress
# the warning in a context manager for this function call only, because we
# know the warning is not relevant.
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message=(
"scipy.cluster: The symmetric non-negative "
"hollow observation matrix looks suspiciously like an "
"uncondensed distance matrix"
),
)
fig = sns.clustermap(
dfr,
cmap=params.cmap,
vmin=params.vmin,
vmax=params.vmax,
col_colors=params.colorbar,
row_colors=params.colorbar,
figsize=(params.figsize, params.figsize),
linewidths=params.linewidths,
annot=annot,
)

# add labels for each of the input genomes
add_labels(fig, params)
Expand Down
1 change: 0 additions & 1 deletion pyani/scripts/average_nucleotide_identity.py
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,6 @@ def draw(args: Namespace, filestems: List[str], gformat: str) -> None:
infilename = fullstem.with_suffix(".tab")
dfm = pd.read_csv(infilename, index_col=0, sep="\t")
logger.info("Writing heatmap to %s", outfilename)
print(args.labels, args.classes)
params = pyani_graphics.Params(
params_mpl(dfm)[filestem],
pyani_tools.get_labels(args.labels, logger=logger),
Expand Down
4 changes: 2 additions & 2 deletions pyani/scripts/parsers/plot_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# (c) The James Hutton Institute 2016-2019
# (c) University of Strathclyde 2019-2020
# (c) University of Strathclyde 2019-2022
# Author: Leighton Pritchard
#
# Contact:
Expand All @@ -17,7 +17,7 @@
# The MIT License
#
# Copyright (c) 2016-2019 The James Hutton Institute
# Copyright (c) 2019-2020 University of Strathclyde
# Copyright (c) 2019-2022 University of Strathclyde
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down
4 changes: 2 additions & 2 deletions pyani/scripts/parsers/report_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# (c) The James Hutton Institute 2016-2019
# (c) University of Strathclyde 2019-2020
# (c) University of Strathclyde 2019-2022
# Author: Leighton Pritchard
#
# Contact:
Expand All @@ -17,7 +17,7 @@
# The MIT License
#
# Copyright (c) 2016-2019 The James Hutton Institute
# Copyright (c) 2019-2020 University of Strathclyde
# Copyright (c) 2019-2022 University of Strathclyde
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down
3 changes: 3 additions & 0 deletions pyani/scripts/subcommands/subcmd_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,9 @@ def write_distribution(
title=f"matrix_{matdata.name}_run{run_id}",
)

# Be tidy with matplotlib caches
plt.close("all")


def write_heatmap(
run_id: int,
Expand Down
69 changes: 48 additions & 21 deletions pyani/scripts/subcommands/subcmd_report.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# (c) The James Hutton Institute 2017-2019
# (c) University of Strathclyde 2019-2020
# (c) University of Strathclyde 2019-2022
# Author: Leighton Pritchard
#
# Contact:
Expand All @@ -18,7 +18,7 @@
# The MIT License
#
# Copyright (c) 2017-2019 The James Hutton Institute
# Copyright (c) 2019-2020 University of Strathclyde
# Copyright (c) 2019-2022 University of Strathclyde
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand All @@ -40,6 +40,7 @@
"""Provides the report subcommand for pyani."""

import logging
import warnings

from argparse import Namespace
from typing import List, NamedTuple
Expand Down Expand Up @@ -154,12 +155,7 @@ def subcmd_report(args: Namespace) -> int:
"genome label",
"genome class",
]
report(
args,
session,
formats,
ReportParams("runs_genomes", statement, headers),
)
report(args, session, formats, ReportParams("runs_genomes", statement, headers))

# Report table of all runs in which a genome is involved
if args.show_genomes_runs:
Expand Down Expand Up @@ -196,12 +192,7 @@ def subcmd_report(args: Namespace) -> int:
"method",
"date run",
]
report(
args,
session,
formats,
ReportParams("genomes_runs", statement, headers),
)
report(args, session, formats, ReportParams("genomes_runs", statement, headers))

# Report table of comparison results for the indicated runs
if args.run_results:
Expand Down Expand Up @@ -234,6 +225,7 @@ def subcmd_report(args: Namespace) -> int:
.filter(Run.run_id == run_id)
.statement
)
logger.debug("Results query: %s", statement)
headers = [
"Comparison ID",
"Query ID",
Expand Down Expand Up @@ -300,12 +292,7 @@ def subcmd_report(args: Namespace) -> int:
return 0


def report(
args: Namespace,
session,
formats: List[str],
params: ReportParams,
) -> None:
def report(args: Namespace, session, formats: List[str], params: ReportParams) -> None:
"""Write tabular report of pyani runs from database.
:param args: Namespace of command-line arguments
Expand All @@ -319,7 +306,47 @@ def report(
logger.debug(
"Writing table of pyani %s from the database to %s.*", params.name, outfname
)
data = pd.read_sql(params.statement, session.bind)

# With newer versions of SQLAlchemy, Pandas may throw a warning due to the composition
# of our statement including a Cartesian product, even though this is what we want:
# SAWarning: SELECT statement has a cartesian product between FROM element(s)
# "runs" and FROM element "genome_query". Apply join condition(s) between each
# element to resolve.
# We could use SQLAlchemy's true() function to force the join condition, but this has
# to be done from within Pandas, and is an issue for them to fix.
# We suppress the warning, instead.
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message=(
"SELECT statement has a cartesian product between FROM "
'element\\(s\\) "runs" and FROM element "genome_query"'
),
)
warnings.filterwarnings(
"ignore",
message=(
"SELECT statement has a cartesian product between FROM "
'element\\(s\\) "runs" and FROM element "comparisons"'
),
)
warnings.filterwarnings(
"ignore",
message=(
"SELECT statement has a cartesian product between FROM "
'element\\(s\\) "runs" and FROM element "genome_subject"'
),
)
warnings.filterwarnings(
"ignore",
message=(
"SELECT statement has a cartesian product between FROM "
'element\\(s\\) "genome_query", "genome_subject", "comparisons" '
'and FROM element "runs"'
),
)
data = pd.read_sql(params.statement, session.bind)

data.columns = params.headers
pyani_report.write_dbtable(data, outfname, formats)

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ Pillow
PyQt5
scipy
seaborn
sqlalchemy==1.3.10
sqlalchemy
tqdm
13 changes: 6 additions & 7 deletions tests/test_anib.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# (c) The James Hutton Institute 2017-2019
# (c) University of Strathclyde 2019-2020
# (c) University of Strathclyde 2019-2022
# Author: Leighton Pritchard
#
# Contact: leighton.pritchard@strath.ac.uk
Expand All @@ -17,7 +17,7 @@
# The MIT License
#
# Copyright (c) 2017-2019 The James Hutton Institute
# Copyright (c) 2019-2020 University of Strathclyde
# Copyright (c) 2019-2022 University of Strathclyde
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -49,7 +49,6 @@
import pandas as pd
import pytest # noqa: F401 # pylint: disable=unused-import
import unittest
import unittest.mock as mock

from pandas.testing import assert_frame_equal

Expand Down Expand Up @@ -367,8 +366,8 @@ def test_parse_legacy_blastdir(anib_output_dir):
anib_output_dir.legacyblastdir, orglengths, fraglengths, mode="ANIblastall"
)
assert_frame_equal(
result.percentage_identity.sort_index(1).sort_index(),
anib_output_dir.legacyblastresult.sort_index(1).sort_index(),
result.percentage_identity.sort_index(axis=1).sort_index(),
anib_output_dir.legacyblastresult.sort_index(axis=1).sort_index(),
)


Expand All @@ -380,8 +379,8 @@ def test_parse_blastdir(anib_output_dir):
anib_output_dir.blastdir, orglengths, fraglengths, mode="ANIb"
)
assert_frame_equal(
result.percentage_identity.sort_index(1).sort_index(),
anib_output_dir.blastresult.sort_index(1).sort_index(),
result.percentage_identity.sort_index(axis=1).sort_index(),
anib_output_dir.blastresult.sort_index(axis=1).sort_index(),
)


Expand Down
29 changes: 13 additions & 16 deletions tests/test_anim.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# (c) The James Hutton Institute 2017-2019
# (c) University of Strathclyde 2019-2021
# (c) University of Strathclyde 2019-2022
# Author: Leighton Pritchard
#
# Contact:
Expand All @@ -18,7 +18,7 @@
# The MIT License
#
# Copyright (c) 2017-2019 The James Hutton Institute
# Copyright (c) 2019-2021 University of Strathclyde
# Copyright (c) 2019-2022 University of Strathclyde
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand All @@ -43,15 +43,15 @@
pytest -v
"""
import sys

from pathlib import Path
from typing import List, NamedTuple, Tuple

import pandas as pd
import pytest
import unittest

from pandas.util.testing import assert_frame_equal
from pandas.testing import assert_frame_equal

from pyani import anim, pyani_files

Expand Down Expand Up @@ -208,8 +208,8 @@ def test_deltadir_parsing(delta_output_dir):
orglengths = pyani_files.get_sequence_lengths(seqfiles)
result = anim.process_deltadir(delta_output_dir.deltadir, orglengths)
assert_frame_equal(
result.percentage_identity.sort_index(1).sort_index(),
delta_output_dir.deltaresult.sort_index(1).sort_index(),
result.percentage_identity.sort_index(axis=1).sort_index(),
delta_output_dir.deltaresult.sort_index(axis=1).sort_index(),
)


Expand Down Expand Up @@ -251,17 +251,14 @@ def test_mummer_single(tmp_path, path_file_two):
path_file_two[0], path_file_two[1], outdir=tmp_path
)
dir_nucmer = tmp_path / "nucmer_output"
outprefix = (
dir_nucmer
/ str(path_file_two[0].stem)
/ str(path_file_two[0].stem + "_vs_" + path_file_two[1].stem)
)
expected = (
(
"nucmer --mum -p "
f"{dir_nucmer / str(path_file_two[0].stem) / str(path_file_two[0].stem + '_vs_' + path_file_two[1].stem)} "
f"{path_file_two[0]} {path_file_two[1]}"
),
(
"delta_filter_wrapper.py delta-filter -1 "
f"{dir_nucmer / str(path_file_two[0].stem ) / str(path_file_two[0].stem + '_vs_' + path_file_two[1].stem + '.delta')} "
f"{dir_nucmer / str(path_file_two[0].stem ) / str(path_file_two[0].stem + '_vs_' + path_file_two[1].stem + '.filter')}"
),
f"nucmer --mum -p {outprefix} {path_file_two[0]} {path_file_two[1]}",
f"delta_filter_wrapper.py delta-filter -1 {outprefix}.delta {outprefix}.filter",
)
assert cmds == expected

Expand Down
4 changes: 2 additions & 2 deletions tests/test_subcmd_05_report.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# (c) The James Hutton Institute 2017-2019
# (c) The University of Strathclude 2019
# (c) The University of Strathclude 2019-2022
# Author: Leighton Pritchard
#
# Contact:
Expand All @@ -19,7 +19,7 @@
# The MIT License
#
# Copyright (c) 2017-2018 The James Hutton Institute
# (c) The University of Strathclude 2019
# (c) The University of Strathclude 2019-2022
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down

0 comments on commit c748d55

Please sign in to comment.