Skip to content

Commit

Permalink
Merge branch 'release/0.15.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
simonvh committed Oct 7, 2020
2 parents 94f8a2c + b1e9dfc commit 8e88282
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 210 deletions.
2 changes: 1 addition & 1 deletion data/templates/table.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ table[data-sortable].sortable-theme-slick th[data-sorted="true"][data-sorted-dir
{% block before_cellstyle %}{% endblock before_cellstyle %}
{% block cellstyle %}
{%- for s in cellstyle %}
#T_{{uuid}}{{s.selector}} {
{%- for selector in s.selectors -%}{%- if not loop.first -%},{%- endif -%}#T_{{uuid}}{{selector}}{%- endfor -%} {
{% for p,val in s.props %}
{{p}}: {{val}};
{% endfor %}
Expand Down
9 changes: 5 additions & 4 deletions gimmemotifs/maelstrom.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
sns.set_style("white")

from gimmemotifs.config import MotifConfig, DIRECT_NAME, INDIRECT_NAME
from gimmemotifs.moap import moap, Moap, scan_to_table
from gimmemotifs.moap import moap, Moap
from gimmemotifs.scanner import scan_regionfile_to_table
from gimmemotifs.rank import rankagg
from gimmemotifs.motif import read_motifs
from gimmemotifs.report import maelstrom_html_report
Expand Down Expand Up @@ -339,7 +340,7 @@ def run_maelstrom(
count_table = os.path.join(outdir, "motif.count.txt.gz")
if not os.path.exists(count_table):
logger.info("motif scanning (counts)")
counts = scan_to_table(
counts = scan_regionfile_to_table(
infile,
genome,
"count",
Expand All @@ -357,7 +358,7 @@ def run_maelstrom(
score_table = os.path.join(outdir, "motif.score.txt.gz")
if not os.path.exists(score_table):
logger.info("motif scanning (scores)")
scores = scan_to_table(
scores = scan_regionfile_to_table(
infile,
genome,
"score",
Expand All @@ -377,7 +378,6 @@ def run_maelstrom(

if filter_redundant:
logger.info("Selecting non-redundant motifs")

fa = FeatureAgglomeration(
distance_threshold=filter_cutoff,
n_clusters=None,
Expand All @@ -393,6 +393,7 @@ def run_maelstrom(
.drop_duplicates(subset=["label"], keep="last")["motif"]
.values
)

nr_motif = (
X_cluster.sort_values("var")
.drop_duplicates(subset=["label"], keep="last")[["label", "motif"]]
Expand Down
105 changes: 4 additions & 101 deletions gimmemotifs/moap.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,106 +44,15 @@ def warn(*args, **kwargs):

from gimmemotifs import __version__
from gimmemotifs.motif import read_motifs
from gimmemotifs.scanner import Scanner
from gimmemotifs.scanner import scan_regionfile_to_table
from gimmemotifs.config import MotifConfig
from gimmemotifs.utils import pfmfile_location, as_fasta
from gimmemotifs.utils import pfmfile_location

import warnings

warnings.filterwarnings("ignore", category=DeprecationWarning)

logger = logging.getLogger("gimme.maelstrom")
FPR = 0.01


def scan_to_table(
input_table, genome, scoring, pfmfile=None, ncpus=None, zscore=True, gc=True
):
"""Scan regions in input table with motifs.
Parameters
----------
input_table : str
Filename of input table. Can be either a text-separated tab file or a
feather file.
genome : str
Genome name. Can be either the name of a FASTA-formatted file or a
genomepy genome name.
scoring : str
"count" or "score"
pfmfile : str, optional
Specify a PFM file for scanning.
ncpus : int, optional
If defined this specifies the number of cores to use.
Returns
-------
table : pandas.DataFrame
DataFrame with motif ids as column names and regions as index. Values
are either counts or scores depending on the 'scoring' parameter.s
"""
config = MotifConfig()

if pfmfile is None:
pfmfile = config.get_default_params().get("motif_db", None)
if pfmfile is not None:
pfmfile = os.path.join(config.get_motif_dir(), pfmfile)

if pfmfile is None:
raise ValueError("no pfmfile given and no default database specified")

logger.info("reading table")
if input_table.endswith("feather"):
df = pd.read_feather(input_table)
idx = df.iloc[:, 0].values
else:
df = pd.read_table(input_table, index_col=0, comment="#")
idx = df.index

regions = list(idx)
if len(regions) >= 1000:
check_regions = np.random.choice(regions, size=1000, replace=False)
else:
check_regions = regions

size = int(
np.median([len(seq) for seq in as_fasta(check_regions, genome=genome).seqs])
)
s = Scanner(ncpus=ncpus)
s.set_motifs(pfmfile)
s.set_genome(genome)
s.set_background(genome=genome, gc=gc, size=size)

scores = []
if scoring == "count":
logger.info("setting threshold")
s.set_threshold(fpr=FPR)
logger.info("creating count table")
for row in s.count(regions):
scores.append(row)
logger.info("done")
else:
s.set_threshold(threshold=0.0)
msg = "creating score table"
if zscore:
msg += " (z-score"
if gc:
msg += ", GC%"
msg += ")"
else:
msg += " (logodds)"
logger.info(msg)
for row in s.best_score(regions, zscore=zscore, gc=gc):
scores.append(row)
logger.info("done")

motif_names = [m.id for m in read_motifs(pfmfile)]
logger.info("creating dataframe")
return pd.DataFrame(scores, index=idx, columns=motif_names)


class Moap(object):
Expand Down Expand Up @@ -778,18 +687,12 @@ def moap(
sys.stderr.write("can't read motifs from {}".format(pfmfile))
raise

# initialize scanner
s = Scanner(ncpus=ncpus)
s.set_motifs(pfmfile)
s.set_genome(genome)
s.set_background(genome=genome)

# scan for motifs
motif_names = [m.id for m in read_motifs(pfmfile)]
scores = []
if method == "classic" or scoring == "count":
logger.info("motif scanning (scores)")
scores = scan_to_table(
scores = scan_regionfile_to_table(
inputfile,
genome,
"count",
Expand All @@ -800,7 +703,7 @@ def moap(
)
else:
logger.info("motif scanning (scores)")
scores = scan_to_table(
scores = scan_regionfile_to_table(
inputfile,
genome,
"score",
Expand Down
79 changes: 47 additions & 32 deletions gimmemotifs/motif.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,13 @@ def pwm_scan_all(self, fa, cutoff=0.9, nreport=50, scan_rc=True):
return matches

def plot_logo(
self, kind="information", fname=None, title=True, ylabel=True, add_left=0
self,
kind="information",
fname=None,
title=True,
ylabel=True,
add_left=0,
ax=None,
):
"""Plot motif logo
Expand All @@ -520,49 +526,58 @@ def plot_logo(
total = sum(self.pfm[0]) / 4
pfm = [[total] * 4] * add_left + self.pfm
matrix = pd.DataFrame(pfm, columns=["A", "C", "G", "T"])

if kind == "ensembl":
self.plot_ensembl_logo(fname=fname, title=title)
return
elif kind == "information":
matrix = lm.transform_matrix(
matrix, from_type="counts", to_type="information"
)
logo = lm.Logo(
matrix,
figsize=(fig_width * matrix.shape[0], fig_height),
show_spines=False,
vpad=0.02,
)

logo_params = {
"information": {
"df": lm.transform_matrix(
matrix, from_type="counts", to_type="information"
),
"figsize": (fig_width * matrix.shape[0], fig_height),
"show_spines": False,
"vpad": 0.02,
},
"frequency": {
"df": lm.transform_matrix(
matrix, from_type="counts", to_type="probability"
),
"figsize": (fig_width * matrix.shape[0], fig_height),
"show_spines": False,
"vpad": 0.02,
"font_name": "DejaVu Sans Mono",
},
"energy": {
"df": lm.transform_matrix(
lm.transform_matrix(matrix, from_type="counts", to_type="weight"),
center_values=True,
),
"figsize": (fig_width * matrix.shape[0], fig_height * 2),
"fade_below": 0.7,
"shade_below": 0.3,
"flip_below": False,
"show_spines": False,
},
}

if ax is not None:
logo_params[kind]["ax"] = ax
del logo_params[kind]["figsize"]

logo = lm.Logo(**logo_params[kind])

if kind == "information":
if ylabel:
logo.ax.set_ylabel("Bits", fontsize=16)
logo.ax.set_ylim(0, 2)
logo.ax.set_yticks([0, 0.5, 1, 1.5, 2], minor=False)
elif kind == "frequency":
matrix = lm.transform_matrix(
matrix, from_type="counts", to_type="probability"
)
logo = lm.Logo(
matrix,
font_name="DejaVu Sans Mono",
figsize=(fig_width * matrix.shape[0], fig_height),
show_spines=False,
vpad=0.02,
)
if ylabel:
logo.ax.set_ylabel("Frequency", fontsize=16)
logo.ax.set_ylim(0, 1)

elif kind == "energy":
matrix = lm.transform_matrix(matrix, from_type="counts", to_type="weight")
matrix = lm.transform_matrix(matrix, center_values=True)
logo = lm.Logo(
matrix,
fade_below=0.7,
shade_below=0.3,
flip_below=False,
show_spines=False,
figsize=(fig_width * matrix.shape[0], fig_height * 2),
)
if ylabel:
logo.ax.set_ylabel(r"$\Delta \Delta G$/RT", labelpad=-1, fontsize=16)
else:
Expand Down

0 comments on commit 8e88282

Please sign in to comment.