# New report layout with templates

In [2]:
import pandas as pd
import numpy as np
import panel as pn
import altair as alt
from pathlib import Path
# Import plotting functions from plotting
from plotting import bracken_raw, contig_quality, kaiju_raw, kaiju_megahit, cat_megahit, bowtie2_alignment_plot
from utils import parse_bowtielog, parse_fastp_report

pn.extension("tabulator")
pn.extension("vega", sizing_mode="stretch_width", template="fast")
pn.widgets.Tabulator.theme = 'modern'

# Samples
sample_folder = Path("../virusclassification_nextflow/results/")
samples = [x for x in sample_folder.iterdir() if x.is_dir() and not x.stem.startswith(".")]
sample = samples[0]

# number
number = 10


# Raw bracken and kaiju report
cleaned_bracken_report = list(sample.rglob("*bracken_raw.csv"))[0]
cleaned_kaiju_report = list(sample.rglob("*kaiju_raw.csv"))[0]

# Raw bracken and kaiju plots
bracken_bar_plot = bracken_raw.bar_chart_bracken_raw(
    cleaned_bracken_report, number=number,virus_only=True
)

bracken_domain_bar_plot = bracken_raw.bar_chart_bracken_raw(
    cleaned_bracken_report, level="domain", virus_only=False
)

kaiju_raw_plot = kaiju_raw.bar_chart_kaiju_raw(file=cleaned_kaiju_report).interactive()

species_and_domain_bracken = (
    alt.hconcat(bracken_bar_plot, bracken_domain_bar_plot)
    .resolve_scale(color="independent")
)

kaiju_raw_plot_pane = pn.pane.Vega(kaiju_raw_plot, sizing_mode="stretch_both", name="Kaiju")


cat_kaiju_csv = list(sample.rglob("*cat_kaiju_merged.csv"))[0]
cat_kaiju_df = pd.read_csv(cat_kaiju_csv)[["name", "taxon_id", "length", "last_level_kaiju", "last_level_cat", "sequence"]]
cat_kaiju_table = pn.widgets.Tabulator(
    cat_kaiju_df, 
    editors={
        'sequence': {'type': 'editable', 'value': False}
    },
    layout='fit_columns',
    pagination='local', 
    page_size=15,
    show_index=False,
    name="Contig Table"
)


def subheader(text: str, 
              bg_color: str = "#04c273",
              height: int = 60
):
    return (
        pn.pane.Markdown(
        text,
        background=bg_color,
        height=height, 
        margin=10, 
        style={
            "color": "white", 
            "padding": "10px",
            }
        )
    )


# Header
header = pn.pane.Markdown(
    f"""
    # Report for {sample}
    """,
    background="#039458",
    height=150, 
    margin=10, 
    style={
        "color": "white", 
        "padding": "0px 0px 200px 0px",
        "text-align": "center",
        "font-size": "20px",
    }
)



raw_classification_tab = pn.Tabs(
    kaiju_raw_plot_pane,
    cat_kaiju_table,
)

# Subheaders
subheader_raw_classification = subheader(
    f"""
    ## Raw classification
    #### Double click the <b> FASTA column </b> and copy (CTRL-C) to clipboard to get full sequence
    """, 
    height=90
)


# Sections
raw_section = pn.Column(subheader_raw_classification, raw_classification_tab)



# --- Create the report --- #
all_tabs = pn.Tabs(
    ("Raw classification", raw_section),
    tabs_location="left",
)

report = pn.Column(
    header,
    pn.layout.Divider(),
    all_tabs,
)

report.save("panel-report.html", title=f"Report {sample}")

In [4]:
import pandas as pd
import numpy as np
import panel as pn
import altair as alt
from pathlib import Path
# Import plotting functions from plotting
from plotting import bracken_raw, contig_quality, kaiju_raw, kaiju_megahit, cat_megahit, bowtie2_alignment_plot
from utils import parse_bowtielog, parse_fastp_report

# Samples
sample_folder = Path("../virusclassification_nextflow/results/")
samples = [x for x in sample_folder.iterdir() if x.is_dir() and not x.stem.startswith(".")]
sample = samples[0]

# number
number = 10


# Raw bracken and kaiju report
cleaned_bracken_report = list(sample.rglob("*bracken_raw.csv"))[0]
cleaned_kaiju_report = list(sample.rglob("*kaiju_raw.csv"))[0]

# Raw bracken and kaiju plots
bracken_bar_plot = bracken_raw.bar_chart_bracken_raw(
    cleaned_bracken_report, number=number,virus_only=True
)

bracken_domain_bar_plot = bracken_raw.bar_chart_bracken_raw(
    cleaned_bracken_report, level="domain", virus_only=False
)

kaiju_raw_plot = kaiju_raw.bar_chart_kaiju_raw(file=cleaned_kaiju_report)

species_and_domain_bracken = (
    alt.hconcat(bracken_bar_plot, bracken_domain_bar_plot)
    .resolve_scale(color="independent")
)


In [10]:
cleaned_kaiju_report_table = pn.widgets.Tabulator(
    cleaned_kaiju_report_df, 
    #editors={
    #    'fasta': {'type': 'editable', 'value': False}
    #},
    layout='fit_columns',
    show_index=False,
    name="Kaiju Table"
)

In [7]:
cleaned_kaiju_report_df = pd.read_csv(cleaned_kaiju_report)

In [9]:
cleaned_kaiju_report_df

Unnamed: 0,taxon_id,taxonomy,percent,reads,taxon_name
0,10335,Alphaherpesvirinae,0.635000,5334,Human alphaherpesvirus 3
1,10335,Herviviricetes,0.635000,5334,Human alphaherpesvirus 3
2,10335,Herpesviridae,0.635000,5334,Human alphaherpesvirus 3
3,10335,Varicellovirus,0.635000,5334,Human alphaherpesvirus 3
4,10335,Human alphaherpesvirus 3,0.635000,5334,Human alphaherpesvirus 3
...,...,...,...,...,...
263,140052,Orthoretrovirinae,0.000119,1,Betaretrovirus
264,140052,Retroviridae,0.000119,1,Betaretrovirus
265,140052,Ortervirales,0.000119,1,Betaretrovirus
266,140052,Revtraviricetes,0.000119,1,Betaretrovirus


In [22]:
cat_kaiju_csv = list(sample.rglob("*cat_kaiju_merged.csv"))[0]
cat_kaiju_df = pd.read_csv(cat_kaiju_csv)[["name", "taxon_id", "length", "last_level_kaiju", "last_level_cat", "sequence"]]
cat_kaiju_table = pn.widgets.Tabulator(
    cat_kaiju_df, 
    editors={
        'sequence': {'type': 'editable', 'value': False}
    },
    layout='fit_columns',
    show_index=False,
    name="Contig Table"
)

In [21]:
cat_kaiju_df

Unnamed: 0,name,taxon_id,length,last_level_kaiju,last_level_cat,sequence
0,k79_144,10335.0,5711,Human alphaherpesvirus 3,Human alphaherpesvirus 3: 1.00,CCTACGCCCGCAGTAACTCCTCAACCAAGAGGGGCTGAGTTTCATA...
1,k79_124,10335.0,2072,Human alphaherpesvirus 3,Human alphaherpesvirus 3: 1.00,CTTGCGGCACAAATGTGTCCGCTTTCTCTAACCAAGGTAATCTGCG...
2,k79_128,10335.0,1712,Human alphaherpesvirus 3,Human alphaherpesvirus 3: 1.00,AACATTTATCCCAACTGATTACATTTCATACGCGAATAAACGACAC...
3,k79_108,10335.0,1696,Human alphaherpesvirus 3,Human alphaherpesvirus 3: 1.00,CGCACGTCATCGACCATCGGCAGAGGTGTTGCTTAACCACTCTGTT...
4,k79_93,10335.0,1652,Human alphaherpesvirus 3,Human alphaherpesvirus 3: 1.00,TATATACAGTCTCAAGCTTGATGGTTATGGCAGTATAAACCGTGGG...
...,...,...,...,...,...,...
142,k79_50,10335.0,247,Human alphaherpesvirus 3,Human alphaherpesvirus 3: 1.00,CATCAAACACACAGACGCGTAAAATTGCGCGAGTTCCCAAGGTATC...
143,k79_21,10335.0,246,Human alphaherpesvirus 3,Human alphaherpesvirus 3: 1.00,GCTCACCTCCTTATGCGACGAATACAGACGCGTCCATCCGGCTAAC...
144,k79_36,10335.0,246,Human alphaherpesvirus 3,Human alphaherpesvirus 3: 1.00,TAATGCTCTTACATGCCGCAGTAAATCCCATTTTATATTAATACAG...
145,k79_30,10335.0,242,Human alphaherpesvirus 3,Human alphaherpesvirus 3: 1.00,GATCAATCGCAAGTCCAACTCTTTGGGGCAAGGTTCCAGTATATTT...


In [31]:
fastp_report = list(sample.rglob("*fastp/*.html"))[0]
fastp_df = parse_fastp_report.parse_fastp(fastp_report)
fastp_table = pn.widgets.Tabulator(
    fastp_df, 
    layout='fit_columns',
    show_index=False,
    name="FastP Summary"
)

In [30]:
fastp_df

Unnamed: 0,description,value
0,fastp version:,0.23.2 (https://github.com/OpenGene/fastp)
1,sequencing:,paired end (76 cycles + 76 cycles)
2,mean length before filtering:,"74bp, 74bp"
3,mean length after filtering:,"74bp, 74bp"
4,duplication rate:,3.924355%
5,Insert size peak:,75
6,total reads:,18.200000 K
7,total bases:,1.355811 M
8,Q20 bases:,1.300614 M (95.928857%)
9,Q30 bases:,1.187558 M (87.590232%)
