In [1]:
import gosling as gos

## Convert GFF3 to CSV

In [None]:
import gffpandas.gffpandas as gffpd
import pandas as pd

gff_filename = 'MZ781228.gff3' 

annotation = gffpd.read_gff3(gff_filename)
attr_to_columns = annotation.attributes_to_columns()

# attr_to_columns['hs'] = 'hs' + (attr_to_columns.index + 1).astype(str)
attr_to_columns['gene_prefix'] = attr_to_columns.apply(lambda row: row['gbkey'] if pd.isna(row['gene']) 
                            else (row['gene'][:3] if pd.notna(row['gene']) else ''), axis=1)

attr_to_columns.to_csv(f'{gff_filename}.csv')

## Import Dataset

In [2]:
data = gos.csv(
  url="./MZ781228.gff3.csv",
  chromosomeField="seq_id",
  chromosomePrefix="MZ781228.",
  genomicFields=["start", "end"]
)

data_base = gos.csv(
    url="./MZ781228.fasta.csv"
)

data_gc = gos.csv(
    url="./MZ781228_gc_content.csv",
)

data_gc_skew = gos.csv(
    url="./MZ781228_gc_skew.csv",
)

data_coverage = gos.csv(
    url="./MZ781228_sort_out.12F.bam.tsv",
    separator="\t",
    headerNames=["id", "position", "depth"]
)

In [3]:
track_base = gos.Track(data).encode(
    tooltip=["start:G", "end:G", "strand:N", "gene:N"]
).transform_filter(
    "gbkey", 
    oneOf=["tRNA", "rRNA", "CDS", "D-loop"]
)

track_gene_base = track_base.mark_rect().encode(
    color=gos.Color("gene_prefix:N",
            domain=["nad", "cox", "atp", "trn", "rrn", "D-loop"],
            range=["#8c510a", "#d8b365", "#f6e8c3", "#c7eae5", "#5ab4ac", "#01665e"],
        ),
    x=gos.X("start:G", domain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15000]), axis="none"),
    xe="end:G",
    stroke=gos.value("#000000"),
    strokeWidth=gos.value(0.5),
)

track_gene_posstrand = track_gene_base.transform_filter_not(
    "strand", 
    oneOf=["-"]
).properties(
    title="Positive Strand",
)

track_gene_negstrand = track_gene_base.transform_filter_not(
    "strand", 
    oneOf=["+"]
).properties(
    title="Negative Strand",
)

label_gene_base = track_base.mark_text(dy=15).encode(
    x=gos.X("start:G", axis="top"),
    xe="end:G",
    text="gene:N",
    size=gos.value(15)
).transform_filter(
    "gbkey", 
    oneOf=["tRNA", "rRNA", "CDS", "D-loop"]
).visibility_lt(
    measure="width",
    threshold="|xe-x|",
    transitionPadding=10,
    target="mark",
)

label_gene_posstrand = label_gene_base.transform_filter_not(
    "strand", 
    oneOf=["-"]
)

label_gene_negstrand = label_gene_base.transform_filter_not(
    "strand", 
    oneOf=["+"]
)

track_gc = gos.Track(data_gc).mark_area().encode(
    # color=gos.Color(field="gc_content",
    #         type="quantitative",
    #         domain=[16.3, 14.7, 14.0],
    #         range=["#8c510a", "#d8b365", "#f6e8c3"],
    #     ),
    color=gos.value("steelBlue"),
    x=gos.X("position:G"),
    # y=gos.Y("gc_content:Q", domain=(0, 50), flip=False),
    tooltip=["position:G", "gc_content:Q"],
)

track_gc_skew = gos.Track(data_gc_skew).mark_line().encode(
    # color=gos.value("#ff4000"),
    x=gos.X("position:G"),
    # y=gos.Y("depth:Q", flip=False),
    tooltip=["position:G", "gc_skew:Q"]
)

track_coverage = gos.Track(data_coverage).mark_area().encode(
    x=gos.X("position:G", axis="bottom"),
    # y=gos.Y("depth:Q", flip=False),
    tooltip=["position:G", "depth:Q"]
)

track_gene_posstrand_label = gos.overlay(
    track_gene_posstrand, label_gene_posstrand
).properties(
    width=500, 
    height=10,
    xDomain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15235]),
    layout="circular",
)

track_gene_negstrand_label = gos.overlay(
    track_gene_negstrand, label_gene_negstrand
).properties(
    width=500, 
    height=10,
    xDomain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15235]),
    layout="circular",
)

## Circos

In [4]:
track_gene_posstrand = track_gene_posstrand.properties(width=500, height=10)
track_gene_negstrand = track_gene_negstrand.properties(width=500, height=10)
track_gc = track_gc.properties(y=gos.Y("gc_content:Q"), width= 500, height=10)
track_gc_skew = track_gc_skew.properties(y=gos.Y("gc_skew:Q", domain=(-1, 1)), width= 500, height=10)
track_coverage = track_coverage.properties(y=gos.Y("depth:Q"), width= 500, height=10)

vis = gos.stack(track_gene_posstrand_label, track_gene_negstrand_label, track_gc, track_gc_skew, track_coverage).properties(
    title="Basic Marks: Rect",
    subtitle="Tutorial Examples",
    layout="circular",
    xDomain=gos.GenomicDomain(interval=[0, 15235]),
    spacing=0,
    static=True,
    centerRadius=0.3,
)

vis.display()
vis.save("circular_layout.html")

In [5]:
track_gene_posstrand = track_gene_posstrand.properties(width=1000, height=25)
track_gene_negstrand = track_gene_negstrand.properties(width=1000, height=25)
track_gc = track_gc.properties(y=gos.Y("gc_content:Q", domain=(0, 50), flip=False), width=1000, height=50)
track_gc_skew = track_gc_skew.properties(y=gos.Y("gc_skew:Q", domain=(-0.7, 0.1)), width= 1000, height=50)
track_coverage = track_coverage.properties(y=gos.Y("depth:Q", flip=False), width=1000, height=50)

track_base_2 = gos.Track(data_base).mark_bar().encode(
    x=gos.X("Start:G"),
    xe=gos.X("End:G"),
    y=gos.Y("Count:Q", axis="none"),
    color=gos.Color("Base:N", domain=["A", "T", "G", "C"], legend=True)
)

track_base_text_2 = gos.Track(data_base).mark_text().encode(
    x=gos.X("Start:G"),
    xe=gos.X("End:G"),
    size=gos.value(24),
    color=gos.value("white"),
    text="Base:N",
).visibility_lt(
    measure="width", threshold="|xe-x|", transitionPadding=30, target="mark"
).visibility_lt(
    measure="zoomLevel", threshold=10, target="track"
).transform_filter_not("Count", oneOf=[0])

track_gene_base_label = gos.overlay(
    track_base_2, track_base_text_2
).properties(
    width=1000, 
    height=25,
    xDomain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15235]),
)

vis = gos.stack(track_gene_posstrand_label, track_gene_negstrand_label, track_gc, track_gc_skew, track_coverage).properties(
    title="Basic Marks: Rect",
    subtitle="Tutorial Examples",
    layout="linear",
    xDomain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15235]),
    spacing=0,
    static=False,
)

vis.display()
vis.save("linear_layout.html")

In [6]:
track_gene_posstrand = track_gene_posstrand.properties(width=1000, height=25)
track_gene_negstrand = track_gene_negstrand.properties(width=1000, height=25)
track_gc = track_gc.properties(y=gos.Y("gc_content:Q", domain=(0, 50), flip=False), width=1000, height=50)
track_gc_skew = track_gc_skew.properties(y=gos.Y("gc_skew:Q", domain=(-0.7, 0.1)), width= 1000, height=50)
track_coverage = track_coverage.properties(y=gos.Y("depth:Q", flip=False), width=1000, height=50)

track_base_2 = gos.Track(data_base).mark_bar().encode(
    x=gos.X("Start:G"),
    xe=gos.X("End:G"),
    y=gos.Y("Count:Q", axis="none"),
    color=gos.Color("Base:N", domain=["A", "T", "G", "C"], legend=True)
)

track_base_text_2 = gos.Track(data_base).mark_text().encode(
    x=gos.X("Start:G"),
    xe=gos.X("End:G"),
    size=gos.value(24),
    color=gos.value("white"),
    text="Base:N",
).visibility_lt(
    measure="width", threshold="|xe-x|", transitionPadding=30, target="mark"
).visibility_lt(
    measure="zoomLevel", threshold=10, target="track"
).transform_filter_not("Count", oneOf=[0])

track_gene_base_label = gos.overlay(
    track_base_2, track_base_text_2
).properties(
    width=1000, 
    height=25,
    xDomain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15235]),
)

vis = gos.stack(track_gc, track_gc_skew, track_gene_base_label, track_coverage).properties(
    title="Genomic Properties + Sequence Alignment",
    subtitle="Hybrid Layout",
    layout="linear",
    xDomain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15235]),
    spacing=0,
    static=False,
)

vis.display()
vis.save("linear_layout_2.html")

In [None]:
track = gos.Track(data).mark_rect().encode(
    color=gos.Color("gene_prefix:N",
            domain=["nad", "cox", "atp", "trn", "rrn", "D-loop"],
            range=["#8c510a", "#d8b365", "#f6e8c3", "#c7eae5", "#5ab4ac", "#01665e"],
        ),
    x=gos.X("start:G", domain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15000]), axis="none"),
    xe="end:G",
    stroke=gos.value("#000000"),
    strokeWidth=gos.value(0.5),
).transform_filter(
    "gbkey", 
    oneOf=["tRNA", "rRNA", "CDS", "D-loop"]
).transform_filter_not(
    "strand", 
    oneOf=["-"]
).properties(width=500, height=10)

track_reverse = gos.Track(data).mark_rect().encode(
    color=gos.Color("gbkey:N",
            domain=["nad", "cox", "atp", "trn", "rrn", "D-loop"],
            range=["#8c510a", "#d8b365", "#f6e8c3", "#c7eae5", "#5ab4ac", "#01665e"],
        ),
    x=gos.X("start:G", domain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15000]), axis="none"),
    xe="end:G",
    stroke=gos.value("#000000"),
    strokeWidth=gos.value(0.5),
).transform_filter(
    "gbkey", 
    oneOf=["tRNA", "rRNA", "CDS", "D-loop"]
).transform_filter_not(
    "strand", 
    oneOf=["+"]
).properties(width=500, height=10)

track_gc = gos.Track(data_gc).mark_bar().encode(
    x=gos.X("position:G"),
    # xe=gos.X("position:G", offset=10),
    y=gos.Y("gc_content:Q", domain=(0, 50), flip=True),
    color=gos.Color(
        field="gc_content",
        domain=(10, 20, 30),
        range=["#1E88E5", "#7CB342", "#D81B60"]
    ),
    tooltip=["position:G", "gc_content:Q"],
).properties(width=500, height=10)

track_coverage = gos.Track(data_coverage).mark_line().encode(
    x=gos.X("position:G", axis="bottom"),
    y=gos.Y("depth:Q", flip=True),
    tooltip=["position:G", "depth:Q"]
).properties(width= 500, height=10)

vis = gos.stack(track, track_reverse, track_gc, track_coverage).properties(
    title="Basic Marks: Rect",
    subtitle="Tutorial Examples",
    layout="circular",
    xDomain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15235]),
    spacing=0,
    static=True,
    centerRadius=0.3,
)

vis.display()
vis.save("circular_layout.html")

### Circos Test 2

In [None]:
base = gos.Track(data).encode(
    # rows=gos.Row("strand:N", domain=["+", "-"]),
    tooltip=["start:G", "end:G", "strand:N", "gene:N"]
).properties(
    title="Genes | MZ781228"
)

track = base.mark_rect().encode(
    color=gos.Color("gene_prefix:N",
            domain=["nad", "cox", "atp", "trn", "rrn", "D-loop"],
            range=["#8c510a", "#d8b365", "#f6e8c3", "#c7eae5", "#5ab4ac", "#01665e"],
        ),
    x=gos.X("start:G", domain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15000]), axis="none"),
    xe="end:G",
    stroke=gos.value("#000000"),
    strokeWidth=gos.value(0.5),
).transform_filter(
    "gbkey", 
    oneOf=["tRNA", "rRNA", "CDS", "D-loop"]
)

track_reverse = gos.Track(data).mark_rect().encode(
    color=gos.Color("gbkey:N",
            domain=["nad", "cox", "atp", "trn", "rrn", "D-loop"],
            range=["#8c510a", "#d8b365", "#f6e8c3", "#c7eae5", "#5ab4ac", "#01665e"],
        ),
    x=gos.X("start:G", domain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15000]), axis="none"),
    xe="end:G",
    stroke=gos.value("#000000"),
    strokeWidth=gos.value(0.5),
).transform_filter(
    "gbkey", 
    oneOf=["tRNA", "rRNA", "CDS", "D-loop"]
).transform_filter_not(
    "strand", 
    oneOf=["+"]
).properties(width=500, height=10)

gene_label = base.mark_text(dy=15).encode(
    x=gos.X("start:G", axis="top"),
    xe="end:G",
    text="gene:N",
    size=gos.value(15)
).transform_filter(
    "gbkey", 
    oneOf=["tRNA", "rRNA", "CDS", "D-loop"]
).visibility_lt(
    measure="width",
    threshold="|xe-x|",
    transitionPadding=10,
    target="mark",
)

track_gc = gos.Track(data_gc).mark_bar().encode(
    x=gos.X("position:G"),
    # xe=gos.X("position:G", offset=10),
    y=gos.Y("gc_content:Q", domain=(0, 50), flip=True),
    color=gos.Color(
        field="gc_content",
        domain=(10, 20, 30),
        range=["#1E88E5", "#7CB342", "#D81B60"]
    ),
    tooltip=["position:G", "gc_content:Q"],
).properties(width=500, height=10)

track_coverage = gos.Track(data_coverage).mark_line().encode(
    x=gos.X("position:G", axis="bottom"),
    y=gos.Y("depth:Q", flip=True),
    tooltip=["position:G", "depth:Q"]
).properties(width= 500, height=10)

track_gene = gos.overlay(
    track, gene_label
).properties(
    width=500, 
    height=10,
    xDomain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15235]),
    layout="circular",
)

vis = gos.stack(track_gene, track_gc, track_coverage).properties(
    title="Basic Marks: Rect",
    subtitle="Tutorial Examples",
    layout="circular",
    xDomain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15235]),
    spacing=0,
    static=True,
    centerRadius=0.3,
)

vis.display()
# vis.save("circular_layout.html")

## Linear Alignment

In [None]:
track_2 = gos.Track(data).mark_rect().encode(
    color=gos.Color("gbkey:N",
            domain=["CDS", "tRNA", "rRNA", "D-loop"],
            range=["#ffff00", "#000080", "#ff4500", "#000000"],
        ),
    x=gos.X("start:G", domain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15000]), axis="none"),
    xe="end:G",
    stroke=gos.value("#000000"),
    strokeWidth=gos.value(0.5),
).transform_filter(
    "gbkey", 
    oneOf=["tRNA", "rRNA", "CDS", "D-loop"]
).transform_filter_not(
    "strand", 
    oneOf=["-"]
).properties(width=1000, height=25)

track_reverse_2 = gos.Track(data).mark_rect().encode(
    color=gos.Color("gbkey:N",
            domain=["CDS", "tRNA", "rRNA", "D-loop"],
            range=["#ffff00", "#000080", "#ff4500", "#000000"],
        ),
    x=gos.X("start:G", domain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15000]), axis="none"),
    xe="end:G",
    stroke=gos.value("#000000"),
    strokeWidth=gos.value(0.5),
).transform_filter(
    "gbkey", 
    oneOf=["tRNA", "rRNA", "CDS", "D-loop"]
).transform_filter_not(
    "strand", 
    oneOf=["+"]
).properties(width=1000, height=25)

track_base_2 = gos.Track(data_base).mark_bar().encode(
    x=gos.X("Start:G"),
    xe=gos.X("End:G"),
    y=gos.Y("Count:Q", axis="none"),
    color=gos.Color("Base:N", domain=["A", "T", "G", "C"], legend=True)
).properties(width=1000, height=50)

track_base_text_2 = gos.Track(data_base).mark_text().encode(
    x=gos.X("Start:G"),
    xe=gos.X("End:G"),
    size=gos.value(24),
    color=gos.value("white"),
    text="Base:N",
).visibility_lt(
    measure="width", threshold="|xe-x|", transitionPadding=30, target="mark"
).visibility_lt(
    measure="zoomLevel", threshold=10, target="track"
).transform_filter_not("Count", oneOf=[0])

track_gc_2 = gos.Track(data_gc).mark_bar().encode(
    x=gos.X("position:G"),
    # xe=gos.X("position:G", offset=10),
    y=gos.Y("gc_content:Q", domain=(0, 50)),
    color=gos.Color(
        field="gc_content",
        domain=(10, 20, 30),
        range=["#1E88E5", "#7CB342", "#D81B60"]
    ),
    tooltip=["position:G", "gc_content:Q"],
).properties(width=1000, height=50)

track_coverage_2 = gos.Track(data_coverage).mark_line().encode(
    x=gos.X("position:G", axis="bottom"),
    y=gos.Y("depth:Q"),
    tooltip=["position:G", "depth:Q"]
).properties(width= 1000, height=50)

vis_base_text = gos.overlay(track_base_2, track_base_text_2).properties(
    title="Multi-Scale Sequence Plot",
    xDomain=gos.GenomicDomain(interval=[0, 15235]),
    width=725,
    height=100
)

vis_2 = gos.stack(track_2, track_reverse_2, vis_base_text, track_gc_2, track_coverage_2).properties(
    title="Basic Marks: Rect",
    subtitle="Tutorial Examples",
    xDomain=gos.GenomicDomain(chromosome="chr1", interval=[0, 15235]),
    spacing=0,
    # static=True,
    centerRadius=0.3,
)

vis_2.display()
vis_2.save("linear_layout.html")

In [None]:
vis_base_text.display()
vis_base_text.save("linear_layout_base.html")