In [1]:
library(circlize)
library(rtracklayer)
library(dplyr)
library(GenomicRanges)
library(ComplexHeatmap) ## legend
library(stringr)

circlize version 0.4.16
CRAN page: https://cran.r-project.org/package=circlize
Github page: https://github.com/jokergoo/circlize
Documentation: https://jokergoo.github.io/circlize_book/book/

If you use it in published research, please cite:
Gu, Z. circlize implements and enhances circular visualization
  in R. Bioinformatics 2014.

This message can be suppressed by:
  suppressPackageStartupMessages(library(circlize))


Loading required package: GenomicRanges

Loading required package: stats4

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, r

## read hap2 gene te trna gff3 files

In [2]:
# Step 2: Read and Process GFF3 Data
# Replace 'gene_annotation.gff3' and 'te_annotation.gff3' with your file paths
gene_gff3 <- import("hap2_gemoma_final_annotation.gff") ## only gene rows, grep -v "^#" hap1_gemoma_final_annotation.gff > tmp
te_gff3 <- import("hap2.filteredRepeats.gff") ## sed -i '' 's/_RagTag//g' *.gff
trna_gff3 <- import("hap2tRNA.gff3") ## sed -i '' 's/_RagTag//g' *.gff

# Filter for relevant feature types (e.g., "gene" and "transposable_element")
genes <- gene_gff3[gene_gff3$type == "gene"] 
tes <- te_gff3[te_gff3$type != "Simple_repeat"]
trna <- trna_gff3[trna_gff3$type == "tRNA"] 

gene_bed0 <- genes %>%
  as.data.frame() %>%
  select(seqnames, start, end, strand, score = NULL, name = NULL) %>%
  mutate(score = ".", name = ".")
te_bed0 <- tes %>%
  as.data.frame() %>%
  select(seqnames, start, end, strand, score = NULL, name = NULL) %>%
  mutate(score = ".", name = ".")
trna_bed0 <- trna %>%
  as.data.frame() %>%
  select(seqnames, start, end, strand, score = NULL, name = NULL) %>%
  mutate(score = ".", name = ".")

# Rename columns to match BED format
colnames(gene_bed0) <- c("chrom", "chromStart", "chromEnd", "strand", "score", "name")
colnames(te_bed0) <- c("chrom", "chromStart", "chromEnd", "strand", "score", "name")
colnames(trna_bed0) <- c("chrom", "chromStart", "chromEnd", "strand", "score", "name")

# read inferal annotation
ncRNA_bed0 <- read.table("hap2inferal.bed", header = FALSE, stringsAsFactors = FALSE)
colnames(ncRNA_bed0) <- c("chrom", "chromStart", "chromEnd", "strand", "score", "name")

##remove chrUn
hap2gene_bed <- gene_bed0 %>%
  filter(chrom != "chrUn")
hap2te_bed <- te_bed0 %>%
  filter(chrom != "chrUn")
hap2trna_bed <- trna_bed0 %>%
  filter(chrom != "chrUn")

hap2ncRNA_bed <- ncRNA_bed0 %>%
  filter(chrom != "chrUn")
hap2sno_bed <- ncRNA_bed0 %>%
  filter(chrom != "chrUn" & str_detect(name, "sno") )
hap2MIR_bed <- ncRNA_bed0 %>%
  filter(chrom != "chrUn" & str_detect(name, "MIR") )

## add ".2" to chromosome names
hap2gene_bed$chrom <- paste0(hap2gene_bed$chrom, ".2")
hap2te_bed$chrom <- paste0(hap2te_bed$chrom, ".2")
hap2trna_bed$chrom <- paste0(hap2trna_bed$chrom, ".2")
hap2sno_bed$chrom <- paste0(hap2sno_bed$chrom, ".2")
hap2MIR_bed$chrom <- paste0(hap2MIR_bed$chrom, ".2")


## read hap1 gene te trna gff3 files

In [3]:
# Step 2: Read and Process GFF3 Data
# Replace 'gene_annotation.gff3' and 'te_annotation.gff3' with your file paths
gene_gff3 <- import("hap1_gemoma_final_annotation.gff") ## only gene rows, grep -v "^#" hap1_gemoma_final_annotation.gff > tmp
te_gff3 <- import("hap1.filteredRepeats.gff") ## sed -i '' 's/_RagTag//g' *.gff
trna_gff3 <- import("hap1tRNA.gff3") ## sed -i '' 's/_RagTag//g' *.gff


# Filter for relevant feature types (e.g., "gene" and "transposable_element")
genes <- gene_gff3[gene_gff3$type == "gene"] 
tes <- te_gff3[te_gff3$type != "Simple_repeat"]
trna <- trna_gff3[trna_gff3$type == "tRNA"] 

gene_bed0 <- genes %>%
  as.data.frame() %>%
  select(seqnames, start, end, strand, score = NULL, name = NULL) %>%
  mutate(score = ".", name = ".")
te_bed0 <- tes %>%
  as.data.frame() %>%
  select(seqnames, start, end, strand, score = NULL, name = NULL) %>%
  mutate(score = ".", name = ".")
trna_bed0 <- trna %>%
  as.data.frame() %>%
  select(seqnames, start, end, strand, score = NULL, name = NULL) %>%
  mutate(score = ".", name = ".")

# Rename columns to match BED format
colnames(gene_bed0) <- c("chrom", "chromStart", "chromEnd", "strand", "score", "name")
colnames(te_bed0) <- c("chrom", "chromStart", "chromEnd", "strand", "score", "name")
colnames(trna_bed0) <- c("chrom", "chromStart", "chromEnd", "strand", "score", "name")

# read inferal annotation
ncRNA_bed0 <- read.table("hap1inferal.bed", header = FALSE, stringsAsFactors = FALSE)
colnames(ncRNA_bed0) <- c("chrom", "chromStart", "chromEnd", "strand", "score", "name")

##remove chrUn
hap1gene_bed <- gene_bed0 %>%
  filter(chrom != "chrUn")
hap1te_bed <- te_bed0 %>%
  filter(chrom != "chrUn")
hap1trna_bed <- trna_bed0 %>%
  filter(chrom != "chrUn")

ncRNA_bed <- ncRNA_bed0 %>%
  filter(chrom != "chrUn")
hap1sno_bed <- ncRNA_bed0 %>%
  filter(chrom != "chrUn" & str_detect(name, "sno") )
hap1MIR_bed <- ncRNA_bed0 %>%
  filter(chrom != "chrUn" & str_detect(name, "MIR") )

## add ".2" to chromosome names
hap1gene_bed$chrom <- paste0(hap1gene_bed$chrom, ".1")
hap1te_bed$chrom <- paste0(hap1te_bed$chrom, ".1")
hap1trna_bed$chrom <- paste0(hap1trna_bed$chrom, ".1")
hap1sno_bed$chrom <- paste0(hap1sno_bed$chrom, ".1")
hap1MIR_bed$chrom <- paste0(hap1MIR_bed$chrom, ".1")


## merge hap2 and hap1 bed files

In [4]:
# Combine dataframes row-wise
gene_bed <- rbind(hap1gene_bed, hap2gene_bed)
te_bed <- rbind(hap1te_bed, hap2te_bed)
trna_bed <- rbind(hap1trna_bed, hap2trna_bed)
sno_bed <- rbind(hap1sno_bed, hap2sno_bed)
MIR_bed <- rbind(hap1MIR_bed, hap2MIR_bed)

## sort bed files by chromosomes

In [5]:
# Sort dataframe by 'chrom'
gene_bed_sorted <- gene_bed[order(gene_bed$chrom), ]
te_bed_sorted <- te_bed[order(te_bed$chrom), ]
trna_bed_sorted <- trna_bed[order(trna_bed$chrom), ]
sno_bed_sorted <- sno_bed[order(sno_bed$chrom), ]
MIR_bed_sorted <- MIR_bed[order(MIR_bed$chrom), ]

In [6]:
# Read the custom cytoband file
## bioawk -c fastx '{print $name"\t"1"\t"length($seq)}' hap1.fasta|sed "s/_RagTag//" > hap1.ideogram
custom_cytoband1 <- read.table("hap1.ideogram", header = FALSE, stringsAsFactors = FALSE)
custom_cytoband2 <- read.table("hap2.ideogram", header = FALSE, stringsAsFactors = FALSE)

# Rename columns for clarity
colnames(custom_cytoband1) <- c("chrom", "start", "end")
custom_cytoband1$chrom <- paste0(custom_cytoband1$chrom, ".1")

colnames(custom_cytoband2) <- c("chrom", "start", "end")
custom_cytoband2$chrom <- paste0(custom_cytoband2$chrom, ".2")

# merge 
custom_cytoband <- rbind(custom_cytoband1, custom_cytoband2)
# sort
custom_cytoband_sorted <- custom_cytoband[order(custom_cytoband$chrom), ]
custom_cytoband_sorted

Unnamed: 0_level_0,chrom,start,end
Unnamed: 0_level_1,<chr>,<int>,<int>
1,chr1.1,1,29230096
10,chr1.2,1,30055869
2,chr2.1,1,32432764
11,chr2.2,1,34263903
3,chr3.1,1,39685618
12,chr3.2,1,35932244
4,chr4.1,1,29536771
13,chr4.2,1,35105441
5,chr5.1,1,50348529
14,chr5.2,1,49492547


## read synteny blocks

In [7]:
hap2bed1 <- read.table("nochrUn_hap2block_bed1", header = FALSE, stringsAsFactors = FALSE)
hap2bed2 <- read.table("nochrUn_hap2block_bed2", header = FALSE, stringsAsFactors = FALSE)
# Rename columns for clarity
colnames(hap2bed1) <- c("chrom", "start", "end")
colnames(hap2bed2) <- c("chrom", "start", "end")
hap2bed1$chrom <- paste0(hap2bed1$chrom, ".2")
hap2bed2$chrom <- paste0(hap2bed2$chrom, ".2")

hap1bed1 <- read.table("hap1_block_bed1", header = FALSE, stringsAsFactors = FALSE)
hap1bed2 <- read.table("hap1_block_bed2", header = FALSE, stringsAsFactors = FALSE)
# Rename columns for clarity
colnames(hap1bed1) <- c("chrom", "start", "end")
colnames(hap1bed2) <- c("chrom", "start", "end")
hap1bed1$chrom <- paste0(hap1bed1$chrom, ".1")
hap1bed2$chrom <- paste0(hap1bed2$chrom, ".1")

# merge
bed1 <- rbind(hap1bed1,hap2bed1)
bed2 <- rbind(hap1bed2,hap2bed2)


## generate colour for synteny pairs, chr1.1 and chr1.2 have same color

In [8]:
# Step 1: Get unique names and generate colors
unique_names1 <- unique(hap1bed1$chrom)
colors1 <- rainbow(length(unique_names1))  # Generate colors

unique_names2 <- unique(hap2bed1$chrom)
colors2 <- rainbow(length(unique_names2))  # Generate colors

# Create a dictionary mapping names to colors
name_color_dict1 <- setNames(colors1, unique_names1)
name_color_dict2 <- setNames(colors2, unique_names2)
name_color_dict <- c(name_color_dict1,name_color_dict2)

# Step 2: Perform replacement and output color list
bed1$color <- name_color_dict[bed1$chrom]  # Map names to colors
color_list <- bed1$color  # Output color list
head(bed1)

Unnamed: 0_level_0,chrom,start,end,color
Unnamed: 0_level_1,<chr>,<int>,<int>,<chr>
1,chr1.1,16366076,16823477,#FF0000
2,chr1.1,8568169,9235979,#FF0000
3,chr1.1,10914359,10937523,#FF0000
4,chr1.1,23423181,25355157,#FF0000
5,chr1.1,6942371,7606901,#FF0000
6,chr1.1,4539909,4726481,#FF0000


In [None]:
png("merged_circos_sort_chrom.png", width = 1000, height = 1000)
# Initialize the circos plot with the custom ideogram
# Set the start angle
circos.par(start.degree = 90, gap.after = 0.5, track.margin = c(0,0))
circos.initializeWithIdeogram(custom_cytoband_sorted, plotType = NULL)

circos.track(ylim = c(0, 1), track.height = 0.05, panel.fun = function(x, y) {
    breaks = seq(0, 1e9, by = 5e6)
    circos.genomicAxis(major.at = breaks, labels = paste0(breaks/1e6, ""), labels.cex = 1)
    circos.text(CELL_META$xcenter, CELL_META$ycenter, CELL_META$sector.index, cex=1.5)
})

# Plot gene density
circos.genomicDensity(gene_bed_sorted, col = "blue", track.height = 0.1, window.size = 1e6, 
                      bg.lwd = 0.3 )

# Plot te density
circos.genomicDensity(te_bed_sorted, col = "red", track.height = 0.1, window.size = 1e6,
                    bg.lwd = 0.3)

# Plot trna density
circos.genomicDensity(trna_bed_sorted, col = "yellowgreen", track.height = 0.1, window.size = 1e6,
                     bg.lwd = 0.3)
# circos.genomicDensity(tRNA_bed, col = "yellowgreen", track.height = 0.1, window.size = 1e6)

# Plot trna density
# circos.genomicDensity(rRNA_bed, col = "yellow2", track.height = 0.1, window.size = 1e6,
#                      bg.col = "whitesmoke", bg.lwd = 0.5)

# Plot trna density
circos.genomicDensity(sno_bed_sorted, col = "tan2", track.height = 0.1, window.size = 1e6,
                     bg.lwd = 0.3)

# Plot trna density
circos.genomicDensity(MIR_bed_sorted, col = "darkorange", track.height = 0.1, window.size = 1e6,
                     bg.lwd = 0.3)

# plot mcscan synteny
circos.genomicLink(bed1, bed2, col = color_list, border = NA)

# text(0, 0, "Hap2", cex = 2)

circos.clear()
# Close the PNG device
dev.off()

In [16]:
png("merged_circos_nosort_chrom.png", width = 1000, height = 1000)
# Initialize the circos plot with the custom ideogram
# Set the start angle
circos.par(start.degree = 90, gap.after = 0.5, track.margin = c(0,0))
circos.initializeWithIdeogram(custom_cytoband, plotType = NULL, sort.chr = FALSE)

circos.track(ylim = c(0, 1), track.height = 0.05, panel.fun = function(x, y) {
    breaks = seq(0, 1e9, by = 5e6)
    circos.genomicAxis(major.at = breaks, labels = paste0(breaks/1e6, ""), labels.cex = 1)
    circos.text(CELL_META$xcenter, CELL_META$ycenter, CELL_META$sector.index, cex=1.5)
})

# Plot gene density
circos.genomicDensity(gene_bed_sorted, col = "blue", track.height = 0.1, window.size = 1e6, 
                      bg.lwd = 0.3 )

# Plot te density
circos.genomicDensity(te_bed_sorted, col = "red", track.height = 0.1, window.size = 1e6,
                      bg.lwd = 0.3)

# Plot trna density
circos.genomicDensity(trna_bed_sorted, col = "yellowgreen", track.height = 0.1, window.size = 1e6,
                      bg.lwd = 0.3)
# circos.genomicDensity(tRNA_bed, col = "yellowgreen", track.height = 0.1, window.size = 1e6)

# Plot trna density
# circos.genomicDensity(rRNA_bed, col = "yellow2", track.height = 0.1, window.size = 1e6,
#                      bg.col = "whitesmoke", bg.lwd = 0.5)

# Plot trna density
circos.genomicDensity(sno_bed_sorted, col = "tan2", track.height = 0.1, window.size = 1e6,
                      bg.lwd = 0.3)

# Plot trna density
circos.genomicDensity(MIR_bed_sorted, col = "darkorange", track.height = 0.1, window.size = 1e6,
                      bg.lwd = 1)

# plot mcscan synteny
circos.genomicLink(bed1, bed2, col = color_list, border=NA) ## border = NA

# text(0, 0, "Hap2", cex = 2)

circos.clear()
# Close the PNG device
dev.off()

In [11]:
?circos.genomicAxis()

In [35]:
?circos.initializeWithIdeogram()

In [27]:
circos.par

 Option                  Value                
 -----------------------:-----------------------
 start.degree            90                   
 gap.degree              1                    
 gap.after               1                    
 track.margin            0, 0.0126088028123338
 unit.circle.segments    500                  
 cell.padding            0.02, 0, 0.02, 0     
 track.height            0.2                  
 circle.margin           0, 0, 0, 0           
 canvas.xlim             -1, 1                
 canvas.ylim             -1, 1                
 major.by.degree         10                   
 clock.wise              TRUE                 
 xaxis.clock.wise        TRUE                 
 message                 TRUE                 
 help                    TRUE                 

In [42]:
?circos.trackPlotRegion()

In [34]:
?circos.track()

In [43]:
par("lwd")

In [12]:
dim(gene_bed)

In [13]:
dim(te_bed)

In [14]:
dim(trna_bed)

In [15]:
dim(ncRNA_bed)

In [16]:
name_counts <- table(ncRNA_bed$name)
print(name_counts)


        5_8S_rRNA       5_ureB_sRNA           5S_rRNA            enod40 
               72                 1               390                 1 
         Histone3        Intron_gpI       Intron_gpII              IsrR 
                9                 4                18                12 
LSU_rRNA_bacteria  LSU_rRNA_eukarya           mir-156           mir-160 
               14                79                 3                 4 
          mir-166           mir-172           mir-283           mir-393 
               11                 7                 2                 3 
          mir-395           mir-399            MIR159          MIR162_2 
                5                 5                 9                 2 
           MIR168          MIR169_2          MIR169_5          MIR171_1 
                1                27                23                15 
         MIR171_2           MIR2275            MIR319            MIR390 
                1                 5               