In [16]:
import cogent3
from cogent3 import get_app
import matplotlib.pyplot as plt
import libs
import paths

In [17]:
#Define apps
loader = get_app("load_aligned", moltype="dna")   
omit_degs_cds = get_app("omit_degenerates", moltype="dna", motif_length=3)
omit_degs_noncds1 = get_app("omit_degenerates", moltype="dna")
omit_degs_noncds2 = get_app("omit_degenerates", moltype="dna")
rename_cds = libs.renamer_cds_aligned()
rename_noncds = libs.renamer_noncds_aligned()
ffold = get_app("take_codon_positions", fourfold_degenerate=True)
concat = get_app("concat", moltype="dna")

trim_stops = get_app("trim_stop_codons")

# CDS

In [18]:
folder_in = paths.DATA_APES114 + 'cds/'
in_dstore = cogent3.open_data_store(folder_in, suffix='fa', mode='r')

#create a concatenated alignment with all coding positions
ffold_process = loader + trim_stops + omit_degs_cds + rename_cds + ffold
# result below is only valid alignments (no NotCompleted).
results_ffold = [r for r in ffold_process.as_completed(in_dstore[:], parallel=True) if r]
ffold_alns = concat(results_ffold)
ffold_alns.source = "cds"
ffold_alns

   0%|          |00:00<?

0,1
,0
Chimpanzee,TGGAATGAGTGAAACTAATACACACAAGAGATATCCGAATAAATATTACTTCCGATTAAT
Gorilla,.............................A..............................
Human,.............................A..............................


In [19]:
dp_ffold = ffold_alns[20000:30000].dotplot(name1="Human", name2="Chimpanzee", title="ffold")
dp_ffold.show()

# CDS codon aligned

In [5]:
omit_degs_cds.disconnect()
ffold.disconnect()

folder_in = paths.DATA_APES114 + 'cds/codon_aligned/'
in_dstore = cogent3.open_data_store(folder_in, suffix='fa', mode='r')
#create a concatenated alignment with all coding positions
ffold_codonalignedprocess = loader+omit_degs_cds+ffold
# result below is only valid alignments (no NotCompleted).
results_ffold = [r for r in ffold_codonalignedprocess.as_completed(in_dstore[:], parallel=True) if r]
ffold_alns_cnaligned = concat(results_ffold)
ffold_alns_cnaligned.source = "cds_codonaligned"
ffold_alns_cnaligned

   0%|          |00:00<?

0,1
,0
Chimpanzee,CGACCATAAGCGGGCGCCTCGGCAGGTGGCCAGTAGCTGCTACGGCGGCGGGTCGGTGGG
Gorilla,..........................................................A.
Human,..........................................................C.


In [12]:
dp_ffold = ffold_alns_cnaligned[20000:30000].dotplot(name1="Human", name2="Chimpanzee", title="ffold codon aligned")
dp_ffold.show()

# Introns

In [7]:
folder_in = paths.DATA_APES114 + 'introns/'
in_dstore = cogent3.open_data_store(folder_in, suffix='fa', mode='r')

#create a concatenated alignment with all coding positions
noncds_process = loader + omit_degs_noncds1 + rename_noncds
# result below is only valid alignments (no NotCompleted).
results_allpos = [r for r in noncds_process.as_completed(in_dstore[:], parallel=True) if r]
allpos_introns = concat(results_allpos)
allpos_introns.source = "introns"

allpos_introns

   0%|          |00:00<?

0,1
,0
Chimpanzee,AGACACGAAACCTCCCGGGTGGCTTACAGACGCTGCCAGCATCGCCGCCGCCAGGTGAGT
Gorilla,.........G..................................................
Human,.........G..................................................


In [20]:
dp_allpos = allpos_introns[20000:30000].dotplot(name1="Human", name2="Chimpanzee", title="intron")
dp_allpos.show()

# Introns nucleotide aligned

In [10]:
folder_in = paths.DATA_APES114 + 'introns/nt_aligned/'
in_dstore = cogent3.open_data_store(folder_in, suffix='fa', mode='r')
#create a concatenated alignment with all coding positions
noncds_process_ntaligned = loader + omit_degs_noncds2
# result below is only valid alignments (no NotCompleted).
results_allpos = [r for r in noncds_process_ntaligned.as_completed(in_dstore[:], parallel=True) if r]
allpos_introns_ntaligned = concat(results_allpos)
allpos_introns_ntaligned.source = "introns_ntaligned"
allpos_introns_ntaligned

   0%|          |00:00<?

0,1
,0
Chimpanzee,CTTACCTAAGAAGAAAACAGCTTTAAATAAATTTTGAAAAATCTGGGCCCTCCCTATCAC
Gorilla,............................................................
Human,............................................................


In [21]:
dp_allpos = allpos_introns_ntaligned[20000:30000].dotplot(name1="Human", name2="Chimpanzee", title="intron nucleotide aligned")
dp_allpos.show()

# Introns AR

In [23]:
folder_in = paths.DATA_APES114 + 'intronsAR/'
in_dstore = cogent3.open_data_store(folder_in, suffix='fa', mode='r')

# result below is only valid alignments (no NotCompleted).
results_allpos = [r for r in noncds_process.as_completed(in_dstore[:], parallel=True) if r]
allpos_intronsAR = concat(results_allpos)
allpos_intronsAR.source = "intronsAR"
allpos_intronsAR

   0%|          |00:00<?

0,1
,0
Chimpanzee,TTTGTTGTTGGTTTTGAAACAGTCTCGCTCTGTTGCCCAGGCTGGAGTGCAGTGGCACGA
Gorilla,.................................................T..........
Human,............................................................


In [25]:
dp_allpos = allpos_intronsAR[20000:30000].dotplot(name1="Human", name2="Chimpanzee", title="intron nucleotide aligned")
dp_allpos.show()

# Introns AR nucleotide aligned

In [28]:
folder_in = paths.DATA_APES114 + 'intronsAR/nt_aligned/'
in_dstore = cogent3.open_data_store(folder_in, suffix='fa', mode='r')
# result below is only valid alignments (no NotCompleted).
results_allpos = [r for r in noncds_process_ntaligned.as_completed(in_dstore[:], parallel=True) if r]
allpos_intronsAR_ntaligned = concat(results_allpos)
allpos_intronsAR_ntaligned.source = "introns_ntaligned"
allpos_intronsAR_ntaligned

   0%|          |00:00<?

0,1
,0
Chimpanzee,CAAAACCTCTTTCCACTCTCCAGCCCTGTGGCCTCGGAAGGGCTACTCCATCTCTCTGAA
Gorilla,............................................................
Human,...........................................C................


In [29]:
dp_allpos = allpos_intronsAR_ntaligned[20000:30000].dotplot(name1="Human", name2="Chimpanzee", title="intron nucleotide aligned")
dp_allpos.show()