In [1]:
import cogent3
from cogent3 import get_app
from phylim.apps import phylim
import libs
import paths

In [2]:
folder_in = paths.DATA_APES114 + 'introns/'
in_dstore = cogent3.open_data_store(folder_in, suffix='fa', mode='r')

print(in_dstore.describe)

Directory datastore
record type      number
-----------------------
completed            54
not_completed         0
logs                  0
-----------------------


In [3]:
#perform a codon alignment and remove stop codons
loader = get_app("load_aligned", moltype="dna")
omit_degs = get_app("omit_degenerates", moltype="dna")
rename = libs.renamer_aligned()
concat = get_app("concat", moltype="dna")

app = loader + omit_degs + rename
#By using this renamer_aligned function I throw away sequences with paralogs
results_allpos = [r for r in app.as_completed(in_dstore[:], parallel=True) if r]
alpos_alns = concat(results_allpos)
alpos_alns.source = "introns_alignments"
alpos_alns

   0%|          |00:00<?

0,1
,0
Chimpanzee,AGACACGAAACCTCCCGGGTGGCTTACAGACGCTGCCAGCATCGCCGCCGCCAGGTGAGT
Human,.........G..................................................
Gorilla,.........G..................................................


In [4]:
#Get distances of concatenated sequences 
#time_het=max sets a 36 params substitution model
sm = get_app("model", "GN", time_het="max", optimise_motif_probs=True, show_progress=True)
result_intergenic = sm(alpos_alns)

checker = get_app("phylim")
checked = checker(result_intergenic)
lengthstree = result_intergenic.lf.get_lengths_as_ens()
print("Is model identifiable? " + str(checked.is_identifiable) + "\n")
print("Model results : \n" + str(result_intergenic.lf) + "\n")
print("Distances: " + str(lengthstree))

   0%|          |00:00<?

   0%|          |00:00<?

Is model identifiable? True

Model results : 
GN
log-likelihood = -729217.0499
number of free parameters = 39
edge          parent    length     A>C     A>G     A>T     C>A     C>G
----------------------------------------------------------------------
Human         root        0.01    1.17    4.56    0.58    0.95    1.40
Gorilla       root        0.01    1.01    4.39    0.70    1.17    1.60
Chimpanzee    root        0.01    0.87    3.52    0.58    0.78    1.58
----------------------------------------------------------------------

continued: 
 C>T     G>A     G>C     G>T     T>A     T>C
--------------------------------------------
6.76    6.45    1.58    1.23    0.53    4.05
6.07    6.03    1.49    1.21    0.60    4.29
5.53    5.47    1.40    0.97    0.50    3.15
--------------------------------------------

   A       C       G       T
----------------------------
0.25    0.25    0.26    0.25
----------------------------

Distances: {'Human': np.float64(0.006103595508793721), 'Gorilla