In [1]:
import os
from Bio import Entrez
import json

Entrez.email = "VIMVer@univ-amu.fr"

Vazy1_dir:str = "./../../../../testymolo/media/tables_csv/"
Vazy2_dir:str = "./../../../../testymolo/media/data/transfert/"
data_dir:str = "./../../../../testymolo/media/data/"



# Tables

### Methods

In [4]:
### caution when not all rows same nbr of col
def custom_csv_parser_to_list(infilepath:str):
    with open(infilepath ,'r') as handle:

        rows:list = []

        for line in handle.readlines():
            line = line.strip(';') # removing terminal ';' 
            values:list = [] # re-initialize value list
            val:str = "" # current parsing value
            inquote:bool = False # re-initialize in quote : False
            quote_char:str = '' # ' or "
            for c in line:
                if(c == ','): # encounter comma
                    if(not inquote): ## and not in quote
                        val = val.strip("'")
                        val = val.strip('"')
                        if(len(val) > 0):
                            values.append(val)
                        else:
                            values.append(None)
                        val = ""
                        continue
                elif(c == '"' or c == "'"): # encounter quote
                    if(inquote): # already in quote 
                        if(quote_char == c):  # encounter ending quote mark
                            inquote = False
                        else: # encounter the other quote mark 
                            pass
                    else: # encounter starting quote mark
                        inquote = True
                        quote_char = c
                elif((c == ' ' or c == '\n') and not inquote):  # get rid of whitespaces
                    continue
                val += c
            if(len(val)>0):
                val = val.strip("'")
                val = val.strip('"')
                values.append(val)

            if(len(values) > 0):
                rows.append(values)
                #print(values)  
                
        #print("nbr of col", set([len(row) for row in rows]))
        #print("nbr of rows", len(rows))

        return rows
    
def csv_to_json(data:list, table_headers):
    return [ { table_headers[i]:item[i] for i in range(len(item)) } for item in data ]

In [5]:
def fetch_phylogeny(accession):
    
    def get_tax_data(taxid):
        try:
            search = Entrez.efetch(id=taxid, db='taxonomy', retmode='xml')
            return Entrez.read(search)
        except:
            return []

    def extract_TaxId(taxon):
        return taxon['TaxId']

    def extract_Name(taxon):
        return taxon['ScientificName']

    def extract_phylo(taxon):
        return taxon['Lineage']
    
    for result in get_tax_data(accession):
        taxon = {
            'id':extract_TaxId(result),
            'name':extract_Name(result),
            'phylogeny':extract_phylo(result)
            }
        #taxon_json = json.dumps(taxon)
        #print(taxon_json)
    
        return taxon

In [6]:
def parse_table(table_name:str, table_headers:tuple) -> list:
    return csv_to_json(custom_csv_parser_to_list(os.path.join(Vazy1_dir, table_name+'.csv')), table_headers)

### Variables

In [7]:
Vazy2_Organism_headers:tuple = ("id", "name", "abr", "group", "phylogeny", "complete")
Vazy2_Organism_json:list = []

VAZY_2:dict = {}
Vazy2_Organism_json:list = []

In [8]:
VAZY_1:dict = {
    'Organism': parse_table('Organism', ("Tax_id", "Name", "Categorie", "Classe", "Ordre", "Fam", "SsFam", "Genre", "Note_org")),
    'CAZy_DB': parse_table('CAZy_DB', ("DB_ac", "Protein", "DB_nom", "Organism", "abr", "Tax_id", "EC", "_3D_status", "Length", "Sequence", "DB_note", "Created", "Modified", "PP_status", "Lib_sort")),
    'CAZy_GB_GP': parse_table('CAZy_GB_GP', ("DB_ac", "GP_ac", "GB_ac", "GP_gi", "GP_gene", "GP_genomic", "GP_begin", "GP_end", "GP_note")),
    'CAZy_PDB': parse_table('CAZy_PDB', ("DB_ac", "PDB_id", "PDB_chain", "PDB_begin", "PDB_end", "PDB_note", "PDB_bornModo")),
    'CAZy_PP': parse_table('CAZy_PP', ("DB_ac", "PP_ac", "PP_gi", "PP_gene", "PP_begin", "PP_end", "PP_note", "PP_diff")),
    'CAZy_SP': parse_table('CAZy_SP', ("DB_ac", "SP_ac", "SP_id", "SP_gene", "SP_begin", "SP_end", "SP_note")),
    'CAZyModO': parse_table('CAZyModO', ("URL_family", "URL_title", "URL_body", "URL_relative", "URL_note", "Modified")),
    'Cz_EC' : parse_table('Cz_EC', ("DB_ac", "EC")),
    'EC_num': parse_table('EC_num', ("EC", "EC_name", "EC_othername", "EC_reaction", "EC_comment")),
    'ModO_Composition': parse_table('ModO_Composition', ("DB_ac", "Mod_num", "Family", "Subf")),
    'ModO_CrossRefs': parse_table('ModO_CrossRefs', ("Family", "Link", "URL_base", "URL_string", "Ref_note")),
    'ModO_Families': parse_table('ModO_Families', ("Family", "Family_Name", "Family_Activity", "Family_Taxo", "Clan", "Family_note", "Family_Private_note", "Fold", "ModoS_uniqfct", "ModoS_Activity", "ModoS_Description", "ModoS_Fold", "ModoS_note", "ModoS_Private_note", "web_descript", "web_status")),
    'ModO_Limits': parse_table('ModO_Limits', ("DB_ac", "Mod_num", "Mod_begin", "Mod_end") ),
    'Motifs':parse_table('Motifs', ("motifs_ac", "motifs_name", "motifs_activity", "motifs_classification", "motifs_form", "motifs_note")),
    'Prot_Infos':parse_table('Prot_Infos', ("DB_ac", "INFO_indice", "INFO_ac", "INFO_Nature", "INFO", "INFO_pmid")),
    'Prot_MOTIF':parse_table('Prot_MOTIF', ("DB_ac", "MOTIF_indice", "MOTIF_ac", "MOTIF_name", "MOTIF_begin", "MOTIF_end", "MOTIF", "MOTIF_activity", "MOTIF_class", "MOTIF_note", "MOTIF_ref", "MOTIF_pmid")),
    'Prot_MUT': parse_table('Prot_MUT', ("DB_ac", "MUT_indice", "MUT_ac", "MUT", "MUT_note", "MUT_ref", "MUT_pmid")),
    'Prot_REG': parse_table('Prot_REG', ("DB_ac", "REG_indice", "REG_ac", "REG_name", "REG_begin", "REG_end", "REG_note", "REG_ref", "REG_pmid")),
    'Prot_RI': parse_table('Prot_RI',("DB_ac", "RI_indice", "RI_ac", "RI_name", "RI_begin", "RI_end", "RI_no_limit", "RI_note", "RI_ref", "RI_pmid")),
}

print(VAZY_1['Organism'][0])


{'Tax_id': '11020', 'Name': 'Barmah Forest virus', 'Categorie': 'Viruses', 'Classe': 'ssRNA positive-strand viruses, no DNA stage', 'Ordre': 'nd', 'Fam': 'Togaviridae', 'SsFam': 'nd', 'Genre': 'Alphavirus', 'Note_org': 'BFV complex'}


### Resolve

In [19]:
""" Fetch NCBI's taxonomy for each organism in VAZY_1 \\
1m 47.1s
"""
for organism in VAZY_1['Organism']:
    taxon:dict = fetch_phylogeny(organism['Tax_id'])
    if taxon:
        if "Nidovirales" in taxon['phylogeny']:
            if not organism['Tax_id'] in [Vazy2_item['id'] for Vazy2_item in Vazy2_Organism_json]:
                Vazy2_Organism_json.append(taxon)

# Vazy1

### Variables

In [20]:
#import json
Nidovirales_taxid:list = []
Vazy2_json:list = []
with open("./../../../../testymolo/media/data/Organism.temp.json") as handle:
    Vazy2_json:list = json.load(handle)
Nidovirales_taxid = [ item['id'] for item in Vazy2_json]
Nidovirales_taxid.sort()
print(Nidovirales_taxid)

#TaxId:list = [ int(Org['Tax_id']) for Org in VAZY_1['Organism'] ]
#TaxId.sort()
#print(TaxId)

[11120, 11128, 11137, 11138, 11149, 28295, 31631, 227984, 233262, 694007, 694008, 1335626, 2697049]


### Methods

In [21]:

def Associate__ModO_Composition__ModO_Limits(DB_ac:str, Mod_num:str):
    """ Associate ModO_Composition and ModO_Limits """
    for item in VAZY_1["ModO_Composition"]:
        if item['DB_ac'] == DB_ac and item['Mod_num'] == Mod_num:
            if item['Subf'] is not '':
                return item['Family']+'-'+item['Subf']
            return item['Family']
    return 'None'

  if item['Subf'] is not '':


In [22]:

def filter__subseqs__by__DB_ac(DB_ac:str) -> list:
    """ Collect every limits from each protein in CAZy_DB"""
    result:list = []
    for item in VAZY_1["ModO_Limits"]:
        if item['DB_ac'] == DB_ac :
            record:dict = {**item, 'ModO':Associate__ModO_Composition__ModO_Limits(DB_ac, item['Mod_num'])}
            result.append(record)
    return result

In [23]:

def filter__CAZy_DB__by__taxid(taxid:int) -> list:
    """ Collect every item in CAZy_BD from the same organism. """
    result:list=[]
    for item in VAZY_1['CAZy_DB']:
        if str(item['Tax_id']) == str(taxid):
            record:dict = {**item, 'ModO_Limits':filter__subseqs__by__DB_ac(item['DB_ac'])}
            result.append(record)
    #print(result)
    return result

### Resolve

In [24]:
trace:dict = {}
""" Faire la trace de VAZi-1 en fct de Tax_id
"""
for organism in VAZY_1['Organism']:

    taxid:str = organism['Tax_id']

    #print(taxid)
    #print(Nidovirales_taxid)

    for nido_taxid in Nidovirales_taxid:
        if taxid == str(nido_taxid):

            trace[str(taxid)] = { **organism, 'CAZy_DB':filter__CAZy_DB__by__taxid(taxid)}
            print(taxid, len(trace[str(taxid)]))

        #break

    #break


11120 10
11128 10
11137 10
11138 10
11149 10
28295 10
31631 10


In [25]:
""" >> "trace.json" 
"""
with open(os.path.join(Vazy2_dir,'trace.json'), 'w') as outfile:
    outfile.write(json.dumps(trace))

### save

In [32]:
trace:dict = {}
with open(os.path.join(Vazy2_dir,'trace.json')) as file:
    trace = json.load(file)

# Modulo

### Variables

In [26]:
""" ONLY F type module (functional) """

Moduli_f:list = []
# modO family
# modO size

with open(os.path.join(data_dir, "Organism.temp.json")) as handle:
    VAZY_2['Organism'] = json.load(handle)
with open(os.path.join(data_dir, "Protein.temp.json")) as handle:
    VAZY_2['Protein'] = json.load(handle)


### method

In [59]:
def exists__in__VAZ_2(DB_ac:str) -> bool:
    for item in VAZY_2['Protein']:
        if str(item['data_ac']) == DB_ac:
            return True
    return False



### Resolve

In [27]:
for Tax_id, organism in trace.items():
    for protein in organism['CAZy_DB']:
        for limits in protein['ModO_Limits']:
            modO:str = limits['ModO']
            if modO not in Moduli_f:
                if modO.startswith('F'):
                    Moduli_f.append(modO)


In [28]:
print('Moduli_f', len(Moduli_f), sorted(Moduli_f))

Moduli_f 38 ['F184', 'F185-A', 'F185-B', 'F186', 'F187', 'F188', 'F189-A', 'F190-A', 'F191', 'F192-A', 'F193', 'F194', 'F195', 'F196', 'F197', 'F198', 'F199', 'F200', 'F201', 'F202', 'F203', 'F204', 'F205', 'F206', 'F207', 'F208', 'F233', 'F240', 'F245', 'F246', 'F247', 'F248', 'F249', 'F250', 'F251', 'F252', 'F40', 'F41']


### save

In [29]:
""" >> "Moduli_f.txt" 
"""
with open(os.path.join(Vazy2_dir,'Moduli_f.txt'), 'w') as outfile:
    outfile.write(json.dumps(sorted(Moduli_f)))

In [30]:
Moduli_f:list = []
with open(os.path.join(Vazy2_dir,'Moduli_f.txt')) as file:
    Moduli_f = json.load(file)

### resolve

In [60]:
trace_modO:dict = {}
for Tax_id, organism in trace.items():
    for protein in organism['CAZy_DB']:
        for limits in protein['ModO_Limits']:
            modO:str = limits['ModO']
            if modO in Moduli_f:
                if modO not in trace_modO:
                    trace_modO[modO] = []
                existing = exists__in__VAZ_2(limits['DB_ac'])
                trace_modO[modO].append((limits['DB_ac'], limits['Mod_num'], limits['Mod_begin'], limits['Mod_end'], existing))

### save

In [61]:
""" >> "trace_modO.json" 
"""
with open(os.path.join(Vazy2_dir,'trace_modO.json'), 'w') as outfile:
    outfile.write(json.dumps(trace_modO))

# Get profile

### variable

In [1]:
import os
import json
from Bio import SeqIO
from Bio.Align.Applications import MuscleCommandline
from subprocess import Popen

## variables
modO:str = "F189-A"
subseqs:list = []
VAZY_2:dict = {}

## dir path"
data_dir:str = "./../../../../testymolo/media/data/"
Vazy2_dir:str = os.path.join(data_dir, "transfert/")

## read data files
""" >> "trace_modO.json"  """
trace_modO:list = []
with open(os.path.join(Vazy2_dir,'trace_modO.json')) as file:
    trace_modO = json.load(file)
with open(os.path.join(data_dir, "Organism.temp.json")) as handle:
    VAZY_2['Organism'] = json.load(handle)
with open(os.path.join(data_dir, "Protein.temp.json")) as handle:
    VAZY_2['Protein'] = json.load(handle)
with open(os.path.join(data_dir, "Subseq.temp.json")) as handle:
    VAZY_2['subseq'] = json.load(handle)


### method

In [2]:
def get_organism(DB_ac:str) -> str:
    for item in VAZY_2['Protein']:
        if DB_ac == str(item['data_ac']):
            return str(item['organism'])
    return ''

def get_origin(DB_ac:str) -> dict:
    for item in VAZY_2['Protein']:
        if DB_ac == str(item['data_ac']) and not item['derivedFromPP']:
            return {'header':item['header'], 'sequence':item['sequence']}
    return None

def get_fasta(*sseq) -> dict:
    origin = get_origin(sseq[0])
    if origin:
        start:int = int(sseq[2])
        end:int = int(sseq[3])
        header:str = f">{origin['header']}:[{start}-{end}]"
        sequence:str = origin['sequence'][start-1:end-1]
        return {'header':header, 'sequence':sequence} 
    return None

In [8]:
### into fasta
def into_fasta(dataset):
    with open("./infile.fasta",'w') as handle:
        for fasta in subseqs:
            handle.write(fasta['header']+'\n')
            handle.write(fasta['sequence']+'\n')


In [9]:
### align
def align_muscle() -> str:
    filepath = "./infile.fasta"
    out_filepath = "./outfile_muscle.fasta"
    command:str = str(MuscleCommandline(cmd='muscle', input=filepath, out=out_filepath))
    print(command+" -quiet" )  # adding "-quiet" option ... doen't work in v3.8 ?! (but does in v5.)
    Popen(command.split(' '))  # command must be a list of words
    return out_filepath


In [10]:
def parse_alignment(filepath:str) -> list:
    msa:list = []
    with open(filepath) as handle:
        for record in SeqIO.parse(handle, format='fasta'):
            msa.append({'header': record.description, 'sequence':str(record.seq)})
    return msa

### resolve

In [3]:
for sseq in trace_modO[modO]:
    fasta = get_fasta(*sseq)
    if fasta is not None:
        subseqs.append(fasta)
print(*subseqs, sep='\n')

{'header': '>ORF1ab_polyprotein__(@473):[3929-4868]', 'sequence': 'SVAGASDFDKNYLNRVRGSSEARLIPLASGCDPDVVKRAFDVCNKESAGMFQNLKRNCARFQELRDTEDGNLEYLDSYFVVKQTTPSNYEHEKSCYEDLKSEVTADHDFFVFNKNIYNISRQRLTKYTMMDFCYALRHFDPKDCEVLKEILVTYGCIEDYHPKWFEENKDWYDPIENSKYYVMLAKMGPIVRRALLNAIEFGNLMVEKGYVGVITLDNQDLNGKFYDFGDFQKTAPGAGVPVFDTYYSYMMPIIAMTDALAPERYFEYDVHKGYKSYDLLKYDYTEEKQELFQKYFKYWDQEYHPNCRDCSDDRCLIHCANFNILFSTLIPQTSFGNLCRKVFVDGVPFIATCGYHSKELGVIMNQDNTMSFSKMGLSQLMQFVGDPALLVGTSNNLVDLRTSCFSVCALTSGITHQTVKPGHFNKDFYDFAEKAGMFKEGSSIPLKHFFYPQTGNAAINDYDYYRYNRPTMFDICQLLFCLEVTSKYFECYEGGCIPASQVVVNNLDKSAGYPFNKFGKARLYYEMSLEEQDQLFEITKKNVLPTITQMNLKYAISAKNRARTVAGVSILSTMTNRQFHQKILKSIVNTRNASVVIGTTKFYGGWDNMLRNLIQGVEDPILMGWDYPKCDRAMPNLLRIAASLVLARKHTNCCSWSERIYRLYNECAQVLSETVLATGGIYVKPGGTSSGDATTAYANSVFNIIQATSANVARLLSVITRDIVYDNIKSLQYELYQQVYRRVNFDPAFVEKFYSYLCKNFSLMILSDDGVVCYNNTLAKQGLVADISGFREVLYYQNNVFMADSKCWVEPDLEKGPHEFCSQHTMLVEVDGEPKYLPYPDPSRILGACVFVDDVDKTEPVAVMERYIALAIDAYPLVHHENEEYKKVFFVLLAYIRKLYQELSQNMLMDYSFVMDIDKGSKFWEQEFYENMY

In [13]:
into_fasta(subseqs)
aligned_by_muscle = parse_alignment(align_muscle())
print(aligned_by_muscle[0])

muscle -in ./infile.fasta -out ./outfile_muscle.fasta -quiet
{'header': 'ORF1ab_polyprotein__(@451):[3999-4928]', 'sequence': '------SFTVDQSYLFKRVRGSS-AARLEPC-NGTDPDHVSRAFDIYNKDVACIGKFLKTNCSRFR--------NLDKHDAYYIVKRCTKTVMDHEQVCYNDLKDSGAVAEHDFFTYK-EGRCEFGNVARRNLTKYTMMDLCYAIRNFDEKNCEVLKEILVTVGACTEEFFEN-----KDWFDPVENEAIHEVYAKLGPIVANAMLKCVAFCDAIVEKGYIGVITLDNQDLNGNFYDFGDFVKTAPGFGCACVTSYYSYMMPLMGMTSCLESENFVKSDIYGSDYKQYDLLAYDFTEHKEYLFQKYFKYWDRTYHPNCSDCTSDECIIHCANFNTLFSMTIPMTAFGPLVRKVHIDGVPVVVTAGYHFKQLGIVWNLDVKLDTMKLSMTDLLRFVTDPTLLVASSPALLDQRTVCFSIAALSTGITYQTVKPGHFNKDFYDFITERGFFEEGSELTLKHFFFAQGGEAAMTDFNYYRYNRVTVLDICQAQFVYKIVGKYFECYDGGCINAREVVVTNYDKSAGYPLNKFGKARLYYETLSYEEQDALFALTKRNVLPTMTQMNLKYAISGKARARTVGGVSLLSTMTTRQYHQKHLKSIAATRNATVVIGSTKFYGGWDNMLKNLMRDVDNGCLMGWDYPKCDRALPNMIRMASAMILGSKHVGCCTHNDRFYRLSNELAQVLTEVVHCTGGFYFKPGGTTSGDGTTAYANSAFNIFQAVSANVNKLLGVDSNACNNVTVKSIQRKIYDNCYRSSSIDEEFVVEYFSYLRKHFSMMILSDDGVVCYNKDYADLGYVADINAFKATLYYQNNVFMSTSKCWVEPDLSVGPHEFCSQHTLQIVGPDGDYYLPYPDPSRILSAGVFVDDIVKTDNV


MUSCLE v3.8.1551 by Robert C. Edgar

http://www.drive5.com/muscle
This software is donated to the public domain.
Please cite: Edgar, R.C. Nucleic Acids Res 32(5), 1792-97.

infile 6 seqs, lengths min 926, max 939, avg 929
00:00:00    16 MB(-2%)  Iter   1  100.00%  K-mer dist pass 1
00:00:00    16 MB(-2%)  Iter   1  100.00%  K-mer dist pass 2


00:00:00    23 MB(-2%)  Iter   1  100.00%  Align node       
00:00:00    23 MB(-2%)  Iter   1  100.00%  Root alignment
00:00:00    23 MB(-2%)  Iter   2  100.00%  Refine tree   
00:00:00    23 MB(-2%)  Iter   2  100.00%  Root alignment
00:00:00    23 MB(-2%)  Iter   2  100.00%  Root alignment
00:00:00    23 MB(-2%)  Iter   3  100.00%  Refine biparts
00:00:00    23 MB(-2%)  Iter   4  100.00%  Refine biparts
00:00:00    23 MB(-2%)  Iter   5  100.00%  Refine biparts
00:00:00    23 MB(-2%)  Iter   6  100.00%  Refine biparts
00:00:00    23 MB(-2%)  Iter   7  100.00%  Refine biparts
00:00:00    23 MB(-2%)  Iter   8  100.00%  Refine biparts
00:00:00    23 MB(-2%)  Iter   9  100.00%  Refine biparts
00:00:00    23 MB(-2%)  Iter  10  100.00%  Refine biparts
00:00:00    23 MB(-2%)  Iter  11  100.00%  Refine biparts
00:00:00    23 MB(-2%)  Iter  12  100.00%  Refine biparts
00:00:00    23 MB(-2%)  Iter  13  100.00%  Refine biparts
00:00:00    23 MB(-2%)  Iter  13  100.00%  Refine biparts
00:00:00   

In [9]:
## load Organism
Organisms:list = []
with open(os.path.join(data_dir, "Organism.temp.json")) as handle:
    Organisms:list = json.load(handle)
    
trace:any
with open(os.path.join(Vazy2_dir, "trace_modO.json")) as handle:
    trace = json.load(handle)

In [13]:
DB_ac = [item[0] for item in trace["F189-A"]]
print(DB_ac)

['473', '494', '465', '483', '451', '459', '375']


In [14]:
def Fetch_taxid_from_DBac(param):
    for item in VAZY_1['CAZy_DB']: #['Tax_id']:
        if str(item['DB_ac']) == str(param):
            return item['Tax_id']
    return None    

In [19]:
TaxId = [str(Fetch_taxid_from_DBac(k)) for k in DB_ac]
print(TaxId)

['11120', '11128', '11137', '11138', '11149', '28295', '31631']


In [27]:
Phylogeny:list = []
for org in Organisms:
    if str(org['id']) in TaxId:
        phylogeny:str = org['phylogeny']
        phylogeny = phylogeny.split(';')
        phylogeny = list([phylo.strip() for phylo in phylogeny])
        Phylogeny.append(phylogeny)
print(*Phylogeny, sep='\n')

['Viruses', 'Riboviria', 'Orthornavirae', 'Pisuviricota', 'Pisoniviricetes', 'Nidovirales', 'Cornidovirineae', 'Coronaviridae', 'Orthocoronavirinae', 'Gammacoronavirus', 'Igacovirus', 'Avian coronavirus']
['Viruses', 'Riboviria', 'Orthornavirae', 'Pisuviricota', 'Pisoniviricetes', 'Nidovirales', 'Cornidovirineae', 'Coronaviridae', 'Orthocoronavirinae', 'Betacoronavirus', 'Embecovirus', 'Betacoronavirus 1']
['Viruses', 'Riboviria', 'Orthornavirae', 'Pisuviricota', 'Pisoniviricetes', 'Nidovirales', 'Cornidovirineae', 'Coronaviridae', 'Orthocoronavirinae', 'Alphacoronavirus', 'Duvinacovirus']
['Viruses', 'Riboviria', 'Orthornavirae', 'Pisuviricota', 'Pisoniviricetes', 'Nidovirales', 'Cornidovirineae', 'Coronaviridae', 'Orthocoronavirinae', 'Betacoronavirus', 'Embecovirus', 'Murine coronavirus']
['Viruses', 'Riboviria', 'Orthornavirae', 'Pisuviricota', 'Pisoniviricetes', 'Nidovirales', 'Cornidovirineae', 'Coronaviridae', 'Orthocoronavirinae', 'Alphacoronavirus', 'Tegacovirus', 'Alphacorona

In [28]:
def AllSameStrings(strings:list) -> bool:
    if len(strings) > 1:
        item = strings[0]
        for w in strings[1:]:
            if item != w :
                return False
        return True
    else:
        return False

# LastCommonAncestor
def LastCommonAncestor(phylos:list) -> str:
    N = min([len(k) for k in phylos])
    if N > 0:
        lastCommonAncestor = ""
        for k in range(N):
            if AllSameStrings([phylo[k] for phylo in phylos]):
                lastCommonAncestor = phylos[0][k]
    return lastCommonAncestor

In [29]:
LastCommonAncestor(Phylogeny)

'Orthocoronavirinae'

# else