In [244]:
import pandas as pd
import numpy as np
import Levenshtein
class SeqInfo(object):
    """Holds information regarding the sequence.
    
    """
    def __init__(self, seq, 
                 protein,
                 accession,
                 name=None,
                 subtype=None,
                 host=None, 
                 date=None, 
                 erisk=None,
                 irisk=None,
                 risk_flag=None,
                 country=None):
        self.name = name
        self.protein=protein
        self.subtype=subtype        
        self.seq = seq
        self.accession = accession 
        self.host = host
        self.date = date
        self.erisk = erisk
        self.irisk = irisk
        self.risk_flag = risk_flag
        self.country = country
        
class MultipleSeqInfo(object):
    """Holds information regarding the sequences in the records.
    
    Args:
        records (list): list of records parsed from NCBI
        cov19_accessions (list): of accessions corresponding to cov19
    """
    def __init__(self,
                 dataframe,
                 accessionname,
                 proteinname,
                 risk_threshold=6.25):
        
        self.seq_infos = {}
        self.risk_threshold = risk_threshold
        for i in np.arange(dataframe.index.size):
            record=dataframe.iloc[i,:]
            seqinfo = SeqInfo(
                name=record.id,
                seq=record[proteinname], 
                protein=proteinname,
                accession=record[accessionname],
                subtype=record.subtype,
                erisk=record.predicted_emergence_score,
                irisk=record.predicted_impact_score,
                risk_flag = record.predicted_emergence_score > self.risk_threshold,
                host=None,
                date=None,
                country=None)
            #print(record.predicted_emergence_score > self.risk_threshold)
            self.seq_infos[seqinfo.accession] = seqinfo
        
    
    def compute_L_diatance_matrix(self):
        highriskseq = pd.DataFrame.from_dict({key:val.seq 
                                              for (key,val) in self.seq_infos.items() 
                                              if val.risk_flag},orient='index',columns=['seq'])
        num=highriskseq.index.size
        d=np.zeros([num,num])
        for i in np.arange(num):
            for j in np.arange(num):
                if i > j:
                    d[i,j] = Levenshtein.distance(highriskseq.seq.values[i],
                                                  highriskseq.seq.values[j])
        ds=pd.DataFrame(d)        
        ds=(ds+ds.transpose())
        ds.columns=highriskseq.index.values
        self.highriskdistancematrix=ds.copy()
        return ds
        
    
    def accessions_to_subtype(self, accessions):
        """Create a dictionary mapping the accession to the host.
        """
        
        subtypes = []
        
        for accession in accessions:
            seqinfo = self.seq_infos[accession]
            subtypes.append(seqinfo.subtype)
            
        return subtypes

    def accessions_to_host(self, accessions):
        """Create a dictionary mapping the accession to the host.
        """
        
        hosts = []
        
        for accession in accessions:
            seqinfo = self.seq_infos[accession]
            hosts.append(seqinfo.host)
            
        return hosts
    

In [245]:
N=10000
df=pd.read_csv('./combined_results.csv',index_col=0)
df1=df[['subtype','predicted_impact_score', 'predicted_emergence_score', 'ha', 'na']]
df1=df1.sort_values('predicted_emergence_score',ascending=False).head(N)
df1=df1[df1.predicted_emergence_score>6.2]
df1.subtype.value_counts()

H1N1    190
H3N2     94
H7N9      1
Name: subtype, dtype: int64

In [312]:
ALLinfoHA=MultipleSeqInfo(df.reset_index(),'ha_accession','ha',risk_threshold=6.27)

In [313]:
ds=ALLinfoHA.compute_L_diatance_matrix()

In [314]:
from sklearn.cluster import DBSCAN
cls = DBSCAN(metric='precomputed').fit(ds)
cls.labels_
pd.set_option('display.max_rows', None)
df1.assign(cls=cls.labels_)[['subtype','cls']]

ValueError: Length of values (80) does not match length of index (285)

In [315]:
dss=(1/ds).replace(np.inf,0)

In [316]:
from sklearn.cluster import SpectralClustering
cls = SpectralClustering(n_clusters=10,affinity='precomputed').fit(dss)
pd.set_option('display.max_rows', None)
df1.assign(cls=cls.labels_)[['subtype','cls']]

ValueError: Length of values (80) does not match length of index (285)

In [317]:
def write_fasta(seqs, fasta_file, wrap=80):
    """Write sequences to a fasta file.

    Parameters
    ----------
    seqs : dict[seq_id] -> seq
        Sequences indexed by sequence id.
    fasta_file : str
        Path to write the sequences to.
    wrap: int
        Number of AA/NT before the line is wrapped.
    """
    with open(fasta_file, 'w') as f:
        for gid, gseq in seqs.items():
            f.write('>{}\n'.format(gid))
            for i in range(0, len(gseq), wrap):
                f.write('{}\n'.format(gseq[i:i + wrap])) 

In [318]:
jsn=df1.ha.to_json()
import json    # or `import simplejson as json` if on Python < 2.6
obj = json.loads(jsn) 

In [319]:
write_fasta(obj,'ha.fasta')

In [320]:
from Bio.Phylo import TreeConstruction
from Bio import Phylo
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from Bio.Align import MultipleSeqAlignment
from Bio import Entrez
from Bio import SeqIO
#from Bio.Alphabet import generic_dna

In [321]:
def load_dm(file_, upper_diag=True):
    """Load the distance matrix. 
    
    Also, do some preprocessing. 
    """
    
    df = pd.read_csv(file_)
    #df.set_index('Unnamed: 0', inplace=True)
    #assert np.all(df.columns == df.index)
    
    # drop duplicate columns after reading csv
    #df = df.loc[:, ~df.columns.str.replace("(\.\d+)$", "").duplicated()]
    
    if upper_diag:
        df = df + df.T
    return df

def save_tree(tree, file_name, save_type='xml'):
    """Saved the created phylogenetic tree."""
    
    if save_type == 'pickle':
        graph = Phylo.to_networkx(tree)
        save_pickled(graph, file_name)
    elif save_type == 'xml':
        Phylo.write(tree, file_name, 'phyloxml')
    else:
        raise ValueError('Not a correct save type.')
        
def pandas_dm_to_biopython_dm(dm):
    """Convert the pandas distance matrix to the biopython distance matrix.
    
    Returns:
        biopython distance matrix
    """
    
    accessions = dm.columns
    bio_dm = []
    for i, accession in enumerate(accessions):
        bio_dm.append(list(dm.iloc[i, :i+1].values))
        
    bio_dm = TreeConstruction._DistanceMatrix(
        list(dm.columns), 
        bio_dm)
    
    return bio_dm

def distance_matrix_to_phylo_tree(dm, outfile=None):
    """Create a phylogenetic tree from the distance matrix."""
    
    dm = pandas_dm_to_biopython_dm(dm)
    
    treeConstructor = TreeConstruction.DistanceTreeConstructor()
    tree = treeConstructor.nj(dm)
    
    if outfile is not None:
        save_tree(tree, outfile)

In [322]:
#ds.columns=[x.replace('/','_') for x in df1.index.values]
ds.to_csv('dm.csv',index=None)
ds.columns


Index(['EPI1766616', 'EPI1766712', 'EPI1766716', 'EPI1768609', 'EPI1769150',
       'EPI1773255', 'EPI1775888', 'EPI1775900', 'EPI1775916', 'EPI1775980',
       'EPI1777715', 'EPI1778580', 'EPI1778590', 'EPI1778720', 'EPI1778732',
       'EPI1779438', 'EPI1779454', 'EPI1779474', 'EPI1780098', 'EPI1780120',
       'EPI1780243', 'EPI1780419', 'EPI1780479', 'EPI1817876', 'EPI1818121',
       'EPI1818149', 'EPI1818373', 'EPI1830281', 'EPI1832641', 'EPI1832784',
       'EPI1832788', 'EPI1832802', 'EPI1832818', 'EPI1833072', 'EPI1833157',
       'EPI1907229', 'EPI1907263', 'EPI1907387', 'EPI1908086', 'EPI1908102',
       'EPI1908134', 'EPI1908176', 'EPI1908453', 'EPI1908513', 'EPI1908547',
       'EPI1908557', 'EPI1908559', 'EPI1908782', 'EPI1908789', 'EPI1908812',
       'EPI1909019', 'EPI1909021', 'EPI1909033', 'EPI1909037', 'EPI1910432',
       'EPI1910743', 'EPI1910821', 'EPI1910887', 'EPI1910943', 'EPI1911033',
       'EPI1911241', 'EPI1932076', 'EPI1975300', 'EPI1975316', 'EPI1975364',

In [323]:
CONSTRUCT_PHYLO=True
PHYLO_TREE_DIR='./'
OUTPUT_DIR='./'

if CONSTRUCT_PHYLO:
    ALL_dm_ldistance = load_dm(
        OUTPUT_DIR + 'dm.csv', 
        upper_diag=False)
#    ALL_dm_qdistance = load_dm(
#        OUTPUT_DIR + 'ALL_qdistance_dm.csv', 
#        upper_diag=True)
    
    distance_matrix_to_phylo_tree(
        ALL_dm_ldistance, PHYLO_TREE_DIR + 'ldistanceh1n1.xml')
    
#    distance_matrix_to_phylo_tree(
#        ALL_dm_qdistance, PHYLO_TREE_DIR + 'qdistance.xml')

In [324]:
from ete3 import Tree, TreeStyle
from ete3 import Phyloxml
from ete3 import AttrFace, faces, Tree, NodeStyle, TreeStyle

In [325]:
# Helper Functions

In [326]:
def load_pickled(file_name):
    with open(file_name, 'rb') as f:
        return pickle.load(f, encoding='latin')

In [327]:
## ETE3 Functions

In [328]:
def get_farthest_node(tree, sequence):
    return (tree&sequence).get_farthest_node()

def get_all_accessions_from_tree(tree):
    return [leaf_node.name for leaf_node in tree.get_leaves()]

def remove_certain_hosts_from_tree(tree, hosts):
    """Remove leaf nodes if the host of that leaf is in `hosts`"""
    
    tree = copy.deepcopy(tree)
    
    removed_accessions = []
    for leaf_node in tree.get_leaves():
        if leaf_node.host in hosts:
            leaf_node.detach()
            
    return tree

def set_midpoint_outgroup(tree):
    tree.set_outgroup(tree.get_midpoint_outgroup())

In [329]:
### Host To Name

In [330]:
def host_to_name(name, specific_game_name=False):
    """Map the host name to some standard name
    
    categories:
        mouse/rat
        bat
        cattle/cows/goat/calf/bovine/donkey/pigs/camel
        humans
        
    Args:
        name (str): name of the host
        specific_game_name (bool): if we want a specific game name
    """
    
    name = name.lower()
    orig_name = name
    def names_in_host(orig_name, sub_names):
        """Check if any of the sub names are in the orig name"""
        
        for sub_name in sub_names:
            if sub_name.lower() in orig_name:
                return True
            
        return False
    
    def names_is_host(orig_name, names):
        """Check if any of the sub names are in the orig name"""
        
        for name in names:
            if name.lower() == orig_name:
                return True
            
        return False
    
    bat_subnames = [
        'bat', 'Artibeus', 'Rhinolophus', 'Megaerops niphanae', 'Chiroptera', 
        'dupreanum', 'Pipistrellus', 'Rousettus leschenaultii', 'Pteronotus',
        'Hipposideros', 'Tylonycteris pachypus', 'Pteropus', 'Myotis', 'Epomophorus',
        'Tadarida', 'Cynopterus', 'Epomops', 'Nycteris', 'Scotophilus', 'Rhinilophus',
        'Eptesicus', 'Neoromicia', 'Rousettus', 'Rhinopoma', 'Macroglossus', 'Megaloglossus',
        'Dyacopterus', 'Aselliscus', 'Carollia perspicillata', 'Chaerephon plicata',
        'Vespertilio', 'Miniopterus', 'Triaenops', 'Glossophaginae', 'Hypsugo',
        'Ptenochirus', 'Ia io', 'Eumops', 'Nyctalus noctula', 'Nyctinomops', 
        'Dobsonia moluccensis', 'Mops condyluru', 'Eidolon helvum', 'Taphozous perforatus']
    
    cattle = [
        'cow', 'calf', 'Bovine', 'taurus', 'cattle', 
        'Bubalus', 'Bovidae', 'wisent', 'buffalo', 'yak']
    camels = ['dromedary', 'Camel']
    pigs = ['pig', 'porcine', 'swine', 'Sus scrofa']
    canines = ['canine', 'dog', 'canis'] # canines (i.e. wolves, foxes, dogs)
    birds = [
        'Asio clamator', 'Columba livia', 'Megascops', 'Ara ararauna', 
        'Pyroderus', 'Coragyps atratus', 'Pyroderus scutatus', 'Rupornis',
         'Pitangus sulphuratus']
    horses = ['equus', 'Equine', 'horse',]
    alpaca = ['Vicugna', 'alpaca', 'lama']
    antelopes = ['antelope', 'nyala', 'waterbuck', 'sitatunga']
    chimps = ['Pan troglodytes verus', 'Chlorocebus aethiops']
    
    game_subnames = [
        'civet', # cat 
        'cuniculus', # rabbit
        'tahr', 'goat', # goat
        'donkey', 'deer', 'giraffe', 'Erinaceus', 'Manis javanica',
        'Myodes glareolus', # mole
        'Nycticebus pygmaeus', 
        'Cavia_porcellus', # guiness pig 
    ]
    
    game_subnames = game_subnames + cattle + camels + pigs + canines + birds + horses\
        + alpaca + antelopes + chimps
    
    rat_subnames = [
        'Apodemus', 'mouse', 'Murine', 'rodent', 'mus',
        'Crocidura', 'Niviventer', 'Mastomys', 'Rattus']
    rat_names = ['rat']
    human_subnames = ['Homo sapiens', 'human']
    
    coronavirus_subnames = ['coronavirus']
    
    other_subnames = ['genomic DNA']
    
    if names_in_host(name, bat_subnames):
        name = 'bat'
        
    elif names_in_host(name, game_subnames):
        name = 'game'
    
    elif names_in_host(name, rat_subnames) or names_is_host(name, rat_names):
        name = 'rat'
    
    elif names_in_host(name, human_subnames):
        name = 'human'
        
    elif names_in_host(name, coronavirus_subnames):
        # TODO: may need to check this if the data changes
        name = 'human'
        
    elif names_in_host(name, other_subnames):
        name = 'other'
    else:
        raise ValueError('Not a name that has been considered: {}'.format(name))
        
    if specific_game_name and name == 'game':
#         print orig_name
        if names_in_host(orig_name, cattle):
            name = 'cattle'
        elif names_in_host(orig_name, camels):
            name = 'camel'
        elif names_in_host(orig_name, pigs):
            name = 'pig'
        elif names_in_host(orig_name, canines):
            name = 'canine'
        elif names_in_host(orig_name, birds):
            name = 'bird'
        elif names_in_host(orig_name, horses):
            name = 'horse'
        elif names_in_host(orig_name, chimps):
            name = 'chimp'
            
            
    # cattle + camels + pigs + canines + birds + horses + alpaca + antelopes + chimps
    # cattle, camel, pig, canine,  bird, horse, alpaca, antelope, chimp
    return name

In [331]:
def load_tree(filename, type_='phyloxml'):
    """Load saved phylogenetic tree.
    """
    
    if type_ == 'phyloxml':
        project = Phyloxml()
        project.build_from_file(filename)

        for tree in project.get_phylogeny():
            break

        t=tree
        
    elif type_ == 'newick':
        t = Tree(filename, format=1)
    else:
        raise ValueError('Not a correct type.')
        
    return t

In [332]:
PHYLO_DIR='./'

In [333]:
Phylo.convert(
    PHYLO_DIR + 'ldistanceh1n1.xml','phyloxml',
    PHYLO_DIR + 'ldistance.nhx','newick')

ltree = load_tree(
    PHYLO_DIR + 'ldistance.nhx',
    type_='newick')


In [334]:
# Labeling

In [335]:
def label_nodes(
    tree, 
    recordinfo):
    """Label the nodes of the tree.
    
    We label nodes on whether:
        it is covid19
    """
    
    tree = copy.deepcopy(tree)
   
    for node in tree:
        name = node.name      
        node.host = recordinfo.seq_infos[name].subtype
        print(node.name,node.host)
    return tree

In [336]:
labelled_tree=label_nodes(
    ltree, ALLinfoHA)

EPI1907263 H1N1
EPI2158738 H1N1
EPI1830281 H1N1
EPI1778720 H1N1
EPI1777715 H1N1
EPI1975300 H1N1
EPI1908176 H1N1
EPI1908559 H1N1
EPI1908557 H1N1
EPI1908547 H1N1
EPI1833072 H1N1
EPI1832802 H1N1
EPI1775980 H1N1
EPI1833157 H1N1
EPI1908812 H1N1
EPI1908102 H1N1
EPI1832788 H1N1
EPI1832641 H1N1
EPI1775888 H1N1
EPI1976602 H1N1
EPI1976249 H1N1
EPI1778580 H1N1
EPI1768609 H1N1
EPI1766616 H1N1
EPI1910821 H1N1
EPI1909021 H1N1
EPI1910887 H1N1
EPI1909019 H1N1
EPI1908782 H1N1
EPI1908789 H1N1
EPI1832784 H1N1
EPI2153378 H1N1
EPI1832818 H1N1
EPI1911033 H1N1
EPI2026200 H7N9
EPI1769192 H3N2
EPI1930925 H3N2
EPI2148218 H3N2
EPI2148192 H3N2
EPI1817170 H3N2
EPI1775924 H3N2
EPI1818137 H3N2
EPI1766723 H3N2
EPI1817876 H1N1
EPI1769150 H1N1
EPI1975820 H1N1
EPI1975364 H1N1
EPI1975316 H1N1
EPI1910743 H1N1
EPI2146879 H1N1
EPI1910432 H1N1
EPI1909033 H1N1
EPI1908134 H1N1
EPI1907387 H1N1
EPI1932076 H1N1
EPI1911241 H1N1
EPI1910943 H1N1
EPI1775916 H1N1
EPI1908086 H1N1
EPI1780479 H1N1
EPI1818121 H1N1
EPI1780419 H1N1
EPI17800

In [337]:
for node in labelled_tree:
    print(node.name,node.host)

EPI1907263 H1N1
EPI2158738 H1N1
EPI1830281 H1N1
EPI1778720 H1N1
EPI1777715 H1N1
EPI1975300 H1N1
EPI1908176 H1N1
EPI1908559 H1N1
EPI1908557 H1N1
EPI1908547 H1N1
EPI1833072 H1N1
EPI1832802 H1N1
EPI1775980 H1N1
EPI1833157 H1N1
EPI1908812 H1N1
EPI1908102 H1N1
EPI1832788 H1N1
EPI1832641 H1N1
EPI1775888 H1N1
EPI1976602 H1N1
EPI1976249 H1N1
EPI1778580 H1N1
EPI1768609 H1N1
EPI1766616 H1N1
EPI1910821 H1N1
EPI1909021 H1N1
EPI1910887 H1N1
EPI1909019 H1N1
EPI1908782 H1N1
EPI1908789 H1N1
EPI1832784 H1N1
EPI2153378 H1N1
EPI1832818 H1N1
EPI1911033 H1N1
EPI2026200 H7N9
EPI1769192 H3N2
EPI1930925 H3N2
EPI2148218 H3N2
EPI2148192 H3N2
EPI1817170 H3N2
EPI1775924 H3N2
EPI1818137 H3N2
EPI1766723 H3N2
EPI1817876 H1N1
EPI1769150 H1N1
EPI1975820 H1N1
EPI1975364 H1N1
EPI1975316 H1N1
EPI1910743 H1N1
EPI2146879 H1N1
EPI1910432 H1N1
EPI1909033 H1N1
EPI1908134 H1N1
EPI1907387 H1N1
EPI1932076 H1N1
EPI1911241 H1N1
EPI1910943 H1N1
EPI1775916 H1N1
EPI1908086 H1N1
EPI1780479 H1N1
EPI1818121 H1N1
EPI1780419 H1N1
EPI17800

In [338]:
import copy

In [339]:
def prune_nodes(t):
    # collapsed nodes are labeled, so you locate them and prune them
    for n in t.search_nodes(collapsed=True):
        for ch in n.get_children():
            ch.detach()
            
            
def mean(array):
    return sum(array)/float(len(array))

def cache_distances(tree):
    ''' precalculate distances of all nodes to the root''' 
    node2rootdist = {tree:0}
    for node in tree.iter_descendants('preorder'):
        node2rootdist[node] = node.dist + node2rootdist[node.up]
    return node2rootdist

def closest_node(node, node2tips, root_distance):
    """Find the closest node."""
    
    tips = []
    distances = []
    for tip in node2tips[node]:
        distances.append(root_distance[tip]-root_distance[node])
        tips.append(tip)
#     index = np.argmin([root_distance[tip]-root_distance[node] for tip in node2tips[node]])
    index = np.argmin(distances)
    return tips[index]

def collapse(tree, min_dist,AllrecordInfo):
    # cache the tip content of each node to reduce the number of times the tree is traversed
    
    tree = copy.deepcopy(tree)
    
    node2tips = tree.get_cached_content()
    root_distance = cache_distances(tree)

    for node in tree.get_descendants('preorder'):
        if not node.is_leaf():
            avg_distance_to_tips = mean([root_distance[tip]-root_distance[node]
                                         for tip in node2tips[node]])

            if avg_distance_to_tips < min_dist:
                # do whatever, ete support node annotation, deletion, labeling, etc.

                # rename
#                node.name += ' COLLAPSED avg_d:%g {%s}' %(avg_distance_to_tips,
#                                                 ','.join([tip.name for tip in node2tips[node]]))
                #node.name += '{%s}' %(list(node2tips[node])[-1].name)
                #node.name = 'avg_d:%g' %(avg_distance_to_tips)
                # label
            
                closest_name = closest_node(node, node2tips, root_distance).name
                node.host = AllrecordInfo.seq_infos[closest_name].subtype
                node.name = '%s (%g)' %(closest_name,avg_distance_to_tips)
                
            
                node.add_features(collapsed=True)

                # set drawing attribute so they look collapsed when displayed with tree.show()
                node.img_style['draw_descendants'] = False

    return tree
                # etc...

In [340]:
ltree_collapsed = collapse(
    labelled_tree, 
    min_dist=1, 
    AllrecordInfo=ALLinfoHA)


In [341]:
# Render Tree

In [342]:
# COLBAT='DarkRed'
# COLRAT='SteelBlue'
COLHUMAN='DarkGreen'
COLCOVID='DarkRed'
COLBAT='Red'
COLRAT='Blue'
COLCAMEL='Purple'
COLGAME='Red'
COLCATTLE='Yellow'
# COLHUMAN='Black'
FS=40
PW=20

In [343]:
ltree_collapsed

Tree node 'Inner78' (0x7f0b7140859)

In [344]:
labelled_tree

Tree node 'Inner78' (0x7f0a1a187b5)

In [353]:
def nodeAttribConstruct(color, node):
    N = AttrFace(
        "name", fsize=FS, 
        text_prefix=" ",penwidth=PW,ftype='Arial',
        fgcolor=color,fstyle='bold')
    faces.add_face_to_node(N, node, 1, position="branch-right")
    return N

def layout(node):
    if node.is_leaf():
        if  node.host == 'H1N1':
            N = nodeAttribConstruct(COLBAT,node)
        elif node.host == 'H3N2':
            N = nodeAttribConstruct(COLRAT,node)
        elif node.host == 'H7N9':
            N = nodeAttribConstruct(COLHUMAN,node)
        elif node.host == 'H1N2':
            N = nodeAttribConstruct(COLCATTLE,node)
        elif node.host == 'game':
            N = nodeAttribConstruct(COLGAME,node)
        elif node.host == 'camel':
            N = nodeAttribConstruct(COLCAMEL,node)
        else:
            N = nodeAttribConstruct(COLGAME,node)
            
            
def render_tree(tree, outfile):# all_seq_data, display_type='nearest_host'):
    """Render the tree inside the file to a circular 
    phylogenetic tree.
    
    NOTE: outfile should be in .pdf for best visuals
    Returns:
    """
    #tree = Tree(nwfile,format=1)

    ts = TreeStyle()
    ns = NodeStyle()
    ts.show_leaf_name = True
    #ts.rotation = 90
    ts.mode = "r"
    #ts.arc_start = -360 # 0 degrees = 3 o'clock
    #ts.arc_span = 360
    ts.scale=7500
    ts.show_scale=True
    # ts.show_branch_length=True
    #ts.min_leaf_separation=10
    #ts.optimal_scale_level='full'
    #ts.branch_vertical_margin=0
    
    ns.hz_line_width=2
    ns.vt_line_width=1
    #ts.layout_fn = layout
    ns["vt_line_width"] = 2
    ns["hz_line_width"] = 1
#     ns['fsize'] = 20
    for n in tree.traverse():
        n.set_style(ns)
     
    #all_accessions = all_seq_data['accessions'].values
    for n in tree:
        ts.layout_fn = layout

        
    tree.set_style(ns)
    #tree.set_style(ts)
    
    #t.show()
    tree.render(
        outfile, 
        dpi=100, 
        h=1000,
    tree_style=ts)
# def layout(node):
#     if node.is_leaf():
#         if node.host == 'bat':
#             N = AttrFace("name", fsize=FS, text_prefix=" ",penwidth=PW,ftype='Arial',fgcolor=COLBAT,fstyle='bold')
#             faces.add_face_to_node(N, node, 1, position="branch-right")
#         elif node.host == 'rat':
#             N = AttrFace("name", fsize=FS, text_prefix=" ",penwidth=PW,ftype='Arial',fgcolor=COLRAT,fstyle='bold')
#             faces.add_face_to_node(N, node, 1, position="branch-right")
#         elif node.host == 'human':
#             N = AttrFace("name", fsize=FS, text_prefix=" ",penwidth=PW,ftype='Arial',fgcolor=COLHUMAN)
#             faces.add_face_to_node(N, node, 1, position="branch-right")
#         else:
# #             print (node.host)
#             N = AttrFace("name", fsize=FS, text_prefix=" ",penwidth=PW,ftype='Arial')
#             faces.add_face_to_node(N, node, 1, position="branch-right")
           
            

In [354]:
render_tree(
    labelled_tree, './tmp.pdf')
#     SAVE_PHYLO_DIR + 'Lcollapsed_farthest_from_earliest_covid_as_outgroup.pdf',
#     SAVE_PHYLO_DIR + 'Lcollapsed_earliest_seq_as_outgroup.pdf',
#    all_seq_data)

In [306]:
render_tree(
    ltree_collapsed, './tmp.pdf')
#     SAVE_PHYLO_DIR + 'Lcollapsed_farthest_from_earliest_covid_as_outgroup.pdf',
#     SAVE_PHYLO_DIR + 'Lcollapsed_earliest_seq_as_outgroup.pdf',
#    all_seq_data)

In [26]:
#for (key,val) in ddict.items()