In [14]:
from pathlib import Path
import pandas as pd
import dendropy
from ete3 import Tree

base = Path("../data/online_version/")
csv_path = base / "20230330_3FTx.csv"
df = pd.read_csv(csv_path)

nexus_file = Path("../data/phylo_tree/exabays/20230419_3FTx.nexus")
newick_file = nexus_file.with_suffix(".newick")

In [7]:
def nexus2newick(nexus_file: str, newick_file: str) -> None:
    # Read the Nexus file, save node support,  and convert it to Newick format
    nexus_tree = dendropy.Tree.get(path=nexus_file, schema="nexus")
    for node in nexus_tree.postorder_node_iter():
        if node.is_internal():
            node.label = node.annotations['prob(percent)'].value

    nexus_tree.write(
        path=newick_file,
        schema="newick",
        suppress_rooting=True,
        )

In [27]:
species = ["Bungarus multicinctus", "Homo sapiens", "Naja naja", "Varanus komodoensis"]
#nexus2newick(nexus_file=nexus_file, newick_file=newick_file)
for spe in species:
    tree_out = f"../data/phylo_tree/species/{spe.replace(' ', '_')}.newick"
    species_ids = df.loc[
        (df["id_new"] <= 1416) & (df["species"] == spe), "identifier"
    ].to_list()
    species_ids = ["'SP|Q17RY6-2|Homo sapiens'" if s == 'SP|Q17RY6-2|Homo_sapiens' else s for s in species_ids]
    t = Tree(str(newick_file))
    t.prune(species_ids)
    t.write(format=2, outfile=tree_out)