In [1]:
import os
from os.path import join

from Bio import Phylo
import logging
from datetime import datetime
import time
import subprocess

def check_inferred_tree(dir_rna):
    if len(os.listdir(dir_rna)) != 50:
        logging.warning(f"{dir_rna} does not have 10 trees.")
        return False
    else:
        return True
        
def extract_highestloglh(dir_path):
    lh = dict()
    files = os.listdir(dir_path)
    seeds = [f"{i:02d}" for i in range(1, 11)]

    for file in os.listdir(dir_path):
        for seed in seeds:
            if file.startswith('RAxML_log') and file.endswith(seed):
                with open(os.path.join(dir_path, file), 'r') as f:
                    final_result = f.readlines()[-1].strip()
                    best_value = final_result.split()[-1]
                    lh[seed] = float(best_value)
    print(lh)
    return sorted(lh.items(), key=lambda x: x[1], reverse=True)[0]

def extract_highestLH_2Trees_pseudo(dir_output, rna):
    raxTree_path = join(dir_output, 'raxml', rna)
    raxPTree_path = join(dir_output, 'raxmlP_wPseu', rna)
    if check_inferred_tree(raxTree_path) and check_inferred_tree(raxPTree_path):
        bestTreesLH = {'DNA': extract_highestloglh(raxTree_path),
                       'RNA considering pseudoknots': extract_highestloglh(raxPTree_path)}
        return bestTreesLH
    else:
        return None
    
def extract_highestLH_2Trees_ipseudo(dir_output, rna):
    raxTree_path = join(dir_output, 'raxml', rna)
    raxPiTree_path = join(dir_output, 'raxmlP_iPseu', rna)
    if check_inferred_tree(raxTree_path) and check_inferred_tree(raxPiTree_path):
        bestTreesLH = {'DNA': extract_highestloglh(raxTree_path),
                       'RNA ignoring pseudoknots': extract_highestloglh(raxPiTree_path)}
        return bestTreesLH
    else:
        return None

def combineTreeFiles(dir_combined, rna, group, tree1, tree2):
    combined_TreeFile = join(dir_combined, f"{rna}.{group}.highestLH.trees")

    with open(combined_TreeFile, 'w') as f_combined:
        for tree_file in [tree1, tree2]:
            with open(tree_file, 'r') as f:
                f_combined.write(f.read())

In [None]:
DIR_OUTPUTS = '/Users/u7875558/Documents/PhD/RNAPhylo/allModels_SEED/outputs'
MODEL = 'S6A'
DIR_WORKING = join(DIR_OUTPUTS, 'AU_Test', MODEL)
os.makedirs(DIR_WORKING, exist_ok=True)
DIR_COMBINE = join(DIR_OUTPUTS, "combinedTreeLH", MODEL)
os.makedirs(DIR_COMBINE, exist_ok=True)

DIR_INPUTS='/Users/u7875558/Documents/PhD/RNAPhylo/allModels_SEED/nci_inputs'
DIR_FASTA=join(DIR_INPUTS, 'fasta_files')
DIR_SS=join(DIR_INPUTS, 'ss_files')

TREE_OUTPUT = join(DIR_OUTPUTS, 'combinedFiles')
LOG_FILE='/Users/u7875558/Documents/PhD/RNAPhylo/allModels_SEED/logs/2025-03-24_11-55-45.log'