In [16]:
# Code that process the output from mol_opt code

import os
import yaml
import csv
import re

def process_yaml_to_csv(yaml_file, csv_file):
    with open(yaml_file, 'r') as file:
        data = yaml.safe_load(file)

    with open(csv_file, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Input smiles', 'Docking score'])

        for smiles, scores in data.items():
            writer.writerow([smiles, scores[0] if scores else None])

# Regex pattern to extract model and pdb_name
pattern = pattern = re.compile(r'results_(.+)_(.+?)_docking_0\.yaml$')
# List of target directories
target_directories = ['screening', 'graph_ga', 'smiles_ga',
                      'smiles_vae', 'selfies_vae', 'moldqn',
                      'reinvent', 'mimosa', 'smiles_lstm_hc',
                      'dst']  
# Ensure the evaluation_output directory exists
output_dir = '../evaluation_output'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Traverse the directories
for root, dirs, files in os.walk('main'):
    # Check if the current directory is in the target list
    if os.path.basename(root) in target_directories:
        if 'results' in dirs:
            results_dir = os.path.join(root, 'results')
            # Process each YAML file in the results directory
            for file in os.listdir(results_dir):
                if file.endswith('.yaml'):
                    match = pattern.match(file)
                    if match:
                        model, pdb_name = match.groups()
                        # Skip files with model equal to 'qed'
                        if pdb_name.lower() == 'qed':
                            continue
                        yaml_path = os.path.join(results_dir, file)
                        csv_filename = f"{pdb_name}_docking_{model}.csv"
                        csv_path = os.path.join(output_dir, csv_filename)
                        process_yaml_to_csv(yaml_path, csv_path)
                        print(f"Processed {yaml_path} into {csv_path}")


Processed main/mimosa/results/results_mimosa_3ny8_docking_0.yaml into ../evaluation_output/3ny8_docking_mimosa.csv
Processed main/mimosa/results/results_mimosa_3eml_docking_0.yaml into ../evaluation_output/3eml_docking_mimosa.csv
Processed main/mimosa/results/results_mimosa_4rlu_docking_0.yaml into ../evaluation_output/4rlu_docking_mimosa.csv
Processed main/mimosa/results/results_mimosa_5mo4_docking_0.yaml into ../evaluation_output/5mo4_docking_mimosa.csv
Processed main/mimosa/results/results_mimosa_3pbl_docking_0.yaml into ../evaluation_output/3pbl_docking_mimosa.csv
Processed main/mimosa/results/results_mimosa_7l11_docking_0.yaml into ../evaluation_output/7l11_docking_mimosa.csv
Processed main/mimosa/results/results_mimosa_4unn_docking_0.yaml into ../evaluation_output/4unn_docking_mimosa.csv
Processed main/mimosa/results/results_mimosa_1iep_docking_0.yaml into ../evaluation_output/1iep_docking_mimosa.csv
Processed main/smiles_vae/results/results_smiles_vae_bo_5mo4_docking_0.yaml into

In [5]:
import os
import yaml
import csv
import re
from tdc import Oracle
def generate_property_csv(sa_oracle, qed_oracle, logp_oracle, docking_csv_path, property_csv_path):
    with open(docking_csv_path, 'r') as docking_file, open(property_csv_path, 'w', newline='') as property_file:
        reader = csv.reader(docking_file)
        writer = csv.writer(property_file)
        writer.writerow(['Input smiles', 'SA', 'QED', 'LogP'])

        next(reader)  # Skip header
        for row in reader:
            smiles = row[0]
            sa = sa_oracle(smiles)
            qed = qed_oracle(smiles)
            logp = logp_oracle(smiles)
            writer.writerow([smiles, sa, qed, logp])

sa_oracle = Oracle(name = 'SA')
qed_oracle = Oracle(name = 'QED')
logp_oracle = Oracle(name = 'LogP')
# Directory containing the docking CSV files
docking_dir = '../evaluation_output'  # Replace with the actual directory if different

# Regex pattern for identifying and parsing the docking CSV filenames
pattern = re.compile(r'(.+)_docking_(.+)\.csv$')

# Process each docking CSV file in the directory
for docking_csv_file in os.listdir(docking_dir):
    match = pattern.match(docking_csv_file)
    if match:
        pdb_name, model = match.groups()
        docking_csv_path = os.path.join(docking_dir, docking_csv_file)
        property_csv_filename = f"{pdb_name}_property_{model}.csv"
        property_csv_path = os.path.join(docking_dir, property_csv_filename)
        generate_property_csv(sa_oracle, qed_oracle, logp_oracle, docking_csv_path, property_csv_path)
        print(f"Generated property CSV: {property_csv_path}")

Generated property CSV: ../evaluation_output/4rlu_property_moldqn.csv
Generated property CSV: ../evaluation_output/5mo4_property_screening.csv
Generated property CSV: ../evaluation_output/3ny8_property_screening.csv
Generated property CSV: ../evaluation_output/4unn_property_selfies_vae_bo.csv
Generated property CSV: ../evaluation_output/7l11_property_smiles_vae_bo.csv
Generated property CSV: ../evaluation_output/4rlu_property_graph_ga.csv
Generated property CSV: ../evaluation_output/1iep_property_graph_ga.csv
Generated property CSV: ../evaluation_output/4unn_property_dst.csv
Generated property CSV: ../evaluation_output/4unn_property_smiles_ga.csv
Generated property CSV: ../evaluation_output/3ny8_property_moldqn.csv
Generated property CSV: ../evaluation_output/4rlu_property_reinvent.csv
Generated property CSV: ../evaluation_output/1iep_property_smiles_vae_bo.csv
Generated property CSV: ../evaluation_output/4unn_property_screening.csv
Generated property CSV: ../evaluation_output/3eml_pro

In [4]:
os.path.basename('main/dog_ae/scripts/viz/data/imgs')

'imgs'