# Postprocessing HADDOCK Results

In [26]:
import os
import pandas as pd

## Turn off SettingWithCopyWarning
pd.set_option('mode.chained_assignment', None)

In [34]:
## Define paths to results directories
results_directory_path = "./example_results"
subdirectory_path = 'run1/structures/it1/water/'

## Get list of experiment directories
results_directories = [f for f in os.listdir(results_directory_path)]

## Create empty DataFrame for metrics
experiment_results_df = pd.DataFrame()

In [35]:
## Loop through experiment folders and grab best cluster metrics
for experiment in results_directories:
    experiment_path = f"{results_directory_path}/{experiment}/{subdirectory_path}"

    ## Get best cluster based on lowest van der Waals energy
    vdw_clusters = pd.read_csv(f'{experiment_path}cluster_ener.txt', delimiter=r"\s+").sort_values(by=['Evdw'], ascending = True)
    best_cluster = vdw_clusters.iat[0, 0]

    ## Get best PDB file from best cluster
    cluster_pdbs = pd.read_csv(f'{experiment_path}{best_cluster}_ener', delimiter=r"\s+").sort_values(by=['Evdw'], ascending = True)
    best_pdb = cluster_pdbs.iat[0, 0]

    ## Make Pandas row and append to results DataFrame
    best_cluster_row = vdw_clusters.iloc[[0]]
    best_cluster_row['experiment_name'] = experiment
    best_cluster_row['best_cluster'] = best_cluster.replace('file.nam_clust', '')
    best_cluster_row['best_pdb_path'] = f"{experiment_path}{best_pdb}"
    experiment_results_df = pd.concat([experiment_results_df, best_cluster_row], ignore_index=True)

In [41]:
## Make the output DataFrame prettier

## Subset columns to those of interest
# list(experiment_results_df.columns)
experiment_results_df = experiment_results_df[[
 'experiment_name',
 'best_cluster',
 'best_pdb_path',
 'Nstruc',
 'Einter',
 'sd',
 'Enb',
 'sd.1',
 'Evdw+0.1Eelec',
 'sd.2',
 'Evdw',
 'sd.3',
 'Eelec',
 'sd.4',
 'Eair',
 'sd.5'
]]

## Rename standard deviation columns
rename_sd_dict = {
    'sd': 'Einter_sd',
    'sd.1': 'Enb_sd',
    'sd.2': 'Evdw+0.1Eelec_sd',
    'sd.3': 'Evdw_sd',
    'sd.4': 'Eelec_sd',
    'sd.5': 'Eair_sd'
}
experiment_results_df = experiment_results_df.rename(columns=rename_sd_dict)

## Show DataFrame
experiment_results_df

Unnamed: 0,experiment_name,best_cluster,best_pdb_path,Nstruc,Einter,Einter_sd,Enb,Enb_sd,Evdw+0.1Eelec,Evdw+0.1Eelec_sd,Evdw,Evdw_sd,Eelec,Eelec_sd,Eair,Eair_sd
0,SARS-CoV-2-B.1.1.7-N__CCL7,6,./example_results/SARS-CoV-2-B.1.1.7-N__CCL7/r...,4,2471.72,188.72,-369.63,21.11,-105.72,6.64,-76.4,9.05,-293.23,28.69,2841.35,172.45
1,SARS-CoV-2-B.1.1.7-N__CXCL12beta,5,./example_results/SARS-CoV-2-B.1.1.7-N__CXCL12...,4,2475.97,83.07,-410.99,36.3,-104.95,7.53,-70.94,4.87,-340.05,32.72,2886.96,104.28
2,SARS-CoV-2-B.1.1.7-N__CCL3,4,./example_results/SARS-CoV-2-B.1.1.7-N__CCL3/r...,5,2574.92,136.34,-287.9,68.7,-97.67,18.76,-76.53,14.92,-211.37,59.69,2862.82,77.62
3,SARS-CoV-2-B.1.1.7-N__CCL5,1,./example_results/SARS-CoV-2-B.1.1.7-N__CCL5/r...,8,2564.72,186.78,-362.12,35.33,-115.48,11.49,-88.08,11.01,-274.04,33.67,2926.84,181.06


In [42]:
## Write out results DataFrame
experiment_results_df.to_csv("experiment_results.csv")