# Postprocessing Docking Results

In [21]:
import os, shutil
import pandas as pd

## Turn off SettingWithCopyWarning
pd.set_option('mode.chained_assignment', None)

## Parse AlphaFold2 Results

In [40]:
## Define paths to results directories
results_directory_path = "./AF23_complexes/complex_pdbs"
subdirectory_path = 'run1/structures/it1/water/'

## Get list of experiment directories
# results_directories = [f for f in os.listdir(results_directory_path)]
results_directories = [f for f in os.listdir(results_directory_path) if not f.startswith('._')]

In [41]:
## Create empty DataFrame for metrics
experiment_results_df = pd.DataFrame(columns=[
    'experiment_name', 
    'n_protein',
    'cytokine_protein',
    'best_pdb_path',
    'prodigy_deltaG_kcalpermol',
    'prodigy_dissociation_constant_M'
])


## Show DataFrame
experiment_results_df

Unnamed: 0,experiment_name,n_protein,cytokine_protein,best_pdb_path,prodigy_deltaG_kcalpermol,prodigy_dissociation_constant_M


## Generate PRODIGY Predictions

Note: You must have PRODIGY installed.
```sh
git clone https://github.com/haddocking/prodigy
pip install prodigy/.
```

In [47]:
## To run on remote SuperCloud (Linux)
local_base_path = "/home/gridsan/cford/seqer_shared"

for experiment_dir in results_directories:
    experiment_name = experiment_dir#.replace("._", "")
    ## Create full path to best PDB file
    best_pdb_path = f'{results_directory_path.replace(".","")}/{experiment_name}/relaxed_model_1_multimer_v3_pred_0.pdb'
    pdb_path = f'{local_base_path}{best_pdb_path}'
    # print(pdb_path)

    ## Run PRODIGY and parse stdout
    prodigy_output = os.popen(f'/home/gridsan/cford/.local/bin/prodigy {pdb_path}').read()
    prodigy_output_lines = prodigy_output.split('\n')
    if len(prodigy_output_lines) > 1:
        predicted_binding_affinity = float(prodigy_output_lines[-3].split(':')[1].replace(' ', ''))
        predicted_dissociation_constant = float(prodigy_output_lines[-2].split(':')[1].replace(' ', ''))
        # print(predicted_binding_affinity)

        ## Add values to DataFrame
        experiment_results_df.loc[len(experiment_results_df.index)] = [
            experiment_name,
            experiment_name.split('N_')[0] + 'N',
            experiment_name.split('N_')[1],
            best_pdb_path,
            predicted_binding_affinity,
            predicted_dissociation_constant] 

experiment_results_df

Traceback (most recent call last):
  File "/home/gridsan/cford/.local/bin/prodigy", line 8, in <module>
    sys.exit(main())
  File "/home/gridsan/cford/.local/lib/python3.8/site-packages/prodigy/predict_IC.py", line 411, in main
    struct_path = check_path(cmd.structf)
  File "/home/gridsan/cford/.local/lib/python3.8/site-packages/prodigy/lib/utils.py", line 23, in check_path
    raise IOError("Could not read file: {0}".format(path))
OSError: Could not read file: /home/gridsan/cford/seqer_shared/AF23_complexes/complex_pdbs/SARS-CoV-2-P.1-N_IFNlambda1/relaxed_model_1_multimer_v3_pred_0.pdb
Traceback (most recent call last):
  File "/home/gridsan/cford/.local/bin/prodigy", line 8, in <module>
    sys.exit(main())
  File "/home/gridsan/cford/.local/lib/python3.8/site-packages/prodigy/predict_IC.py", line 411, in main
    struct_path = check_path(cmd.structf)
  File "/home/gridsan/cford/.local/lib/python3.8/site-packages/prodigy/lib/utils.py", line 23, in check_path
    raise IOError("Co

Unnamed: 0,experiment_name,n_protein,cytokine_protein,best_pdb_path,prodigy_deltaG_kcalpermol,prodigy_dissociation_constant_M
0,SARS-CoV-2-BQ.1-N_CXCL10,SARS-CoV-2-BQ.1-N,CXCL10,/AF23_complexes/complex_pdbs/SARS-CoV-2-BQ.1-N...,-10.8,1.200000e-08
1,SARS-CoV-2-BA.1.1-N_CCL22,SARS-CoV-2-BA.1.1-N,CCL22,/AF23_complexes/complex_pdbs/SARS-CoV-2-BA.1.1...,-10.4,2.200000e-08
2,SARS-CoV-2-XBB-N_CCL25,SARS-CoV-2-XBB-N,CCL25,/AF23_complexes/complex_pdbs/SARS-CoV-2-XBB-N_...,-12.7,4.800000e-10
3,SARS-CoV-2-WA1-N_CCL16,SARS-CoV-2-WA1-N,CCL16,/AF23_complexes/complex_pdbs/SARS-CoV-2-WA1-N_...,-11.2,6.500000e-09
4,SARS-CoV-2-BQ.1-N_IFNlambda1,SARS-CoV-2-BQ.1-N,IFNlambda1,/AF23_complexes/complex_pdbs/SARS-CoV-2-BQ.1-N...,-7.9,1.700000e-06
...,...,...,...,...,...,...
1134,SARS-CoV-N_CCL22,SARS-CoV-N,CCL22,/AF23_complexes/complex_pdbs/SARS-CoV-N_CCL22/...,-11.7,2.600000e-09
1135,SARS-CoV-2-XBB-N_XCL1,SARS-CoV-2-XBB-N,XCL1,/AF23_complexes/complex_pdbs/SARS-CoV-2-XBB-N_...,-9.5,1.000000e-07
1136,SARS-CoV-2-BA.1.1-N_CCL3L1,SARS-CoV-2-BA.1.1-N,CCL3L1,/AF23_complexes/complex_pdbs/SARS-CoV-2-BA.1.1...,-8.6,4.800000e-07
1137,SARS-CoV-2-B.1.1.529-N_CCL16,SARS-CoV-2-B.1.1.529-N,CCL16,/AF23_complexes/complex_pdbs/SARS-CoV-2-B.1.1....,-8.7,4.400000e-07


In [48]:
## Write out results DataFrame
experiment_results_df.to_csv("AF23_experiment_results.csv")

In [55]:
import shutil

## Grab Best PDBs
os.makedirs('./AF23_best_pdbs', exist_ok=True)

for index, row in experiment_results_df.iterrows():
    best_pdb_path = row['best_pdb_path']
    experiment_name = row['experiment_name']
    new_path = f'./AF23_best_pdbs/{experiment_name}'
    os.makedirs(new_path, exist_ok=True)
    shutil.copy(f'.{best_pdb_path}', new_path)

PermissionError: [Errno 13] Permission denied: './AF23_best_pdbs/SARS-CoV-2-BQ.1-N_CXCL10/relaxed_model_1_multimer_v3_pred_0.pdb'