
###### By Manasa Yadavalli 
###### Jan 2025

### Evaluation for CSKDE95 and CovPDB. 
#### Use ***posebusters*** conda env

In [1]:
import glob
import os 
from posebusters import PoseBusters
from pathlib import Path
import pandas as pd
from evaluate_cov import make_eval_df 
from evaluate_cov import score_model_eval
from evaluate_cov import confidence_model_eval

In [2]:
pose_bust_cols = ['file',
 'molecule',
 'mol_pred_loaded',
 'mol_true_loaded',
 'mol_cond_loaded',
 'sanitization',
 'inchi_convertible',
 'all_atoms_connected',
 'molecular_formula',
 'molecular_bonds',
 'double_bond_stereochemistry',
 'tetrahedral_chirality',
 'bond_lengths',
 'bond_angles',
 'internal_steric_clash',
 'aromatic_ring_flatness',
 'double_bond_flatness',
 'internal_energy',
 'protein-ligand_maximum_distance',
 'minimum_distance_to_protein',
 'minimum_distance_to_organic_cofactors',
 'minimum_distance_to_inorganic_cofactors',
 'minimum_distance_to_waters',
 'volume_overlap_with_protein',
 'volume_overlap_with_organic_cofactors',
 'volume_overlap_with_inorganic_cofactors',
 'volume_overlap_with_waters',
 'rmsd_≤_2å',
 'passes_valence_checks',
 'passes_kekulization',
 'inchi_crystal_valid',
 'inchi_docked_valid',
 'inchi_crystal',
 'inchi_docked',
 'inchi_overall',
 'inchi_version',
 'stereochemistry_preserved',
 'hydrogens',
 'net_charge',
 'protons',
 'stereo_sp3',
 'stereo_sp3_inverted',
 'stereo_type',
 'number_bonds',
 'shortest_bond_relative_length',
 'longest_bond_relative_length',
 'number_short_outlier_bonds',
 'number_long_outlier_bonds',
 'number_angles',
 'most_extreme_relative_angle',
 'number_outlier_angles',
 'number_noncov_pairs',
 'shortest_noncovalent_relative_distance',
 'number_clashes',
 'number_valid_bonds',
 'number_valid_angles',
 'number_valid_noncov_pairs',
 'number_aromatic_rings_checked',
 'number_aromatic_rings_pass',
 'aromatic_ring_maximum_distance_from_plane',
 'number_double_bonds_checked',
 'number_double_bonds_pass',
 'double_bond_maximum_distance_from_plane',
 'ensemble_avg_energy',
 'mol_pred_energy',
 'energy_ratio',
 'smallest_distance_protein',
 'num_pairwise_clashes_protein',
 'most_extreme_ligand_atom_id_protein',
 'most_extreme_protein_atom_id_protein',
 'most_extreme_ligand_element_protein',
 'most_extreme_protein_element_protein',
 'most_extreme_ligand_vdw_protein',
 'most_extreme_protein_vdw_protein',
 'most_extreme_sum_radii_protein',
 'most_extreme_distance_protein',
 'most_extreme_sum_radii_scaled_protein',
 'most_extreme_relative_distance_protein',
 'most_extreme_clash_protein',
 'smallest_distance_organic_cofactors',
 'not_too_far_away_organic_cofactors',
 'num_pairwise_clashes_organic_cofactors',
 'most_extreme_ligand_atom_id_organic_cofactors',
 'most_extreme_protein_atom_id_organic_cofactors',
 'most_extreme_ligand_element_organic_cofactors',
 'most_extreme_protein_element_organic_cofactors',
 'most_extreme_ligand_vdw_organic_cofactors',
 'most_extreme_protein_vdw_organic_cofactors',
 'most_extreme_sum_radii_organic_cofactors',
 'most_extreme_distance_organic_cofactors',
 'most_extreme_sum_radii_scaled_organic_cofactors',
 'most_extreme_relative_distance_organic_cofactors',
 'most_extreme_clash_organic_cofactors',
 'smallest_distance_inorganic_cofactors',
 'not_too_far_away_inorganic_cofactors',
 'num_pairwise_clashes_inorganic_cofactors',
 'most_extreme_ligand_atom_id_inorganic_cofactors',
 'most_extreme_protein_atom_id_inorganic_cofactors',
 'most_extreme_ligand_element_inorganic_cofactors',
 'most_extreme_protein_element_inorganic_cofactors',
 'most_extreme_ligand_vdw_inorganic_cofactors',
 'most_extreme_protein_vdw_inorganic_cofactors',
 'most_extreme_sum_radii_inorganic_cofactors',
 'most_extreme_distance_inorganic_cofactors',
 'most_extreme_sum_radii_scaled_inorganic_cofactors',
 'most_extreme_relative_distance_inorganic_cofactors',
 'most_extreme_clash_inorganic_cofactors',
 'smallest_distance_waters',
 'not_too_far_away_waters',
 'num_pairwise_clashes_waters',
 'most_extreme_ligand_atom_id_waters',
 'most_extreme_protein_atom_id_waters',
 'most_extreme_ligand_element_waters',
 'most_extreme_protein_element_waters',
 'most_extreme_ligand_vdw_waters',
 'most_extreme_protein_vdw_waters',
 'most_extreme_sum_radii_waters',
 'most_extreme_distance_waters',
 'most_extreme_sum_radii_scaled_waters',
 'most_extreme_relative_distance_waters',
 'most_extreme_clash_waters',
 'volume_overlap_protein',
 'volume_overlap_organic_cofactors',
 'volume_overlap_inorganic_cofactors',
 'volume_overlap_waters',
 'rmsd',
 'kabsch_rmsd',
 'centroid_distance']


In [None]:
# eval for working ligands cskde95
true_poses = '/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/working_pose_bust_cskde'
pred_poses = '/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/results/cskde95_inference'

all_pose_evals =  make_eval_df(true_poses, pred_poses, pose_bust_cols)

In [15]:
all_pose_evals['protein'] = all_pose_evals['molecule'].str.extract(r'^(.*?)_') 
all_pose_evals['rank'] = all_pose_evals['molecule'].str.extract(r'rank(\d+)').astype(int)
# sort within proteins based on rmsd of poses generated NOT by rank predicted
sorted_df = all_pose_evals.groupby('protein', group_keys=False).apply(lambda group: group.sort_values('rmsd'))
group_df = sorted_df.groupby('protein')

num_prots= len(group_df.groups)

  sorted_df = all_pose_evals.groupby('protein', group_keys=False).apply(lambda group: group.sort_values('rmsd'))


In [56]:
import numpy as np
rmsd_best = np.array([]) # top-1 pose rmsds
prots = np.array([])
for prot, row_index in group_df.groups.items(): 
    rmsd_best = np.append(rmsd_best, sorted_df.loc[row_index[0], 'rmsd'])
    prots = np.append(prots, prot)

# median Top-1 RMSD
median_top1 = round(np.mean(rmsd_best), 2)
std_top1 = round(np.std(rmsd_best), 2)
print('median top-1 rmsd:', median_top1, '\n', 'std top-1 rmsds:', std_top1)

# Top-1 RMSD 
count = np.sum(rmsd_best < 2)
top1_rmsd_perc = round(count/len(rmsd_best) * 100, 2)
print('top1 rmsd < 2A %',top1_rmsd_perc)

median top-1 rmsd: 7.17 
 std top-1 rmsds: 4.53
top1 rmsd < 2A % 18.18


In [67]:
prot_rmsd_best = dict(zip(prots, rmsd_best))
prot_min_rmsd = min(prot_rmsd_best, key=prot_rmsd_best.get)
prot_max_rmsd= max(prot_rmsd_best, key=prot_rmsd_best.get)
print('prot with min rmsd:', prot_min_rmsd, '\nrmsd:', round(prot_rmsd_best[prot_min_rmsd], 3))
print('prot with max rmsd:', prot_max_rmsd, '\nrmsd:', round(prot_rmsd_best[prot_max_rmsd], 3))

prot with min rmsd: 1nqc 
rmsd: 0.497
prot with max rmsd: 6vgy 
rmsd: 16.919


In [9]:
from rdkit import Chem
from posebusters.modules.rmsd import check_rmsd
true_lig = '/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/1nqc/1nqc_ligand.sdf'
pose = '/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/results/cskde95_inference/1nqc/rank1.sdf'

true_ligand_supplier = Chem.SDMolSupplier(true_lig)
predicted_pose_supplier = Chem.SDMolSupplier(pose)

mol_true = true_ligand_supplier[0]
mol_pred = predicted_pose_supplier[0]

In [10]:
rmsd_result = check_rmsd(mol_pred, mol_true, rmsd_threshold=2.0)

In [15]:
rmsd_result['results']['rmsd']

0.4972252727637645

In [5]:
prot = '/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95/Structures/CSKDE95/5orl/ac/system-pre.pdb'
# prot = '/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5orl/5orl_processed.pdb'
true_lig = '/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5orl/5orl_ligand.sdf'
pose = '/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/results/cskde95_inference/5orl/rank1.sdf'

buster = PoseBusters(config="redock")
buster_current = buster.bust(pose, true_lig, prot)

df_buster_reset = buster_current.reset_index() # changing file and molecule to columns instead of indexes

df_buster_reset

Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95/Structures/CSKDE95/5orl/ac/system-pre.pdb with error: Could not load molecule.


[10:22:40] UFFTYPER: Unrecognized charge state for atom: 11


Unnamed: 0,file,molecule,mol_pred_loaded,mol_true_loaded,mol_cond_loaded,sanitization,inchi_convertible,all_atoms_connected,molecular_formula,molecular_bonds,...,protein-ligand_maximum_distance,minimum_distance_to_protein,minimum_distance_to_organic_cofactors,minimum_distance_to_inorganic_cofactors,minimum_distance_to_waters,volume_overlap_with_protein,volume_overlap_with_organic_cofactors,volume_overlap_with_inorganic_cofactors,volume_overlap_with_waters,rmsd_≤_2å
0,/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/...,mol_at_pos_0,True,True,False,True,True,True,True,True,...,,,,,,,,,,False


In [7]:
from rdkit import Chem
mol = Chem.MolFromPDBFile('system-pre.pdb', sanitize=True)
if mol is None:
    print("RDKit could not parse the file.")

OSError: Bad input file system-pre.pdb

In [14]:
true_poses = '/home/ymanasa/posebusters_eval/CSKDE95_datamol_af2'
pred_poses = '/home/ymanasa/posebusters_eval/cskde95_inference'
all_df = make_eval_df(true_poses, pred_poses, pose_bust_cols=pose_bust_cols)

5p9m
not all poses were predicted for 5p9m, moving to next
6yq2
not all poses were predicted for 6yq2, moving to next
6wp8
not all poses were predicted for 6wp8, moving to next


In [5]:
true_poses = '/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2'
pred_poses = '/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/results/cskde95_inference'
all_df = make_eval_df(true_poses, pred_poses, pose_bust_cols=pose_bust_cols)

4cdc
6j4p
not all poses were predicted for 6j4p, moving to next
4qps
not all poses were predicted for 4qps, moving to next
1hbj
4dmy


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/4luc/4luc_processed.pdb with error: Could not load molecule.




1qdq
not all poses were predicted for 1qdq, moving to next
4luc


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/4luc/4luc_processed.pdb with error: Could not load molecule.


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/4luc/4luc_processed.pdb with error: Could not load molecule.


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/4luc/4luc_processed.pdb with error: Could not load molecule.


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/4luc/4luc_processed.pdb with error: Could not load molecule.


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/4luc/4luc_processed.pdb with error: Could not load molecule.


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/4luc/4luc_processed.pdb with error: Could not load molecule.


Could not load molecul

4i9o


[13:10:06] UFFTYPER: Unrecognized charge state for atom: 14
[13:10:06] UFFTYPER: Unrecognized charge state for atom: 14
[13:10:07] UFFTYPER: Unrecognized charge state for atom: 14
[13:10:07] UFFTYPER: Unrecognized charge state for atom: 14
[13:10:07] UFFTYPER: Unrecognized charge state for atom: 14
[13:10:08] UFFTYPER: Unrecognized charge state for atom: 14
[13:10:08] UFFTYPER: Unrecognized charge state for atom: 14
[13:10:09] UFFTYPER: Unrecognized charge state for atom: 14
[13:10:09] UFFTYPER: Unrecognized charge state for atom: 14


4mll
not all poses were predicted for 4mll, moving to next
6ary


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5vbm/5vbm_processed.pdb with error: Could not load molecule.


5vbm


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5vbm/5vbm_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5vbm/5vbm_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5vbm/5vbm_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5vbm/5vbm_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5vbm/5vbm_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5vbm/5vbm_processed.pdb with error: Could not load molecule.
Could not load molecule from /home

6hmu


[13:10:28] UFFTYPER: Unrecognized charge state for atom: 17
[13:10:29] UFFTYPER: Unrecognized charge state for atom: 17
[13:10:29] UFFTYPER: Unrecognized charge state for atom: 17
[13:10:30] UFFTYPER: Unrecognized charge state for atom: 17
[13:10:31] UFFTYPER: Unrecognized charge state for atom: 17
[13:10:32] UFFTYPER: Unrecognized charge state for atom: 17
[13:10:33] UFFTYPER: Unrecognized charge state for atom: 17
[13:10:34] UFFTYPER: Unrecognized charge state for atom: 17
[13:10:35] UFFTYPER: Unrecognized charge state for atom: 17


5p9m
5j9z
2xyp
6vgy
2xyg
5ac0
3sn8
not all poses were predicted for 3sn8, moving to next
1ghm


[13:12:09] UFFTYPER: Unrecognized charge state for atom: 5


3orz


[13:12:11] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:12] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:13] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:14] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:15] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:17] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:18] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:19] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:19] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:20] UFFTYPER: Unrecognized charge state for atom: 5


5maj


[13:12:20] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:21] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:22] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:23] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:23] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:24] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:25] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:26] UFFTYPER: Unrecognized charge state for atom: 5
[13:12:26] UFFTYPER: Unrecognized charge state for atom: 5
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5orl/5orl_processed.pdb with error: Could not load molecule.


[13:12:27] UFFTYPER: Unrecognized charge state for atom: 11


5orl


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5orl/5orl_processed.pdb with error: Could not load molecule.


[13:12:28] UFFTYPER: Unrecognized charge state for atom: 11
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5orl/5orl_processed.pdb with error: Could not load molecule.


[13:12:28] UFFTYPER: Unrecognized charge state for atom: 11
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5orl/5orl_processed.pdb with error: Could not load molecule.


[13:12:28] UFFTYPER: Unrecognized charge state for atom: 11
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5orl/5orl_processed.pdb with error: Could not load molecule.


[13:12:28] UFFTYPER: Unrecognized charge state for atom: 11
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5orl/5or

5dzj
5qir
6puh






5u17
























3hhi
not all poses were predicted for 3hhi, moving to next
3h0e






















5qh8
4cde
5xhr
not all poses were predicted for 5xhr, moving to next
2g8e
not all poses were predicted for 2g8e, moving to next
3q7z
not all poses were predicted for 3q7z, moving to next
3ovx
4an1


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/3zmh/3zmh_processed.pdb with error: Could not load molecule.


3zmh


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/3zmh/3zmh_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/3zmh/3zmh_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/3zmh/3zmh_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/3zmh/3zmh_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/3zmh/3zmh_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/3zmh/3zmh_processed.pdb with error: Could not load molecule.
Could not load molecule from /home

5d11
6yq2
6iuo
not all poses were predicted for 6iuo, moving to next
4amz


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5l6p/5l6p_processed.pdb with error: Could not load molecule.


5l6p


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5l6p/5l6p_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5l6p/5l6p_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5l6p/5l6p_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5l6p/5l6p_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5l6p/5l6p_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5l6p/5l6p_processed.pdb with error: Could not load molecule.
Could not load molecule from /home

1ewl
not all poses were predicted for 1ewl, moving to next
2z9w
6ezp
2q9m


[13:15:29] UFFTYPER: Unrecognized charge state for atom: 5


3m2z


[13:15:30] UFFTYPER: Unrecognized charge state for atom: 5
[13:15:30] UFFTYPER: Unrecognized charge state for atom: 5
[13:15:30] UFFTYPER: Unrecognized charge state for atom: 5
[13:15:31] UFFTYPER: Unrecognized charge state for atom: 5
[13:15:31] UFFTYPER: Unrecognized charge state for atom: 5
[13:15:31] UFFTYPER: Unrecognized charge state for atom: 5
[13:15:32] UFFTYPER: Unrecognized charge state for atom: 5
[13:15:32] UFFTYPER: Unrecognized charge state for atom: 5
[13:15:33] UFFTYPER: Unrecognized charge state for atom: 5




5mjb






















1xrl




5v6v
6j4p
not all poses were predicted for 6j4p, moving to next
1hv7






3k7f
not all poses were predicted for 3k7f, moving to next
3t9t
not all poses were predicted for 3t9t, moving to next
6aff






















4bs5
4amy


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/4cl8/4cl8_processed.pdb with error: Could not load molecule.


1ewm
not all poses were predicted for 1ewm, moving to next
4cl8


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/4cl8/4cl8_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/4cl8/4cl8_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/4cl8/4cl8_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/4cl8/4cl8_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/4cl8/4cl8_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/4cl8/4cl8_processed.pdb with error: Could not load molecule.
Could not load molecule from /home

6wp8


[13:17:28] ERROR: *Conversion failed*

[13:17:29] ERROR: *Conversion failed*

[13:17:30] ERROR: *Conversion failed*

[13:17:30] ERROR: *Conversion failed*

[13:17:30] ERROR: *Conversion failed*

[13:17:31] ERROR: *Conversion failed*

[13:17:32] ERROR: *Conversion failed*

[13:17:32] ERROR: *Conversion failed*

[13:17:32] ERROR: *Conversion failed*

[13:17:33] ERROR: *Conversion failed*

[13:17:33] ERROR: *Conversion failed*

[13:17:34] ERROR: *Conversion failed*



5ty2
not all poses were predicted for 5ty2, moving to next
5o3y
3oj8






3kwb






















5ac2


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5ac2/5ac2_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5ac2/5ac2_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5ac2/5ac2_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5ac2/5ac2_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5ac2/5ac2_processed.pdb with error: Could not load molecule.
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/5ac2/5ac2_processed.pdb with error: Could not load molecule.
Could not load molecule from /home

1zpb






















6c7z
1fj8
4wsk
6q6l
not all poses were predicted for 6q6l, moving to next
3mzd
not all poses were predicted for 3mzd, moving to next
5vnp
2op9


[13:19:54] UFFTYPER: Unrecognized atom type: B_ (22)


4lqm


[13:19:56] UFFTYPER: Unrecognized atom type: B_ (22)
[13:19:58] UFFTYPER: Unrecognized atom type: B_ (22)
[13:19:59] UFFTYPER: Unrecognized atom type: B_ (22)
[13:20:00] UFFTYPER: Unrecognized atom type: B_ (22)
[13:20:01] UFFTYPER: Unrecognized atom type: B_ (22)
[13:20:02] UFFTYPER: Unrecognized atom type: B_ (22)
[13:20:03] UFFTYPER: Unrecognized atom type: B_ (22)
[13:20:04] UFFTYPER: Unrecognized atom type: B_ (22)
[13:20:06] UFFTYPER: Unrecognized atom type: B_ (22)


5uxz
2wj1
3qsd
not all poses were predicted for 3qsd, moving to next
6hn2


Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/6hn2/6hn2_processed.pdb with error: Could not load molecule.
[13:20:47] UFFTYPER: Unrecognized charge state for atom: 13
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/6hn2/6hn2_processed.pdb with error: Could not load molecule.
[13:20:48] UFFTYPER: Unrecognized charge state for atom: 13
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/6hn2/6hn2_processed.pdb with error: Could not load molecule.
[13:20:48] UFFTYPER: Unrecognized charge state for atom: 13
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/6hn2/6hn2_processed.pdb with error: Could not load molecule.
[13:20:48] UFFTYPER: Unrecognized charge state for atom: 13
Could not load molecule from /home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/data/CSKDE95_datamol_af2/6hn2/6hn2_proces

1td2
5e1i
3svv
4cdf
2dw5
6eej
not all poses were predicted for 6eej, moving to next
5d6e
4jxg
not all poses were predicted for 4jxg, moving to next
3pdf
5rep
1nqc
4qgv


[13:23:16] UFFTYPER: Unrecognized charge state for atom: 5


3otu


[13:23:20] UFFTYPER: Unrecognized charge state for atom: 5
[13:23:21] UFFTYPER: Unrecognized charge state for atom: 5
[13:23:22] UFFTYPER: Unrecognized charge state for atom: 5
[13:23:23] UFFTYPER: Unrecognized charge state for atom: 5
[13:23:24] UFFTYPER: Unrecognized charge state for atom: 5
[13:23:25] UFFTYPER: Unrecognized charge state for atom: 5
[13:23:26] UFFTYPER: Unrecognized charge state for atom: 5
[13:23:27] UFFTYPER: Unrecognized charge state for atom: 5
[13:23:28] UFFTYPER: Unrecognized charge state for atom: 5


6j7b
not all poses were predicted for 6j7b, moving to next
4x6j


In [6]:
all_df

Unnamed: 0,file,molecule,mol_pred_loaded,mol_true_loaded,mol_cond_loaded,sanitization,inchi_convertible,all_atoms_connected,molecular_formula,molecular_bonds,...,most_extreme_sum_radii_scaled_waters,most_extreme_relative_distance_waters,most_extreme_clash_waters,volume_overlap_protein,volume_overlap_organic_cofactors,volume_overlap_inorganic_cofactors,volume_overlap_waters,rmsd,kabsch_rmsd,centroid_distance
0,/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/...,4cdc_rank4,True,True,True,True,True,True,True,True,...,,,,,,,,,,
1,/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/...,4cdc_rank10,True,True,True,True,True,True,True,True,...,,,,,,,,,,
2,/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/...,4cdc_rank6,True,True,True,True,True,True,True,True,...,,,,,,,,,,
3,/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/...,4cdc_rank2,True,True,True,True,True,True,True,True,...,,,,,,,,,,
4,/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/...,4cdc_rank1,True,True,True,True,True,True,True,True,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
705,/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/...,4x6j_rank10,True,True,True,True,True,False,True,True,...,,,,,,,,,,
706,/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/...,4x6j_rank9,True,True,True,True,True,False,True,True,...,,,,,,,,,,
707,/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/...,4x6j_rank5,True,True,True,True,True,False,True,True,...,,,,,,,,,,
708,/home/ymanasa/turbo/ymanasa/opt/DiffDockL-Cov/...,4x6j_rank8,True,True,True,True,True,False,True,True,...,,,,,,,,,,


In [8]:
mean_rmsd_perc, best_gen_perc, metrics_df = score_model_eval(all_df)

  metrics_df = pd.concat([metrics_df, pd.DataFrame({'Protein': [prot],


In [11]:
metrics_df

Unnamed: 0,Protein,Best RMSD,Mean RMSD,Std Dev of RMSD,RMSD < 2 %
0,1fj8,,,,0.0
1,1ghm,,,,0.0
2,1hbj,,,,0.0
3,1hv7,,,,0.0
4,1nqc,,,,0.0
...,...,...,...,...,...
66,6hn2,,,,0.0
67,6puh,,,,0.0
68,6vgy,,,,0.0
69,6wp8,,,,0.0


In [9]:
mean_rmsd_perc

0.0

In [10]:
best_gen_perc

0.0

In [None]:
confidence_model_metrics = confidence_model_eval(all_df)

In [None]:
from scipy.stats import spearmanr

count_rank1 = 0 
rmsd_check = 2
num_gen = 10

all_df['protein'] = all_df['molecule'].str.extract(r'^(.*?)_') 
all_df['rank'] = all_df['molecule'].str.extract(r'rank(\d+)').astype(int)

sorted_df = all_df.groupby('protein', group_keys=False).apply(lambda group: group.sort_values('rank'))
group_df = sorted_df.groupby('protein')

num_prots= len(group_df.groups)

count_rank1 = 0
for prot, row_index in group_df.groups.items(): 
    rank1_rmsd = sorted_df.loc[row_index[0], 'rmsd']
    lowest_rmsd = (sorted_df.loc[row_index, 'rmsd']).min()
    count_rank1 += (rank1_rmsd == lowest_rmsd)

rank1_corrects = count_rank1/num_prots * 100 
# look at precision/recall and averages


1.8908935919761511
2.3215856519844356
5.547063965789069


  sorted_df = all_df.groupby('protein', group_keys=False).apply(lambda group: group.sort_values('rank'))
