This Jupyter notebook calculates the scores of the docked PDB. You need to run the "run_colores.py" script to dock the PDB before running this. It also requires the "pdb_stats.csv" file from the "get_pdb_info.ipynb" notebook, containing 2nd structure information of all the PDBs. It can be run as soon as "run_colores.py" is started. You may get the correct solution early. The second cell requires the results from the "calculate_fsc.py".  You can run this notebook and "calculate_fsc.py" iteratively while "run_colores.py" is still running.

In [113]:
import pandas as pd
from glob import glob
import os
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_colwidth',1000)

pdbs = []

for d in glob("00*colores_results"):
    pdbs.extend(glob(f"{d}/*pdb"))

results = []
for pdb in pdbs:
    pdb_basename = os.path.basename(pdb)
    folder_name = os.path.dirname(pdb)
    with open(pdb) as f:
        for line in f:
            if "Unnormalized" in line:
                CC = round(float(line.split()[-1]),3)
                break
    results.append([pdb_basename, folder_name, CC])
    
results_df = pd.DataFrame(results, columns=["name", "Folder", "CC"])

pdb_stats = pd.read_csv('pdb_stats.csv')
results_merged_df = pd.merge(results_df, pdb_stats, on='name')
results_merged_df.sort_values(["CC", "helix%"], ascending=False, inplace=True, ignore_index=True)
results_merged_df.to_csv("colores_CC.csv", index=False, sep="\t")
len(set(results_merged_df["name"].str[:-17]))

21615

In [116]:
#Run "calculate_fsc.py" in a shell with EMAN2 activated before running the following cells

if not os.path.isfile("resolution_scores.txt"):
    os.system("touch resolution_scores.txt")
resolution_scores = pd.read_csv('resolution_scores.txt', header=None, sep="\s+", names=["name","Folder","Res_FSC05", "Res_FSC02"], usecols=["name", "Res_FSC05", "Res_FSC02"])
results_merged_res_df = pd.merge(results_merged_df, resolution_scores, on='name', how="inner")
results_merged_res_df.sort_values(["Res_FSC02", "Res_FSC05"], inplace=True, ignore_index=True)
results_merged_res_df["name"] = results_merged_res_df["Folder"] + "/" + results_merged_res_df["name"]
results_merged_res_df.drop(labels=["Folder"], axis=1, inplace=True)
results_merged_res_df.to_csv("colores_CC_wRes.csv", index=False, sep="\t")
len(results_merged_res_df)

64080