# Make SARS-CoV-2 N protein - Cytokine Complex Tasks
Note: To be run from `./inputs` folder using a Python kernel with PyMOL installed)

In [None]:
!conda install -y -c conda-forge -c schrodinger pymol-bundle pandas

In [19]:
import os, shutil, sys
import pandas as pd
from itertools import product
from pymol import cmd
from random import sample

In [20]:
## List N protein files
n_files = os.listdir('./N-Proteins/')
n_files = [x.replace('.pdb', '') for x in n_files]

## TEMP SUBSET FOR TESTING
n_files = ['SARS-CoV-2-WA1-N', 'SARS-CoV-2-XBB-N']

## List Cytokine files
cytokine_files = os.listdir('./Cytokines/')
cytokine_files = [x.replace('.pdb', '') for x in cytokine_files]

## TEMP SUBSET FOR TESTING
cytokine_files = ['CXCL12alpha', 'CXCL12beta']

files_dict = {
    'n_protein': n_files,
    'cytokine_protein': cytokine_files
}

print(files_dict)

{'n_protein': ['SARS-CoV-2-WA1-N', 'SARS-CoV-2-XBB-N'], 'cytokine_protein': ['CXCL12alpha', 'CXCL12beta']}


In [21]:
## Prepare Task DataFrame
task_grid = pd.DataFrame([row for row in product(*files_dict.values())], columns=files_dict.keys())

task_grid['complex_id'] = task_grid['n_protein'].replace('SARS-CoV-2-N-', '') + "__" + task_grid['cytokine_protein']
task_grid['n_pdb'] = task_grid['n_protein'] + ".pdb"
task_grid['cytokine_pdb'] = task_grid['cytokine_protein'] + ".pdb"

## Make Empty Columns
task_grid['experiment_path'] = ''
task_grid['n_residues'] = ''
task_grid['cytokine_residues'] = ''

## Reorder Columns
task_grid = task_grid[[
    'complex_id',
    'experiment_path',
    'n_protein', 'n_pdb', 'n_residues',
    'cytokine_protein', 'cytokine_pdb', 'cytokine_residues'
    ]]

display(task_grid)

Unnamed: 0,complex_id,experiment_path,n_protein,n_pdb,n_residues,cytokine_protein,cytokine_pdb,cytokine_residues
0,SARS-CoV-2-WA1-N__CXCL12alpha,,SARS-CoV-2-WA1-N,SARS-CoV-2-WA1-N.pdb,,CXCL12alpha,CXCL12alpha.pdb,
1,SARS-CoV-2-WA1-N__CXCL12beta,,SARS-CoV-2-WA1-N,SARS-CoV-2-WA1-N.pdb,,CXCL12beta,CXCL12beta.pdb,
2,SARS-CoV-2-XBB-N__CXCL12alpha,,SARS-CoV-2-XBB-N,SARS-CoV-2-XBB-N.pdb,,CXCL12alpha,CXCL12alpha.pdb,
3,SARS-CoV-2-XBB-N__CXCL12beta,,SARS-CoV-2-XBB-N,SARS-CoV-2-XBB-N.pdb,,CXCL12beta,CXCL12beta.pdb,


In [22]:
## Define Helper Functions

## Find Random Surface Residues
def find_random_surface_residues(file, percentage = 0.25):
    cmd.load(file)
    ## Finds atoms on the surface of a protein
    ## Logic from: https://pymolwiki.org/index.php/FindSurfaceResidues
    cutoff = 2.0
    tmpObj = cmd.get_unused_name("_tmp")
    cmd.create(tmpObj, "(all) and polymer", zoom=0)
    cmd.set("dot_solvent", 1, tmpObj)
    cmd.get_area(selection=tmpObj, load_b=1)
    cmd.remove(tmpObj + " and b < " + str(cutoff))
    cmd.select("exposed_atoms", "(all) in " + tmpObj)
    cmd.delete(tmpObj)
    ## Get a list of residue numbers and then subset it
    surface_residues = set()
    cmd.iterate('exposed_atoms', "surface_residues.add(resi)", space=locals())
    k = int(len(surface_residues) * percentage)
    surface_residues_sample = list(sample(surface_residues, k))
    surface_residues_sample = [int(x) for x in surface_residues_sample]
    surface_residues_sample.sort()
    ## Reintialize Everything
    cmd.reinitialize(what='everything')
    return surface_residues_sample

## Write AIR File
def write_air_file(active1, passive1, active2, passive2, segid1='A', segid2='B', output_file = "air.tbl"):

    active1 = [int(x) for x in active1]
    passive1 = [int(x) for x in passive1]
    active2 = [int(x) for x in active2]
    passive2 = [int(x) for x in passive2]

    all1 = active1 + passive1
    all2 = active2 + passive2

    param_lines = []

    for resi1 in active1:
        param_lines.append('assign (resi {:d} and segid {:s})'.format(resi1, segid1))
        param_lines.append('(')
        c = 0
        for resi2 in all2:
            param_lines.append('       (resi {:d} and segid {:s})'.format(resi2, segid2))
            c += 1
            if c != len(all2):
                param_lines.append('        or')
        param_lines.append(') 2.0 2.0 0.0\n')
            
    for resi2 in active2:
        param_lines.append('assign (resi {:d} and segid {:s})'.format(resi2, segid2))
        param_lines.append('(\n')
        c = 0
        for resi1 in all1:
            param_lines.append('       (resi {:d} and segid {:s})'.format(resi1, segid1))
            c += 1
            if c != len(all1):
                param_lines.append('        or\n')
        param_lines.append(') 2.0 2.0 0.0\n')
    
    f = open(output_file, "w")
    f.writelines("\n".join(param_lines))
    f.close()

## Write run.param file
def write_run_params(ambig_tbl = None,
                     haddock_dir = "/root/haddock/haddock2.4-2021-01/",
                     n_comp = 2,
                     pdb_file_1 = "",
                     pdb_file_2 = "",
                     project_dir = "./",
                     prot_segid_1 = "A",
                     prot_segid_2 = "B",
                     run_number = 1,
                     output_file = "run.param"):
    param_lines  = [
        f"HADDOCK_DIR={haddock_dir}",
        f"N_COMP={n_comp}",
        f"PDB_FILE1={pdb_file_1}",
        f"PDB_FILE2={pdb_file_2}",
        f"PROJECT_DIR={project_dir}",
        f"PROT_SEGID_1={prot_segid_1}",
        f"PROT_SEGID_2={prot_segid_2}",
        f"RUN_NUMBER={run_number}",
    ]
    if ambig_tbl:
        param_lines = [f"AMBIG_TBL={ambig_tbl}", *param_lines]
    # print(param_lines)
    # return(param_lines)
    f = open(output_file, "w")
    f.writelines("\n".join(param_lines))
    f.close()
    return(True)


In [23]:
## Setup Experiment Paths
root_path = '../cluster_tests/'

for index, complex in task_grid.iterrows():
    print(f"Prepping: {complex['complex_id']}...\n")
    experiment_path = root_path + complex['complex_id']
    task_grid.at[index,'experiment_path'] = experiment_path

    ## Make Experiment Folder
    os.makedirs(experiment_path, exist_ok = True)

    ## Copy PDBs
    print("Copying PDB files...")
    shutil.copyfile("./N-Proteins/" + complex['n_pdb'],
                    experiment_path + "/" + complex['n_pdb'])
    shutil.copyfile("./Cytokines/" + complex['cytokine_pdb'],
                    experiment_path + "/" + complex['cytokine_pdb'])
    
    ## Copy ana_scripts folder
    print("Copying ana_scripts folder...")
    shutil.copytree("../helper_scripts/ana_scripts/",
                    experiment_path + "/ana_scripts/")
    
    ## Make run.cns.path file
    shutil.copyfile("../helper_scripts/run.cns.patch",
                    experiment_path + "/run.cns.patch")
    
    ## Make run-docking.csh file
    shutil.copyfile("../helper_scripts/run-docking.csh",
                    experiment_path + "/run-docking.csh")
    
    ## Define Random Restraints
    print("Finding random N surface residues...")
    N_residues = find_random_surface_residues(file = f"./N-Proteins/{complex['n_pdb']}", percentage = 1.00)
    task_grid.at[index,'n_residues'] = '`' + str(','.join(str(x) for x in N_residues))
    print("Finding random Cytokine surface residues...")
    cytokine_residues = find_random_surface_residues(file = f"./Cytokines/{complex['cytokine_pdb']}", percentage = 1.00)
    task_grid.at[index,'cytokine_residues'] = f"`{','.join(str(x) for x in cytokine_residues)}"

    ## Write out AIR file
    print("Writing AIR file...")
    write_air_file(active1 = N_residues,
                   passive1 = [],
                   active2 = cytokine_residues,
                   passive2 = [],
                   segid1='A', segid2='B',
                   output_file = f"{experiment_path}/air.tbl")

    ## Make run.param file
    print("Writing run.param file...")
    write_run_params(ambig_tbl = "./air.tbl",
                    haddock_dir = "/root/haddock/haddock2.4-2021-01/",
                    n_comp = 2,
                    pdb_file_1 = f"./{complex['n_pdb']}",
                    pdb_file_2 = f"./{complex['cytokine_pdb']}",
                    project_dir = "./",
                    prot_segid_1 = "A",
                    prot_segid_2 = "B",
                    run_number = 1,
                    output_file = f"{experiment_path}/run.param")
    
    ## Copy cleanup Python script
    print("Copying cleanup.py script...")
    shutil.copyfile("../helper_scripts/cleanup.py",
                    experiment_path + "/cleanup.py")
    
    print(f"Finished Prepping: {complex['complex_id']}.\n")

Prepping: SARS-CoV-2-WA1-N__CXCL12alpha...

Copying PDB files...
Copying ana_scripts folder...
Finding random N surface residues...
Finding random Cytokine surface residues...
Writing AIR file...
Writing run.param file...
Copying cleanup.py script...
Finished Prepping: SARS-CoV-2-WA1-N__CXCL12alpha.

Prepping: SARS-CoV-2-WA1-N__CXCL12beta...

Copying PDB files...
Copying ana_scripts folder...
Finding random N surface residues...
Finding random Cytokine surface residues...
Writing AIR file...
Writing run.param file...
Copying cleanup.py script...
Finished Prepping: SARS-CoV-2-WA1-N__CXCL12beta.

Prepping: SARS-CoV-2-XBB-N__CXCL12alpha...

Copying PDB files...
Copying ana_scripts folder...
Finding random N surface residues...
Finding random Cytokine surface residues...
Writing AIR file...
Writing run.param file...
Copying cleanup.py script...
Finished Prepping: SARS-CoV-2-XBB-N__CXCL12alpha.

Prepping: SARS-CoV-2-XBB-N__CXCL12beta...

Copying PDB files...
Copying ana_scripts folder...
Fi

In [14]:
## Create Analysis CSV
task_grid.to_csv(f'{root_path}/analyses.csv', index=False)