In [1]:
import pandas as pd
import prody as pry
import os
from glob import glob
import re

from modeller import *
from modeller.automodel import *

In [2]:
amino3to1dict = {'ALA': 'A','CYS': 'C','ASP': 'D','GLU': 'E','PHE': 'F','GLY': 'G',
                 'HIS': 'H','ILE': 'I','LYS': 'K','LEU': 'L','MET': 'M','ASN': 'N',
                 'PRO': 'P','GLN': 'Q','ARG': 'R','SER': 'S','THR': 'T','VAL': 'V',
                 'TRP': 'W','TYR': 'Y'}

In [3]:
def make_folder(folder_result = "results"):

    if not os.path.exists(folder_result):
        os.makedirs(folder_result)
        
    return folder_result

In [4]:
def pdb_paths(pdbsdir="modelled_mutations/"):

    paths = glob(pdbsdir+"*.pdb")
    paths = [os.path.abspath(x) for x in paths]
    return paths

In [5]:
def split_mutations(mut_str):
    """Function to split a [mutation] string, searching the mutaion patter and return groups.
    Output is a list with 3 elements [wt_aa,aa_number,mut_aa] """
    
    search_mut = re.search("([A-Z])([0-9]+[A-Z]*)([A-Z])",mut_str,flags=re.I)
    m_splits = search_mut.groups()
    
    return list(m_splits)

In [6]:
def aa2replace(m_splits):
    """Function to get the 3letters aa in amino3to1dict
    using the split_mutations() output"""
    for k,v in amino3to1dict.items():
        if v == m_splits[0]:
            return k

## Basado en https://salilab.org/modeller/manual/node250.html

In [61]:
def do_mutate(pdbname,pdbwt,restype,pdbsdir):
    
    # This will read a PDB file, change its sequence a little, build new
    # coordinates for any of the additional atoms using only the internal
    # geometry, and write the mutant PDB file.  It can be seen as primitive
    # but rapid comparative modeling for substitution mutants. For more
    # sophisticated modeling, see http://salilab.org/modeller/wiki/Mutate%20model
    #
    # For insertion and deletion mutants, follow the standard comparative
    # modeling procedure.
    
    env = environ()
    env.io.atom_files_directory = [pdbsdir]
    
    # Read the topology library with non-hydrogen atoms only:
    env.libs.topology.read(file='$(LIB)/top_heav.lib')
    # To produce a mutant with all hydrogens, uncomment this line:
    #env.libs.topology.read(file='$(LIB)/top_allh.lib')
    # Read the CHARMM parameter library:
    env.libs.parameters.read(file='$(LIB)/par.lib')

    # Read the original PDB file and copy its sequence to the alignment array:
    code = pdbname
    aln = alignment(env)
    mdl = model(env, file=code)
    aln.append_model(mdl, atom_files=code, align_codes=code)
    #get original chain names
    template_chains = [c.name for c in mdl.chains]

    # Select the residues to be mutated: in this case all ASP residues:
    #sel = selection(mdl).only_residue_types('ASP')
    # The second example is commented out; it selects residues '1' and '10'.
    sel = selection(mdl.residues['%s:%s'% (m_splits[1].upper(),chain_mutated)])

    # Mutate the selected residues into HIS residues (neutral HIS):
    sel.mutate(residue_type=restype)

    # Add the mutated sequence to the alignment arrays (it is now the second
    # sequence in the alignment):
    aln.append_model(mdl, align_codes=pdbwt)

    # Generate molecular topology for the mutant:
    mdl.clear_topology()
    mdl.generate_topology(aln[pdbwt])

    # Transfer all the coordinates you can from the template native structure
    # to the mutant (this works even if the order of atoms in the native PDB
    # file is not standard):
    mdl.transfer_xyz(aln)

    # Build the remaining unknown coordinates for the mutant:
    mdl.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES')
    # Transfer the residue and chain ids and write out the new MODEL:
    for ct,cm in zip(template_chains,mdl.chains):
        cm.name = ct
    # Write the mutant to a file:
    mdl.write(file=pdbwt+'.pdb')

# Run proccesing

In [8]:
ab_bind_original = pd.read_excel("PRO-25-393-s002.xlsx")
ab_bind_mCSM = pd.read_table("ab_bind_dataset.csv")

#Obtener datos de los modelos HM que no estan en el dataframe de mCSM
ab_bind_HMdata = ab_bind_original.loc[ab_bind_original["#PDB"].str.startswith("HM")]
#Agregar la columna chain con la cadena mutada, y reescribir la columna Mutation 
ab_bind_HMdata = ab_bind_HMdata.assign(Chain= ab_bind_HMdata['Mutation'].str.split(':').str[0],Mutation= ab_bind_HMdata['Mutation'].str.split(':').str[1])


FileNotFoundError: [Errno 2] No such file or directory: 'PRO-25-393-s002.xlsx'

In [9]:
#ab_bind_mCSM = pd.read_table("ab_bind_dataset.csv")
pdbfiles = pdb_paths(pdbsdir="modelled_mutations/")


In [10]:
# Ordenar la lista pdbfiles en orden secuencial para que coincida con el orden del dataframe
def extract_num(pdb):
    return int(pdb.split("/")[-1].split(".")[2])

pdbfiles.sort(key=extract_num)

In [18]:
ab_bind_mCSM_HM = ab_bind_mCSM.append(ab_bind_HMdata,sort=True)

In [20]:
ab_bind_mCSM_HM.to_csv("ab_bind_mCSM_HM.csv")

# modelando estructuras WT

In [11]:
ab_bind_mCSM_HM = pd.read_csv("../data/ab_bind_mCSM_HM.csv",index_col=0)
pdbfiles = pdb_paths(pdbsdir="../data/modelled_mutations/")
# Ordenar la lista pdbfiles en orden secuencial para que coincida con el orden del dataframe
def extract_num(pdb):
    return int(pdb.split("/")[-1].split(".")[2])

pdbfiles.sort(key=extract_num)

In [62]:
pdbs_dir= os.path.abspath("../data/modelled_mutations/")

old_dir = os.getcwd()
try:
    contador = 0
    os.chdir(make_folder("wt_modells"))
    for pdb,tuples in zip(pdbfiles,ab_bind_mCSM_HM.itertuples()):
            
        chain_mutated = tuples.Chain
        m_splits = split_mutations(tuples.Mutation)
        #Define name, WT name and WT .ali file
        name = os.path.basename(pdb)[:-4]
        name_wt = name+".WT"
        reswt = aa2replace(m_splits) 

        do_mutate(name,name_wt,reswt,pdbs_dir)
        contador +=1
finally:
    os.chdir(old_dir)

read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_68

read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_68

read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_68


getf_______W> RTF restraint not found in the atoms list:
              residue type, indices:    13     1
              atom names           : N     -C    CA    CD
              atom indices         :     1     0     2     3
read_to_681_> topology.submodel read from topology file:        3

getf_______W> RTF restraint not found in the atoms list:
              residue type, indices:    13     1
              atom names           : N     -C    CA    CD
              atom indices         :     1     0     2     3
read_to_681_> topology.submodel read from topology file:        3

getf_______W> RTF restraint not found in the atoms list:
              residue type, indices:    13     1
              atom names           : N     -C    CA    CD
              atom indices         :     1     0     2     3
read_to_681_> topology.submodel read from topology file:        3

getf_______W> RTF restraint not found in the atoms list:
              residue type, indices:    13     1
              ato

              atom names           : N     -C    CA    CD
              atom indices         :     1     0     2     3
read_to_681_> topology.submodel read from topology file:        3

getf_______W> RTF restraint not found in the atoms list:
              residue type, indices:    13     1
              atom names           : N     -C    CA    CD
              atom indices         :     1     0     2     3
read_to_681_> topology.submodel read from topology file:        3

getf_______W> RTF restraint not found in the atoms list:
              residue type, indices:    13     1
              atom names           : N     -C    CA    CD
              atom indices         :     1     0     2     3
read_to_681_> topology.submodel read from topology file:        3

getf_______W> RTF restraint not found in the atoms list:
              residue type, indices:    13     1
              atom names           : N     -C    CA    CD
              atom indices         :     1     0     2     3
read_

read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_681_> topology.submodel read from topology file:        3
read_to_68

              atom names           : N     -C    CA    CD
              atom indices         :     1     0     2     3
read_to_681_> topology.submodel read from topology file:        3

getf_______W> RTF restraint not found in the atoms list:
              residue type, indices:    13     1
              atom names           : N     -C    CA    CD
              atom indices         :     1     0     2     3
read_to_681_> topology.submodel read from topology file:        3

getf_______W> RTF restraint not found in the atoms list:
              residue type, indices:    13     1
              atom names           : N     -C    CA    CD
              atom indices         :     1     0     2     3
read_to_681_> topology.submodel read from topology file:        3

getf_______W> RTF restraint not found in the atoms list:
              residue type, indices:    13     1
              atom names           : N     -C    CA    CD
              atom indices         :     1     0     2     3
read_

              residue type, indices:    13     1
              atom names           : N     -C    CA    CD
              atom indices         :     1     0     2     3
read_to_681_> topology.submodel read from topology file:        3

getf_______W> RTF restraint not found in the atoms list:
              residue type, indices:    13     1
              atom names           : N     -C    CA    CD
              atom indices         :     1     0     2     3
read_to_681_> topology.submodel read from topology file:        3

getf_______W> RTF restraint not found in the atoms list:
              residue type, indices:    13     1
              atom names           : N     -C    CA    CD
              atom indices         :     1     0     2     3
read_to_681_> topology.submodel read from topology file:        3

getf_______W> RTF restraint not found in the atoms list:
              residue type, indices:    13     1
              atom names           : N     -C    CA    CD
              ato