In [85]:
from pathlib import Path
import os
import shlex
import shutil
import subprocess
import pandas as pd


names_rows_stability = [
    ['dg', 1],  # totalEnergy
    ['backbone_hbond', 2],
    ['sidechain_hbond', 3],
    ['van_der_waals', 4],
    ['electrostatics', 5],
    ['solvation_polar', 6],
    ['solvation_hydrophobic', 7],
    ['van_der_waals_clashes', 8],
    ['entropy_sidechain', 9],
    ['entropy_mainchain', 10],
    ['sloop_entropy', 11],
    ['mloop_entropy', 12],
    ['cis_bond', 13],
    ['torsional_clash', 14],
    ['backbone_clash', 15],
    ['helix_dipole', 16],
    ['water_bridge', 17],
    ['disulfide', 18],
    ['electrostatic_kon', 19],
    ['partial_covalent_bonds', 20],
    ['energy_ionisation', 21],
    ['entropy_complex', 22],
    ['number_of_residues', 23],
    ['interface_residues', 24],
    ['interface_residues_clashing', 25],
    ['interface_residues_vdw_clashing', 26],
    ['interface_residues_bb_clashing', 27]
]

names_rows_stability_complex = ([
    ['intraclashes_energy_1', 3],
    ['intraclashes_energy_2', 4],
] + [[x[0], x[1] + 4] for x in names_rows_stability])
names_stability_complex = list(next(zip(*names_rows_stability_complex)))
names_stability_complex_wt = [name + '_wt'
                              for name in names_stability_complex[:-5]] + \
                             ['number_of_residues', 'interface_residues_wt', 'interface_residues_clashing_wt',
                              'interface_residues_vdw_clashing_wt', 'interface_residues_bb_clashing_wt']
names_stability_complex_mut = [name + '_mut'
                              for name in names_stability_complex[:-5]] + \
                             ['number_of_residues', 'interface_residues_mut', 'interface_residues_clashing_mut',
                              'interface_residues_vdw_clashing_mut', 'interface_residues_bb_clashing_mut']


def _export_foldxpath(path_to_export):
    # export PATH=$PATH:/path/to/folder
    if str(path_to_export) not in os.environ["PATH"]:
        os.environ["PATH"] += ":" + str(path_to_export)
        print("foldx path exported")
    else:
        print("foldx bin folder already in PATH")


def _rotabase_symlink(rotabase_path):
    # rotabase symlink
    sym_rotabase = Path("rotabase.txt")
    if not sym_rotabase.is_symlink():
        sym_rotabase.symlink_to(rotabase_path)
        print("Symlink to rotabase.txt create on working dir")
    else:
        print("rotabase.txt symlink already exist on working dir")


def read_analyse_complex(output_file):
    df = pd.read_csv(output_file, sep='\t', index_col=False, skiprows=8)
    # Format dataframe
    df = df.rename(columns=lambda s: s.lower().replace(' ', '_'))
    #logger.debug(df.head())
    assert df.shape[0] == 1
    result = df.drop(pd.Index(['pdb', 'group1', 'group2']), axis=1).iloc[0].tolist()
    return result


def convert_features_to_differences(df, keep_mut=False):
    """Convert `_wt` and `_mut` columns into `_wt` and `_change` columns.
    Create a new set of features (ending in `_change`) that describe the difference between values
    of the wildtype (features ending in `_wt`) and mutant (features ending in `_mut`) features.
    If `keep_mut` is `False`, removes all mutant features (features ending in `_mut`).
    """
    column_list = []
    for column_name, column in df.iteritems():
        if ('_mut' in column_name and column_name.replace('_mut', '_wt') in df.columns and
                df[column_name].dtype != object):
            if keep_mut:
                column_list.append(column)
            new_column = column - df[column_name.replace('_mut', '_wt')]
            if 'secondary_structure' in column_name:
                new_column = new_column.apply(lambda x: 1 if x else 0)
            new_column.name = column_name.replace('_mut', '_change')
            column_list.append(new_column)
        else:
            column_list.append(column)
    new_df = pd.concat(column_list, axis=1)
    return new_df

foldx_exe = "/mnt/d/Python_projects/AbPred/libs/foldx5Linux64/"
class FoldX:

    def __init__(self, foldx_dir=None, verbose=True):

        self._tempdir = Path(foldx_exe)
        _export_foldxpath(self._tempdir)
        #self.verbose = verbose
        self.pdbfile = None

    def _run(self, cmd, **options):

        """   ********************************************
       ***                                      ***
       ***             FoldX 4 (c)              ***
       ***                                      ***
       ***     code by the FoldX Consortium     ***
       ***                                      ***
       ***     Jesper Borg, Frederic Rousseau   ***
       ***    Joost Schymkowitz, Luis Serrano   ***
       ***    Peter Vanhee, Erik Verschueren    ***
       ***     Lies Baeten, Javier Delgado      ***
       ***       and Francois Stricher          ***
       *** and any other of the 9! permutations ***
       ***   based on an original concept by    ***
       ***   Raphael Guerois and Luis Serrano   ***
       ********************************************

    FoldX program options:


    Basic OPTIONS:
      -v [ --version ] arg (=Version beta 4)
                                            print version string
      -h [ --help ]                         produce help message
      -c [ --command ] arg                  Choose your FoldX Command:

                                            AlaScan
                                            AnalyseComplex
                                            BuildModel
                                            CrystalWaters
                                            Dihedrals
                                            DNAContact
                                            DNAScan
                                            LoopReconstruction
                                            MetalBinding
                                            Optimize
                                            PDBFile
                                            PepX
                                            PositionScan
                                            PrintNetworks
                                            Pssm
                                            QualityAssessment
                                            ReconstructSideChains
                                            RepairPDB
                                            Rmsd
                                            SequenceDetail
                                            SequenceOnly
                                            Stability

      -f [ --config ] arg                   config file location
      -d [ --debug ] arg                    Debug, produces more output

    Generic OPTIONS:
      --pdb arg (="")
      --pdb-list arg (="")                  File with a list of PDB files
      --pdb-dir arg (="./")                 PDB directory
      --output-dir arg (="./")              OutPut directory
      --output-file arg (="")               OutPut file
      --queue arg                           cluster queue: fast, normal, infinity,
                                            highmem, all.q
      --clean-mode arg (=0)                 FoldX clean mode: none, all, output or
                                            pdb
      --max-nr-retries arg (=1)             Maximum number of retries of a FoldX
                                            command if not finished successfully.
                                            Especially important to set at least to
                                            two when working on a cluster and file
                                            transfers often fail.
      --skip-build arg (=0)                 Skip the build step in the algorithm

    FoldX OPTIONS:

    input:
      --fixSideChains arg                   allows FoldX to complete missing
                                            sidechains at read-time, defaults to
                                            true
      --rotabaseLocation arg                set the location of the rotabase,
                                            defaults to rotabase.txt
      --noCterm arg                         set whether the last residue in a list
                                            of peptides (ex:ABC) shouldn't be
                                            considered as the C-terminal (i.e.,
                                            have an OXT), defaults to none
      --noNterm arg                         set whether the first residue in a list
                                            peptides (ex: ABC) shouldn't be
                                            considered as the N-Terminal (i.e.,
                                            have a third proton on the N), defaults
                                            to none

    output:
      --screen arg (=1)                     sets screen output, defaults to true
      --overwriteBatch arg (=1)             set to overwrite or not the specific
                                            name given as the first value in a
                                            command, defaults to true
      --noHeader arg (=0)                   remove standard FoldX Header from
                                            outputs, defaults to false

    PDB output:
      --out-pdb arg (=1)                    set to output PDBs when doing
                                            mutations, defaults to true
      --pdbHydrogens arg (=0)               output the hydrogens we place in the
                                            generated pdbs, defaults to false
      --pdbWaters arg (=0)                  output the predicted water bridges in
                                            the generated pdbs, defaults to false
      --pdbIons arg (=0)                    output the predicted metal ions in the
                                            generated pdbs, defaults to false
      --pdbDummys arg (=0)                  output the the dummy atoms we use (for
                                            N and C caps of helixes as well as the
                                            free orbitals) in the generated pdbs,
                                            defaults to false
      --pdbIsoforms arg (=0)                output the isoforms of the His in the
                                            generated pdbs, defaults to false

    physico chemical parameters:
      --temperature arg                     set the temperature (in K) of the
                                            calculations, defaults to 298 K
      --pH arg                              set the pH of the calculations,
                                            defaults to 7
      --ionStrength arg                     set the ionic strength of the
                                            calculations, defaults to 0.05

    force-field:
      --vdwDesign arg                       set VdWDesign of the experiment,
                                            defaults to 2 ( 0 very soft, 1 medium
                                            soft, 2 strong used for design )
      --clashCapDesign arg                  set maximun penalty per atom of the van
                                            der waals' clashes, defaults set to 5.0
      --backBoneAtoms arg                   consider only backbone atoms for all
                                            energy calculations, defaults to false
      --dipoles arg                         set to consider helices dipoles,
                                            defaults to true
      --complexClashes arg                  set the threshold (in kcal/mol) for
                                            counting clashing aminoacids at the
                                            interface, defaults to 1.

    entropy calculations:
      --fullMainEntropy arg                 set to maximally penalize the main
                                            chain of ligand and protein (usefull
                                            when comparing peptide data with
                                            poly-Alanine backbones), defaults to
                                            false

    water and ion evaluations:
      --water arg                           set how FoldX considers waters:
                                            -CRYSTAL (read the pdb waters) -PREDICT
                                            (predict water bridges from sratch)
                                            -IGNORE (don't consider waters)
                                            -COMPARE, defaults to -IGNORE

    complex options:
      --complexWithDNA arg                  set to consider only two groups in a
                                            protein-DNA complex, DNA + protein,
                                            defaults to false

    algorithm specific parameters:
      --moveNeighbours arg                  set to move neighbours when we mutate,
                                            defaults to true
      --numberOfRuns arg                    set the number of runs done in
                                            BuidModel, defaults to 1
      --fitAtoms arg                        set atoms involved in the RMSD command
                                            BB(backbone atoms), CA(Calpha),
                                            CA_CB(both Calpha and Cbeta),
                                            N_CA_O(N,Calpha and O), defaults to BB
      --rmsdPDB arg                         print out the rotated target of the
                                            RMSD command, defaults to true
      --repair_Interface arg                set to limit RepairPDB when applying to
                                            a complex: ALL(repair all residues
                                            including interface), ONLY(repair only
                                            the interface), NONE(no repair of the
                                            interface), defaults to ALL
      --burialLimit arg                     set a burial limit under which a
                                            residue is not repaired, defaults to 1.
                                            (inactive)
      --bFactorLimit arg                    set a relative bFactor limit above
                                            which a residue is not repaired,
                                            defaults to 0. (inactive)"""

        if options:
            for key, value in options.items():
                cmd.extend(["--" + key, value])
        p = subprocess.Popen(shlex.split(cmd), universal_newlines=True, shell=False, stdout=subprocess.PIPE)
        while True:
            out = p.stdout.readline()
            if not out and p.poll() is not None:
                break
            if self.verbose and out:
                print(out.splitlines()[0])
                
    def _run(self,cmd):
        # call external program on `filename`
        fout = open("stdout_{}.txt".format(self.pdbfile[:-4]),"w")
        subprocess.check_call(shlex.split(cmd),stdout=fout)
        fout.close()


    def repair_pdb(self, pdbfile):
        """Run FoldX ``RepairPDB`` """

        pdb = Path(pdbfile).absolute()
        self.pdbfile = pdb.name
        command = ("foldx --command=RepairPDB --pdb={}".format(self.pdbfile))
        self._run(command)

    def analyse_complex(self, pdb_file, partners):
        """Run FoldX ``AnalyseComplex``."""

        pdb = Path(pdb_file).absolute()
        pdb_name = pdb.name[:-4]
        partner1 = partners.split('_')[0]
        partner2 = partners.split('_')[1]

        command = ("foldx --command=AnalyseComplex --pdb={} ".format(pdb.name) +
                   "--analyseComplexChains={},{} ".format(partner1, partner2))

        self._run(command)
        output_file = pdb.parent.joinpath('Interaction_%s_AC.fxout' % pdb_name)

        result = read_analyse_complex(output_file)

        return result

    def point_mutations(self, pdb_file, partners, to_mutate, mutations):
        """Run FoldX ``Pssm``.

        Parameters
        ----------
        to_mutate:
            Mutation specified in the following format:
            {mutation.residue_wt}{chain_id}{residue_id}
        mutations:
            Mutant residues
        """
        pdb = Path(pdb_file).absolute()
        pdb_mutation = pdb.name[:-4]+'_'+to_mutate+mutations
        partner1 = partners.split('_')[0]
        partner2 = partners.split('_')[1]

        command = ("foldx --command=Pssm --pdb={} ".format(pdb.name) +
                   "--analyseComplexChains={},{} ".format(partner1, partner2) +
                   "--positions={}a ".format(to_mutate) + "--aminoacids={} ".format(mutations) +
                   '--output-file={}'.format(pdb_mutation))

        self._run(command)

        # Copy foldX result to mantain local copy
        wt_result = Path('WT_{}_1.pdb'.format(pdb.name[:-4]))
        mut_result = Path('{}_1.pdb'.format(pdb.name[:-4]))
        wt_rename = Path('{}-{}-wt.pdb'.format(pdb.name[:-4], to_mutate+mutations))
        mut_rename = Path('{}-{}-mut.pdb'.format(pdb.name[:-4], to_mutate+mutations))

        shutil.copy(wt_result, wt_rename)
        shutil.copy(mut_result, mut_rename)

    def build_model(self, pdb_file, foldx_mutation):

        pdb = Path(pdb_file).absolute()

        mutation_file = self._get_mutation_file(pdb_file, foldx_mutation)
        command = ("foldx --command=BuildModel --pdb='{}' ".format(pdb.name) +
                   "--mutant-file='{}'".format(mutation_file))

        self._run(command)

        # Copy foldX result to mantain local copy
        wt_result = Path('WT_{}_1.pdb'.format(pdb.name[:-4]))
        mut_result = Path('{}_1.pdb'.format(pdb.name[:-4]))
        wt_rename = Path('{}-{}-wt.pdb'.format(pdb.name[:-4], foldx_mutation))
        mut_rename = Path('{}-{}-mut.pdb'.format(pdb.name[:-4], foldx_mutation))

        shutil.copy(wt_result, wt_rename)
        shutil.copy(mut_result, mut_rename)

    def _get_mutation_file(self, pdb_file, foldx_mutation):
        """
        Parameters
        ----------
        foldx_mutation:
            Mutation specified in the following format:
            {mutation.residue_wt}{chain_id}{residue_id}{mutation.residue_mut}
        """
        pdb = Path(pdb_file).absolute()

        mutation_file = Path('individual_list_{}_{}.txt'.format(pdb.name[:-4], foldx_mutation))
        mutation_file.write_text('{};\n'.format(foldx_mutation))

        return mutation_file



In [80]:
PDBS_DIR = Path("out_models/")
pdbs_paths = list(PDBS_DIR.glob("*mut.pdb"))

In [72]:
subprocess.DEVNULL?

In [8]:
### form 1
procs = []
for p in range(2):
    pdb = Path("VRC01.pdb").absolute()

    command = ("foldx --command=RepairPDB --pdb={}".format(pdb.name))
    fout = open("stdout_%d.txt" % p,'w')
    p = subprocess.Popen(shlex.split(command), stdout=fout)
    fout.close()
    procs.append(p)

for p in procs:
    p.wait()

In [77]:
f.name

'VRC01.G64A.mut.pdb'

In [76]:
# form 2
import os
import concurrent.futures

def run(command):
    ... # call external program on `filename`
    command = shlex.split(command)
    fout = open("stdout_{}.txt".format(f.name),"w")
    subprocess.check_call(command,stdout=fout)
    fout.close()


def repair_pdb(pdbfile):
    pdb = Path(pdbfile).absolute()
    command = ("foldx --command=RepairPDB --pdb={}".format(pdb.name))
    run(command)


# populate files
pdbs_paths = list(PDBS_DIR.glob("*mut.pdb"))[:10]

CWD = os.getcwd()
try:
    os.chdir(PDBS_DIR)
    # start threads
    with concurrent.futures.ProcessPoolExecutor(max_workers=3) as executor:
        future_to_file = dict((executor.submit(repair_pdb, f), f) for f in pdbs_paths)

        for future in concurrent.futures.as_completed(future_to_file):
            f = future_to_file[future]
            if future.exception() is not None:
                print('%r generated an exception: %s' % (f, future.exception()))
            # run() doesn't return anything so `future.result()` is always `None`
finally:
    os.chdir(CWD)

KeyboardInterrupt: 

In [82]:
foldx.

In [86]:
# form 2 with foldx class
pdbs_paths = list(PDBS_DIR.glob("*mut.pdb"))[:10]

CWD = os.getcwd()
try:
    os.chdir(PDBS_DIR)
    # start threads
    foldx = FoldX()
    with concurrent.futures.ProcessPoolExecutor(max_workers=3) as executor:
        future_to_file = dict((executor.submit(foldx.repair_pdb, f), f) for f in pdbs_paths)

        for future in concurrent.futures.as_completed(future_to_file):
            f = future_to_file[future]
            if future.exception() is not None:
                print('%r generated an exception: %s' % (f, future.exception()))
            # run() doesn't return anything so `future.result()` is always `None`
finally:
    os.chdir(CWD)

foldx bin folder already in PATH


In [68]:
concurrent.futures.as_completed?

# Testing foldx class

In [2]:
foldx = FoldX(verbose=True)


foldx path exported


In [3]:
foldx.repair_pdb(pdb_file="VRC01.pdb")


In [3]:
pdbs_to_repair = PDBS_DIR.glob("*.pdb")
try:
    os.chdir(PDBS_DIR)
    #create symlink to rotabase.txt
    rotabase_symlink(ROTABASE)
    (PDBS_DIR.glob("*.pdb"))
    for pdb in pdbs_to_repair:
        options = {"command":"RepairPDB","repair_Interface":"ONLY","pdb":str(pdb.name)}
        
        FoldX(exe="foldx",verbose=True,**options).run()
finally:
    os.chdir(CWD)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/vfica/Documents/GitHub/AbPred/notebooks/pdbs_to_repair'

In [4]:
subprocess.Popen?