In [1]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
import pickle
import sys

ABPRED_DIR = Path.cwd().parent
if ABPRED_DIR not in sys.path:
    sys.path.append(str(ABPRED_DIR))
    
from AbPred.interface_contacts import InterfaceContacts

In [2]:
# Paths
DATA = ABPRED_DIR / "data"
PDBS_DIR = Path("pdbsWT_to_repair/")

# Dataframes 
DF_features = pd.read_csv(DATA / "DF_wildtype_features.csv",index_col=0)
ab_bind_mCSM_HM = pd.read_csv(DATA / "ab_bind_mCSM_HM.csv",index_col=0)

In [13]:
#Funcion para el sorting de paths por numero
def extract_num(pdb):
    return int(pdb.split(".")[2])

def sort_df(df, column_idx, key):
    '''Takes dataframe, column index and custom function for sorting, 
    returns dataframe sorted by this column using this function'''
    
    col = df.ix[:,column_idx]
    temp = np.array(col.values.tolist())
    order = sorted(range(len(temp)), key=lambda j: key(temp[j]))
    return df.ix[order]

# Reset index and sorting for posterior merge data
DF_features_index = DF_features.reset_index()
DF_features_index_sorted = sort_df(DF_features_index,0,extract_num)
DF_features_index_sorted.set_index("index",inplace=True)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  if __name__ == '__main__':
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  if sys.path[0] == '':


In [3]:
repair_clean_pdbs = list(PDBS_DIR.glob("*Repair.clean.pdb"))
ind_pdb_problem = [int(ind.name.split(".")[2]) for ind in repair_clean_pdbs]
partner_pdb_problem = ab_bind_mCSM_HM.iloc[ind_pdb_problem]["Partners(A_B)"]

# Reanalisis con arpegio de estructuras problemas optimizadas con Foldx.

In [4]:
# Partner of pdbs with interface problem
for pdb,partner_pdbs in zip(repair_clean_pdbs,partner_pdb_problem):
    
    ic = InterfaceContacts(pdb)
    print("Running arpeggio")
    ic.run_arpeggio()
    ic.get_ic(partners=partner_pdbs)
    print("Finish getting interface contact\n")
    ic.get_pymol_contacts()
    print("END")

Running arpeggio
Getting only interface contacts
Finish getting interface contact

Creating file 1AK4.mut.6.H487A_Repair.clean.pml
Drew 324 total contacts.
END
Running arpeggio
Getting only interface contacts
Finish getting interface contact

Creating file 1BJ1.mut.21.H86A_Repair.clean.pml
Drew 770 total contacts.
END
Running arpeggio
Getting only interface contacts
Finish getting interface contact

Creating file 1BJ1.mut.22.H90A_Repair.clean.pml
Drew 746 total contacts.
END
Running arpeggio
Getting only interface contacts
Finish getting interface contact

Creating file 1BJ1.mut.26.K48A_Repair.clean.pml
Drew 768 total contacts.
END
Running arpeggio
Getting only interface contacts
Finish getting interface contact

Creating file 1BJ1.mut.31.Q87A_Repair.clean.pml
Drew 772 total contacts.
END
Running arpeggio
Getting only interface contacts
Finish getting interface contact

Creating file 1BJ1.mut.32.Q89A_Repair.clean.pml
Drew 763 total contacts.
END
Running arpeggio
Getting only interface 

Drew 656 total contacts.
END
Running arpeggio
Getting only interface contacts
Finish getting interface contact

Creating file HM_2NYY.mut.609.R903A_Repair.clean.pml
Drew 656 total contacts.
END
Running arpeggio
Getting only interface contacts
Finish getting interface contact

Creating file HM_2NYY.mut.610.S954A_Repair.clean.pml
Drew 636 total contacts.
END
Running arpeggio
Getting only interface contacts
Finish getting interface contact

Creating file HM_2NYY.mut.611.T923A_Repair.clean.pml
Drew 699 total contacts.
END
Running arpeggio
Getting only interface contacts
Finish getting interface contact

Creating file HM_2NYY.mut.612.K30R_Repair.clean.pml
Drew 680 total contacts.
END
Running arpeggio
Getting only interface contacts
Finish getting interface contact

Creating file HM_2NYY.mut.613.M34Q_Repair.clean.pml
Drew 701 total contacts.
END
Running arpeggio
Getting only interface contacts
Finish getting interface contact

Creating file HM_2NYY.mut.614.Y31D_Repair.clean.pml
Drew 618 tota

# Creando dataframe final de estructuras mutadas (originales de Ab-bind y mCSM)

In [5]:
interface_sifts = []
pdbs_names = []
for paths in PDBS_DIR.glob("*.interface_contacts"):
    name = paths.name[:-19]
    contact_data = pd.read_table(paths)
    interface_sift_sum = contact_data.drop(["atom_bgn","atom_end","interacting_entities"],axis=1).sum()
    
    interface_sifts.append(interface_sift_sum.values)
    pdbs_names.append(name)

In [8]:
col_names = contact_data.drop(["atom_bgn","atom_end","interacting_entities"],axis=1).columns
DF_features_repaired = pd.DataFrame(interface_sifts,columns=col_names,index=pdbs_names)
DF_features_repaired.to_csv(DATA / "DF_wildtype_repared.csv")
DF_features_repaired.head()

Unnamed: 0,clash,covalent,vdw_clash,vdw,proximal,hbond,weak_hbond,xbond,ionic,metal_complex,aromatic,hydrophobic,carbonyl,polar,weak_polar
1AK4.mut.6.H487A_Repair.clean,0,0,9,5,310,7,4,0,0,0,0,31,0,7,3
1BJ1.mut.21.H86A_Repair.clean,0,0,16,8,746,11,10,0,0,0,15,25,1,12,11
1BJ1.mut.22.H90A_Repair.clean,0,0,16,4,726,11,10,0,0,0,9,29,1,12,10
1BJ1.mut.26.K48A_Repair.clean,0,2,24,10,732,11,11,0,0,0,15,26,1,13,13
1BJ1.mut.31.Q87A_Repair.clean,0,0,21,10,741,11,10,0,0,0,17,30,1,12,13


In [19]:
# load pickle with name of pdbs with interface problems

in_pickle = open("pdbsWT_to_repair.pkl","rb")
pdbs_with_problem = pickle.load(in_pickle)
pdb_names_problems = [names.split("/")[-1][:-4] for names in pdbs_with_problem]

In [21]:
DF_drop_pdbs = DF_features_index_sorted[~DF_features_index_sorted.index.isin(pdb_names_problems)]

In [36]:
#concatenar entrmedio para preservar orden 
DF_final_wildtype = pd.concat([DF_drop_pdbs,DF_features_repaired])

In [37]:
DF_final_wildtype_resetindex =DF_final_wildtype.reset_index()

In [38]:
DF_final_wildtype_resetindex_sorted = sort_df(DF_final_wildtype_resetindex,0,extract_num)
DF_final_wildtype_resetindex_sorted.set_index("index",inplace=True)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  if __name__ == '__main__':
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  if sys.path[0] == '':


In [40]:
# Los signos de ddG fueron mantenidos .
DF_final_wildtype_resetindex_sorted["ddG(kcal/mol)"] = ab_bind_mCSM_HM["ddG(kcal/mol)"].values

In [42]:
DF_final_wildtype_resetindex_sorted.to_csv(DATA/"DF_wildtype_features_final.csv")

# Dataframe final

In [46]:
DF_mutated_features_final=pd.read_csv(DATA/"DF_mutated_features_final.csv",index_col=0)


In [47]:
DF_sabpred_final = pd.concat([DF_final_wildtype_resetindex_sorted,DF_mutated_features_final])

In [51]:
DF_sabpred_final.to_csv(DATA/"DF_AbPred_final.csv")