In [1]:
#!/usr/bin/env python 3
import os

PATH = "../data/Interfaces/"

In [2]:
def get_chain_lists(labeledChainsFile, mappedChains):
        with open(labeledChainsFile, 'r') as lfh:
            lfh.readline() #skips header
            
            for line in lfh:
                pdbID = line.split('\t', 1)[0]
                histoneChains = line.split('\t')[1].split(',')
                partnerChains = line.split('\t')[2].split(',')
                
                histDict = {}
                partnDict = {}
                for chain in histoneChains:
                    histDict[chain] = ''

                for chain in partnerChains:
                    partnDict[chain] = ''

                mappedChains[pdbID] = {'histone' : {}}
                mappedChains[pdbID] = {'partner' : {}}
                
                mappedChains[pdbID]['histone'] = histDict
                mappedChains[pdbID]['partner'] = partnDict

In [3]:
def map_chains(labeledChainsFile, mappingFiles, mappedChains):
    get_chain_lists(labeledChainsFile, mappedChains)
    
    for file in mappingFiles:
        pdbID = file.split('_', 1)[0]
        with open(PATH+file, 'r') as mfh:
            mfh.readline() #skips header  
            
            for line in mfh:
                lineFields = line.split('\t', 2)
                chainOriginal = lineFields[0] #Alexander's files
                chainNew = lineFields[1] #labeled_chains file
                
                if(chainNew in mappedChains[pdbID]['histone']):
                    mappedChains[pdbID]['histone'][chainNew] = chainOriginal
                elif(chainNew in mappedChains[pdbID]['partner']):
                    mappedChains[pdbID]['partner'][chainNew] = chainOriginal

In [4]:
class pdbFreq:
    def __init__(self, interfaceFiles, mappedChains):
        self.freq = {}
        self.freq['pdb'] = {}
        self.freq['pdb']['chain'] = {}
        self.freq['pdb']['chain']['residue'] = {}
            
        for file in interfaceFiles:
            pdbID = file.split('_', 1)[0]
            
            with open (PATH+file, 'r') as ifh:
                ifh.readline() #skips header  
                for line in ifh:
                    lineFields = line.split('\t', 7) #gets only the first 8 columns !!!
                    chain1 = lineFields[0]
                    chain2 = lineFields[4]
                    #chain=histone+partner or partner+partner:
                    if(not(chain1 in mappedChains[pdbID]['histone'].values() and chain2 in mappedChains[pdbID]['histone'].values())):
                        if((chain1 in mappedChains[pdbID]['histone'].values()) and (chain2 in mappedChains[pdbID]['partner'].values())):
                            res = lineFields[2]
                            chain = list(mappedChains[pdbID]['histone'].keys())[list(mappedChains[pdbID]['histone'].values()).index(chain1)] + list(mappedChains[pdbID]['partner'].keys())[list(mappedChains[pdbID]['partner'].values()).index(chain2)] #test
                            self.addResidue(pdbID, chain, res) #chain->chain1
                            #the following if statement takes care of inter-nucleosomal interactions
                            if(pdbID == "5gse"):
                                chain = list(mappedChains[pdbID]['partner'].keys())[list(mappedChains[pdbID]['partner'].values()).index(chain2)] + list(mappedChains[pdbID]['histone'].keys())[list(mappedChains[pdbID]['histone'].values()).index(chain1)] #test
                                self.addResidue(pdbID, chain, lineFields[6]) #chain -> chain2
                        elif((chain1 in mappedChains[pdbID]['partner'].values()) and (chain2 in mappedChains[pdbID]['histone'].values())):
                            res = lineFields[6]
                            chain = list(mappedChains[pdbID]['histone'].keys())[list(mappedChains[pdbID]['histone'].values()).index(chain2)] + list(mappedChains[pdbID]['partner'].keys())[list(mappedChains[pdbID]['partner'].values()).index(chain1)] #test                        
                            self.addResidue(pdbID, chain, res) #chain -> chain2
                            #the following if statement takes care of inter-nucleosomal interactions
                            if(pdbID == "5gse"):
                                chain = list(mappedChains[pdbID]['partner'].keys())[list(mappedChains[pdbID]['partner'].values()).index(chain1)] + list(mappedChains[pdbID]['histone'].keys())[list(mappedChains[pdbID]['histone'].values()).index(chain2)] #test                                                             
                                self.addResidue(pdbID, chain, lineFields[2]) #chain->chain1
                        elif((not(pdbID == '5gse')) and (chain1 in mappedChains[pdbID]['partner'].values()) and (chain2 in mappedChains[pdbID]['partner'].values())):
                            res = lineFields[2]
                            chain = list(mappedChains[pdbID]['partner'].keys())[list(mappedChains[pdbID]['partner'].values()).index(chain2)] + list(mappedChains[pdbID]['partner'].keys())[list(mappedChains[pdbID]['partner'].values()).index(chain1)] #test                        
                            self.addResidue(pdbID, chain, res) #chain -> chain2


                            
    def addResidue(self, pdb, ch, aa):
        if(pdb in self.freq):
            if(ch in self.freq[pdb]):
                if(aa in self.freq[pdb][ch]):
                    self.freq[pdb][ch][aa] += 1
                else:
                    self.freq[pdb][ch][aa] = 1
            else:
                self.freq[pdb][ch] = {aa : 1}
        else:
            self.freq[pdb] = {ch : {aa : 1}}
                              
    def printContent(self):
        for pdb in self.freq.keys():
            
            for(chain) in self.freq[pdb].keys():
                
                for(res) in self.freq[pdb][chain].keys():
                    print(pdb+'\t'+chain+'\t'+res+'\t', self.freq[pdb][chain][res])

In [5]:
def main():
    folder = os.listdir(PATH)
    
    mappingFiles = []
    interfaceFiles = []
    for file in folder:
        if("mapping" in file):
            mappingFiles.append(file)
        elif("contacts"in file):
            interfaceFiles.append(file)
    
    labeledChainsFiles = "../data/labeled_chains.tsv"

    mappedChains = {} 
    mappedChains['PDB'] = {}
    mappedChains['PDB']['type'] = {}
    mappedChains['PDB']['type']['chain'] = {}
    
    map_chains(labeledChainsFiles, mappingFiles, mappedChains)
    
    result = pdbFreq(interfaceFiles, mappedChains)
    result.printContent()

In [6]:
if __name__ == "__main__":
    main()

pdb	chain	residue	 {}
5e5a	GK	23	 1
5e5a	GK	24	 1
5e5a	GK	25	 2
5e5a	GK	54	 5
5e5a	GK	57	 27
5e5a	GK	58	 31
5e5a	GK	61	 8
5e5a	GK	62	 51
5e5a	GK	65	 23
5e5a	GK	66	 3
5e5a	GK	91	 12
5e5a	GK	93	 13
5e5a	GK	94	 7
5e5a	HK	42	 14
5e5a	HK	44	 1
5e5a	HK	45	 83
5e5a	HK	46	 36
5e5a	HK	47	 2
5e5a	HK	48	 13
5e5a	HK	49	 3
5e5a	HK	103	 10
5e5a	HK	104	 18
5e5a	HK	107	 36
5e5a	HK	111	 41
5e5a	HK	114	 5
5o9g	AW	81	 6
5o9g	BW	17	 4
5o9g	BW	18	 20
5o9g	BW	19	 31
5o9g	BW	20	 41
5o9g	BW	21	 9
5x0y	AO	81	 27
5x0y	AO	83	 6
5x0y	BO	15	 5
5x0y	BO	16	 25
5x0y	BO	17	 87
5x0y	BO	18	 71
5x0y	BO	19	 5
5x0y	BO	20	 18
6c0w	AK	79	 2
6c0w	AK	80	 51
6c0w	AK	81	 8
6c0w	AK	82	 29
6c0w	AK	83	 22
6c0w	BK	24	 15
6c0w	BK	75	 1
6c0w	BK	78	 4
6c0w	BK	80	 12
6c0w	DK	93	 10
6c0w	DK	123	 1
5gse	FK	27	 2
5gse	KF	62	 2
5gse	GP	117	 3
5gse	GP	118	 4
5gse	GP	119	 1
5gse	GP	120	 9
5gse	GP	121	 11
5gse	PG	81	 28
5gse	HN	109	 1
5gse	NH	89	 1
5hq2	HK	43	 1
5hq2	HK	44	 13
5hq2	HK	45	 14
5hq2	HK	46	 1
5hq2	HK	47	 5
5hq2	HK	48	 1
4r8p	AK	76