In [4]:
#!/usr/bin/env python 3
import os

PATH = "../data/Interfaces/"

In [5]:
def get_chain_lists(labeledChainsFile, mappedChains):
        with open(labeledChainsFile, 'r') as lfh:
            lfh.readline() #skips header
            
            for line in lfh:
                pdbID = line.split('\t', 1)[0]
                histoneChains = line.split('\t')[1].split(',')
                partnerChains = line.split('\t')[2].split(',')
                
                histDict = {}
                partnDict = {}
                for chain in histoneChains:
                    histDict[chain] = ''

                for chain in partnerChains:
                    partnDict[chain] = ''

                mappedChains[pdbID] = {'histone' : {}}
                mappedChains[pdbID] = {'partner' : {}}
                
                mappedChains[pdbID]['histone'] = histDict
                mappedChains[pdbID]['partner'] = partnDict

In [6]:
def map_chains(labeledChainsFile, mappingFiles, mappedChains):
    get_chain_lists(labeledChainsFile, mappedChains)
    
    for file in mappingFiles:
        pdbID = file.split('_', 1)[0]
        with open(PATH+file, 'r') as mfh:
            mfh.readline() #skips header  
            
            for line in mfh:
                lineFields = line.split('\t', 2)
                chainOriginal = lineFields[0] #Alexander's files
                chainNew = lineFields[1] #labeled_chains file
                
                if(chainNew in mappedChains[pdbID]['histone']):
                    mappedChains[pdbID]['histone'][chainNew] = chainOriginal
                elif(chainNew in mappedChains[pdbID]['partner']):
                    mappedChains[pdbID]['partner'][chainNew] = chainOriginal

In [7]:
class pdbFreq:
    def __init__(self, interfaceFiles, mappedChains):
        self.freq = {}
        self.freq['pdb'] = {}
        self.freq['pdb']['chain'] = {}
        self.freq['pdb']['chain']['residue'] = {}
            
        for file in interfaceFiles:
            pdbID = file.split('_', 1)[0]
            
            with open (PATH+file, 'r') as ifh:
                for line in ifh:
                    lineFields = line.split('\t', 7) #gets only the first 8 columns !!!
                    chain1 = lineFields[0]
                    chain2 = lineFields[4]
                
                    if((chain1 in mappedChains[pdbID]['histone'].values()) and (chain2 in mappedChains[pdbID]['partner'].values())):
                        res = lineFields[2]
                        self.addResidue(pdbID, chain1, res)
                    elif((chain1 in mappedChains[pdbID]['partner'].values()) and (chain2 in mappedChains[pdbID]['histone'].values())):
                        res = lineFields[6]
                        self.addResidue(pdbID, chain2, res)
                    
    def addResidue(self, pdb, ch, aa):
        if(pdb in self.freq):
            if(ch in self.freq[pdb]):
                if(aa in self.freq[pdb][ch]):
                    self.freq[pdb][ch][aa] += 1
                else:
                    self.freq[pdb][ch][aa] = 1
            else:
                self.freq[pdb][ch] = {aa : 1}
        else:
            self.freq[pdb] = {ch : {aa : 1}}
                              
    def printContent(self):
        for pdb in self.freq.keys():
            
            for(chain) in self.freq[pdb].keys():
                
                for(res) in self.freq[pdb][chain].keys():
                    print(pdb+'\t'+chain+'\t'+res+'\t', self.freq[pdb][chain][res])
        
    #def write(self):
        #with open (file, 'w') as fh:
         #   key in self.freq.keys():
            


In [8]:
def main():
    folder = os.listdir(PATH)
    
    mappingFiles = []
    interfaceFiles = []
    for file in folder:
        if("mapping" in file):
            mappingFiles.append(file)
        elif("contacts"in file):
            interfaceFiles.append(file)
    
    labeledChainsFiles = "../data/labeled_chains.tsv"

    mappedChains = {} 
    mappedChains['PDB'] = {}
    mappedChains['PDB']['type'] = {}
    mappedChains['PDB']['type']['chain'] = {}
    
    map_chains(labeledChainsFiles, mappingFiles, mappedChains)
    
    result = pdbFreq(interfaceFiles, mappedChains)
    result.printContent()
    #result.write()

In [9]:
if __name__ == "__main__":
    main()

pdb	chain	residue	 {}
5e5a	I	23	 1
5e5a	I	24	 1
5e5a	I	25	 2
5e5a	I	54	 5
5e5a	I	57	 27
5e5a	I	58	 31
5e5a	I	61	 8
5e5a	I	62	 51
5e5a	I	65	 23
5e5a	I	66	 3
5e5a	I	91	 12
5e5a	I	93	 13
5e5a	I	94	 7
5e5a	J	42	 14
5e5a	J	44	 1
5e5a	J	45	 83
5e5a	J	46	 36
5e5a	J	47	 2
5e5a	J	48	 13
5e5a	J	49	 3
5e5a	J	103	 10
5e5a	J	104	 18
5e5a	J	107	 36
5e5a	J	111	 41
5e5a	J	114	 5
5o9g	A	81	 6
5o9g	B	17	 4
5o9g	B	18	 20
5o9g	B	19	 31
5o9g	B	20	 41
5o9g	B	21	 9
5x0y	A	81	 27
5x0y	A	83	 6
5x0y	B	15	 5
5x0y	B	16	 25
5x0y	B	17	 87
5x0y	B	18	 71
5x0y	B	19	 5
5x0y	B	20	 18
6c0w	A	79	 2
6c0w	A	80	 51
6c0w	A	81	 8
6c0w	A	82	 29
6c0w	A	83	 22
6c0w	B	24	 15
6c0w	B	75	 1
6c0w	B	78	 4
6c0w	B	80	 12
6c0w	D	93	 10
6c0w	D	123	 1
5hq2	D	43	 1
5hq2	D	44	 13
5hq2	D	45	 14
5hq2	D	46	 1
5hq2	D	47	 5
5hq2	D	48	 1
4r8p	A	76	 15
4r8p	A	77	 34
4r8p	A	78	 17
4r8p	A	79	 8
4r8p	A	80	 11
4r8p	B	21	 4
4r8p	B	23	 23
4r8p	B	74	 10
4r8p	C	57	 5
4r8p	C	61	 26
4r8p	C	64	 11
4r8p	C	65	 8
4r8p	C	68	 30
4r8p	C	69	 4
4r8p	C	71	 1
4r8p	C	72	