In [4]:
#!/usr/bin/env python 3
import os

PATH = "../data/Interfaces/"

In [16]:
def get_chain_lists(labeledChainsFile, mappedChains):
        with open(labeledChainsFile, 'r') as lfh:
            lfh.readline() #skips header
            
            for line in lfh:
                pdbID = line.split('\t', 1)[0]
                histoneChains = line.split('\t')[1].split(',')
                partnerChains = line.split('\t')[2].split(',')
                
                histDict = {}
                partnDict = {}
                for chain in histoneChains:
                    histDict[chain] = ''

                for chain in partnerChains:
                    partnDict[chain] = ''

                mappedChains[pdbID] = {'histone' : {}}
                mappedChains[pdbID] = {'partner' : {}}
                
                mappedChains[pdbID]['histone'] = histDict
                mappedChains[pdbID]['partner'] = partnDict

In [17]:
def map_chains(labeledChainsFile, mappingFiles, mappedChains):
    get_chain_lists(labeledChainsFile, mappedChains)
    
    for file in mappingFiles:
        pdbID = file.split('_', 1)[0]
        with open(PATH+file, 'r') as mfh:
            mfh.readline() #skips header  
            
            for line in mfh:
                lineFields = line.split('\t', 2)
                chainOriginal = lineFields[0] #Alexander's files
                chainNew = lineFields[1] #labeled_chains file
                
                if(chainNew in mappedChains[pdbID]['histone']):
                    mappedChains[pdbID]['histone'][chainNew] = chainOriginal
                elif(chainNew in mappedChains[pdbID]['partner']):
                    mappedChains[pdbID]['partner'][chainNew] = chainOriginal

In [21]:
class pdbFreq:
    def __init__(self, interfaceFiles, mappedChains):
        self.freq = {}
        self.freq['pdb'] = {}
        self.freq['pdb']['chain'] = {}
        self.freq['pdb']['chain']['residue'] = {}
            
        for file in interfaceFiles:
            pdbID = file.split('_', 1)[0]
            
            with open (PATH+file, 'r') as ifh:
                for line in ifh:
                    lineFields = line.split('\t', 7) #gets only the first 8 columns !!!
                    chain1 = lineFields[0]
                    chain2 = lineFields[4]
                
                    if((chain1 in mappedChains[pdbID]['histone'].values()) and (chain2 in mappedChains[pdbID]['partner'].values())):
                        res = lineFields[2]
                        self.addResidue(pdbID, chain1, res)
                    elif((chain1 in mappedChains[pdbID]['partner'].values()) and (chain2 in mappedChains[pdbID]['histone'].values())):
                        res = lineFields[6]
                        self.addResidue(pdbID, chain2, res)
                    
    def addResidue(self, pdb, ch, aa):
        if(pdb in self.freq):
            if(ch in self.freq[pdb]):
                if(aa in self.freq[pdb][ch]):
                    self.freq[pdb][ch][aa] += 1
                else:
                    self.freq[pdb][ch][aa] = 1
            else:
                self.freq[pdb][ch] = {aa : 1}
        else:
            self.freq[pdb] = {ch : {aa : 1}}
                              
    def printContent(self):
        for pdb in self.freq.keys():
            print(pdb)
            
            for(chain) in self.freq[pdb].keys():
                print('\t'+chain)
                
                for(res) in self.freq[pdb][chain].keys():
                    print('\t'+'\t'+res+'\t', self.freq[pdb][chain][res])
        
    #def write(self):
        #with open (file, 'w') as fh:
         #   key in self.freq.keys():
            


In [22]:
def main():
    folder = os.listdir(PATH)
    
    mappingFiles = []
    interfaceFiles = []
    for file in folder:
        if("mapping" in file):
            mappingFiles.append(file)
        elif("contacts"in file):
            interfaceFiles.append(file)
    
    labeledChainsFiles = "../data/labeled_chains.tsv"

    mappedChains = {} 
    mappedChains['PDB'] = {}
    mappedChains['PDB']['type'] = {}
    mappedChains['PDB']['type']['chain'] = {}
    
    map_chains(labeledChainsFiles, mappingFiles, mappedChains)
    
    result = pdbFreq(interfaceFiles, mappedChains)
    result.printContent()
    #result.write()

In [23]:
if __name__ == "__main__":
    main()

pdb
	chain
		residue	 {}
5e5a
	I
		23	 1
		24	 1
		25	 2
		54	 5
		57	 27
		58	 31
		61	 8
		62	 51
		65	 23
		66	 3
		91	 12
		93	 13
		94	 7
	J
		42	 14
		44	 1
		45	 83
		46	 36
		47	 2
		48	 13
		49	 3
		103	 10
		104	 18
		107	 36
		111	 41
		114	 5
5o9g
	A
		81	 6
	B
		17	 4
		18	 20
		19	 31
		20	 41
		21	 9
5x0y
	A
		81	 27
		83	 6
	B
		15	 5
		16	 25
		17	 87
		18	 71
		19	 5
		20	 18
6c0w
	A
		79	 2
		80	 51
		81	 8
		82	 29
		83	 22
	B
		24	 15
		75	 1
		78	 4
		80	 12
	D
		93	 10
		123	 1
5hq2
	D
		43	 1
		44	 13
		45	 14
		46	 1
		47	 5
		48	 1
4r8p
	A
		76	 15
		77	 34
		78	 17
		79	 8
		80	 11
	B
		21	 4
		23	 23
		74	 10
	C
		57	 5
		61	 26
		64	 11
		65	 8
		68	 30
		69	 4
		71	 1
		72	 15
		88	 1
		89	 24
		90	 31
		91	 21
		92	 29
		93	 6
		118	 10
		119	 21
	D
		96	 2
		102	 37
		105	 6
		103	 23
		106	 15
	E
		76	 17
		77	 30
		78	 13
		79	 5
		80	 7
	F
		21	 7
	G
		57	 4
		61	 25
		64	 8
		65	 7
		68	 15
		69	 2
		71	 3
		72	 18
		89	 15
		90	 21
		91	 17
		92	 23