In [3]:
# @title Install dependencies
from google.colab import output
from google.colab import files
import os
from pathlib import Path
import re
from google.colab import widgets
#!apt-get install dssp
#!pip install nglview --quiet
output.enable_custom_widget_manager()
import nglview as nv

#download socket2

!chmod u+x /content/socket2/socket2



In [4]:
# @title Functions to read files
colors = ['red', 'green', 'blue', 'yellow', 'cyan', 'magenta', 'gray', 'orange', 'purple', 'brown']
heptad_colors = {'a':'red','b':'orange','c':'yellow','d':'green','e':'cyan','f':'blue','g':'violet'}

def get_rasmol_coils(file):
    with open(file) as f:
        tokens=f.readlines()
    pattern = r'[0-9]'
    pattern_knobs=r'[0-9]+[a-z]'
    coils={}
    knobs={}
    register={}
    for line in tokens:
        split_line=line.strip('\n').split()
        if len(split_line)>2:

            if split_line[0]=='define' and re.sub(pattern, '', split_line[1])=='coiled_coil':
                coils[split_line[1]]=[x.strip(',') for x in split_line[2:]]

            if split_line[0]=='define' and re.sub(pattern_knobs, '', split_line[1])=='knobs':
                knobs[split_line[1]]=split_line[2][1:-1]

            if split_line[0]=='define' and split_line[1][:8]=='register':
                reg_line=split_line[2].strip('(').strip(')')
                if reg_line[0]!='r':
                    key=split_line[1].split('_')[1][-1]
                    if key in register:
                        register[key]+=','+reg_line
                    else:
                        register[key]=reg_line


    return coils,knobs,register

class Coil:
    def __init__(self,sequence,register,resi,chain):
        self.sequence=sequence
        self.register=register
        self.resi=resi
        self.chain=chain

def parse_socket(file:Path):
    with open(file,'r') as f:
        data=f.readlines()
    #sequence=[]
    #register=[]
    #coil_name=[]
    #start=[]
    #coil_chain=[]
    #resi=[]

    coiled_coil_dict={}
    for line in data:
        if line!='\n':
            temp=line.split()
            if len(temp)>5:
                if temp[3]=='coiled' and temp[4]=='coil':
                    coil_id_counter=int(temp[6])
                    continue
            if len(temp)>1:
                if temp[0]=='extent':
                    seq_id,chain=''.join(temp[5:]).split(':')
                if temp[0]=='assigning':
                    coil_name=temp[5][1:-1]
                if temp[0]=='sequence':
                    sequence=temp[1].strip('\n')
                if temp[0]=='register':
                    #align registers
                    register=temp[1].strip('\n')
                    start=len(line[9:])-1-len(register)

                    sequence=sequence[start:start+len(register)]
                    assert len(sequence)==len(register)

                    coil=Coil(sequence,register,seq_id,chain)
                    if coil_id_counter in coiled_coil_dict:
                        coiled_coil_dict[coil_id_counter].append(coil)
                    else:
                        coiled_coil_dict[coil_id_counter]=[coil]

    return coiled_coil_dict

In [11]:
# @title Upload a single PDB file. { display-mode: "form" }
#@markdown Packing Cutoff parameter in Ångstroms essentially represents the tightness of the knobs-into-holes interactions; the smaller it is, the more ideal the packing.
Packing_Cutoff = 7.1 # @param {type:"slider", min:5.5, max:8, step:0.1}

#@markdown Helix Extension is given to extend all alpha helical regions by X residues at each end. For example the two DSSP helices eitherside of the kink are joined, giving a single helix.
Helix_Extension = 1 # @param [0,1,2] {type:"raw"}

filename='test.pdb'
uploaded = files.upload()
assert len(list(uploaded.keys()))==1

os.rename(list(uploaded.keys())[0], filename)

Saving RP2_xrays_refine_106-coot-3.pdb to RP2_xrays_refine_106-coot-3.pdb


In [27]:
# @title Run DSSP and Socket2.

#check formatting for dssp, see https://github.com/PDB-REDO/dssp/issues/1
#also issue with HEADER, e.g. ISAMBARD or AF2

with open(filename) as f:
  data=f.read()
if data.find('CRYST1')==-1:
    with open(filename,'w') as f:
      f.write('CRYST1    1.000    1.000    1.000  90.00  90.00  90.00 P 1           1   \n')
      f.write(data)

filename_path=Path(filename)
rasmol=str(filename_path.with_suffix('.rasmol'))
longfile=str(filename_path.with_suffix('.long'))
shortfile=str(filename_path.with_suffix('.short'))

#run socket2 and make short files for AF2 structures
!mkdssp -i {filename} -o out.dssp
#remove extra chars - Socket can't read DSSP otherwise
!cat out.dssp | cut -c1-136 >> tmp.dssp
!socket_out="$(echo "$1"|cut -d"." -f1).short"
!echo "$socket_out"
!/content/socket2/socket2 -f {filename} -s tmp.dssp -c {Packing_Cutoff} -h {Helix_Extension} -r {rasmol} -o {longfile} -q > {shortfile}
!rm tmp.dssp
!rm out.dssp

coils,knobs,register=get_rasmol_coils(rasmol)
CC_dict=parse_socket(shortfile)

flat_knobs=','.join(knobs.values())
flat_knobs=flat_knobs.split(',')

flat_register=','.join(register.values())
flat_register=flat_register.split(',')

#show only knobs in coiled-coil regions
knobs_in_CCs=list(set(flat_knobs).intersection(flat_register))

print('We found ' + str(len(CC_dict))+' coiled-coils!')

if len(CC_dict)>0:
  print('See below for more information.')
  tb = widgets.TabBar([str(i) for i in range(len(CC_dict))])

  #dict keys don't start with 0, need to reindex
  for i,coiled_coil in enumerate(CC_dict.values()):
    with tb.output_to(i):
      for helix in coiled_coil:
        print(helix.register+ ' ' + helix.chain + ':' + helix.resi)
        print(helix.sequence)
        print('\n')

Unknown or untrusted program in REMARK 3, trying all parsers to see if there is a match

We found 14 coiled-coils!
See below for more information.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

defgabcdefgabcdefga A:1-19
ELIAEALFAIALALLAIAK


defgabcdefgabcdefga A:34-52
LAEKVYKEAEELYKKAKKK


defgabcdefgabcdefg A:60-77
LIAQALAAIALALAAIAL




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

defgabcdefgabcdefga A:2-20
ELIAEALFAIALALLAIAK


defgabcdefgabcdefgabcd A:59-80
AALALIAQALAAIALALAAIAL


defgabcdefgabcdefgabcde A:116-138
AAALALIAQALALIALALAAIAL


defgabcdefgabcdefga A:173-191
ALIAQALALIAIALAAIAL


defgabcdefgabcdefga A:230-248
ALIAQALALIAIALAAIAL


defgabcdefgabcdefga A:287-305
ALIAQALALIAIALAAIAL


defgabcdefgabcdefga A:344-362
LAEIAIALAEIAIALVEIA




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

defgabcdefgabcdefgabcd A:58-79
AALALIAQALAAIALALAAIAL


abcdefgabcdefgabcdefga A:88-109
AYKLAEEVYKKAEKLYEEAKKK


defgabcdefgabcdefgabcd A:117-138
AALALIAQALALIALALAAIAL




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

defgabcdefgabcdefga A:115-133
ALIAQALALIALALAAIAL


abcdefgabcdefga A:152-166
VYKKAEELYKEAEKK


defgabcdefgabcdefg A:174-191
LIAQALALIAIALAAIAL




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

defgabcdefgabcdefga A:172-190
ALIAQALALIAIALAAIAL


abcdefgabcdefga A:209-223
VYEKAKKVAEEAEKK


defgabcdefgabcdefg A:231-248
LIAQALALIAIALAAIAL




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

defgabcdefgabcdefga A:229-247
ALIAQALALIAIALAAIAL


abcdefgabcdefga A:266-280
VYEEAKKIYEEAKKK


defgabcdefgabcdefg A:288-305
LIAQALALIAIALAAIAL




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

defgabcdefgabcdefga A:286-304
ALIAQALALIAIALAAIAL


abcdefgabcdefga A:323-337
EVKKEAEELAKEAEQ


defgabcdefgabcdefg A:345-362
AEIAIALAEIAIALVEIA




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

defgabcdefgabcdefgabcde B:1-23
AEALELIAEALFAIALALLAIAK


defgabcdefgabcdefga B:34-52
LAEKVYKEAEELYKKAKKK


defgabcdefgabcdefgabcd B:60-81
AALALIAQALAAIALALAAIAL




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

defgabcdefgabcdefgabcd B:2-23
EALELIAEALFAIALALLAIAK


defgabcdefgabcdefgabcde B:59-81
AAALALIAQALAAIALALAAIAL


defgabcdefgabcdefgabcde B:116-138
AAALALIAQALALIALALAAIAL


defgabcdefgabcdefga B:173-191
ALIAQALALIAIALAAIAL


defgabcdefgabcdefga B:230-248
ALIAQALALIAIALAAIAL


defgabcdefgabcdefga B:287-305
ALIAQALALIAIALAAIAL


defgabcdefgabcdefga B:344-362
ALAEIAIALAEIAIALVEI




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

defgabcdefgabcdefgabcd B:58-79
AALALIAQALAAIALALAAIAL


abcdefgabcdefgabcdefga B:88-109
AYKLAEEVYKKAEKLYEEAKKK


defgabcdefgabcdefgabcd B:117-138
AALALIAQALALIALALAAIAL




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

defgabcdefgabcdefga B:115-133
ALIAQALALIALALAAIAL


abcdefga B:159-166
LYKEAEKK


defgabcdefgabcdefg B:174-191
LIAQALALIAIALAAIAL




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

defgabcdefgabcdefga B:172-190
ALIAQALALIAIALAAIAL


abcdefgabcdefga B:209-223
VYEKAKKVAEEAEKK


defgabcdefgabcdefg B:231-248
LIAQALALIAIALAAIAL




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

defgabcdefgabcdefga B:229-247
ALIAQALALIAIALAAIAL


abcdefgabcdefga B:266-280
VYEEAKKIYEEAKKK


defgabcdefgabcdefg B:288-305
LIAQALALIAIALAAIAL




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

defgabcdefgabcdefga B:286-304
ALIAQALALIAIALAAIAL


abcdefgabcdefga B:323-337
EVKKEAEELAKEAEQ


defgabcdefgabcdefg B:345-362
LAEIAIALAEIAIALVEI




<IPython.core.display.Javascript object>

In [13]:
# @title NGLView
show_ligands = False # @param {type:"boolean"}
Knobs="Knobs"
ball_and_sticks = True # @param ["True", "False", "Knobs"] {type:"raw"}
colour_by_register=True # @param {type:"boolean"}
show_only_CC=True # @param {type:"boolean"}

assert len(CC_dict)!=0, 'No coiled coils found. Check the cell above.'

reg_colors=[]
for name,reg in register.items():
    reg_colors.append([heptad_colors[name],reg.replace(',', ' or ')])
scheme=nv.color._ColorScheme(reg_colors,'register')

view=nv.show_structure_file(filename)
view.clear(1)
 # Color helices
representation=[{"type":"cartoon", "params":{"visible": not show_only_CC, "sele": 'protein', "color": 'white'}},
                {"type":"ball+stick", "params":{"visible": show_ligands, "sele": "not polymer and not ( protein or nucleic )"}},
                {"type":"spacefill", "params":{"visible": show_ligands,"sele": "water or ion","scale": 0.15}}]


if colour_by_register:
  for name,cc in coils.items():
    for i,helix in enumerate(cc):
      helix_representation = {"type": "cartoon", "params": {"sele": helix, "color": scheme}}
      representation.append(helix_representation)

      if ball_and_sticks==True:
        helix_representation = {"type": "ball+stick", "params": {"sele": helix + ' and sidechainAttached',"color": scheme}}
        representation.append(helix_representation)
  if ball_and_sticks==Knobs:
    helix_representation = {"type": "ball+stick", "params": {"sele": '(' + " or ".join(knobs_in_CCs) + ')'  + ' and sidechainAttached',"color": scheme}}
    representation.append(helix_representation)


else:
  for name,cc in coils.items():
    for i,helix in enumerate(cc):
      helix_representation = {"type": "cartoon", "params": {"sele": helix, "color": colors[i]}}
      representation.append(helix_representation)

      if ball_and_sticks:
        helix_representation = {"type": "ball+stick", "params": {"sele": helix + ' and sidechainAttached', "color": colors[i]}}
        representation.append(helix_representation)


view.representations=representation
view

NGLWidget()