In [1]:
# @title Install dependencies
from google.colab import output
from google.colab import files
from google.colab import widgets

output.enable_custom_widget_manager()

!pip install nglview --quiet
import os
from pathlib import Path
import re
import nglview as nv
import random
import string
from datetime import datetime
import urllib
import gzip


!rm -rf /content/sample_data/
#download socket2 and DSSP
!apt-get install -q -y dssp
!wget https://raw.githubusercontent.com/woolfson-group/Socket2_colab/main/socket2_linux
!chmod u+x /content/socket2_linux

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/6.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/6.8 MB[0m [31m2.4 MB/s[0m eta [36m0:00:03[0m[2K     [91m━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.7/6.8 MB[0m [31m10.2 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━[0m [32m3.6/6.8 MB[0m [31m34.9 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m6.8/6.8 MB[0m [31m55.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m43.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6



Reading package lists...
Building dependency tree...
Reading state information...
The following additional packages will be installed:
  libcifpp-data libcifpp2
The following NEW packages will be installed:
  dssp libcifpp-data libcifpp2
0 upgraded, 3 newly installed, 0 to remove and 15 not upgraded.
Need to get 1,967 kB of archives.
After this operation, 15.0 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libcifpp-data all 2.0.5-1build1 [437 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 libcifpp2 amd64 2.0.5-1build1 [1,019 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 dssp amd64 4.0.4-1 [511 kB]
Fetched 1,967 kB in 1s (2,341 kB/s)
Preconfiguring packages ...
Selecting previously unselected package libcifpp-data.
(Reading database ... 120899 files and directories currently installed.)
Preparing to unpack .../libcifpp-data_2.0.5-1build1_all.deb ...
Unpacking libcifpp-data (2.0.5-1build1) ...
Selec

In [2]:
# @title Functions to read files
colors = ['red', 'green', 'blue', 'yellow', 'cyan', 'magenta', 'gray', 'orange', 'purple', 'brown']
heptad_colors = {'a':'red','b':'orange','c':'yellow','d':'green','e':'cyan','f':'blue','g':'violet'}

def get_rasmol_coils(file):
    with open(file) as f:
        tokens=f.readlines()
    pattern = r'[0-9]'
    coils={}
    knobs=[]
    register={}
    for line in tokens:
        split_line=line.strip('\n').split()
        if len(split_line)>2:

            if split_line[0]=='define' and re.sub(pattern, '', split_line[1])=='coiled_coil':
                coils[split_line[1]]=[x.strip(',') for x in split_line[2:]]

            if split_line[0]=='define' and split_line[1][:5]=='knobs':
                knob_line=split_line[2].strip('(').strip(')')
                if knob_line[0]!='k':
                  knobs.append(knob_line.split(','))

            if split_line[0]=='define' and split_line[1][:8]=='register':
                reg_line=split_line[2].strip('(').strip(')')
                if reg_line[0]!='r':
                    key=split_line[1].split('_')[1][-1]
                    if key in register:
                        register[key]+=','+reg_line
                    else:
                        register[key]=reg_line


    return coils,knobs,register

class Coil:
    def __init__(self,sequence,register,resi,chain):
        self.sequence=sequence
        self.register=register
        self.resi=resi
        self.chain=chain

def parse_socket(file:Path):
    with open(file,'r') as f:
        data=f.readlines()

    coiled_coil_dict={}
    for line in data:
        if line!='\n':
            temp=line.split()
            if len(temp)>5:
                if temp[3]=='coiled' and temp[4]=='coil':
                    coil_id_counter=int(temp[6])
                    continue
            if len(temp)>1:
                if temp[0]=='extent':
                    seq_id,chain=''.join(temp[5:]).split(':')
                if temp[0]=='assigning':
                    coil_name=temp[5][1:-1]
                if temp[0]=='sequence':
                    sequence=temp[1].strip('\n')
                if temp[0]=='register':
                    #align registers
                    register=temp[1].strip('\n')
                    start=len(line[9:])-1-len(register)

                    sequence=sequence[start:start+len(register)]
                    assert len(sequence)==len(register)

                    coil=Coil(sequence,register,seq_id,chain)
                    if coil_id_counter in coiled_coil_dict:
                        coiled_coil_dict[coil_id_counter].append(coil)
                    else:
                        coiled_coil_dict[coil_id_counter]=[coil]

    return coiled_coil_dict

In [3]:
# @title Click to upload a single PDB file or enter a 4 letter PDB-ID. { display-mode: "form" }

pdb_code = '' #@param {type:"string"}

#Download the first biological assembly
#biological_assembly=True # @param {type:"boolean"}

if pdb_code:
  pdb_code=pdb_code.upper()
  print(pdb_code)
  assert len(pdb_code)==4, "Incorrect PDB code provided!"
  filename=f'{pdb_code}.pdb'

  try:
    response = urllib.request.urlopen(f'http://files.rcsb.org/download/{pdb_code}.pdb.gz')
    with open(filename, 'wb') as outfile:
      outfile.write(gzip.decompress(response.read()))

  except urllib.error.HTTPError:
    print('PDB code does not exist.')

else:
  filename=''.join(random.choices(string.ascii_uppercase + string.digits, k=5))+'.pdb'
  uploaded = files.upload()
  assert len(list(uploaded.keys()))==1

  #formatting for dssp, see https://github.com/PDB-REDO/dssp/issues/1
  #Must have HEADER and CRYST1, errors with AF2, Rosetta, etc. otherwise
  with open(filename,'w') as f:
    f.write('HEADER\n')
    f.write('CRYST1    1.000    1.000    1.000  90.00  90.00  90.00 P 1           1   \n')
    !grep "^ATOM\|^TER\|^HETATM\|^ENDMDL\|^MODEL" {list(uploaded.keys())[0]} >> {filename}
  os.remove(list(uploaded.keys())[0])

Saving 6g66.pdb to 6g66.pdb


In [4]:
# @title Run DSSP and Socket2.

#@markdown Packing Cutoff parameter in Ångstroms essentially represents the tightness of the knobs-into-holes interactions; the smaller it is, the more ideal the packing.
Packing_Cutoff = 7.0 # @param {type:"number"}

#@markdown Helix Extension is given to extend all alpha helical regions by X residues at each end. For example the two DSSP helices eitherside of the kink are joined, giving a single helix.
Helix_Extension = 0 # @param {type:"integer"}


filename_path=Path(filename)
rasmol=str(filename_path.with_suffix('.rasmol'))
longfile=str(filename_path.with_suffix('.long'))
shortfile=str(filename_path.with_suffix('.short'))

#run socket2 and make short files for AF2 structures
!mkdssp -i {filename} -o out.dssp
#remove extra chars - Socket can't read DSSP otherwise
!cat out.dssp | cut -c1-136 >> tmp.dssp
!socket_out="$(echo "$1"|cut -d"." -f1).short"
!echo "$socket_out"
!/content/socket2_linux -f {filename} -s tmp.dssp -c {Packing_Cutoff} -h {Helix_Extension} -r {rasmol} -o {longfile} -q > {shortfile}
!rm tmp.dssp
!rm out.dssp

coils,knobs,register=get_rasmol_coils(rasmol)
CC_dict=parse_socket(shortfile)

flat_knobs=[x for y in knobs for x in y]
assert len(flat_knobs)>0, "Coiled-coils don't have any knobs! Inspect the output files."

flat_register=','.join(register.values())
flat_register=flat_register.split(',')

#show only knobs in coiled-coil regions
knobs_in_CCs=list(set(flat_knobs).intersection(flat_register))

print('We found ' + str(len(CC_dict))+' coiled-coils!')

if len(CC_dict)>0:
  print('See below for more information.')
  tb = widgets.TabBar([str(i) for i in range(len(CC_dict))])

  #dict keys don't start with 0, need to reindex
  for i,coiled_coil in enumerate(CC_dict.values()):
    with tb.output_to(i):
      for helix in coiled_coil:
        print(helix.register+ ' ' + helix.chain + ':' + helix.resi)
        print(helix.sequence)
        print('\n')


We found 1 coiled-coils!
See below for more information.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

defgabcdefgabcdefgabcdefga A:3-28
AQAIKEVAKAIKEVAWAIKEVAQAIK


defgabcdefgabcdefgabcdefga B:3-28
AQAIKEVAKAIKEVAWAIKEVAQAIK


defgabcdefgabcdefgabcdefga C:3-28
AQAIKEVAKAIKEVAWAIKEVAQAIK


defgabcdefgabcdefgabcdefga D:3-28
AQAIKEVAKAIKEVAWAIKEVAQAIK


defgabcdefgabcdefgabcdefga E:3-28
AQAIKEVAKAIKEVAWAIKEVAQAIK


defgabcdefgabcdefgabcdefgab F:3-29
VAQAIKEVAKAIKEVAWAIKEVAQAIK


defgabcdefgabcdefgabcdefga G:3-28
AQAIKEVAKAIKEVAWAIKEVAQAIK




<IPython.core.display.Javascript object>

In [5]:
# @title NGLView
show_ligands = False # @param {type:"boolean"}
Knobs="Knobs"
#@markdown TRUE: show all side chains within coiled coil regions.

#@markdown KNOBS: show only side chains that act as knobs in knobs-into-holes packing.

ball_and_sticks = Knobs # @param ["True", "False", "Knobs"] {type:"raw"}
colour_by_register=True # @param {type:"boolean"}
show_only_CC=False # @param {type:"boolean"}

assert len(CC_dict)!=0, 'No coiled coils found. Check the cell above.'

reg_colors=[]
for name,reg in register.items():
    reg_colors.append([heptad_colors[name],reg.replace(',', ' or ')])
scheme=nv.color._ColorScheme(reg_colors,'register')

view=nv.show_structure_file(filename)
view.clear(1)
 # Color helices
representation=[{"type":"cartoon", "params":{"visible": not show_only_CC, "sele": 'protein', "color": 'white'}},
                {"type":"ball+stick", "params":{"visible": show_ligands, "sele": "not polymer and not ( protein or nucleic )"}},
                {"type":"spacefill", "params":{"visible": show_ligands,"sele": "water or ion","scale": 0.15}}]


if colour_by_register:
  for name,cc in coils.items():
    for i,helix in enumerate(cc):
      helix_representation = {"type": "cartoon", "params": {"sele": helix, "color": scheme}}
      representation.append(helix_representation)

      if ball_and_sticks==True:
        helix_representation = {"type": "ball+stick", "params": {"sele": helix + ' and sidechainAttached',"color": scheme}}
        representation.append(helix_representation)
  if ball_and_sticks==Knobs:
    helix_representation = {"type": "ball+stick", "params": {"sele": '(' + " or ".join(knobs_in_CCs) + ')'  + ' and sidechainAttached',"color": scheme}}
    representation.append(helix_representation)


else:
  for name,cc in coils.items():
    for i,helix in enumerate(cc):
      helix_representation = {"type": "cartoon", "params": {"sele": helix, "color": colors[i]}}
      representation.append(helix_representation)

      if ball_and_sticks==True:
        helix_representation = {"type": "ball+stick", "params": {"sele": helix + ' and sidechainAttached', "color": colors[i]}}
        representation.append(helix_representation)
  if ball_and_sticks==Knobs:
    helix_representation = {"type": "ball+stick", "params": {"sele": '(' + " or ".join(knobs_in_CCs) + ')'  + ' and sidechainAttached'}}
    representation.append(helix_representation)


view.representations=representation
view

NGLWidget()

In [6]:
# @title Download your results { display-mode: "form" }

now = datetime.now()
!zip -r /content/socket2_{now.strftime("%m_%d_%H_%M")}.zip /content/
files.download(f'/content/socket2_{now.strftime("%m_%d_%H_%M")}.zip')

  adding: content/ (stored 0%)
  adding: content/.config/ (stored 0%)
  adding: content/.config/configurations/ (stored 0%)
  adding: content/.config/configurations/config_default (deflated 15%)
  adding: content/.config/logs/ (stored 0%)
  adding: content/.config/logs/2023.12.04/ (stored 0%)
  adding: content/.config/logs/2023.12.04/14.20.49.627769.log (deflated 91%)
  adding: content/.config/logs/2023.12.04/14.23.49.652015.log (deflated 58%)
  adding: content/.config/logs/2023.12.04/14.23.59.638040.log (deflated 86%)
  adding: content/.config/logs/2023.12.04/14.26.59.279282.log (deflated 57%)
  adding: content/.config/logs/2023.12.04/14.27.00.107426.log (deflated 56%)
  adding: content/.config/logs/2023.12.04/14.26.48.840108.log (deflated 58%)
  adding: content/.config/active_config (stored 0%)
  adding: content/.config/config_sentinel (stored 0%)
  adding: content/.config/.last_opt_in_prompt.yaml (stored 0%)
  adding: content/.config/default_configs.db (deflated 98%)
  adding: conte

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>