In [None]:
#@title Install dependencies
%%time

!pip install biopython

import os
num_relax = 0
use_amber = num_relax > 0
use_templates = False

USE_AMBER = use_amber
USE_TEMPLATES = use_templates
PYTHON_VERSION = "3.10"

if not os.path.isfile("COLABFOLD_READY"):
  print("installing colabfold...")
  os.system("pip install -q --no-warn-conflicts 'colabfold[alphafold-minus-jax] @ git+https://github.com/sokrypton/ColabFold' 'tensorflow-cpu==2.11.0'")
  os.system("pip uninstall -yq jax jaxlib")
  os.system("pip install -q 'jax[cuda]==0.3.25' -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html")
  os.system("ln -s /usr/local/lib/python3.*/dist-packages/colabfold colabfold")
  os.system("ln -s /usr/local/lib/python3.*/dist-packages/alphafold alphafold")
  os.system("touch COLABFOLD_READY")

if USE_AMBER or USE_TEMPLATES:
  if not os.path.isfile("CONDA_READY"):
    print("installing conda...")
    os.system("wget -qnc https://github.com/jaimergp/miniforge/releases/latest/download/Mambaforge-colab-Linux-x86_64.sh")
    os.system("bash Mambaforge-colab-Linux-x86_64.sh -bfp /usr/local")
    os.system("mamba config --set auto_update_conda false")
    os.system("touch CONDA_READY")

if USE_TEMPLATES and not os.path.isfile("HH_READY") and USE_AMBER and not os.path.isfile("AMBER_READY"):
  print("installing hhsuite and amber...")
  os.system(f"mamba install -y -q -c conda-forge -c bioconda kalign2=2.04 hhsuite=3.3.0 openmm=7.7.0 python='{PYTHON_VERSION}' pdbfixer 2>&1 1>/dev/null")
  os.system("touch HH_READY")
  os.system("touch AMBER_READY")
else:
  if USE_TEMPLATES and not os.path.isfile("HH_READY"):
    print("installing hhsuite...")
    os.system(f"mamba install -y -q -c conda-forge -c bioconda kalign2=2.04 hhsuite=3.3.0 python='{PYTHON_VERSION}' 2>&1 1>/dev/null")
    os.system("touch HH_READY")
  if USE_AMBER and not os.path.isfile("AMBER_READY"):
    print("installing amber...")
    os.system(f"mamba install -y -q -c conda-forge openmm=7.7.0 python='{PYTHON_VERSION}' pdbfixer 2>&1 1>/dev/null")
    os.system("touch AMBER_READY")

In [None]:
#@title Mount Google Drive
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
#@title ##Upload fasta file
from Bio import SeqIO
from google.colab import files

uploaded = files.upload()

seq_dict = dict()

for file in uploaded:
  for seq_rec in SeqIO.parse(f"./{file}", "fasta"):
    seq_dict[seq_rec.id.replace("|","_")] = str(seq_rec.seq)

In [None]:
#@title Run **AlphaFold2**
import sys
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from Bio import BiopythonDeprecationWarning
warnings.simplefilter(action='ignore', category=BiopythonDeprecationWarning)
from pathlib import Path
from colabfold.download import download_alphafold_params, default_data_dir
from colabfold.utils import setup_logging
from colabfold.batch import get_queries, run, set_model_type
from colabfold.plot import plot_msa_v2
import shutil
import os
import numpy as np
from google.colab import files
import re
import hashlib
import random
from sys import version_info
from colabfold.colabfold import plot_protein
from pathlib import Path
import matplotlib.pyplot as plt


from sys import version_info
python_version = f"{version_info.major}.{version_info.minor}"

def add_hash(x,y):
  return x+"_"+hashlib.sha1(y.encode()).hexdigest()[:5]

for job in seq_dict:
  query_sequence = seq_dict[job]
  jobname = job

  num_relax = 0
  template_mode = "none"

  use_amber = num_relax > 0

  # remove whitespaces
  query_sequence = "".join(query_sequence.split())

  basejobname = "".join(jobname.split())
  basejobname = re.sub(r'\W+', '', basejobname)
  jobname = add_hash(basejobname, query_sequence)

  # check if directory with jobname exists
  def check(folder):
    if os.path.exists(folder):
      return False
    else:
      return True
  if not check(jobname):
    n = 0
    while not check(f"{jobname}_{n}"): n += 1
    jobname = f"{jobname}_{n}"

  # make directory to save results
  os.makedirs(jobname, exist_ok=True)

  # save queries
  queries_path = os.path.join(jobname, f"{jobname}.csv")
  with open(queries_path, "w") as text_file:
    text_file.write(f"id,sequence\n{jobname},{query_sequence}")

  if template_mode == "pdb70":
    use_templates = True
    custom_template_path = None
  elif template_mode == "custom":
    custom_template_path = os.path.join(jobname,f"template")
    os.makedirs(custom_template_path, exist_ok=True)
    uploaded = files.upload()
    use_templates = True
    for fn in uploaded.keys():
      os.rename(fn,os.path.join(custom_template_path,fn))
  else:
    custom_template_path = None
    use_templates = False

  print("jobname",jobname)
  print("sequence",query_sequence)
  print("length",len(query_sequence.replace(":","")))

  msa_mode = "mmseqs2_uniref"
  pair_mode = "unpaired_paired"
  a3m_file = os.path.join(jobname,f"{jobname}.a3m")

  model_type = "alphafold2_multimer_v2"
  num_recycles = 3
  recycle_early_stop_tolerance = "auto"

  max_msa = "auto"
  num_seeds = 1
  use_dropout = False

  num_recycles = None if num_recycles == "auto" else int(num_recycles)
  recycle_early_stop_tolerance = None if recycle_early_stop_tolerance == "auto" else float(recycle_early_stop_tolerance)
  if max_msa == "auto": max_msa = None

  save_all = False
  save_recycles = False
  save_to_google_drive = False
  dpi = 200

  if save_to_google_drive:
    from pydrive.drive import GoogleDrive
    from pydrive.auth import GoogleAuth
    from google.colab import auth
    from oauth2client.client import GoogleCredentials
    auth.authenticate_user()
    gauth = GoogleAuth()
    gauth.credentials = GoogleCredentials.get_application_default()
    drive = GoogleDrive(gauth)
    print("You are logged into Google Drive and are good to go!")

  display_images = False

  try:
    K80_chk = os.popen('nvidia-smi | grep "Tesla K80" | wc -l').read()
  except:
    K80_chk = "0"
    pass
  if "1" in K80_chk:
    print("WARNING: found GPU Tesla K80: limited to total length < 1000")
    if "TF_FORCE_UNIFIED_MEMORY" in os.environ:
      del os.environ["TF_FORCE_UNIFIED_MEMORY"]
    if "XLA_PYTHON_CLIENT_MEM_FRACTION" in os.environ:
      del os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"]

  # For some reason we need that to get pdbfixer to import
  if use_amber and f"/usr/local/lib/python{python_version}/site-packages/" not in sys.path:
      sys.path.insert(0, f"/usr/local/lib/python{python_version}/site-packages/")

  def input_features_callback(input_features):
    if display_images:
      plot_msa_v2(input_features)
      plt.show()
      plt.close()

  def prediction_callback(protein_obj, length,
                          prediction_result, input_features, mode):
    model_name, relaxed = mode
    if not relaxed:
      if display_images:
        fig = plot_protein(protein_obj, Ls=length, dpi=150)
        plt.show()
        plt.close()

  result_dir = jobname
  if 'logging_setup' not in globals():
      setup_logging(Path(os.path.join(jobname,"log.txt")))
      logging_setup = True

  queries, is_complex = get_queries(queries_path)
  model_type = set_model_type(is_complex, model_type)

  if "multimer" in model_type and max_msa is not None:
    use_cluster_profile = False
  else:
    use_cluster_profile = True

  download_alphafold_params(model_type, Path("."))
  results = run(
      queries=queries,
      result_dir=result_dir,
      use_templates=use_templates,
      custom_template_path=custom_template_path,
      num_relax=num_relax,
      msa_mode=msa_mode,
      model_type=model_type,
      num_models=3,
      num_recycles=num_recycles,
      recycle_early_stop_tolerance=recycle_early_stop_tolerance,
      num_seeds=num_seeds,
      use_dropout=use_dropout,
      model_order=[1,2,3],
      is_complex=is_complex,
      data_dir=Path("."),
      keep_existing_results=False,
      rank_by="auto",
      pair_mode=pair_mode,
      stop_at_score=float(100),
      prediction_callback=prediction_callback,
      dpi=dpi,
      zip_results=False,
      save_all=save_all,
      max_msa=max_msa,
      use_cluster_profile=use_cluster_profile,
      input_features_callback=input_features_callback,
      save_recycles=save_recycles,
  )

  shutil.copytree(f"{jobname}/", f"/content/gdrive/My Drive/{jobname}/")
