# Set Up

Can take ~1min - 1min30s

In [None]:
#@title Mount google drive
from google.colab import drive
drive.mount('/content/drive')
from sys import version_info
python_version = f"{version_info.major}.{version_info.minor}"

In [None]:
#@title Fixed params

custom_template_path = None
use_templates = False

num_relax = 0
use_amber = num_relax > 0

use_custom_msa = False

msa_mode = "MMseqs2 (UniRef+Environmental)" #param ["MMseqs2 (UniRef+Environmental)", "MMseqs2 (UniRef only)","single_sequence","custom"]
pair_mode = "unpaired"


In [None]:
#@title Install dependencies
%%bash -s $use_amber $use_templates $python_version

set -e

USE_AMBER=$1
USE_TEMPLATES=$2
PYTHON_VERSION=$3

if [ ! -f COLABFOLD_READY ]; then
  # install dependencies
  # We have to use "--no-warn-conflicts" because colab already has a lot preinstalled with requirements different to ours
  pip install -q --no-warn-conflicts "colabfold[alphafold-minus-jax] @ git+https://github.com/yoann-ba/ColabFold_light" "tensorflow-cpu==2.11.0"
  pip uninstall -yq jax jaxlib
  pip install -q "jax[cuda]==0.3.25" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
  touch COLABFOLD_READY
fi

# setup conda
if [ ${USE_AMBER} == "True" ] || [ ${USE_TEMPLATES} == "True" ]; then
  if [ ! -f CONDA_READY ]; then
    wget -qnc https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
    bash Miniconda3-latest-Linux-x86_64.sh -bfp /usr/local 2>&1 1>/dev/null
    rm Miniconda3-latest-Linux-x86_64.sh
    touch CONDA_READY
  fi
fi
# setup template search
if [ ${USE_TEMPLATES} == "True" ] && [ ! -f HH_READY ]; then
  conda install -y -q -c conda-forge -c bioconda kalign2=2.04 hhsuite=3.3.0 python="${PYTHON_VERSION}" 2>&1 1>/dev/null
  touch HH_READY
fi
# setup openmm for amber refinement
if [ ${USE_AMBER} == "True" ] && [ ! -f AMBER_READY ]; then
  conda install -y -q -c conda-forge openmm=7.5.1 python="${PYTHON_VERSION}" pdbfixer cryptography==38.0.4 2>&1 1>/dev/null
  touch AMBER_READY
fi

# Interactable

In [None]:
#@title Input protein sequence, then hit `Runtime` -> `Run all`
#@markdown ## Paths
input_dir = '/content/drive/MyDrive/Biotech_Work/Dev_files/ColabFold_runs/IO/input/shaped_negatives_reduced.csv' #@param {type:"string"}
result_dir = '/content/drive/MyDrive/Biotech_Work/Dev_files/ColabFold_runs/IO/output' #@param {type:"string"}
#@markdown input file : dir/file.csv, output dir : folder

# number of models to use
#@markdown ---
#@markdown ## Advanced settings
#@markdown #### Models
model_type = "alphafold2_multimer_v3" #@param ["auto", "alphafold2_ptm", "alphafold2_multimer_v1", "alphafold2_multimer_v2", "alphafold2_multimer_v3"]
#@markdown - if `auto` selected, will use `alphafold2_ptm` for monomer prediction and `alphafold2_multimer_v3` for complex prediction.
#@markdown Any of the mode_types can be used (regardless if input is monomer or complex).

num_models = 5 #@param [1,2,3,4,5] {type:"raw"}

#@markdown #### Recycles & Early stop
num_recycles = "5" #@param ["auto", "0", "1", "3", "5", "6", "12", "24", "48"]
recycle_early_stop_tolerance = "0.5" #@param ["auto", "0.0", "0.5", "1.0"]
#@markdown - if `auto` selected, will use 20 recycles if `model_type=alphafold2_multimer_v3` (with tol=0.5), all else 3 recycles (with tol=0.0).
rank_by = "multimer" #@param ["auto", "plddt", "multimer", "ptm", "iptm"]
stop_at_score = 90 #@param {type:"string"}

#@markdown #### Sample settings
#@markdown -  enable dropouts and increase number of seeds to sample predictions from uncertainty of the model.
#@markdown -  decrease `max_msa` to increase uncertainity
max_msa = "auto" #@param ["auto", "512:1024", "256:512", "64:128", "32:64", "16:32"]
num_seeds = 2 #@param [1,2,4,8,16] {type:"raw"}
use_dropout = True #@param {type:"boolean"}

#@markdown #### Saving & Misc.
recompile_padding = 10
do_not_overwrite_results = True #@param {type:"boolean"}
zip_results = True #@param {type:"boolean"}

save_all = True #@param {type:"boolean"}
#@markdown - Necessary to have ticked on to access the distogram
actually_save_all = False #@param {type: "boolean"}
#@markdown - Actually save all in a heavy pickle file (~90MB per rank)
save_single_representations = True #@param {type: "boolean"}
save_pair_representations = False #@param {type: "boolean"}
save_recycles = False #@param {type:"boolean"}
save_to_google_drive = True #@param {type:"boolean"}
#@markdown -  if the save_to_google_drive option was selected, the result zip will be uploaded to your Google Drive
dpi = 200 #@param {type:"integer"}
#@markdown - set dpi for image resolution


num_recycles = None if num_recycles == "auto" else int(num_recycles)
recycle_early_stop_tolerance = None if recycle_early_stop_tolerance == "auto" else float(recycle_early_stop_tolerance)
if max_msa == "auto": max_msa = None


if save_to_google_drive:
  from pydrive.drive import GoogleDrive
  from pydrive.auth import GoogleAuth
  from google.colab import auth
  from oauth2client.client import GoogleCredentials
  auth.authenticate_user()
  gauth = GoogleAuth()
  gauth.credentials = GoogleCredentials.get_application_default()
  drive = GoogleDrive(gauth)
  print("You are logged into Google Drive and are good to go!")

#@markdown Don't forget to hit `Runtime` -> `Run all` after updating the form.

In [None]:
#@title Run Prediction

import sys

from colabfold.batch import get_queries, run, set_model_type
from colabfold.download import download_alphafold_params#, default_data_dir
from colabfold.utils import setup_logging
from pathlib import Path

# For some reason we need that to get pdbfixer to import
if use_amber and f"/usr/local/lib/python{python_version}/site-packages/" not in sys.path:
    sys.path.insert(0, f"/usr/local/lib/python{python_version}/site-packages/")

if 'logging_setup' not in globals():
    setup_logging(Path(result_dir).joinpath("log.txt"))
    logging_setup = True

queries, is_complex = get_queries(input_dir)

model_type = set_model_type(is_complex, model_type)
if "multimer" in model_type and max_msa is not None:
  use_cluster_profile = False
else:
  use_cluster_profile = True
download_alphafold_params(model_type, Path("."))

run(
    # I/O
    queries=queries,
    result_dir=result_dir,
    use_templates=use_templates,
    use_amber=use_amber,
    data_dir=Path("."),
    # MSA
    msa_mode=msa_mode,
    pair_mode=pair_mode,
    # Model
    model_type=model_type,
    is_complex=is_complex,
    num_models=num_models, ####################################################
    model_order=[1, 2, 3, 4, 5],
    # Recycle and Early stop
    num_recycles=num_recycles,
    recycle_early_stop_tolerance = recycle_early_stop_tolerance,
    rank_by=rank_by,
    stop_at_score=stop_at_score,
    # Sample
    # max_seq = max_seq,
    num_seeds = num_seeds, ################################################
    use_dropout = use_dropout,
    # max_extra_seq = None,
    use_cluster_profile = use_cluster_profile,
    # Save & Misc.
    recompile_padding = recompile_padding, #def 10
    keep_existing_results=do_not_overwrite_results,
    save_single_representations = save_single_representations,
    save_pair_representations = save_pair_representations,
    save_all = save_all,
    actually_save_all = actually_save_all,
    zip_results=zip_results,
    save_recycles = save_recycles
    # prediction_callback,
    # use_gpu_relax,
    # dpi,
    # feature_dict_callback,
    # num_ensemble,
    # custom_template_path,
    # num_relax,
    # host_url,
    # random_seed = 0,
)



In [None]:
# !sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1
# !sudo update-alternatives --config python3

# !python3 --version
# !sudo apt install python3-pip