<a href="https://colab.research.google.com/github/yuminshen/cs61a-2024/blob/main/cg2all.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# List of tasks
- Task 1: Conversion of an all-atom structure to a CG model using __convert_all2cg__
- Task 2: Conversion of a CG model to an all-atom structure using __convert_cg2all__
- Task 3: Conversion of a CG simulation trajectory to an atomistic simulation trajectory using __convert_cg2all__

# Supported coarse-grained models
- CalphaBasedModel: C$\alpha$-trace (atom names should be "CA")
- ResidueBasedModel: Residue center-of-mass (atom names should be "CA")
- SidechainModel: A bead at the sidechain center-of-mass (atom names should be "SC")
- CalphaCMModel: C$\alpha$-trace + Residue center-of-mass (atom names should be "CA" and "CM")
- CalphaSCModel: C$\alpha$-trace + Sidechain center-of-mass (atom names should be "CA" and "SC")
- BackboneModel: Model only with backbone atoms (N, CA, C)
- MainchainModel: Model only with mainchain atoms (N, CA, C, O)
- Martini: [Martini](http://cgmartini.nl/) model
- Martini3: [Martini3](http://www.cgmartini.nl/index.php/martini-3-0) model
- PRIMO: [PRIMO](http://dx.doi.org/10.1002/prot.22645) model
---

In [1]:
#@title Install cg2all package (takes 4-5 minutes)
%%bash

gpu_available=$(nvidia-smi | grep "CUDA Version" | wc -l)
if [[ $gpu_available == 1 ]]; then
    echo "This notebook is running on a GPU runtime."
    pip install  dgl -f https://data.dgl.ai/wheels/torch-2.3/cu121/repo.html &> /dev/null
    #pip install dgl -f https://data.dgl.ai/wheels/cu116/repo.html &> /dev/null
else
    echo "This notebook is running on a CPU runtime."
fi

pip install -q git+http://github.com/huhlim/cg2all@cuda-12 &> /dev/null
pip install -q py3Dmol gdown mrcfile &> /dev/null

This notebook is running on a GPU runtime.


In [2]:
#@title Download model checkpoint files (optional, takes <1 minute)
#@markdown This step downloads all PyTorch model checkpoint files. If you did not run this step and a necessary checkpoint file is missing, then the script will download it automatically.

import cg2all.lib.libmodel
from cg2all.lib.libconfig import MODEL_HOME

for model_type in ["CalphaBasedModel", "ResidueBasedModel", "SidechainModel", "CalphaCMModel", "CalphaSCModel", "BackboneModel", "MainchainModel", "Martini", "Martini3", "PRIMO"]:
    ckpt_fn = MODEL_HOME / f"{model_type}.ckpt"
    if not ckpt_fn.exists():
        cg2all.lib.libmodel.download_ckpt_file(model_type, ckpt_fn)


DGL backend not selected or invalid.  Assuming PyTorch for now.


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)
Downloading from Google Drive ... /usr/local/lib/python3.11/dist-packages/cg2all/model/CalphaBasedModel.ckpt
Downloading from Google Drive ... /usr/local/lib/python3.11/dist-packages/cg2all/model/ResidueBasedModel.ckpt
Downloading from Google Drive ... /usr/local/lib/python3.11/dist-packages/cg2all/model/SidechainModel.ckpt
Downloading from Google Drive ... /usr/local/lib/python3.11/dist-packages/cg2all/model/CalphaCMModel.ckpt
Downloading from Google Drive ... /usr/local/lib/python3.11/dist-packages/cg2all/model/CalphaSCModel.ckpt
Downloading from Google Drive ... /usr/local/lib/python3.11/dist-packages/cg2all/model/BackboneModel.ckpt
Downloading from Google Drive ... /usr/local/lib/python3.11/dist-packages/cg2all/model/MainchainModel.ckpt
Downloading from Google Drive ... /usr/local/lib

In [3]:
#@title Set up py3Dmol for structure display

import py3Dmol

def display(pdb_fn, representation="cartoon", is_traj=False):
    view = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js')
    if is_traj:
        view.addModelsAsFrames(open(pdb_fn,'r').read(),'pdb')
    else:
        view.addModel(open(pdb_fn,'r').read(),'pdb')

    if representation == "cartoon":
        view.setStyle({'cartoon': {'color':'spectrum'}})
        #
        view.addStyle({'and':[{'resn':["GLY","PRO"],'invert':True},{'atom': ["N", "C", "O"],'invert':True}]},
                        {'stick':{'colorscheme':"WhiteCarbon",'radius':0.3}})
        view.addStyle({'and':[{'resn':"GLY"},{'atom':'CA'}]},
                        {'sphere':{'colorscheme':"WhiteCarbon",'radius':0.3}})
        view.addStyle({'and':[{'resn':"PRO"},{'atom':['C','O'],'invert':True}]},
                        {'stick':{'colorscheme':"WhiteCarbon",'radius':0.3}})

    elif representation == "ball+stick":
        view.setStyle({"sphere": {"color": "spectrum", "radius": 1.0}})

    else:
        raise NotImplementedError(representation)

    view.zoomTo()
    if is_traj:
        view.animate({'loop': 'forward', "interval": 500})

    return view

---

In [None]:
#@title Task 1: Conversion of an all-atom structure to a CG model using __convert_all2cg__

# upload a PDB file
import requests
from google.colab import files

coarse_grained_model_type = "Martini3" #@param ["CalphaBasedModel", "ResidueBasedModel", "SidechainModel", "CalphaCMModel", "CalphaSCModel", "BackboneModel", "MainchainModel", "Martini", "Martini3", "PRIMO"]

use_example = True #@param {type:"boolean"}
#@markdown - An example input PDB file will be downloaded from [our repository](https://github.com/huhlim/cg2all/tree/main/tests).

if use_example:
    url = "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.pdb"
    input_pdb = url.split("/")[-1]
    with open(input_pdb, "wt") as fout:
        fout.write(requests.get(url).text)
else:
    input_pdb = files.upload()
    input_pdb = list(input_pdb)[0]

# convert
import pathlib
output_pdb = pathlib.Path(input_pdb).stem + f".{coarse_grained_model_type}.pdb"

!convert_all2cg -p $input_pdb -o $output_pdb --cg $coarse_grained_model_type
print(f"Converted {input_pdb} to {output_pdb} in {coarse_grained_model_type}")

display(output_pdb, representation="ball+stick").show()

In [None]:
#@title Download the converted file
files.download(output_pdb)

---

In [4]:
#@title Task 2: Conversion of a CG model to an all-atom structure using __convert_cg2all__

# upload a PDB file
import requests
from google.colab import files
import pathlib
import subprocess
import tempfile
from Bio.PDB import PDBParser, PDBIO

coarse_grained_model_type = "CalphaBasedModel" #@param ["CalphaBasedModel", "ResidueBasedModel", "SidechainModel", "CalphaCMModel", "CalphaSCModel", "BackboneModel", "MainchainModel", "Martini", "Martini3", "PRIMO"]

use_example = False #@param {type:"boolean"}
#@markdown - An example input PDB file will be downloaded from [our repository](https://github.com/huhlim/cg2all/tree/main/tests).

def split_models(pdb_file):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('input', pdb_file)
    temp_files = []

    for model in structure:
        temp_pdb = tempfile.NamedTemporaryFile(delete=False, suffix=".pdb")
        temp_io = PDBIO()
        temp_io.set_structure(model)
        temp_io.save(temp_pdb.name)
        temp_files.append(temp_pdb.name)

    return temp_files

if use_example:
    url = {"CalphaBasedModel": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.calpha.pdb", \
           "ResidueBasedModel": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.residue.pdb", \
           "SidechainModel": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.sc.pdb", \
           "CalphaCMModel": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.cacm.pdb", \
           "CalphaSCModel": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.casc.pdb", \
           "BackboneModel": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.bb.pdb", \
           "MainchainModel": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.mc.pdb", \
           "Martini": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.martini.pdb", \
           "Martini3": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.martini3.pdb", \
           "PRIMO": "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1ab1_A.primo.pdb"}
    url = url[coarse_grained_model_type]
    input_pdb = url.split("/")[-1]
    with open(input_pdb, "wt") as fout:
        fout.write(requests.get(url).text)
else:
    input_pdb = files.upload()
    input_pdb = list(input_pdb)[0]

model_pdbs = split_models(input_pdb)
output_files = []
for model_pdb in model_pdbs:
    output_pdb = pathlib.Path(model_pdb).stem + ".all.pdb"
    subprocess.run([
        "convert_cg2all",
        "-p", model_pdb,
        "-o", output_pdb,
        "--cg", coarse_grained_model_type
    ], check=True)
    output_files.append(output_pdb)

# convert

# 合并结果
final_output = pathlib.Path(input_pdb).stem + "_multi.all.pdb"
with open(final_output, 'w') as outfile:
    for i, pdb in enumerate(output_files):
        with open(pdb) as infile:
            outfile.write(f"MODEL        {i+1}\n")
            outfile.write(infile.read())
            outfile.write("ENDMDL\n")

# 清理临时文件
for f in model_pdbs + output_files:
    pathlib.Path(f).unlink()

print(f"Converted {input_pdb} in {coarse_grained_model_type} to {output_pdb}")

display(output_pdb, representation="cartoon").show()

Saving PED00082_e001_ensemble.pdb to PED00082_e001_ensemble.pdb
{'loading_ckpt': 0.6744749546051025, 'model_configuration': 0.13113641738891602, 'loading_input': 59.57213735580444, 'forward_pass': 1.6243839263916016, 'writing_output': 0.17839837074279785, 'total': 62.18053102493286}
Converted PED00082_e001_ensemble.pdb in CalphaBasedModel to PED00082_e001_ensemble.all.pdb


In [5]:
#@title Download the converted file
files.download(output_pdb)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

---

In [None]:
#@title Task 3: Conversion of a CG simulation trajectory to an atomistic simulation trajectory using __convert_cg2all__
#@markdown Input trajectory file should be in the DCD format.

# upload a PDB file
import requests
from google.colab import files

coarse_grained_model_type = "CalphaBasedModel" #@param ["CalphaBasedModel", "ResidueBasedModel", "SidechainModel", "CalphaCMModel", "CalphaSCModel", "BackboneModel", "MainchainModel", "Martini", "Martini3", "PRIMO"]

use_example = True #@param {type:"boolean"}
#@markdown - An example input PDB and DCD files will be downloaded from [our repository](https://github.com/huhlim/cg2all/tree/main/tests).

if use_example:
    if coarse_grained_model_type != "CalphaBasedModel":
        print("The example is based on CalphaBasedModel.")
        coarse_grained_model_type = "CalphaBasedModel"
    #
    url = "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1jni.calpha.pdb"
    input_pdb = url.split("/")[-1]
    with open(input_pdb, "wt") as fout:
        fout.write(requests.get(url).text)
    #
    url = "https://raw.githubusercontent.com/huhlim/cg2all/main/tests/1jni.calpha.dcd"
    input_dcd = url.split("/")[-1]
    with open(input_dcd, "wb") as fout:
        fout.write(requests.get(url).content)

else:
    input_pdb = files.upload()
    input_pdb = list(input_pdb)[0]
    input_dcd = files.upload()
    input_dcd = list(input_dcd)[0]

batch_size = 1 #@param {type: "number"}
#@markdown - Batch size should be a divisor of the total number of frames. The example trajectory file has five frames.

# convert
import pathlib
output_dcd = pathlib.Path(input_dcd).stem + ".all.dcd"
output_pdb = pathlib.Path(input_pdb).stem + ".all.pdb"

!convert_cg2all -p $input_pdb --dcd $input_dcd -o $output_dcd -opdb $output_pdb --cg $coarse_grained_model_type --batch $batch_size
print(f"Converted {input_dcd} in {coarse_grained_model_type} to {output_dcd}")

import mdtraj
pdb = mdtraj.load(input_pdb)
traj = mdtraj.load(output_dcd, top=output_pdb)
if len(traj) > 20:
    print("Displaying first 20 frames")
    traj = traj[:20]
traj = traj.superpose(traj)
traj.save("display.pdb")
#
display("display.pdb", representation="cartoon", is_traj=True).show()
#print("Displaying the last frame")
#display(output_pdb, representation="cartoon").show()

In [None]:
#@title Download the converted file
files.download(output_pdb)
files.download(output_dcd)