<a href="https://colab.research.google.com/github/ysy-scu/search/blob/main/ProteinMPNN_ddG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#ProteinMPNN-ddG

Scores all possible point mutations of a protein, to identify those which improve stability and or expression


In [1]:
#@title Install ProteinMPNN-ddG (and colabdesign)
import os
try:
  import proteinmpnn_ddg
except:
  os.system("pip install -q proteinmpnn_ddg[cuda12]@git+https://github.com/PeptoneLtd/proteinmpnn_ddg.git@paper")

from proteinmpnn_ddg import predict_logits_for_all_point_mutations_of_single_pdb

import numpy as np
import pandas as pd

import jax
import jax.numpy as jnp

from google.colab import files
from google.colab import data_table
data_table.disable_dataframe_formatter()

def get_pdb(pdb_code=""):
    """
    根据用户输入获取 PDB 文件。
    - 如果 pdb_code 为空，则允许用户手动上传文件。
    - 如果 pdb_code 是本地文件路径，则直接返回。
    - 如果 pdb_code 是标准 PDB 代码（4个字符），则从 RCSB 下载。
    - 否则，从 AlphaFold 数据库下载对应的 PDB 文件。
    """
    if pdb_code is None or pdb_code == "":
        # 允许用户上传自己的 PDB 文件
        if files is not None:  # Colab 环境
            upload_dict = files.upload()  # 上传文件
            uploaded_filename = list(upload_dict.keys())[0]
            print(f"Uploaded PDB file: {uploaded_filename}")
            return uploaded_filename
        else:
            raise ValueError("No PDB code provided and files.upload() is unavailable.")
    elif os.path.isfile(pdb_code):
        print(f"Using local PDB file: {pdb_code}")
        return pdb_code
    elif len(pdb_code) == 4:
        # 从 RCSB 下载标准 PDB 文件
        pdb_filename = f"{pdb_code}.pdb"
        os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_filename}")
        print(f"Downloaded PDB file from RCSB: {pdb_filename}")
        return pdb_filename
    else:
        # 从 AlphaFold 数据库下载 PDB 文件
        alphafold_filename = f"AF-{pdb_code}-F1-model_v3.pdb"
        os.system(f"wget -qnc https://alphafold.ebi.ac.uk/files/{alphafold_filename}")
        print(f"Downloaded PDB file from AlphaFold: {alphafold_filename}")
        return alphafold_filename

In [2]:
import warnings, os, re
warnings.simplefilter(action='ignore', category=FutureWarning)

os.system("mkdir -p output")

# USER OPTIONS
#@markdown # ProteinMPNN options
model_name = "v_48_020" #@param ["v_48_002", "v_48_010", "v_48_020", "v_48_030"]
#@markdown (v_48_020 recommended)


#@markdown # Input Options
pdb='' #@param {type:"string"}
#@markdown (leave `pdb` as  blank to get an upload prompt)
chains = "A" #@param {type:"string"}
#@markdown (You can specify several chains, separating by commas e.g. "A,C")

#@markdown Only the chains specified will be loaded from the PDB file for prediction
# chains_to_predict = "" #@param {type:"string"}
# #@markdown (Leave `chains_to_predict` empty to predict all chains)

nrepeats = 1
seed = 42

# cleaning user options
chains = re.sub("[^A-Za-z]+",",", chains)
chains = chains.split(',')

pdb_path = get_pdb(pdb)

Saving 7sh6 (1).pdb to 7sh6 (1).pdb
Uploaded PDB file: 7sh6 (1).pdb


In [3]:
#@title Run ProteinMPNN-ddG

#@markdown (Positive values are good mutations, which strengthen stability and expression)

dfs = []
for chain in chains:
  df = predict_logits_for_all_point_mutations_of_single_pdb(
      model_name,
      chains,
      pdb_path,
      nrepeat=nrepeats,
      seed=seed,
      chain_to_predict=chain,
      pad_inputs=False,
      apply_ddG_correction=True)
  df['chain'] = chain
  dfs.append(df)
df = pd.concat(dfs)
df = df.rename(columns={'logit_difference_ddg': 'proteinmpnn_ddg_score'})
df = df[['chain','pre','pos','post','proteinmpnn_ddg_score']]
df.to_csv('predictions.csv')

display(df.sort_values(
    'proteinmpnn_ddg_score', ascending=False
    ).head(
      10
      ).style.hide().format(
          precision=1, decimal="."
          )
      )

chain,pre,pos,post,proteinmpnn_ddg_score
A,G,79,P,5.4
A,G,254,V,5.4
A,A,226,C,4.3
A,G,254,M,4.2
A,T,110,V,4.1
A,G,254,L,3.9
A,V,194,I,3.8
A,G,254,I,3.8
A,G,254,Y,3.7
A,E,231,C,3.7


In [4]:
#@title download predictions (optional)
from google.colab import files
files.download(f'predictions.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>