In [None]:
import numpy as np
import torch,esm
import re,sys
from transformers import T5Tokenizer, T5EncoderModel, BertGenerationEncoder, BertTokenizer
torch.cuda.set_device(3)
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
print("Using device: {}".format(device))
transformer_link = "prot_t5_xl_half_uniref50-enc"
print("Loading: {}".format(transformer_link))
model = T5EncoderModel.from_pretrained(transformer_link)
if device==torch.device("cuda"):
  model.to(torch.float32) # only cast to full-precision if no GPU is available
model = model.to(device)
model = model.eval()
tokenizer = T5Tokenizer.from_pretrained(transformer_link, do_lower_case=False, legacy=True )

sequence = "RRRLGVMGGTFDPIHNGHLVAASEVADRFALDEVIFVPTGQRKVSPAEHRYLMTVIATASNPRFTVSRADIDRGGATYTVDTLTDLRTAHPDADLYFITGADALASILSWENWEQLFTLAKFIGVSRPGYELSGLSLVEVPALAISSTDCRIRAGQARPIWYLVPDGVVQYVAKHRLYS"

save_path = "../ATPBind/embedding/Prott5_case_study2.csv"

with torch.no_grad():
    sequence_examples = [" ".join(list(re.sub(r"[UZOB]", "X", sequence)))]
    ids = tokenizer(sequence_examples, add_special_tokens=False, padding="longest")
    input_ids = torch.tensor(ids['input_ids']).to(device)
    attention_mask = torch.tensor(ids['attention_mask']).to(device)
    embedding_repr = model(input_ids=input_ids, attention_mask=attention_mask)
    emb_0 = embedding_repr.last_hidden_state[0, :len(sequence_examples[0])].data.squeeze().cpu().numpy()
    with open(save_path, 'a') as f:
        np.savetxt(f, emb_0, delimiter=',')
    torch.cuda.empty_cache()



In [None]:
import numpy as np
import torch,esm
import re,sys
from transformers import AutoTokenizer, AutoModel
torch.cuda.set_device (2)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device: {}".format(device))
model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()
transformer_link = "esm2"
print("Loading: {}".format(transformer_link))
tokenizer = AutoTokenizer.from_pretrained(transformer_link)
model = model.to(device)
model = model.eval()
sequence = "RRRLGVMGGTFDPIHNGHLVAASEVADRFALDEVIFVPTGQRKVSPAEHRYLMTVIATASNPRFTVSRADIDRGGATYTVDTLTDLRTAHPDADLYFITGADALASILSWENWEQLFTLAKFIGVSRPGYELSGLSLVEVPALAISSTDCRIRAGQARPIWYLVPDGVVQYVAKHRLYS"
save_path = "../ATPBind/embedding/ESM2_case_study2.csv"

with torch.no_grad():
    batch_converter = alphabet.get_batch_converter()
    data = [("protein1", sequence)]
    batch_labels, batch_strs, batch_tokens = batch_converter(data)
    results = model(batch_tokens.cuda(), repr_layers=[33], return_contacts=True)
    embeddings = results["representations"][33][:, 1:len(sequence) + 1].data.squeeze().cpu().numpy()
    with open(save_path, 'a') as f:
        np.savetxt(f, embeddings, delimiter=',')
    torch.cuda.empty_cache()




In [None]:
import re, torch
import torch.nn as nn
import torch.nn.functional as F

class CoATT_Plan3(nn.Module):
    def __init__(self):
        super(CoATT_Plan3, self).__init__()
        self.conv1 = nn.Conv1d(1280, 256, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(1280, 256, kernel_size=5, padding=2)
        self.conv3 = nn.Conv1d(1280, 256, kernel_size=7, padding=3)

        self.conv4 = nn.Conv1d(1024, 256, kernel_size=3, padding=1)
        self.conv5 = nn.Conv1d(1024, 256, kernel_size=5, padding=2)
        self.conv6 = nn.Conv1d(1024, 256, kernel_size=7, padding=3)

        self.conv7 = nn.Conv1d(2304, 512, kernel_size=3, padding=1)
        self.conv8 = nn.Conv1d(2304, 512, kernel_size=5, padding=2)
        self.conv9 = nn.Conv1d(2304, 512, kernel_size=7, padding=3)

        self.bn = nn.BatchNorm1d(256)
        self.bn_con = nn.BatchNorm1d(512)
        self.act = nn.ReLU()
        self.lstmcell = nn.LSTM(768, 128, bidirectional=True)  # 双向GRU
        self.sigmoid = nn.Sigmoid()
        self.classifier = nn.Sequential(
            nn.Linear(512, 128),
            nn.Dropout(0.5),
            nn.Linear(128, 64),
            nn.Dropout(0.5),
            nn.Linear(64, 2),
            nn.Softmax(-1)
        )

    def forward(self, esm2_embeding, protTrans_embeding):
        fea1 = esm2_embeding.permute(0, 2, 1)
        fea1 = self.conv1(fea1)
        fea1 = self.bn(fea1)
        fea1 = self.act(fea1)
        fea1 = fea1.permute(0, 2, 1)

        fea2 = esm2_embeding.permute(0, 2, 1)
        fea2 = self.conv2(fea2)
        fea2 = self.bn(fea2)
        fea2 = self.act(fea2)
        fea2 = fea2.permute(0, 2, 1)

        fea3 = esm2_embeding.permute(0, 2, 1)
        fea3 = self.conv3(fea3)
        fea3 = self.bn(fea3)
        fea3 = self.act(fea3)
        fea3 = fea3.permute(0, 2, 1)

        fea_esm2 = torch.cat([fea1, fea2, fea3], dim=2)

        fea4 = protTrans_embeding.permute(0, 2, 1)
        fea4 = self.conv4(fea4)
        fea4 = self.bn(fea4)
        fea4 = self.act(fea4)
        fea4 = fea4.permute(0, 2, 1)

        fea5 = protTrans_embeding.permute(0, 2, 1)
        fea5 = self.conv5(fea5)
        fea5 = self.bn(fea5)
        fea5 = self.act(fea5)
        fea5 = fea5.permute(0, 2, 1)

        fea6 = protTrans_embeding.permute(0, 2, 1)
        fea6 = self.conv6(fea6)
        fea6 = self.bn(fea6)
        fea6 = self.act(fea6)
        fea6 = fea6.permute(0, 2, 1)

        fea_prot = torch.cat([fea4, fea5, fea6], dim=2)

        fea1,_ = self.lstmcell(fea_esm2)
        fea2,_ = self.lstmcell(fea_prot)
        fin_fea = torch.cat([fea1, fea2], dim=2)
        out = self.classifier(fin_fea)
        out = torch.squeeze(out)
        return out

In [None]:
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import warnings
import random
import torch
import os
import numpy as np

seed = 2024
seed = int(seed)
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ":4096:8"
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.use_deterministic_algorithms(True)
warnings.filterwarnings("ignore")


class CustomDataset(Dataset):
    def __init__(self, features1, features2):
        self.features1 = torch.tensor(features1.values.astype(np.float32))
        self.features2 = torch.tensor(features2.values.astype(np.float32))

    def __len__(self):
        return len(self.features1)

    def __getitem__(self, index):
        x1 = self.features1[index]
        x2 = self.features2[index]
        return x1, x2

device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')

print("Spe Rec Pre F1 MCC AUC")
test_ESM2_data1 = pd.read_csv("../ATPBind/embedding/ESM2_case_study.csv")
test_ProtT5_data1 = pd.read_csv("../ATPBind/embedding/ProtT5_case_study.csv")
test_dataset = CustomDataset(test_ESM2_data1, test_ProtT5_data1)
test_loader = DataLoader(test_dataset, batch_size=32)
best_model = CoATT_Plan3()
best_model.eval()
best_model.to(device)
best_model.load_state_dict(torch.load("../save/model/ESM2_ProtT5_227.pt"))
with torch.no_grad():
    all_predictions = []
    for data1, data2 in test_loader:
        data1 = data1.to(device)
        data2 = data2.to(device)
        score = best_model(data1.unsqueeze(1), data2.unsqueeze(1))
        score = score.squeeze(1).detach().cpu().numpy()
        all_predictions.extend(score.argmax(1))
print(all_predictions)