In [2]:
import torch
from tqdm import tqdm
import pandas as pd

from src.datasets.pharmacophore_eval import match_score
from dgl.data.utils import load_graphs
from rdkit import Chem
import csv
from src.eval import sascorer
from rdkit.Chem import QED, AllChem
import joblib


: 

In [None]:
def process_mol(args):
    mol, pp_graph_list, loaded_reg = args
    if mol is None:
        return (0, 0, 0, 0)  # 如果分子无效，返回默认值
    else:
        pharma_match_score_list = [match_score(mol, pp_graph) for pp_graph in pp_graph_list]
        return (
            max(pharma_match_score_list),
            sascorer.calculateScore(mol) * 0.1,
            QED.default(mol),
            loaded_reg.predict([AllChem.GetMorganFingerprintAsBitVect(mol, 2, 1024)]).item()
        )

In [None]:
pp_graph_list, _ = load_graphs("./data/PDK1_pdb/pdk1_phar_graphs.bin")
for pp_graph in pp_graph_list:
    pp_graph.ndata['h'] = \
        torch.cat((pp_graph.ndata['type'], pp_graph.ndata['size'].reshape(-1, 1)), dim=1).float()
    pp_graph.edata['h'] = pp_graph.edata['dist'].reshape(-1, 1).float()
loaded_reg = joblib.load('./data/stacking_regressor_model.pkl')
suppl = Chem.SDMolSupplier('./data/csd/raw/CSD_process.sdf', removeHs=False, sanitize=True)
mols = [mol for mol in suppl][167288:]
test_dataset = './data/csd/raw/CSD_prop.csv'
with open(test_dataset, 'a', newline='') as csvfile:
    writer = csv.writer(csvfile, delimiter=',')
    for mol in tqdm(mols, total=len(mols)):
        arg = mol, pp_graph_list, loaded_reg
        result = process_mol(arg)
        writer.writerow(result)
    writer.close()

In [None]:
pp_graph_list1, _ = load_graphs("./data/glp1_phar_graphs.bin")
for pp_graph1 in pp_graph_list1:
    pp_graph1.ndata['h'] = \
        torch.cat((pp_graph1.ndata['type'], pp_graph1.ndata['size'].reshape(-1, 1)), dim=1).float()
    pp_graph1.edata['h'] = pp_graph1.edata['dist'].reshape(-1, 1).float()
pp_graph_list2, _ = load_graphs("./data/cav32_phar_graphs.bin")
for pp_graph in pp_graph_list2:
    pp_graph.ndata['h'] = \
        torch.cat((pp_graph.ndata['type'], pp_graph.ndata['size'].reshape(-1, 1)), dim=1).float()
    pp_graph.edata['h'] = pp_graph.edata['dist'].reshape(-1, 1).float()
test_dataset = './data/moses/moses_val.csv'
test_data = pd.read_csv(test_dataset)
with open('./data/moses/moses_val.csv', 'r+', newline='') as csvfile:
    reader = csv.reader(csvfile)
    with open('./data/moses/moses_val1.csv', 'a', newline='') as csvfile1:
        writer = csv.writer(csvfile1, delimiter=',')
        writer.writerow(["smiles","pharma_score","SA","QED", "acute_tox", "glp1_score", 'cav32_score'])
        for row in tqdm(list(reader)):
            mol = Chem.MolFromSmiles(row[0])
            d1 = max([match_score(mol, pp_graph1) for pp_graph1 in pp_graph_list1])
            row.append(d1)
            d2 = max([match_score(mol, pp_graph2) for pp_graph2 in pp_graph_list2])
            row.append(d2)
            writer.writerow(row)

In [None]:
def process_mol(args):
    smiles, pp_graph_list, loaded_reg = args
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return (smiles, 0, 0, 0, 0)  # 如果分子无效，返回默认值
    else:
        pharma_match_score_list = [match_score(mol, pp_graph) for pp_graph in pp_graph_list]
        return (
            smiles,
            max(pharma_match_score_list),
            sascorer.calculateScore(mol) * 0.1,
            QED.default(mol),
            loaded_reg.predict([AllChem.GetMorganFingerprintAsBitVect(mol, 2, 1024)]).item())

In [None]:
pp_graph_list, _ = load_graphs("/raid/yyw/PharmDiGress/data/PDK1_pdb/pdk1_phar_graphs.bin")
for pp_graph in pp_graph_list:
    pp_graph.ndata['h'] = \
        torch.cat((pp_graph.ndata['type'], pp_graph.ndata['size'].reshape(-1, 1)), dim=1).float()
    pp_graph.edata['h'] = pp_graph.edata['dist'].reshape(-1, 1).float()
loaded_reg = joblib.load('/raid/yyw/PharmDiGress/data/stacking_regressor_model.pkl')

In [None]:
import csv
val_dataset = '/raid/yyw/PharmDiGress/data/moses/moses_pyg/raw/test.csv'
val_data = pd.read_csv(val_dataset)
with open('/raid/yyw/PharmDiGress/data/moses/moses_pyg/raw/moses_val.csv', 'a', newline='') as csvfile:
    writer = csv.writer(csvfile, delimiter=',')
    writer.writerow(["smiles","pharma_score","SA","QED", "acute_tox"])
    with multiprocessing.Pool(10) as pool:
        # 使用 tqdm 迭代并监视分子列表
        for result in tqdm(pool.imap_unordered(process_mol, [(smiles, pp_graph_list, loaded_reg) for smiles in val_data['SMILES'].values]), total=len(val_data['SMILES'].values)):
            writer.writerow(result)

        pool.close()
    writer.close()

In [None]:
from rdkit import Chem

In [None]:
smiles = 'CC1=C(C(C)=O)C(N(C(CC)CC)C2=NC(NC3=NC=C(N4CCNCC4)C=C3)=NC=C12)=O'

In [None]:
mol = Chem.MolFromSmiles(smiles)

In [None]:
mol

In [None]:
mol_frags = Chem.rdmolops.GetMolFrags(mol, asMols=True, sanitizeFrags=True)

In [None]:
largest_mol = max(mol_frags, default=mol, key=lambda m: m.GetNumAtoms())

In [None]:
largest_mol