In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Check GPU availability
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU device: {torch.cuda.get_device_name(0)}")

PyTorch version: 2.8.0+cu126
CUDA available: True
CUDA version: 12.6
GPU device: Tesla T4


In [3]:
import zipfile
import os

# 1. 设定路径 (请根据您在Drive中的实际位置修改)
DRIVE_PATH = '/content/drive/MyDrive/Colab_Projects' # 假设放在这个文件夹
ZIP_FILE_NAME = 'DDLS_Drug_Repurposing.zip' # 压缩包名字

# 2. 解压文件
zip_path = os.path.join(DRIVE_PATH, ZIP_FILE_NAME)
extract_dir = '/content/DDLS_Drug_Repurposing' # 解压到 Colab 临时环境的这个目录

if not os.path.exists(extract_dir):
    os.makedirs(extract_dir, exist_ok=True)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
    print(f"文件已解压到: {extract_dir}")
else:
    print(f"目录 {extract_dir} 已存在，跳过解压。")

# 3. 导航到正确的目录 (如果需要，但加载文件时使用绝对路径更保险)
os.chdir(extract_dir)
print(f"当前工作目录: {os.getcwd()}")

文件已解压到: /content/DDLS_Drug_Repurposing
当前工作目录: /content/DDLS_Drug_Repurposing


In [4]:
!pip install torch pandas numpy scikit-learn scipy
# 如果 torch 没有自动安装 GPU 版本，可能需要手动安装，但 Colab GPU 运行时通常自带。



In [9]:
import os

# 假设外部目录是 /content/DDLS_Drug_Repurposing
OUTER_DIR = '/content/DDLS_Drug_Repurposing'
INNER_DIR_NAME = 'DDLS_Drug_Repurposing' # 内部文件夹的名称

# 拼接出真正的项目根目录
NEW_CWD = os.path.join(OUTER_DIR, INNER_DIR_NAME)

# 切换工作目录
os.chdir(NEW_CWD)
print(f"工作目录已切换到: {os.getcwd()}")

工作目录已切换到: /content/DDLS_Drug_Repurposing/DDLS_Drug_Repurposing


EGFR target based training

In [10]:
# 确保安装了依赖
!pip install torch pandas numpy scikit-learn scipy -q

# 确保模型保存目录存在
!mkdir -p models

# 运行脚本 (使用相对路径)
print("\n--- 运行 DeepDTA 训练脚本 ---")
!python3 notebooks/model_DeepDTA.py


--- 运行 DeepDTA 训练脚本 ---
2025-10-26 20:42:20,118 - INFO - 使用的设备: cuda:0
2025-10-26 20:42:24,968 - INFO - 模型参数已初始化:
2025-10-26 20:42:24,968 - INFO -   - 化合物词汇量: 41, 最大长度: 248
2025-10-26 20:42:24,969 - INFO -   - 蛋白质词汇量: 21, 最大长度: 1210
2025-10-26 20:42:24,969 - INFO -   - 总样本数: 13286
2025-10-26 20:42:24,970 - INFO - 数据已划分 (训练: 10628, 验证: 1328, 测试: 1330)
2025-10-26 20:42:32,294 - INFO - Epoch 1/100 | 用时: 7.32s | 训练 MSE: 38.2375 | 验证 MSE: 14.1851 | 验证 Pearson: 0.2307
2025-10-26 20:42:32,302 - INFO - **模型已保存至 models/deepdta_egfr_best.pt (验证损失: 14.1851)**
2025-10-26 20:42:37,311 - INFO - Epoch 2/100 | 用时: 5.01s | 训练 MSE: 4.9314 | 验证 MSE: 2.2286 | 验证 Pearson: 0.2761
2025-10-26 20:42:37,315 - INFO - **模型已保存至 models/deepdta_egfr_best.pt (验证损失: 2.2286)**
2025-10-26 20:42:43,222 - INFO - Epoch 3/100 | 用时: 5.91s | 训练 MSE: 3.2615 | 验证 MSE: 2.3000 | 验证 Pearson: 0.3328
2025-10-26 20:42:48,410 - INFO - Epoch 4/100 | 用时: 5.19s | 训练 MSE: 3.0989 | 验证 MSE: 2.1367 | 验证 Pearson: 0.3815
2025-10-26 20:42:48,4

Train-target training

In [9]:
!python3 src/deepdta_model.py

2025-10-26 21:57:47.882138: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1761515867.902137   22702 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1761515867.908236   22702 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1761515867.924143   22702 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1761515867.924168   22702 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1761515867.924172   22702 computation_placer.cc:177] computation placer alr

Train-target prediction

In [10]:
!python3 src/deepdta_workflow.py

2025-10-26 22:04:42.197915: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1761516282.217504   25018 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1761516282.223926   25018 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1761516282.239139   25018 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1761516282.239163   25018 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1761516282.239167   25018 computation_placer.cc:177] computation placer alr

In [15]:
!pip install rdkit
!python3 src/visualize_pipeline.py

2025-10-26 22:25:31,911 - INFO - --- 启动药物结构分析与可视化 ---
2025-10-26 22:25:31,912 - INFO - 输出目录已确认: reports/figures

--- 简化药物结构理化性质摘要 ---

| Drug Name   | SMILES                                         |   MW (g/mol) |   LogP |   H Donors |   H Acceptors |
|:------------|:-----------------------------------------------|-------------:|-------:|-----------:|--------------:|
| Semaglutide | CCCCCCCCCCCCCCCCCC(=O)NCCc1ccc(C(=O)O)cc1      |      431.661 | 7.305  |          2 |             2 |
| Tirzepatide | CCCCCCCCCCCCCCCCCCCC(=O)NCCc1cc(N)c(C(=O)O)cc1 |      474.73  | 7.6674 |          3 |             3 |

----------------------------------

  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_p