Submit training job for Dopamine D4

In [2]:
from typing import List

def write_slurm_script(job_name:str, 
                     run_time:str,
                     output_name:str, 
                     package_dir: str,
                     script:str, 
                     args: List,
                     file_name: str,
                     email: bool = False):
    slurm_options = [
        '#!/bin/bash',
        f'#SBATCH -J {job_name}',
        '#SBATCH -A LEE-WJM41-SL2-GPU',
        '#SBATCH --nodes=1',
        '#SBATCH --ntasks=1',
        '#SBATCH --gres=gpu:1',
        f'#SBATCH --time={run_time}',
        '#SBATCH --mail-user=wjm41@cam.ac.uk',
        f'#SBATCH --output={output_name}',
        '#SBATCH -p ampere',
    ]
    if email:
        slurm_options.append('#SBATCH --mail-type=ALL')

    module_options = [
        '. /etc/profile.d/modules.sh',
        'module purge',
        'module load rhel8/default-amp',
        'module load miniconda/3',
        'source activate dgl_life',
    ]

    pre_empt = f'cd {package_dir}; pip install .'

    slurm_options = '\n'.join(slurm_options)
    module_options = '\n'.join(module_options)
    command_to_run = ' '.join([script]+args)

    string_to_write = f'{slurm_options}\n{module_options}\n{pre_empt}\n{command_to_run}'
    
    with open(file_name, 'w') as f:
        f.write(string_to_write)
    
    return

In [3]:
import os

current_dir = os.getcwd()
script_dir = '/rds-d2/user/wjm41/hpc-work/datasets/Ugis/logbooks/17-Mar-2022/'
data_dir = '/rds-d2/user/wjm41/hpc-work/datasets/Ugis/datasets'
log_dir = '/rds-d2/user/wjm41/hpc-work/datasets/Ugis/runs/ultra-large/D4/'

dataset='D4'
model_name = 'train_ultra'
model_dir = f'/rds-d2/user/wjm41/hpc-work/models/dock2hit/{dataset}'

model_path = f'{model_dir}/{model_name}'
# data_path = f'{data_dir}/{dataset}_test.csv'
data_path = f'{data_dir}/{dataset}_docked_valid.pkl'

script = f'python {script_dir}/train_ultra.py'

log_step = 100
args = [
    f'-p {data_path}',
    '-batch_size 32768',
    '-minibatch_size 8192',
    '-val',
    '-n_epochs 3',
    f'-save_dir {model_path}',
    f'-log_dir {log_dir}',
    f'-log_step {log_step}'
    ]

file_name = 'subm_train_ultra'
run_time = '24:00:00'
output_name = f'{current_dir}/subm_train_ultra.out'

write_slurm_script(job_name=f'{dataset}_train_ultra',
                   run_time=f'{run_time}',
                   output_name=output_name,
                   package_dir='/rds-d2/user/wjm41/hpc-work/datasets/Ugis/',
                   script=script,
                   args=args,
                   file_name=file_name,
                   email=True
                    )

print(f"Submitted training jobs for {model_path} on {data_path}")

!sbatch {file_name}

Submitted training jobs for /rds-d2/user/wjm41/hpc-work/models/dock2hit/D4/train_ultra on /rds-d2/user/wjm41/hpc-work/datasets/Ugis/datasets/D4_docked_valid.pkl
Submitted batch job 57261915
