# 13th May 2022

Notebook for submitting transformer jobs training on molecular smiles -> prototype label & pearson label.

In [4]:
from typing import List

def write_slurm_script(job_name: str,
                       run_time: str,
                       output_name: str,
                       script: str,
                       file_name: str,
                       args: List = None,
                       package_dir: str = None,
                       email: bool = False,
                       gpu: bool = False,
                       conda_env: str = 'ampere'):

    if gpu:
        slurm_options = [
            '#!/bin/bash',
            f'#SBATCH -J {job_name}',
            '#SBATCH -A LEE-WJM41-SL2-GPU',
            '#SBATCH --nodes=1',
            '#SBATCH --ntasks=1',
            '#SBATCH --gres=gpu:1',
            f'#SBATCH --time={run_time}',
            '#SBATCH --mail-user=wjm41@cam.ac.uk',
            f'#SBATCH --output={output_name}',
            '#SBATCH -p ampere',
        ]
    else:
        slurm_options = [
            '#!/bin/bash',
            f'#SBATCH -J {job_name}',
            '#SBATCH -A LEE-WJM41-SL2-CPU',
            '#SBATCH --nodes=1',
            '#SBATCH --ntasks=1',
            ' #SBATCH --cpus-per-task=1',
            f'#SBATCH --time={run_time}',
            '#SBATCH --mail-user=wjm41@cam.ac.uk',
            f'#SBATCH --output={output_name}',
            '#SBATCH -p icelake-himem',
        ]
    if email:
        slurm_options.append('#SBATCH --mail-type=ALL')

    if gpu:
        module_options = [
            '. /etc/profile.d/modules.sh',
            'module purge',
            'module load rhel8/default-amp',
            'module load miniconda/3',
            f'source activate {conda_env}',
        ]
    else:
        module_options = [
            '. /etc/profile.d/modules.sh',
            'module purge',
            'module load rhel8/default-amp',
            'module load miniconda/3',
            f'source activate {conda_env}',
        ]
    if package_dir is not None:
        pre_empt = f'cd {package_dir}; pip install . --use-feature=in-tree-build'
    else:
        pre_empt = ''

    slurm_options = '\n'.join(slurm_options)
    module_options = '\n'.join(module_options)
    if args is not None:
        command_to_run = ' '.join([script]+args)
    else:
        command_to_run = script
        
    string_to_write = f'{slurm_options}\n{module_options}\n{pre_empt}\n{command_to_run}'

    with open(file_name, 'w') as f:
        f.write(string_to_write)

    return


Training

In [4]:
import os

current_dir = os.getcwd()
data_dir = '/home/wjm41/ml_physics/smi2wyk/data/smi2protopear'
log_dir = '/home/wjm41/ml_physics/smi2wyk/runs/smi2protopear'

dataset = 'smi2protopear'
model_name = 'smi2protopear'
model_dir = f'/rds-d2/user/wjm41/hpc-work/models/smi2wyk/{dataset}'

run_dir = '/home/wjm41/ml_physics/smi2wyk/runs/smi2protopear'

model_path = f'{model_dir}/{model_name}'

data_path = f'{data_dir}/ultra_large/{dataset}_negative.pkl'

preprocess_script = f'onmt_build_vocab -config {data_dir}/preprocess.yaml'
train_script = f'onmt_train -config {data_dir}/train_single.yaml -tensorboard True -tensorboard_log_dir {run_dir}'
script = f'{preprocess_script}\n{train_script}'

file_name = f'subm_train_{dataset}'
run_time = '3:00:00'
output_name = f'{current_dir}/{file_name}.out'

write_slurm_script(job_name=f'{file_name}',
                   run_time=f'{run_time}',
                   output_name=output_name,
                   script=script,
                   file_name=file_name,
                   email=True,
                   conda_env='DebiasedMT',
                   gpu=True
                   )

print(f"Submitted training jobs for {model_path} on {data_path}")

!sbatch {file_name}


Submitted training jobs for /rds-d2/user/wjm41/hpc-work/models/smi2wyk/smi2protopear/smi2protopear on /home/wjm41/ml_physics/smi2wyk/data/smi2protopear/ultra_large/smi2protopear_negative.pkl
Submitted batch job 61038925


Translation

In [11]:
import os

current_dir = os.getcwd()
data_dir = '/home/wjm41/ml_physics/smi2wyk/data/smi2protopear'
log_dir = '/home/wjm41/ml_physics/smi2wyk/runs/smi2protopear'

dataset = 'smi2protopear'

model_name = 'smi2protopear'
step = 20000
model_dir = f'/rds-d2/user/wjm41/hpc-work/models/smi2wyk'
model_path = f'{model_dir}/{model_name}_step_{step}.pt'

run_dir = '/home/wjm41/ml_physics/smi2wyk/runs/smi2protopear'
script_dir = '/home/wjm41/ml_physics/smi2wyk/smi2wyk'

pred_name = f'{data_dir}/pred_step_{step}.txt'

translate_script = f'onmt_translate -model {model_path} -src {data_dir}/src-test.txt -output {pred_name} -n_best 5 -beam_size 5 -gpu 0'
score_script = f'python {script_dir}/score_predictions.py -targets {data_dir}/tgt-test.csv -beam_size 5 -predictions {pred_name}'
script = f'{translate_script}\n{score_script}'

file_name = f'subm_test_{dataset}_{step}'
run_time = '1:00:00'
output_name = f'{current_dir}/{file_name}.out'

write_slurm_script(job_name=f'{file_name}',
                   run_time=f'{run_time}',
                   output_name=output_name,
                   script=script,
                   file_name=file_name,
                   email=True,
                   conda_env='DebiasedMT',
                   gpu=True
                   )

print(f"Submitted translation & scoring jobs for {model_path} on {data_dir}")

!sbatch {file_name}


Submitted translation & scoring jobs for /rds-d2/user/wjm41/hpc-work/models/smi2wyk/smi2protopear_step_20000.pt on /home/wjm41/ml_physics/smi2wyk/data/smi2protopear
Submitted batch job 61050354
