# 13th May 2022

Notebook for submitting transformer jobs training on molecular smiles -> prototype label & pearson label.

In [2]:
from typing import List

def write_slurm_script(job_name: str,
                       run_time: str,
                       output_name: str,
                       script: str,
                       file_name: str,
                       args: List = None,
                       ntasks: int = 1,
                       ncores: int = 1,
                       package_dir: str = None,
                       email: bool = False,
                       gpu: bool = False,
                       conda_env: str = 'ampere'):

    if gpu:
        slurm_options = [
            '#!/bin/bash',
            f'#SBATCH -J {job_name}',
            '#SBATCH -A LEE-WJM41-SL2-GPU',
            '#SBATCH --nodes=1',
            f'#SBATCH --ntasks={ntasks}',
            f'#SBATCH --gres=gpu:{ncores}',
            f'#SBATCH --time={run_time}',
            '#SBATCH --mail-user=wjm41@cam.ac.uk',
            f'#SBATCH --output={output_name}',
            '#SBATCH -p ampere',
        ]
    else:
        slurm_options = [
            '#!/bin/bash',
            f'#SBATCH -J {job_name}',
            '#SBATCH -A LEE-WJM41-SL2-CPU',
            '#SBATCH --nodes=1',
            f'#SBATCH --ntasks={ntasks}',
            ' #SBATCH --cpus-per-task=1',
            f'#SBATCH --time={run_time}',
            '#SBATCH --mail-user=wjm41@cam.ac.uk',
            f'#SBATCH --output={output_name}',
            '#SBATCH -p icelake-himem',
        ]
    if email:
        slurm_options.append('#SBATCH --mail-type=ALL')

    if gpu:
        module_options = [
            '. /etc/profile.d/modules.sh',
            'module purge',
            'module load rhel8/default-amp',
            'module load miniconda/3',
            f'source activate {conda_env}',
        ]
    else:
        module_options = [
            '. /etc/profile.d/modules.sh',
            'module purge',
            'module load rhel8/default-amp',
            'module load miniconda/3',
            f'source activate {conda_env}',
        ]
    if package_dir is not None:
        pre_empt = f'cd {package_dir}; pip install . --use-feature=in-tree-build'
    else:
        pre_empt = ''

    slurm_options = '\n'.join(slurm_options)
    module_options = '\n'.join(module_options)
    if args is not None:
        command_to_run = ' '.join([script]+args)
    else:
        command_to_run = script
        
    string_to_write = f'{slurm_options}\n{module_options}\n{pre_empt}\n{command_to_run}'

    with open(file_name, 'w') as f:
        f.write(string_to_write)

    return


Training

In [3]:
import os

current_dir = os.getcwd()
data_dir = '/home/wjm41/ml_physics/smi2wyk/data/smi2protopear'

output_path = f'{data_dir}/csd_organic_aflow.csv'

ntasks = 76
python_script = f'python mpi_aflow_generation.py --output_path {output_path}'
mpi_script = f'mpirun -np {ntasks} -ppn {ntasks}'
script = f'{mpi_script} {python_script}'

file_name = f'subm_mpi_aflow'
run_time = '3:00:00'
output_name = f'{current_dir}/{file_name}.out'

write_slurm_script(job_name=f'{file_name}',
                   run_time=f'{run_time}',
                   output_name=output_name,
                   script=script,
                   file_name=file_name,
                   email=True,
                   ntasks=ntasks,
                   conda_env='csd_env',
                   gpu=False
                   )

print(f"Submitted MPI aflow jobs, writing to {output_path}")

!sbatch {file_name}


Submitted MPI aflow jobs, writing to /home/wjm41/ml_physics/smi2wyk/data/smi2protopear/csd_organic_aflow.csv
/bin/bash: sbatch: command not found
