<img src="assets/logo.jpg" alt="HPC Builder Logo" width="64"/>

# HPC Toolkit

ARC - Aschaffenburger Research Cluster<br>
Version 2024.5.1<br>
_by Kat 2024_

#### Setup HPC Toolkit

First, you'll need to setup some mandatory information before start using the **HPC Toolkit**. Create a file **__hpc_setup.yaml** in your workspace root folder and fill in all information:

**Also:** Just put your yaml file in gitignore to avoid pushing that info to the repository.

In [1]:
from io import StringIO
import pandas as pd
import subprocess
import yaml
import hpctk

# load user HPC settings yaml
with open('../__hpc_creds.yaml', 'r') as file:
    config = yaml.safe_load(file)

# Access the variables
lab = config["lab"]
user = config["user"]
workspace_dir = config["workspace_dir"]
container_name = config["container_name"]
container_tag = config["container_tag"]
docker_file = config["docker_file"]
registry_host = config["registry_host"]
hpc_mn = config["hpc_mn"]
hpc_mn_project_base = config["hpc_mn_project_base"]
ssh_creds = config["ssh_creds"]

hpctk.print_hpc_setup_yaml(lab, user, container_name, container_tag, docker_file, registry_host, hpc_mn, ssh_creds, workspace_dir, hpc_mn_project_base)

Fact,Value
lab,aghm
user,u01233
container_name,automldoe
container_tag,latest
docker_file,Dockerfile
registry_host,10.32.118.9:5000
hpc_mn,10.32.118.9
ssh_creds,"{'port': '44100', 'ssh_key_name': 'hpc', 'ssh_path': '~/.ssh', 'user': 'u01233'}"
workspace_dir,automl_DOE
hpc_mn_project_base,/home_cu/u01233/Projects


#### Get cluster detail status

In [2]:
# sinfo -lNe
try:
    rc = hpctk.run_remote(["sinfo -lNe"], hpc_mn, ssh_creds, stdout=False)
    hpctk.show_tab_styled(rc.stdout, 1, hpctk.highlight_state)

except subprocess.CalledProcessError as e:
    print(f"\n{hpctk.STATE_FAIL} Failed: {e.returncode}\n")
    print(f":: Error message: {e.stderr}")

Unnamed: 0,NODELIST,NODES,PARTITION,STATE,CPUS,S:C:T,MEMORY,TMP_DISK,WEIGHT,AVAIL_FE,REASON
0,DT-IW-24-062,1,LARs-p0*,idle,32,1:16:2,500000,0,1,(null),none
1,DT-ZW-21-006,1,LARs-p0*,idle,36,1:18:2,123000,0,1,(null),none
2,LG-MIS-DT-001,1,LARs-p0*,idle,16,1:8:2,57400,0,1,(null),none


#### Get squeue

In [3]:
# squeue
try:
    rc = hpctk.run_remote(["squeue", "-u", user, "-t", "pending,running", "-r"], hpc_mn, ssh_creds, stdout=False)
    hpctk.show_tab_styled_bar(rc.stdout, 0, hpctk.highlight_job_state, ['ST'])

except subprocess.CalledProcessError as e:
    print(f"\n{hpctk.STATE_FAIL} Failed: {e.returncode}\n")
    print(f":: Error message: {e.stderr}")

Unnamed: 0,JOBID,PARTITION,NAME,USER,ST,TIME,NODES,NODELIST(REASON)


#### Run job on HPC