## Slurm Parsing

In [1]:
import csv
import glob
import os

from dataclasses import asdict

from cheeto.puppet import (PuppetGroupRecord,
                           PuppetUserRecord,
                           PuppetAccountMap,
                           PuppetGroupMap,
                           SlurmPartition, 
                           SlurmQOS, 
                           SlurmQOSTRES, 
                           SlurmRecord, 
                           parse_yaml_tree, 
                           MergeStrategy)

from cheeto.slurm import SAcctMgr
import sh
import yaml
from rich import print as pprint

In [2]:
def sacctmgr_show_parser(fp):
    return csv.DictReader(fp, delimiter='|')

In [3]:
def sanitize_tres(tres_string):
    if not tres_string:
        return {}
    tokens = tres_string.strip().split(',')
    tres = {}
    for token in tokens:
        resource, _, value = token.partition('=')
        resource = resource.removeprefix('gres/')
        resource, _, resource_type = resource.partition(':') # for now we discard the type from resource:type
        tres[resource] = value
    return tres

with open('slurm.qos.txt') as fp:
    for row in sacctmgr_show_parser(fp):
        print('Name={Name}, GrpTRES={GrpTRES}'.format(**row))
        print(sanitize_tres(row['GrpTRES']))

In [4]:
def build_qos_map(qos_file_pointer, filter_on={'Name': 'normal'}):
    qos_map = {}
    filtered_map = {}
    for row in sacctmgr_show_parser(qos_file_pointer):
        
        filter_row = False
        for key, val in row.items():
            if filter_on.get(key, False) == val:
                filter_row = True
                break
    
        slurm_tres = sanitize_tres(row['GrpTRES'])
        puppet_tres = SlurmQOSTRES(cpus=slurm_tres.get('cpu', None),
                                   mem=slurm_tres.get('mem', None),
                                   gpus=slurm_tres.get('gpu', None))
        if puppet_tres.cpus is puppet_tres.mem is puppet_tres.gpus is None:
            puppet_tres = None
        puppet_qos = SlurmQOS(group=puppet_tres,
                              priority=row['Priority'])
        
        if filter_row:
            filtered_map[row['Name']] = puppet_qos
        else:
            qos_map[row['Name']] = puppet_qos
    return qos_map, filtered_map

In [5]:
with open('slurm.qos.txt') as fp:
    qos_map, filtered_map = build_qos_map(fp)
    #pprint(qos_map)
    #pprint(filtered_map)

In [6]:
qos_map

{'Name': SlurmQOS(group=None, job=None, priority='Priority'),
 'zhougrpbh': SlurmQOS(group=SlurmQOSTRES(cpus='96', gpus=None, mem='1000000M'), job=None, priority='0'),
 'zhougrpbm': SlurmQOS(group=None, job=None, priority='10'),
 'zhougrpm': SlurmQOS(group=None, job=None, priority='80'),
 'zhougrph': SlurmQOS(group=SlurmQOSTRES(cpus='192', gpus=None, mem='500G'), job=None, priority='0'),
 'jrigrpbm': SlurmQOS(group=None, job=None, priority='20'),
 'jrigrpbh': SlurmQOS(group=SlurmQOSTRES(cpus='128', gpus=None, mem='980000M'), job=None, priority='20'),
 'mahergrpbm': SlurmQOS(group=None, job=None, priority='10'),
 'mahergrpbh': SlurmQOS(group=SlurmQOSTRES(cpus='64', gpus=None, mem='500000M'), job=None, priority='0'),
 'adamgrpm': SlurmQOS(group=None, job=None, priority='10'),
 'adamgrph': SlurmQOS(group=SlurmQOSTRES(cpus='192', gpus=None, mem='500G'), job=None, priority='0'),
 'jyllwgrpbh': SlurmQOS(group=SlurmQOSTRES(cpus='64', gpus=None, mem='500000M'), job=None, priority='0'),
 'jyllw

In [7]:
def filter_nulls(d):
    return {key: val for key, val in d.items() if val}

def pprint_nonnull(d):
    pprint(filter_nulls(d))

In [8]:
with open('slurm.assoc.txt') as fp:
    for row in sacctmgr_show_parser(fp):
        assoc = {key: val for key, val in row.items() if val}
        #pprint(assoc)
        #print('User={User}, Account={Account}, Partition={Partition} QOS={QOS}'.format(**row))

In [9]:
def check_filter(d, filter_on):
    for key, val in d.items():
        if val in filter_on.get(key, []):
            return True
    return False

In [10]:
def sacctmgr_build_group_slurm_map(assoc_file_pointer,
                                   qos_map,
                                   filter_accounts_on={'Account': ['root', 'farm-test']},
                                   filter_users_on={'QOS': ['normal'],
                                                    'Partition': ['seriallcluster=farm']}):
    # map account => (partition, QOS)
    assoc_map = {'accounts': {}, 'users': {}}
    filtered_assocs = []
    for row in sacctmgr_show_parser(assoc_file_pointer):
        row = filter_nulls(row)
        #pprint(row)

        if 'Partition' not in row:
            # This is a parent Account definition, add it to the map.
            # Set partitions empty and fill in as we encounter them.
            
            filter_row = check_filter(row, filter_accounts_on)
            if filter_row:
                filtered_assocs.append(row)
            else:
                assoc_map['accounts'][row['Account']] = dict(
                    partitions = {},
                    max_jobs = row.get('MaxJobs', None)
                )
        elif 'User' in row:
            # partition<->QOS associations actually only exist per-user, but on our clusters,
            # they should always 
            filter_row = check_filter(row, filter_users_on)

            if filter_row:
                filtered_assocs.append(row)
            else:
                user_name = row['User']
                account_name = row['Account']
                partition_name = row['Partition']
                qos_name = row['QOS']

                try:
                    account_map = assoc_map['accounts'][account_name]
                except KeyError:
                    pprint(row)
                    pprint(filtered_assocs)
                    raise

                try:
                    qos = qos_map[qos_name]
                    print(qos)
                    if not qos:
                        qos = None
                    partition = SlurmPartition(
                        qos = qos
                    )
                except KeyError as e:
                    print(e)
                else:
                    if partition_name not in account_map['partitions']:
                        account_map['partitions'][partition_name] = partition

                    if user_name not in assoc_map['users']:
                        assoc_map['users'][user_name] = [partition]
                    else:
                        assoc_map['users'][user_name].append(partition)
                    
    # Now, convert to our structured dataclasses.
    for account_name, account_data in assoc_map['accounts'].items():
        assoc_map['accounts'][account_name] = SlurmRecord(
            account = account_name,
            max_jobs = account_data['max_jobs'],
            partitions = account_data['partitions']
        )

    return assoc_map, filtered_assocs

In [11]:
with open('slurm.assoc.txt') as fp:
    assoc_map, filtered_assocs = sacctmgr_build_group_slurm_map(fp, qos_map)

SlurmQOS(group=SlurmQOSTRES(cpus='192', gpus=None, mem='500G'), job=None, priority='0')
SlurmQOS(group=None, job=None, priority='10')
SlurmQOS(group=None, job=None, priority='10')
SlurmQOS(group=None, job=None, priority='30')
SlurmQOS(group=None, job=None, priority='30')
SlurmQOS(group=None, job=None, priority='10')
SlurmQOS(group=None, job=None, priority='10')
SlurmQOS(group=SlurmQOSTRES(cpus='192', gpus=None, mem='500G'), job=None, priority='0')
SlurmQOS(group=None, job=None, priority='10')
SlurmQOS(group=None, job=None, priority='10')
SlurmQOS(group=SlurmQOSTRES(cpus='192', gpus=None, mem='500G'), job=None, priority='0')
SlurmQOS(group=None, job=None, priority='10')
SlurmQOS(group=None, job=None, priority='10')
SlurmQOS(group=None, job=None, priority='10')
SlurmQOS(group=None, job=None, priority='10')
SlurmQOS(group=SlurmQOSTRES(cpus='192', gpus=None, mem='500G'), job=None, priority='0')
SlurmQOS(group=None, job=None, priority='10')
SlurmQOS(group=None, job=None, priority='10')
Slur

In [12]:
qos = assoc_map['accounts']['adamgrp'].partitions['high'].qos

In [13]:
files = glob.glob('/work/HPCCF/puppet.hpc-accounts/domains/farm.hpc.ucdavis.edu/*')
tree = parse_yaml_tree(files, merge_on=MergeStrategy.ALL)['merged-all']
puppet_mapping = PuppetAccountMap.Schema().load(tree)

In [14]:
import dataclasses

In [15]:
updated_puppet_groups = []
for slurm_account_name, slurm_account in assoc_map['accounts'].items():
    group = puppet_mapping.group.get(slurm_account_name, False)
    if not group:
        print('No match for Slurm account:', slurm_account_name)
        continue
    updated = dataclasses.replace(group, slurm=slurm_account)
    mapping = PuppetGroupMap(group={slurm_account_name: updated})
    updated_puppet_groups.append((slurm_account_name, mapping))

No match for Slurm account: admin
No match for Slurm account: admingrp
No match for Slurm account: agertongrp
No match for Slurm account: billaccount
No match for Slurm account: briangrp
No match for Slurm account: cantugrp
No match for Slurm account: emoniergrp
No match for Slurm account: entnem
No match for Slurm account: fzdkeltgrp
No match for Slurm account: geminigrp
No match for Slurm account: gpul
No match for Slurm account: gquongrp
No match for Slurm account: gtaylorgrp
No match for Slurm account: imtiyazgrp
No match for Slurm account: mldichtegrp
No match for Slurm account: sivasankargrp
No match for Slurm account: testing


In [59]:
pprint(updated_puppet_groups[:5])

In [16]:
dumper = PuppetGroupMap.Schema(only=['group.slurm'])
for group_name, group in updated_puppet_groups:
    path = f'/work/HPCCF/puppet.hpc-accounts/domains/farm.hpc.ucdavis.edu/{group_name}.slurm.yaml'
    with open(path, 'w') as fp:
        print(dumper.dumps(group), file=fp)

In [68]:
print(PuppetGroupMap.Schema(only=['group.slurm']).dumps(updated_puppet_groups[0][1]))

group:
  adamgrp:
    slurm:
      partitions:
        bml:
          qos:
            group: {}
        high:
          qos:
            group:
              cpus: 192
              mem: 500G
        high2:
          qos:
            group:
              cpus: 352
              mem: 762000M
        low:
          qos:
            group: {}
        low2:
          qos:
            group: {}
        med:
          qos:
            group: {}
        med2:
          qos:
            group: {}
      account: adamgrp
      max_jobs: 1024



In [60]:
updated_puppet_groups[0][1]

PuppetGroupMap(group={'adamgrp': PuppetGroupRecord(gid=9998, ensure='present', tag=None, storage=None, slurm=SlurmRecord(partitions={'high': SlurmPartition(qos=SlurmQOS(group=SlurmQOSTRES(cpus='192', gpus=None, mem='500G'), job=None)), 'low': SlurmPartition(qos=SlurmQOS(group=SlurmQOSTRES(cpus=None, gpus=None, mem=None), job=None)), 'med': SlurmPartition(qos=SlurmQOS(group=SlurmQOSTRES(cpus=None, gpus=None, mem=None), job=None)), 'med2': SlurmPartition(qos=SlurmQOS(group=SlurmQOSTRES(cpus=None, gpus=None, mem=None), job=None)), 'low2': SlurmPartition(qos=SlurmQOS(group=SlurmQOSTRES(cpus=None, gpus=None, mem=None), job=None)), 'high2': SlurmPartition(qos=SlurmQOS(group=SlurmQOSTRES(cpus='352', gpus=None, mem='762000M'), job=None)), 'bml': SlurmPartition(qos=SlurmQOS(group=SlurmQOSTRES(cpus=None, gpus=None, mem=None), job=None))}, account='adamgrp', max_jobs='1024'))})

In [72]:
assoc_map['gpul']

SlurmRecord(partitions={'gpul': SlurmPartition(name='gpul', qos=SlurmQOS(group=SlurmQOSTRES(cpus=None, gpus=None, mem=None), job=None))}, account='gpul', max_jobs=None)

In [33]:
user = PuppetUserRecord.Schema().load(tree['fmoore']['user']['fmoore'])

In [34]:
print(PuppetUserRecord.Schema().dumps(user))

fullname: Frances C Moore
email: fmoore@ucdavis.edu
uid: 1244121
gid: 1244121
groups:
- adamgrp
password: x
shell: /bin/bash
ensure: present
storage:
  zfs: false
  autofs:
    nas: nas-12-3-ib
    path: /nas-12-3/adamgrp/4/



In [35]:
grp = PuppetGroupRecord.Schema().load(tree['dubcovskygrp']['group']['dubcovskygrp'])

In [None]:
assoc_map[]

In [None]:
import dataclasses

In [None]:
print(PuppetGroupRecordSchema.dumps(grp))

In [16]:
qos.to_slurm()

['GrpCpus=192', 'GrpMem=500G']

In [17]:
sacctmgr = SAcctMgr(sacctmgr_path='/work/HPCCF/cheeto/sacctmgr')

In [18]:
sacctmgr.add_qos('testqos', assoc_map['adamgrp'].partitions['high'].qos)

<Command '/work/HPCCF/cheeto/sacctmgr -i add qos testqos GrpCpus=192 GrpMem=500G'>

In [19]:
sacctmgr.add_account('testaccount')

<Command '/work/HPCCF/cheeto/sacctmgr -i add account testaccount'>

In [20]:
sacctmgr.modify_qos('testqos', qos)

<Command '/work/HPCCF/cheeto/sacctmgr -i modify qos testqos set GrpCpus=192 GrpMem=500G'>

In [21]:
sacctmgr.add_user('testuser', 'testaccount', 'testpartition', 'testqos')

<Command '/work/HPCCF/cheeto/sacctmgr -i add user testuser account=testaccount partition=testpartition qos=testqos'>

In [22]:
sacctmgr.remove_user('testuser')

<Command '/work/HPCCF/cheeto/sacctmgr -i remove user testuser'>

In [23]:
sacctmgr.remove_user('testuser', account_name='testaccount')

<Command '/work/HPCCF/cheeto/sacctmgr -i remove user testuser account=testaccount'>

In [24]:
sacctmgr.remove_user('testuser', partition_name='testpartition')

<Command '/work/HPCCF/cheeto/sacctmgr -i remove user testuser partition=testpartition'>

In [25]:
sacctmgr.remove_user('testuser', account_name='testaccount', partition_name='testpartition')

<Command '/work/HPCCF/cheeto/sacctmgr -i remove user testuser account=testaccount partition=testpartition'>