## Slurm Parsing

In [1]:
import csv
import glob
import os

import dataclasses
from dataclasses import asdict

from cheeto.puppet import (PuppetGroupRecord,
                           PuppetUserRecord,
                           PuppetUserMap,
                           PuppetAccountMap,
                           PuppetGroupMap,
                           SlurmPartition, 
                           SlurmQOS, 
                           SlurmQOSTRES, 
                           SlurmRecord, 
                           parse_yaml_forest, 
                           MergeStrategy)
from cheeto.slurm import SAcctMgr, build_slurm_qos_state, build_puppet_association_state, build_puppet_qos_state
from cheeto.utils import check_filter, filter_nulls

import sh
import yaml
from rich import print as rprint
from pprint import pprint

# QoS Map

Build a QoS mapping from the *current state of the world as known to Slurm*. Parses the output of `sacctmgr show qos -P`.

In [4]:
sacctmgr = SAcctMgr()
slurm_qos_map, slurm_filtered_map = sacctmgr.get_slurm_qos_state()

In [5]:
slurm_qos_map

{'datalabgrp-gpu-a100-h-qos': SlurmQOS(group=SlurmQOSTRES(cpus=32, gpus=1, mem='128000M'), job=None, priority=0),
 'datalabgrp-high2-qos': SlurmQOS(group=SlurmQOSTRES(cpus=256, gpus=None, mem='512000M'), job=None, priority=0),
 'datalabgrp-low2-qos': SlurmQOS(group=None, job=None, priority=10),
 'datalabgrp-med2-qos': SlurmQOS(group=None, job=None, priority=10),
 'qtlchenggrp-bmh-qos': SlurmQOS(group=SlurmQOSTRES(cpus=96, gpus=None, mem='1000000M'), job=None, priority=0),
 'qtlchenggrp-bml-qos': SlurmQOS(group=None, job=None, priority=10),
 'qtlchenggrp-bmm-qos': SlurmQOS(group=None, job=None, priority=10),
 'qtlchenggrp-gpuh-qos': SlurmQOS(group=SlurmQOSTRES(cpus=6, gpus=1, mem='96000M'), job=None, priority=0),
 'qtlchenggrp-gpum-qos': SlurmQOS(group=SlurmQOSTRES(cpus=60, gpus=10, mem='983040M'), job=None, priority=0),
 'qtlchenggrp-high-qos': SlurmQOS(group=SlurmQOSTRES(cpus=128, gpus=None, mem='256000M'), job=None, priority=40),
 'qtlchenggrp-high2-qos': SlurmQOS(group=SlurmQOSTRES(

# Bootstrapping: Exporting Old Slurm to YAML

First, we need to write out the old State of the World as known to slurm to our YAML format. We parse out all the associations from `sacctmgr show users -P` and construct the QoS <=> Partition mapping as implied by the user associations, and also keep track of the partitions and QoS's associated with particular users for those that are members of multiple accounts.

In [2]:
with open('peloton.qos.txt') as fp:
    old_slurm_qos_map, old_slurm_filtered_map = build_slurm_qos_state(fp)

In [3]:
old_slurm_qos_map

{'statsgrph2': SlurmQOS(group=SlurmQOSTRES(cpus=256, gpus=None, mem='1024000M'), job=None, priority=0),
 'adminm': SlurmQOS(group=None, job=None, priority=0),
 'gpu': SlurmQOS(group=None, job=None, priority=0),
 'adminh': SlurmQOS(group=None, job=None, priority=0),
 'statsgrpm2': SlurmQOS(group=None, job=None, priority=0),
 'awetzelgrpm2': SlurmQOS(group=None, job=None, priority=0),
 'awetzelgrph2m': SlurmQOS(group=SlurmQOSTRES(cpus=128, gpus=None, mem=None), job=None, priority=0),
 'awetzelgrph2': SlurmQOS(group=SlurmQOSTRES(cpus=256, gpus=None, mem='1024000M'), job=None, priority=0),
 'donadiogrpm': SlurmQOS(group=None, job=None, priority=0),
 'donadiogrph': SlurmQOS(group=SlurmQOSTRES(cpus=544, gpus=None, mem='1114112M'), job=None, priority=0),
 'kellogggrph': SlurmQOS(group=SlurmQOSTRES(cpus=128, gpus=None, mem='256000M'), job=None, priority=0),
 'kellogggrpm': SlurmQOS(group=None, job=None, priority=0),
 'egpgrpm2': SlurmQOS(group=None, job=None, priority=0),
 'egpgrph2': SlurmQOS

In [23]:
def sacctmgr_build_group_slurm_map(assoc_file_pointer,
                                   qos_map,
                                   filter_accounts_on={'Account': ['root', 'farm-test']},
                                   filter_users_on={'QOS': ['normal'],
                                                    'Partition': ['seriallcluster=farm']}):
    #references_qoses = {
    #    'adamgrpm': 'adamgrp-med-qos',
    #    'adamgrph': 'adamgrp-high-qos',
    #    'adamgrph2': 'adamgrp-high2-qos',
    #    'adamgrpm2': 'adamgrp-med2-qos'
    #}
    
    references_qoses = {
        'normal': 'normal'
    }
    
    
    
    # map account => (partition, QOS)
    assoc_map = {'accounts': {}, 'users': {}}
    filtered_assocs = []
    for row in SAcctMgr.get_show_parser(assoc_file_pointer):
        row = filter_nulls(row)
        #pprint(row)

        if 'Partition' not in row and 'User' not in row:
            # This is a parent Account definition, add it to the map.
            # Set partitions empty and fill in as we encounter them.
            
            filter_row = check_filter(row, filter_accounts_on)
            if filter_row:
                filtered_assocs.append(row)
            else:
                assoc_map['accounts'][row['Account']] = dict(
                    partitions = {},
                    max_jobs = row.get('MaxJobs', None)
                )
        elif 'Partition' in row and 'User' in row:
            # partition<->QOS associations actually only exist per-user, but on our clusters,
            # they should always 
            filter_row = check_filter(row, filter_users_on)

            if filter_row:
                filtered_assocs.append(row)
            else:
                user_name = row['User']
                account_name = row['Account']
                partition_name = row['Partition']
                qos_name = row['QOS']

                try:
                    account_map = assoc_map['accounts'][account_name]
                except KeyError:
                    pprint(row)
                    pprint(filtered_assocs)
                    raise

                
                try:

                    if account_name != 'adamgrp' and qos_name in ['adamgrph', 'adamgrph2']:
                        continue
                    elif qos_name in references_qoses:
                        qos = references_qoses[qos_name]
                    else:
                        qos = qos_map[qos_name]
                    
                    if not qos:
                        qos = None

                    partition = SlurmPartition(
                        qos = qos
                    )
                except KeyError as e:
                    print(e)
                else:
                    if partition_name not in account_map['partitions']:
                        account_map['partitions'][partition_name] = partition
                    else:
                        if account_map['partitions'][partition_name].qos == 'normal' and qos_name != 'normal':
                            print(qos_name, qos)
                            account_map['partitions'][partition_name] = partition

                    if user_name not in assoc_map['users']:
                        assoc_map['users'][user_name] = [(account_name, partition_name, partition)]
                    else:
                        assoc_map['users'][user_name].append((account_name, partition_name, partition))
        else:
            print(row)
                    
    # Now, convert to our structured dataclasses.
    for account_name, account_data in assoc_map['accounts'].items():
        assoc_map['accounts'][account_name] = SlurmRecord(
            account = account_name,
            max_jobs = account_data['max_jobs'],
            partitions = account_data['partitions']
        )

    return assoc_map, filtered_assocs

In [5]:
old_slurm_qos_map['adamgrpm']

KeyError: 'adamgrpm'

In [34]:
with open('peloton.assoc.txt') as fp:
    old_assoc_map, old_filtered_assocs = \
        sacctmgr_build_group_slurm_map(fp, old_slurm_qos_map,
                                       filter_accounts_on={'Account': ['root', 'admin', 'upgrade-test']},
                                       filter_users_on={'Account': ['root', 'admin', 'upgrade-test'],
                                                        'Partition': ['rome-256-512', 'rome-256-+']})

{'Cluster': 'peloton', 'Account': 'root', 'User': 'root', 'Share': '1', 'QOS': 'normal'}
awetzelgrph2 SlurmQOS(group=SlurmQOSTRES(cpus=256, gpus=None, mem='1024000M'), job=None, priority=0)
rudolphgrpm2 SlurmQOS(group=None, job=None, priority=0)


An example QOS as represented internally by `cheeto`.

In [22]:
rprint(old_assoc_map['accounts']['cmb'])

In [30]:
old_assoc_map['users']['soukhind']

[('cmb', 'requeue', SlurmPartition(qos='normal')),
 ('cmb', 'p-cmb', SlurmPartition(qos='normal'))]

In [35]:
pprint(old_assoc_map['accounts'])

{'awetzelgrp': SlurmRecord(account='awetzelgrp',
                           partitions={'high': SlurmPartition(qos=SlurmQOS(group=SlurmQOSTRES(cpus=256,
                                                                                              gpus=None,
                                                                                              mem='1024000M'),
                                                                           job=None,
                                                                           priority=0)),
                                       'high2': SlurmPartition(qos=SlurmQOS(group=SlurmQOSTRES(cpus=256,
                                                                                               gpus=None,
                                                                                               mem='1024000M'),
                                                                            job=None,
                                                

In [12]:
old_assoc_map['groups'].keys()

KeyError: 'groups'

### Load up the existing YAML tree from our accounts repo.

In [2]:
def load_puppet_mapping(root='../puppet.hpc-accounts/domains/farm.hpc.ucdavis.edu',
                        pattern='*.yaml'):
    files = glob.glob(os.path.join(root, pattern))
    tree = parse_yaml_forest(files, merge_on=MergeStrategy.ALL)['merged-all']
    puppet_mapping = PuppetAccountMap.Schema().load(tree)
    return puppet_mapping

Merge the Slurm models into the existing accounts tree. This works for all the Slurm accounts where the name matches the associated PI group name; some don't match and we'll need to do them more or less manually.

In [3]:
puppet_mapping = load_puppet_mapping()

In [4]:
account_name_to_group_map = {'entnem': 'entnemgrp'}

## Defunct Accounts

- admin
- admingrp
- agertongrp
- billaccount
- briangrp
- testing
- emoniergrp
- entnem
- imtiyazgrp
- testing


## Fixed

- cantugrp
- gquongrp
- gpul
- gtaylorgrp
- geminigrp
- mldichtegrp

In [74]:
updated_puppet_groups = {}
for slurm_account_name, slurm_account in old_assoc_map['accounts'].items():
    group = puppet_mapping.group.get(slurm_account_name, False)
    if not group:
        #group = puppet_mapping.group.get(account_name_to_group_map.get(slurm_account_name, 'NULL'), False)
        #if not group:
        print('No match for Slurm account:', slurm_account_name)
        continue
    updated = dataclasses.replace(group, slurm=slurm_account)
    mapping = PuppetGroupMap(group={slurm_account_name: updated})
    updated_puppet_groups[slurm_account_name] = mapping

No match for Slurm account: admin
No match for Slurm account: admingrp
No match for Slurm account: agertongrp
No match for Slurm account: billaccount
No match for Slurm account: briangrp
No match for Slurm account: cantugrp
No match for Slurm account: emoniergrp
No match for Slurm account: entnem
No match for Slurm account: gpul
No match for Slurm account: gquongrp
No match for Slurm account: imtiyazgrp
No match for Slurm account: mldichtegrp
No match for Slurm account: testing


In [11]:
updated_puppet_users = []
skipped_users = []
for user_name, assocs in assoc_map['users'].items():
    user = puppet_mapping.user.get(user_name, False)
    if not user:
        print(f'No user {user_name}, skipping.')
        skipped_users.append(user_name)
        continue
    if user.shell == '/usr/sbin/nologin-account-disabled':
        print(f'User {user_name} disabled, skipping.')
        skipped_users.append(user_name)
        continue
    accounts = set((account for account, _, _ in assocs \
                    if account in puppet_mapping.group))
    if user.groups is not None:
        accounts = accounts - set(user.groups)

    if accounts:
        updated = dataclasses.replace(user, slurm=SlurmRecord(account=list(accounts)))
        mapping = PuppetUserMap(user={user_name: updated})
        updated_puppet_users.append((user_name, mapping))

User aariani disabled, skipping.
User aldocb disabled, skipping.
No user buduchin, skipping.
No user gthantha, skipping.
User jvanheer disabled, skipping.
User kvdistor disabled, skipping.
No user mclewis, skipping.
No user wscuello, skipping.
User cylinder disabled, skipping.
User eoziolor disabled, skipping.
User mltrego disabled, skipping.
User msyedaqu disabled, skipping.
User restruch disabled, skipping.
User rsniderm disabled, skipping.
No user bigmemh, skipping.
No user kensia, skipping.
No user memm, skipping.
No user nymsuhhall, skipping.
No user user, skipping.
No user cmaylor, skipping.
No user aburkert, skipping.
User dlabolle disabled, skipping.
No user swong, skipping.
User yoslee disabled, skipping.
No user zzikhoury, skipping.
User ohill disabled, skipping.
No user set, skipping.
No user aldoc, skipping.
No user carrascogonzales, skipping.
No user cesaram, skipping.
User gabmendo disabled, skipping.
No user jri-01, skipping.
No user jri-02, skipping.
No user jri-03, ski

### Edge Cases

In [48]:
def update_group_slurm(new_name, old_name):
    group = puppet_mapping.group.get(new_name)
    
    slurm = old_assoc_map['accounts'][old_name]
    
    updated = dataclasses.replace(group, slurm=dataclasses.replace(slurm, account=new_name))
    return PuppetGroupMap(group={new_name: updated})

In [49]:
group_dumper = PuppetGroupMap.Schema(only=['group.slurm'])

In [39]:
# cantugrp => darcantugrp
new_group = update_group_slurm('darcantugrp', 'cantugrp')

In [41]:
with open('../puppet.hpc-accounts/domains/farm.hpc.ucdavis.edu/darcantugrp.slurm.yaml', 'w') as fp:
    print(group_dumper.dumps(new_group), file=fp)

In [50]:
new_group = update_group_slurm('quonbiogrp','gquongrp')

In [51]:
new_group

PuppetGroupMap(group={'quonbiogrp': PuppetGroupRecord(gid=6113820, ensure='present', tag=None, storage=[PuppetGroupStorage(name='gquongrp', owner='gquon', group=None, autofs=PuppetAutofs(nas='nas-4-0-ib', path='/nas-4-0/gquongrp'), zfs=PuppetZFS(quota='15T'))], slurm=SlurmRecord(account='quonbiogrp', partitions={'gpu-a100-h': SlurmPartition(qos=SlurmQOS(group=SlurmQOSTRES(cpus=32, gpus=1, mem='128000M'), job=None, priority=0)), 'low': SlurmPartition(qos='adamgrp-med-qos'), 'med': SlurmPartition(qos='adamgrp-med-qos'), 'gpum': SlurmPartition(qos=SlurmQOS(group=SlurmQOSTRES(cpus=60, gpus=10, mem='983040M'), job=None, priority=0)), 'gpuh': SlurmPartition(qos=SlurmQOS(group=SlurmQOSTRES(cpus=18, gpus=3, mem='288000M'), job=None, priority=0))}, max_jobs=None))})

In [52]:
with open('../puppet.hpc-accounts/domains/farm.hpc.ucdavis.edu/quonbiogrp.slurm.yaml', 'w') as fp:
    print(group_dumper.dumps(new_group), file=fp)

In [58]:
with open('../puppet.hpc-accounts/domains/farm.hpc.ucdavis.edu/fzdkeltgrp.slurm.yaml', 'w') as fp:
    print(group_dumper.dumps(updated_puppet_groups['fzdkeltgrp']), file=fp)

In [64]:
with open('../puppet.hpc-accounts/domains/farm.hpc.ucdavis.edu/gtaylorgrp.slurm.yaml', 'w') as fp:
    print(group_dumper.dumps(updated_puppet_groups['gtaylorgrp']), file=fp)

In [67]:
with open('../puppet.hpc-accounts/domains/farm.hpc.ucdavis.edu/geminigrp.slurm.yaml', 'w') as fp:
    print(group_dumper.dumps(updated_puppet_groups['geminigrp']), file=fp)

In [76]:
with open('../puppet.hpc-accounts/domains/farm.hpc.ucdavis.edu/sivasankargrp.slurm.yaml', 'w') as fp:
    print(group_dumper.dumps(updated_puppet_groups['sivasankargrp']), file=fp)

In [68]:
new_group = update_group_slurm('dichtergrp','mldichtegrp')

In [71]:
with open('../puppet.hpc-accounts/domains/farm.hpc.ucdavis.edu/dichtergrp.slurm.yaml', 'w') as fp:
    print(group_dumper.dumps(new_group), file=fp)

### Dump the Slurm YAMLs

We need to do a partial dump of the schema to get the correct formatting: if we just use a `SlurmRecord`, we miss the `group: [PI_GRP]:` keys. Luckily `marshmallow` supports this natively with use of the `only` keyword on schema creation. We'll dump just the `slurm` key into our new `pigrp.slurm.yaml` file.

In [42]:
for group_name, group in updated_puppet_groups:
    path = f'../puppet.hpc-accounts/domains/farm.hpc.ucdavis.edu/{group_name}.slurm.yaml'
    with open(path, 'w') as fp:
        print(group_dumper.dumps(group), file=fp)

In [57]:
rprint(updated_puppet_groups['fzdkeltgrp'])

In [15]:
print(group_dumper.dumps(updated_puppet_groups[0][1]))

group:
  adamgrp:
    slurm:
      account: adamgrp
      partitions:
        bml:
          qos:
            priority: 0
        high:
          qos:
            group:
              cpus: 192
              mem: 500G
            job:
              cpus: 192
            priority: 0
        high2:
          qos:
            group:
              cpus: 352
              mem: 762000M
            priority: 0
        low:
          qos:
            job:
              cpus: 192
            priority: 10
        low2:
          qos:
            priority: 30
        med:
          qos:
            job:
              cpus: 192
            priority: 10
        med2:
          qos:
            priority: 30
      max_jobs: 1024



In [16]:
user_dumper = PuppetUserMap.Schema(only=['user.slurm'])
for user_name, user in updated_puppet_users:
    path = f'../puppet.hpc-accounts/domains/farm.hpc.ucdavis.edu/{user_name}.slurm.yaml'
    if os.path.exists(path):
        os.remove(path)
    with open(path, 'w') as fp:
        print(user_dumper.dumps(user), file=fp)

# Create Association Tuples from YAML World View

In [2]:
def load_puppet_mapping(root='../puppet.hpc-accounts/domains/farm.hpc.ucdavis.edu',
                        pattern='*.yaml'):
    files = glob.glob(os.path.join(root, pattern))
    tree = parse_yaml_forest(files, merge_on=MergeStrategy.ALL)['merged-all']
    puppet_mapping = PuppetAccountMap.Schema().load(tree)
    return puppet_mapping

In [4]:
puppet_mapping = load_puppet_mapping()

In [8]:
rprint(puppet_mapping.group['adamgrp'])

In [34]:
sacctmgr = SAcctMgr()
slurm_qos_map, slurm_filtered_map = sacctmgr.get_slurm_qos_state()

In [6]:
slurm_associations = sacctmgr.get_slurm_association_state()

In [7]:
slurm_associations

{'users': {('abrosen', 'adamgrp', 'high2'): 'adamgrp-high2-qos',
  ('abrosen', 'adamgrp', 'bml'): 'adamgrp-bml-qos',
  ('abrosen', 'adamgrp', 'low2'): 'adamgrp-low2-qos',
  ('abrosen', 'adamgrp', 'med2'): 'adamgrp-med2-qos',
  ('abrosen', 'adamgrp', 'high'): 'adamgrp-high-qos',
  ('abrosen', 'adamgrp', 'med'): 'adamgrp-med-qos',
  ('abrosen', 'adamgrp', 'low'): 'adamgrp-low-qos',
  ('adam', 'adamgrp', 'bml'): 'adamgrp-bml-qos',
  ('adam', 'adamgrp', 'high2'): 'adamgrp-high2-qos',
  ('adam', 'adamgrp', 'med2'): 'adamgrp-med2-qos',
  ('adam', 'adamgrp', 'low2'): 'adamgrp-low2-qos',
  ('adam', 'adamgrp', 'low'): 'adamgrp-low-qos',
  ('adam', 'adamgrp', 'med'): 'adamgrp-med-qos',
  ('adam', 'adamgrp', 'high'): 'adamgrp-high-qos',
  ('adamerum', 'adamgrp', 'high2'): 'adamgrp-high2-qos',
  ('adamerum', 'adamgrp', 'bml'): 'adamgrp-bml-qos',
  ('adamerum', 'adamgrp', 'low2'): 'adamgrp-low2-qos',
  ('adamerum', 'adamgrp', 'med2'): 'adamgrp-med2-qos',
  ('adamerum', 'adamgrp', 'high'): 'adamgrp-

In [37]:
def get_qos_name(account_name, partition_name):
    return f'{account_name}-{partition_name}-qos'

In [38]:
def build_puppet_association_state(puppet_mapping):
    sacctmgr = SAcctMgr()
    puppet_associations = dict(users={}, accounts={})
    #command_queue = []

    for group_name, group in puppet_mapping.group.items():
        if group.slurm is not None:
            puppet_associations['accounts'][group_name] = group.slurm.max_jobs

    for user_name, user in puppet_mapping.user.items():

        inherited_partitions = []

        # Get groups first...
        if user.groups is not None:
            for group_name in user.groups:
                if group_name in puppet_mapping.group:
                    group = puppet_mapping.group[group_name]
                    if group.slurm is not None and group.slurm.partitions is not None:
                        inherited_partitions.append((group_name, group.slurm.partitions))
        # Now via account associations
        if user.slurm is not None and user.slurm.account is not None:
            for account in user.slurm.account:
                group = puppet_mapping.group[account]
                inherited_partitions.append((account, group.slurm.partitions))

        for account_name, partitions in inherited_partitions:
            for partition_name, partition in partitions.items():
                qos_name = get_qos_name(account_name, partition_name)
                puppet_associations['users'][(user_name, account_name, partition_name)] = qos_name
                #command_queue.append(sacctmgr.add_user(user_name, account_name, partition_name, qos_name))
    return puppet_associations

In [39]:
puppet_associations = build_puppet_association_state(puppet_mapping)

In [40]:
def build_puppet_qos_state(puppet_mapping):
    qos_map = {}
    for group_name, group in puppet_mapping.group.items():
        if group.slurm is None or group.slurm.partitions is None:
            continue
        for partition_name, partition in group.slurm.partitions.items():
            qos_name = get_qos_name(group_name, partition_name)
            qos_map[qos_name] = partition.qos
    return qos_map

In [41]:
puppet_qos_map = build_puppet_qos_state(puppet_mapping)

In [42]:
def reconcile_qoses(old_qoses, new_qoses):
    deletions = []
    updates = []
    additions = []
    
    for qos_name, old_qos in old_qoses.items():
        if qos_name not in new_qoses:
            deletions.append(qos_name)
        else:
            new_qos = new_qoses[qos_name]
            if old_qos != new_qos:
                updates.append((qos_name, new_qos))
    
    for qos_name, new_qos in new_qoses.items():
        if qos_name not in old_qoses:
            additions.append((qos_name, new_qos))
    
    return deletions, updates, additions

In [43]:
def reconcile_users(old_assocs, new_assocs):
    deletions = []
    updates = []
    additions = []
    
    # Deletions and updates: check old associations against new
    for assoc_key in old_assocs:
        if assoc_key not in new_assocs:
            deletions.append(assoc_key)
        else:
            if old_assocs[assoc_key] != new_assocs[assoc_key]:
                #print(f'Change {assoc_key}: {old_assocs[assoc_key]} => {new_assocs[assoc_key]}')
                updates.append(assoc_key + (new_assocs[assoc_key],))
    
    # Additions: new user associations checked against old
    for assoc_key in new_assocs:
        if assoc_key not in old_assocs:
            additions.append(assoc_key + (new_assocs[assoc_key],))
    
    return deletions, updates, additions                        

In [44]:
def reconcile_accounts(old_accounts, new_accounts):
    deletions = []
    updates = []
    additions = []
    
    for account_name, old_extra in old_accounts.items():
        if account_name not in new_accounts:
            deletions.append(account_name)
        else:
            if old_extra != new_accounts[account_name]:
                updates.append((account_name, new_accounts[account_name]))
    for account_name, new_extra in new_accounts.items():
        if account_name not in old_accounts:
            additions.append((account_name, new_extra))
    
    return deletions, updates, additions

In [20]:
d, u, a = reconcile_users(slurm_associations['users'], puppet_associations['users'])

In [24]:
d, u, a = reconcile_accounts(slurm_associations['accounts'], puppet_associations['accounts'])

In [25]:
d

['admin',
 'admingrp',
 'agertongrp',
 'billaccount',
 'briangrp',
 'camw-test-2',
 'cantugrp',
 'emoniergrp',
 'entnem',
 'farm-test',
 'fzdkeltgrp',
 'geminigrp',
 'gpul',
 'gquongrp',
 'gtaylorgrp',
 'imtiyazgrp',
 'mldichtegrp',
 'sivasankargrp',
 'testing']

In [40]:
d, u, a = reconcile_qoses(slurm_qos_map, puppet_qos_map)

In [45]:
def generate_commands(sacctmgr, slurm_assocations, slurm_qoses, puppet_associations, puppet_qoses):
    user_deletions, user_updates, user_additions = reconcile_users(slurm_associations['users'], 
                                                                   puppet_associations['users'])
    account_deletions, account_updates, account_additions = reconcile_accounts(slurm_associations['accounts'], 
                                                                               puppet_associations['accounts'])
    qos_deletions, qos_updates, qos_additions = reconcile_qoses(slurm_qoses, 
                                                                puppet_qoses)
    
    command_queue = []
    
    for addition in qos_additions:
        command_queue.append(sacctmgr.add_qos(*addition))
    for update in qos_updates:
        command_queue.append(sacctmgr.modify_qos(*update))
    for update in user_updates:
        command_queue.append(sacctmgr.modify_user_qos(*update))
    for deletion in user_deletions:
        command_queue.append(sacctmgr.remove_user(*deletion))
    for deletion in qos_deletions:
        command_queue.append(sacctmgr.remove_qos(deletion))
    for addition in account_additions:
        command_queue.append(sacctmgr.add_account(*addition))
    for update in account_updates:
        command_queue.append(sacctmgr.modify_account(*update))
    for addition in user_additions:
        command_queue.append(sacctmgr.add_user(*addition))
    for deletion in account_deletions:
        command_queue.append(sacctmgr.remove_account(deletion))

    return command_queue

In [46]:
commands = generate_commands(SAcctMgr(sudo=True), slurm_associations, slurm_qos_map, puppet_associations, puppet_qos_map)

In [47]:
commands

[]

In [48]:
len(commands)

0

In [30]:
from rich.progress import track
import time

In [31]:
for i, command in enumerate(track(commands)):
    #if i % 100 == 0:
    #    print(f'{i}. Run:', command)
    #command()
    try:
        command()
        print('Ran:', command)
    except sh.ErrorReturnCode_1:
        print('Error:', command)

Output()

In [25]:
commands[6000]

<Command '/usr/bin/sudo /share/apps/22.04/slurm/22.05.8/bin/sacctmgr -i modify user jf set qos=paullricgrp-low-qos defaultqos=-1 where account=paullricgrp partition=low'>

In [42]:
puppet_mapping.user['camw']

PuppetUserRecord(fullname='Camille Scott', email='cswel@ucdavis.edu', uid=1134153, gid=1134153, groups=['spack-user', 'conda-user', 'software-user', 'ctbrowngrp', 'adamgrp'], password='$y$j9T$v8sSBTyrM6.a6tDQllEi7/$3ENsV0Df.DlJ2/CyrWLo.nu9fwi/zMtdTbq6Jow.Ev9', shell='/bin/zsh', tag=['ssh-tag', 'sudo-tag', 'root-ssh-tag'], home=None, ensure='present', membership=None, storage=PuppetUserStorage(zfs=False, autofs=PuppetAutofs(nas='nas-6-0-ib', path='/nas-6-0/ctbrowngrp/')), slurm=None)

In [50]:
def size_to_megs(size):
    size = size.strip()
    if size[-1] in 'Mm':
        return int(size[:-1])
    if size[-1] in 'Gg':
        return int(size[:-1]) * 1000
    if size[-1] in 'Tt':
        return int(size[:-1]) * 1000000
    else:
        raise ValueError(f'{size} is not an allowed value.')

In [58]:
size_to_megs('1p')

ValueError: 1p is not an allowed value.

In [53]:
for record in updates:
    if 'datalab-18' in record:
        print('Update', record)

for record in deletions:
    if 'datalab-18' in record:
        print('Delete', record)
        
for record in additions:
    if 'datalab-18' in record:
        print('Add', record)

Update ('datalab-18', 'ctbrowngrp', 'high2', 'ctbrowngrp-high2-qos')
Update ('datalab-18', 'ctbrowngrp', 'med2', 'ctbrowngrp-med2-qos')
Update ('datalab-18', 'ctbrowngrp', 'low2', 'ctbrowngrp-low2-qos')
Update ('datalab-18', 'datalabgrp', 'gpu-a100-h', 'datalabgrp-gpu-a100-h-qos')
Update ('datalab-18', 'datalabgrp', 'high2', 'datalabgrp-high2-qos')
Update ('datalab-18', 'datalabgrp', 'med2', 'datalabgrp-med2-qos')
Update ('datalab-18', 'datalabgrp', 'low2', 'datalabgrp-low2-qos')
Delete ('datalab-18', 'farm-test', 'med2-test')
Add ('datalab-18', 'ctbrowngrp', 'bmh', 'ctbrowngrp-bmh-qos')
Add ('datalab-18', 'ctbrowngrp', 'bml', 'ctbrowngrp-bml-qos')
Add ('datalab-18', 'ctbrowngrp', 'bmm', 'ctbrowngrp-bmm-qos')
Add ('datalab-18', 'ctbrowngrp', 'gpu-a100-h', 'ctbrowngrp-gpu-a100-h-qos')
Add ('datalab-18', 'ctbrowngrp', 'gpuh', 'ctbrowngrp-gpuh-qos')
Add ('datalab-18', 'ctbrowngrp', 'gpum', 'ctbrowngrp-gpum-qos')
Add ('datalab-18', 'ctbrowngrp', 'high', 'ctbrowngrp-high-qos')
Add ('datalab

In [182]:
sorted(skipped_users)

['aariani',
 'aburkert',
 'aldoc',
 'aldocb',
 'aliceyu8',
 'allinerp',
 'amweis',
 'askol',
 'barao',
 'bcbaikie',
 'bebenson',
 'bigmemh',
 'borax',
 'buduchin',
 'bwelly',
 'carrascogonzales',
 'cesaram',
 'cluste',
 'cmaylor',
 'cylinder',
 'daisysj',
 'dlabolle',
 'ecgranad',
 'ehellwig',
 'eoziolor',
 'erikent',
 'fengq',
 'gabmendo',
 'grizimer',
 'gthantha',
 'herreral',
 'jameyc',
 'johnmola',
 'jri-01',
 'jri-02',
 'jri-03',
 'jri-04',
 'jri-05',
 'jri-06',
 'jri-07',
 'jri-08',
 'jri-09',
 'jri-10',
 'jri-11',
 'jri-12',
 'jri-13',
 'jri-14',
 'jri-15',
 'jri-16',
 'jri-17',
 'jri-18',
 'jri-19',
 'jri-20',
 'jri-21',
 'jri-22',
 'jri-23',
 'jri-24',
 'jri-25',
 'jri-26',
 'jri-27',
 'jri-28',
 'jri-29',
 'jri-30',
 'jri-31',
 'jri-32',
 'jri-33',
 'jri-34',
 'jri-35',
 'jri-36',
 'jri-37',
 'jri-38',
 'jri-39',
 'jri-40',
 'jvanheer',
 'katng23',
 'kensia',
 'kvdistor',
 'laurens',
 'laurynne',
 'mclewis',
 'memm',
 'mltrego',
 'msyedaqu',
 'mtreiber',
 'nymsuhhall',
 'ohil

In [123]:
diff.keys()

dict_keys(['set_item_removed', 'set_item_added'])

In [52]:
user

PuppetUserRecord(fullname='William R Conner', email='wrconner@ucdavis.edu', uid=606183, gid=606183, groups=['jcchiugrp'], password='x', shell='/bin/bash', tag=None, home=None, ensure='present', membership=None, storage=PuppetUserStorage(zfs=False, autofs=PuppetAutofs(nas='nas-4-0-ib', path='/nas-4-0/jcchiugrp2/')))

In [55]:
group

PuppetGroupRecord(gid=29956, ensure='present', tag=None, storage=[PuppetGroupStorage(name='jcchiugrp-SCRATCH', owner='jcchiu', group=None, autofs=PuppetAutofs(nas='nas-11-2-ib', path='/export/3/jcchiugrp-SCRATCH'), zfs=None), PuppetGroupStorage(name='jcchiugrp', owner='jcchiu', group=None, autofs=PuppetAutofs(nas='nas-4-0-ib', path='/nas-4-0/jcchiugrp2'), zfs=PuppetZFS(quota='30T'))], slurm=None)

In [53]:
puppet_mapping.user['lgnewton']

PuppetUserRecord(fullname='Lacie Gail Newton', email='lgnewton@ucdavis.edu', uid=1496015, gid=1496015, groups=['jbondgrp'], password='x', shell='/bin/bash', tag=None, home=None, ensure='present', membership=None, storage=PuppetUserStorage(zfs=False, autofs=PuppetAutofs(nas='nas-10-1-ib', path='/export/4/')))

In [62]:
assoc_map['users'][user_name]

[('entnemgrp',
  'low',
  SlurmPartition(qos=SlurmQOS(group=None, job=None, priority='10'))),
 ('entnemgrp',
  'med',
  SlurmPartition(qos=SlurmQOS(group=None, job=None, priority='10'))),
 ('entnemgrp',
  'high',
  SlurmPartition(qos=SlurmQOS(group=SlurmQOSTRES(cpus='192', gpus=None, mem='500G'), job=None, priority='0'))),
 ('entnemgrp',
  'bigmemm',
  SlurmPartition(qos=SlurmQOS(group=None, job=None, priority='10'))),
 ('entnemgrp',
  'bigmemht',
  SlurmPartition(qos=SlurmQOS(group=SlurmQOSTRES(cpus='96', gpus=None, mem='1000000M'), job=None, priority='0'))),
 ('entnemgrp',
  'bigmeml',
  SlurmPartition(qos=SlurmQOS(group=None, job=None, priority='10')))]

In [64]:
pprint(assoc_map['accounts']['entnemgrp'])

---

In [14]:
assoc_map['accounts']['gpul']

SlurmRecord(account='gpul', partitions={'gpul': SlurmPartition(qos=SlurmQOS(group=None, job=None, priority='0'))}, max_jobs=None)

In [15]:
sacctmgr = SAcctMgr()

In [18]:
sacctmgr.add_qos('testqos', assoc_map['adamgrp'].partitions['high'].qos)

<Command '/work/HPCCF/cheeto/sacctmgr -i add qos testqos GrpCpus=192 GrpMem=500G'>

In [19]:
sacctmgr.add_account('testaccount')

<Command '/work/HPCCF/cheeto/sacctmgr -i add account testaccount'>

In [20]:
sacctmgr.modify_qos('testqos', qos)

<Command '/work/HPCCF/cheeto/sacctmgr -i modify qos testqos set GrpCpus=192 GrpMem=500G'>

In [21]:
sacctmgr.add_user('testuser', 'testaccount', 'testpartition', 'testqos')

<Command '/work/HPCCF/cheeto/sacctmgr -i add user testuser account=testaccount partition=testpartition qos=testqos'>

In [22]:
sacctmgr.remove_user('testuser')

<Command '/work/HPCCF/cheeto/sacctmgr -i remove user testuser'>

In [23]:
sacctmgr.remove_user('testuser', account_name='testaccount')

<Command '/work/HPCCF/cheeto/sacctmgr -i remove user testuser account=testaccount'>

In [24]:
sacctmgr.remove_user('testuser', partition_name='testpartition')

<Command '/work/HPCCF/cheeto/sacctmgr -i remove user testuser partition=testpartition'>

In [25]:
sacctmgr.remove_user('testuser', account_name='testaccount', partition_name='testpartition')

<Command '/work/HPCCF/cheeto/sacctmgr -i remove user testuser account=testaccount partition=testpartition'>

## QoS Audit

In [5]:
qoses = build_puppet_qos_state(puppet_mapping)

In [7]:
def partition_from_qos_name(qos_name):
    tokens = qos_name.split('-')
    return tokens[-2]

In [8]:
qoses

{'datalabgrp-gpu-a100-h-qos': SlurmQOS(group=SlurmQOSTRES(cpus=32, gpus=1, mem='128000M'), job=None, priority=0),
 'datalabgrp-high2-qos': SlurmQOS(group=SlurmQOSTRES(cpus=256, gpus=None, mem='512000M'), job=None, priority=0),
 'datalabgrp-low2-qos': SlurmQOS(group=None, job=None, priority=10),
 'datalabgrp-med2-qos': SlurmQOS(group=None, job=None, priority=10),
 'qtlchenggrp-bmh-qos': SlurmQOS(group=SlurmQOSTRES(cpus=96, gpus=None, mem='1000000M'), job=None, priority=0),
 'qtlchenggrp-bml-qos': SlurmQOS(group=None, job=None, priority=10),
 'qtlchenggrp-bmm-qos': SlurmQOS(group=None, job=None, priority=10),
 'qtlchenggrp-gpuh-qos': SlurmQOS(group=SlurmQOSTRES(cpus=6, gpus=1, mem='96000M'), job=None, priority=0),
 'qtlchenggrp-high-qos': SlurmQOS(group=SlurmQOSTRES(cpus=128, gpus=None, mem='256000M'), job=None, priority=40),
 'qtlchenggrp-high2-qos': SlurmQOS(group=SlurmQOSTRES(cpus=96, gpus=None, mem='256000M'), job=None, priority=0),
 'qtlchenggrp-low-qos': SlurmQOS(group=None, job=No