# System Information

Autogenerated info about each system

In [1]:
# Plotting setup
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import ticker
from IPython.display import display, display_markdown

import pandas as pd
import os, sys
from pprint import pprint
sys.path.extend(('../../reframe', '../../')) # the `modules` package imports `reframe` so need that in sys.modules
import modules

import json, pprint
from collections import defaultdict

In [2]:
# TODO: move to modules.utils
# TODO: put tests in docstrings
TESTD = {
    'numbers': {'zero':0, 'one':1},
    'letters':{'a':'alpha', 'b':'bravo'},
}

def get_nested(dct, key_pattern):
    """ Get value(s) from a nested dict
    
        Args:
            dct: dict having str keys and values which may be other `dct`s
            key_pattern: str giving dotted key
        
        Returns the value. Note that if key_pattern does not go to full depth then a dict is returned.
    """
    d = dct
    patt_parts = key_pattern.split('.')
    for kp in patt_parts:       
        if kp in d:
            d = d[kp]
        else:
            raise KeyError("No such key '%s'" % key_pattern)
    return d

x = get_nested(TESTD, 'letters')
print(x)

def split_numeric(s):
    """ Split a string into numeric and non-numeric parts """
    num, alpha = [], []
    for c in s:
        if c.isdigit():
            num.append(c)
        else:
            alpha.append(c)
    return ''.join(num), ''.join(alpha)

{'a': 'alpha', 'b': 'bravo'}


In [3]:
TABLES = {'Operating system': {'OS':'os.release.PRETTY_NAME', 'Kernel':'os.kernel'},
          'Chassis': {'model':'chassis.product_name', 'vendor':'chassis.sys_vendor'},
          'CPU': {'architecture':'cpu.Architecture', 'model':'cpu.Model name',
                  'cpus /node':'cpu.CPU(s)', 'sockets /node':'cpu.Socket(s)', 'cores /socket':'cpu.Core(s) per socket',
                  'threads /core':'cpu.Thread(s) per core',
                 },
          'Memory': {'memory /node':'memory.total', 'type':'memory.types'},
         }

In [4]:
 # load all sysinfo.json files:
sysinfos = {} # key-> reframe "system:partition", value-> nested dict of values
for path in modules.utils.find_run_outputs(root='../../output', test='Sysinfo', ext='.json'):
    # load metadata:
    meta = modules.utils.parse_path_metadata(path)
    syspart = '%s:%s' % (meta['sysname'], meta['partition']) # throw away environment
    with open(path) as f:
        sysinfos[syspart] = json.load(f)

# derive and tabulate calculated values:
general_table = []
general_cols = ['Number of nodes', 'Total CPUs', 'Total memory']
for syspart, sysinfo in sysinfos.items():
    
    num_nodes = len([hostdata['hostname'] for hostdata in sysinfo.values()])
    
    total_cpus = sum(int(get_nested(hostdata, 'cpu.CPU(s)')) for hostdata in sysinfo.values())
    
    mems = [get_nested(hostdata, 'memory.total') for hostdata in sysinfo.values()]
    mem_units = list(set(split_numeric(m)[1] for m in mems))
    if len(mem_units) > 1:
        raise NotImplementedError('Cannot cope with different units for memory size across cluster: %r' % mems)
    total_mem = '%s %s' % (sum(int(split_numeric(m)[0]) for m in mems), mem_units[0])
    
    general_table.append([num_nodes, total_cpus, total_mem])
    display_markdown('### %s' % 'General', raw=True)
    general_df = pd.DataFrame(general_table, index=list(sysinfos.keys()), columns=general_cols, dtype=str)
    display(general_df)
    
        
# group system info across reframe partitions and tabulate:
for table_title, table_contents in TABLES.items():
    # collate results across each system and tabulate:
    table = []
    for syspart, sysinfo in sysinfos.items():
        #print('syspart:', syspart)
        table.append([])
        for table_label, datakey in table_contents.items():
            # group data across nodes by VALUES:
            nodevals = defaultdict(list)
            for hostname, nodedata in sysinfo.items():
                val = get_nested(nodedata, datakey)
                nodevals[val].append(hostname)
            # currently we'll throw away the data on which nodes have which values but we could add that later if reqd:
            nodevals = ', '.join(nodevals.keys()) if len(nodevals) > 1 else list(nodevals.keys())[0]
            
            #print(syspart, ':', datakey, ':', nodevals) #display(nodevals)
            table[-1].append(nodevals)
    display_markdown('### %s' % table_title, raw=True)
    df = pd.DataFrame(table, index=list(sysinfos.keys()), columns=table_contents.keys(), dtype=str)
    display(df)

### General

Unnamed: 0,Number of nodes,Total CPUs,Total memory
alaska:ib-gcc9-openmpi4-ucx,16,1024,2096 G


### Operating system

Unnamed: 0,OS,Kernel
alaska:ib-gcc9-openmpi4-ucx,CentOS Linux 7 (Core),3.10.0-1127.8.2.el7.x86_64


### Chassis

Unnamed: 0,model,vendor
alaska:ib-gcc9-openmpi4-ucx,PowerEdge R630,Dell Inc.


### CPU

Unnamed: 0,architecture,model,cpus /node,sockets /node,cores /socket,threads /core
alaska:ib-gcc9-openmpi4-ucx,x86_64,Intel(R) Xeon(R) CPU E5-2683 v4 @ 2.10GHz,64,2,16,2


### Memory

Unnamed: 0,memory /node,type
alaska:ib-gcc9-openmpi4-ucx,131G,Registered-DDR4


In [5]:
# just to show which way around we want it:

example = [['128GB', '24'],
           ['256GB', '128'],
           ['64GB', '6']
          ]

index = ['medium sys', 'large sys', 'small sys']
cols = ['memory', 'num nodes']
df = pd.DataFrame(example, index=index, columns=cols, dtype=str)
display(df)

Unnamed: 0,memory,num nodes
medium sys,128GB,24
large sys,256GB,128
small sys,64GB,6
