# Code to setup the blocks for a 4D Multigrid run
March 2, 2022

In [1]:
import numpy as np

In [2]:
def f_to_numpy(dict1,key_lst):
    for key in key_lst:
        dict1[key]=np.array(dict1[key],dtype=np.int16)
        
def f_divide_arrays(a1,a2):
    a3=a1//a2
    a4=a1%a2
    
    if np.any(a4):
        print("Found non-zero remainder. Integer division is not correct")
        print("Remainder list",a4)
    return a3

In [3]:
keys=['lattice','node_geo','size_per_gpu','nodes']

## Fixed entries

In [4]:
gpus_per_node=8 ## GPUs per node for machine: 8 for Frontier
# gpus_per_node=6 ## 6 for Summit

In [15]:
# Crusher and Frontier

# dict_full={'lattice':[192, 192, 192, 384], 'nodes':288}
# dict_full['node_geo']=[4,6,6,16]
# dict_tune={'nodes':8}
# dict_blocks={'b1':[6,4,4,4], 'b2':[2,2,2,3], 'b3': [2,2,2,1]}
# dict_tune['node_geo']=[2,2,2,8]

# 4 level MG
dict_full={'lattice':[192, 192, 192, 384], 'nodes':288}
dict_full['node_geo']=[4,6,6,16]
dict_tune={'nodes':8}
dict_blocks={'b1':[6,4,4,4], 'b2':[4,4,4,3],}
dict_tune['node_geo']=[2,2,2,8]

# dict_full={'lattice':[144, 144, 144, 288],'nodes':108}
# dict_tune={'node_geo':[2,2,2,8],'nodes':8}
# dict_full['node_geo']=[6,3,6,8]
# dict_blocks={'b1':[4,6,6,6], 'b2':[3,2,2,3]}


# Summit run 

# dict_full={'lattice':[192, 192, 192, 384], 'nodes':384}
# dict_full['node_geo']=[4,6,6,16]
# dict_tune={'nodes':8}
# dict_blocks={'b1':[6,4,4,4], 'b2':[2,2,2,3], 'b3': [2,2,2,1]}
# dict_tune['node_geo']=[2,3,2,4]

# dict_full={'lattice':[144, 144, 144, 288],'nodes':144}
# dict_tune={'node_geo':[6,2,2,2],'nodes':8}
# dict_full['node_geo']=[6,3,6,8]
# dict_blocks={'b1':[4,6,6,6], 'b2':[3,2,2,3]}

## Compute

In [16]:
# Convert lists to numpy arrays
f_to_numpy(dict_full,['lattice','node_geo'])
f_to_numpy(dict_tune,['node_geo'])

# Compute size per gpu for full run
dict_full['size_per_gpu']=f_divide_arrays(dict_full['lattice'],dict_full['node_geo'])

# Compute lattice size for tuning run
dict_tune['size_per_gpu']=dict_full['size_per_gpu']
dict_tune['lattice']=dict_tune['size_per_gpu']*dict_tune['node_geo']


# Compute block sizes inside GPU
f_to_numpy(dict_blocks,['b1','b2'])
loc_vol1=f_divide_arrays(dict_tune['size_per_gpu'],dict_blocks['b1'])
loc_vol2=f_divide_arrays(loc_vol1,dict_blocks['b2'])

if 'b3' in dict_blocks.keys(): 
    loc_vol3=f_divide_arrays(loc_vol2,dict_blocks['b3'])



## Perform checks

In [17]:
## Check number of GPUs match with node geometry
num=1
for i in dict_full['node_geo']: num*=i
assert dict_full['nodes']*gpus_per_node==num

num=1
for i in dict_tune['node_geo']: num*=i
assert dict_tune['nodes']*gpus_per_node==num

## top level block can't have odd
for i in dict_blocks['b1']:
    if i%2!=0: 
        print("Error: All entries in top blocking need to be even",i)

## lower levels blocks need at least 1 even

num_even=0        
for i in dict_blocks['b2']:
    if i%2==0: num_even+=1

if num_even<1 : print("Error: At least one entry in lower blocking needs to be even",dict_blocks['b2'])


## Print output

In [18]:
## Print run paramters
print("Lattice geometry\n")
print("{:<16} {:<15} {:<10}".format(' ','Tuning run','Full run'))
for k in keys:
    v1,v2=dict_tune[k],dict_full[k]
    if k!='nodes':
        w1=" ".join([str(i) for i in v1])
        w2=" ".join([str(i) for i in v2])
        print("{:<16} {:<15} {:<10}".format(k,w1,w2))
    else :
        print("{:<16} {:<15} {:<10}".format(k,str(v1),str(v2)))

print("\n")
print('\033[1m'+'Blocking Scheme') ## Getting bold text 
print('\033[m') 

print("{:<16} {:<15}".format('Layer 0' ,"\t".join([str(i) for i in dict_tune['size_per_gpu']])))
print("{:<16} {:<15}".format('Block 1' ,"\t".join([str(i) for i in dict_blocks['b1']])))
print("---------------------\t -----------------")
print("{:<16} {:<15}".format('Layer 1' ,"\t".join([str(i) for i in loc_vol1])))
print("{:<16} {:<15}".format('Block 2' ,"\t".join([str(i) for i in dict_blocks['b2']])))
print("---------------------\t -----------------")
print("{:<16} {:<15}".format('Layer 2' ,"\t".join([str(i) for i in loc_vol2])))

if 'b3' in dict_blocks.keys(): 
    print("{:<16} {:<15}".format('Block 3' ,"\t".join([str(i) for i in dict_blocks['b3']])))
    print("---------------------\t -----------------")
    print("{:<16} {:<15}".format('Layer 3' ,"\t".join([str(i) for i in loc_vol3])))

Lattice geometry

                 Tuning run      Full run  
lattice          96 64 64 192    192 192 192 384
node_geo         2 2 2 8         4 6 6 16  
size_per_gpu     48 32 32 24     48 32 32 24
nodes            8               288       


[1mBlocking Scheme
[m
Layer 0          48	32	32	24    
Block 1          6	4	4	4        
---------------------	 -----------------
Layer 1          8	8	8	6        
Block 2          4	4	4	3        
---------------------	 -----------------
Layer 2          2	2	2	2        


## Print without tuning info    

In [10]:
## Print run paramters
print("Lattice geometry\n")
# print("{:<16}{:<10}".format(' ','Full run'))
for k in keys:
    v1,v2=dict_tune[k],dict_full[k]
    if k!='nodes':
        w1=" ".join([str(i) for i in v1])
        w2=" ".join([str(i) for i in v2])
        print("{:<16}{:<10}".format(k,w2))
    else :
        print("{:<16}{:<10}".format(k,str(v2)))

print("\n")
print('\033[1m'+'Blocking Scheme') ## Getting bold text 
print('\033[m') 

print("{:<16} {:<15}".format('Layer 0' ,"\t".join([str(i) for i in dict_tune['size_per_gpu']])))
print("{:<16} {:<15}".format('Block 1' ,"\t".join([str(i) for i in dict_blocks['b1']])))
print("---------------------\t -----------------")
print("{:<16} {:<15}".format('Layer 1' ,"\t".join([str(i) for i in loc_vol1])))
print("{:<16} {:<15}".format('Block 2' ,"\t".join([str(i) for i in dict_blocks['b2']])))
print("---------------------\t -----------------")
print("{:<16} {:<15}".format('Layer 2' ,"\t".join([str(i) for i in loc_vol2])))

if 'b3' in dict_blocks.keys(): 
    print("{:<16} {:<15}".format('Block 3' ,"\t".join([str(i) for i in dict_blocks['b3']])))
    print("---------------------\t -----------------")
    print("{:<16} {:<15}".format('Layer 3' ,"\t".join([str(i) for i in loc_vol3])))

Lattice geometry

lattice         192 192 192 384
node_geo        4 6 6 16  
size_per_gpu    48 32 32 24
nodes           288       


[1mBlocking Scheme
[m
Layer 0          48	32	32	24    
Block 1          6	4	4	4        
---------------------	 -----------------
Layer 1          8	8	8	6        
Block 2          2	2	2	3        
---------------------	 -----------------
Layer 2          4	4	4	2        
Block 3          2	2	2	1        
---------------------	 -----------------
Layer 3          2	2	2	2        
