In [1]:
import sys
import os
from pathlib import Path
notebook_path = Path(os.getcwd())
sys.path.append(str(notebook_path.parent))
from scale.olm.core import TempDir,ScaleRte,ThreadPoolExecutor

In [2]:
# Create a SCALE instance.
scale_rte=ScaleRte('/Users/Shared/ornldev/code/scale/builds/master/INSTALL/bin/scalerte')
print('version:',scale_rte.version)
print('data (GB):',scale_rte.data_size/1e9)

version: 7.0.pre-b7
data (GB): 109.270413351


In [14]:
ti=TempDir()

In [15]:
# Create test input files in a temporary directory. Each input file just runs "sleep 1" to sleep
# for 1 second, but it does it through SCALE's shell command.
input_list=[]
sleep_seconds = 2
num_tasks = 6
for i in range(num_tasks):
    input_list.append(ti.write_file(f"=shell\nsleep {sleep_seconds}\nend\n",f"{i}.inp"))

In [16]:
# Run the first input.
input, output = scale_rte.run(input_list[0])
srs = output['scale_runtime_seconds']

In [17]:
# Create a thread executor.
num_workers=6
tpe = ThreadPoolExecutor(max_workers=num_workers)

In [18]:
# Run all the inputs.
import time
start = time.time()
total_needed_seconds = srs*num_tasks
results = tpe.execute(scale_rte.run,input_list)
total_used_seconds = time.time() - start
print('ideal time:',total_needed_seconds/num_workers)
print('used time:',total_used_seconds)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:08<00:00,  1.41s/it]

ideal time: 4.777565002441406
used time: 8.48180890083313





In [None]:
for k,v in results.items():
    print(k,v['success'])

In [None]:
# Plot a histogram of runtimes.
import matplotlib.pyplot as plt
times1 = [v['runtime_seconds'] for k,v in results.items()]
plt.hist(times1,10)
plt.title('Runtime distribution (first run)')
plt.ylabel('# of runs')
plt.xlabel('time(s)')
plt.show()

In [None]:
# Rerun an plot a histogram of runtimes.
import numpy as np
results = tpe.execute(scale_rte.run,input_list)
times2 = [v['runtime_seconds'] for k,v in results.items()]
bins = np.linspace(0, np.amax([times1,times2]), 30)
plt.hist(times1, bins, alpha=0.5, label='first run')
plt.hist(times2, bins, alpha=0.5, label='second (re)run')
plt.legend(loc='upper right')
plt.title('Runtime distribution, showing huge time savings for avoiding reruns')
plt.ylabel('# of runs')
plt.xlabel('time(s)')
plt.show()


In [None]:
# Finally, if we modify a file, only that file will be rerun even when we submit the entire group.
import random
rand = random.uniform(1.0,3.0)
ti.write_file(f"=shell\nsleep {rand} \nend\n","10.inp")
results = tpe.execute(scale_rte.run,input_list)
times3 = [v['runtime_seconds'] for k,v in results.items()]
plt.hist(times1, bins, alpha=0.5, label='first run')
plt.hist(times2, bins, alpha=0.5, label='second run--no mods')
plt.hist(times3, bins, alpha=0.5, label='third run--one file mod')
plt.legend(loc='upper right')
plt.title('Runtime distribution, showing huge time savings for avoiding reruns')
plt.ylabel('# of runs')
plt.xlabel('time(s)')
plt.show()

In [None]:
for k,x in results.items():
    if x['rerun']:
        print(k,'was rerun!')

In [None]:
import time
def my_func(input):
    time.sleep(2)
    output="x"+input.upper()+"x"
    return input,output
input_list = [str(x) for x in range(40)]
tpe = ThreadPoolExecutor(1)
start = time.time()
results = tpe.execute(my_func,input_list)
time.time()-start

In [None]:
results