# Logical Error Rate

In [None]:
d: float = 3
p: float = 0.001
code: str = f"rsc(d={d},p={p})"
noise: str = "none"

slurm_maximum_jobs = 300
slurm_cores_per_node: int = 10  # (slurm_maximum_jobs // slurm_cores_per_node) should not exceed 200
slurm_mem_per_job: int = 4  # 4GB per job because we have very large instances
slurm_extra = dict(
    walltime = "1-00:00:00",  # adaptively shutdown if no more jobs
    queue = "scavenge",  # use with caution: dask does not seem to handle scavenge workers well
    job_extra_directives = ["--requeue"],  # use with scavenge partition will help spawn scavenged jobs
)

json_filename: str | None = None
force_finished: bool = False  # only plot the figure and do not run experiments

In [None]:
ecr_vec: list[float] = [0.2 * e for e in range(5)] + [0.9, 0.95, 0.98, 1]  # erasure conversion rate
c_vec = [0, 50, 1000]
max_iter_vec = [0, 5]
osd_order_vec = [0, 10]

mwpf_decoder_vec = [f"mwpf(c={c})" for c in c_vec] + [f"mwpf(c={c},pass_circuit=1)" for c in c_vec]
bposd0_decoder_vec = [f"bposd(max_iter={max_iter})" for max_iter in max_iter_vec]
bposdn_decoder_vec = [f"bposd(max_iter={max_iter},osd_order={osd_order},osd_method=osd_e)" for max_iter in max_iter_vec for osd_order in osd_order_vec]
bpuf_decoder_vec = [f"bpuf(max_iter={max_iter})" for max_iter in max_iter_vec]

decoder_vec = mwpf_decoder_vec + bposd0_decoder_vec + bposdn_decoder_vec + bpuf_decoder_vec + ["mwpm"]
print("number of decoders:", len(decoder_vec))

In [None]:
from slugify import slugify
from dotmap import DotMap as dmap

if json_filename is None:
    json_filename = "zdat-" + slugify(code) + ".json"
print(json_filename)

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from qec_lego_bench.hpc.monte_carlo import *
from qec_lego_bench.hpc.submitter import *
from qec_lego_bench.hpc.plotter import *
from typing import Iterable
from qec_lego_bench.cli.logical_error_rate import logical_error_rate

### Define the job list

In [None]:
jobs = [MonteCarloJob(decoder, ecr, no_detectors) for decoder in decoder_vec for ecr in ecr_vec for no_detectors in [True, False]]

def monte_carlo_function(shots: int, decoder: str, ecr: float, no_detectors: bool) -> tuple[int, LogicalErrorResult]:
    stats = logical_error_rate(
        decoder=decoder,
        code=code,
        noise=noise,
        noise2=f"erasure_conversion(rate={ecr}" + (",no_detectors=1)" if no_detectors else ")"),
        max_shots=shots,
        max_errors=shots,
        no_progress=True,
        no_print=True,
    )
    return stats.shots, LogicalErrorResult(errors=stats.errors, discards=stats.discards)


print(monte_carlo_function(1000, decoder_vec[0], 0.5, True))

### Define the strategy to submit jobs

In [None]:

min_shot_submitter = MinShotsSubmitter(shots=1000)
barrier = SubmitterBarrier()
precision_submitter = PrecisionSubmitter(
    time_limit=100 * 3600, min_precision=None, target_precision=0.03  # min_precision = None to force reach the target precision
)

def submitter(executor: MonteCarloJobExecutor) -> list[tuple[MonteCarloJob, int]]:
    submit = min_shot_submitter(executor)
    if barrier(executor, submit):  # previous submitter all finished
        submit += precision_submitter(executor)
    return submit

## The rest of the notebook runs the evaluation

Start a cluster by intelligently choose Slurm or Local cluster.

In [None]:
def client_connector():
    try:
        from dask_jobqueue import SLURMCluster
        from dask.distributed import Client
        n_workers = slurm_maximum_jobs // slurm_cores_per_node
        assert n_workers <= 200, "Yale HPC forbids submitting more than 200 jobs per hour"
        slurm_job_folder = os.path.join(os.path.abspath(os.getcwd()), "slurm_job")
        job_extra_directives=[f'--out="{slurm_job_folder}/%j.out"', f'--error="{slurm_job_folder}/%j.err"']
        if 'job_extra_directives' in slurm_extra:
            job_extra_directives += slurm_extra['job_extra_directives']
            del slurm_extra['job_extra_directives']
        cluster = SLURMCluster(
            cores=slurm_cores_per_node,
            processes=slurm_cores_per_node,
            memory=f"{slurm_mem_per_job * slurm_cores_per_node} GB",
            job_extra_directives=job_extra_directives,
            **slurm_extra,
        )
        print(cluster.job_script())
        # cluster.scale(slurm_maximum_jobs)
        cluster.adapt(minimum=slurm_maximum_jobs, maximum=slurm_maximum_jobs)  # allow respawn
    except Exception as e:
        print(e)
        from dask.distributed import Client, LocalCluster
        cluster = LocalCluster(n_workers=local_maximum_jobs)
    print("cluster dashboard link:", cluster.dashboard_link)
    client = Client(cluster)
    return client


In [None]:
config = MonteCarloExecutorConfig()
config.max_submitted_job = max(config.max_submitted_job, 3 * slurm_maximum_jobs)
executor = MonteCarloJobExecutor(
    monte_carlo_function,
    jobs,
    config=config,
    filename=json_filename,
)

### Define the callback, e.g. plotting the intermediate result and the list of remaining tasks

(I have to put them in the same block as the actual execution, otherwise it won't update in VScode)

In [None]:
import time  # add some sleep to let them work properly in VScode Jupyter notebook

time.sleep(0.2)
progress_plotter = JobProgressPlotter()
time.sleep(0.2)
memory_plotter = MemoryUsagePlotter()
time.sleep(0.2)
progress_plotter_by_name = JobProgressPlotter(sort_by_name=True)
time.sleep(0.2)


def callback(executor: MonteCarloJobExecutor):
    progress_plotter(executor)
    time.sleep(0.1)
    progress_plotter_by_name(executor)
    time.sleep(0.1)
    memory_plotter(executor)
    time.sleep(0.1)

print(
    "will shut down the cluster after job finishes; if this is not desired, set `shutdown_cluster` to False"
)

executor.execute(
    client_connector=client_connector,
    submitter=submitter,
    loop_callback=callback,
    shutdown_cluster=True,
    force_finished=force_finished,
)