In [77]:
import itertools
import multiprocessing as mp

from tqdm.notebook import tqdm

import edgedroid.data as e_data
import edgedroid.util as util
from edgedroid.execution_times import *

raw_data_params = e_data.load_default_exec_time_data()
data = preprocess_data(*raw_data_params)
raw_data, *_ = raw_data_params
data

Unnamed: 0,run_id,seq,neuroticism,impairment,next_exec_time,transition,duration
0,134146,1,"[0.3333333333333333, 0.6666666666666666)","[-inf, 1.0)",2.956,NoTransition,"[0.0, 5.0)"
1,134146,2,"[0.3333333333333333, 0.6666666666666666)","[-inf, 1.0)",5.443,NoTransition,"[0.0, 5.0)"
2,134146,3,"[0.3333333333333333, 0.6666666666666666)","[-inf, 1.0)",5.206,NoTransition,"[0.0, 5.0)"
3,134146,4,"[0.3333333333333333, 0.6666666666666666)","[-inf, 1.0)",4.783,NoTransition,"[0.0, 5.0)"
4,134146,5,"[0.3333333333333333, 0.6666666666666666)","[-inf, 1.0)",2.760,NoTransition,"[5.0, 10.0)"
...,...,...,...,...,...,...,...
6715,137353,164,"[0.3333333333333333, 0.6666666666666666)","[-inf, 1.0)",4.722,Higher2Lower,"[5.0, 10.0)"
6716,137353,165,"[0.3333333333333333, 0.6666666666666666)","[-inf, 1.0)",3.475,Higher2Lower,"[5.0, 10.0)"
6717,137353,166,"[0.3333333333333333, 0.6666666666666666)","[-inf, 1.0)",2.296,Higher2Lower,"[10.0, inf)"
6718,137353,167,"[0.3333333333333333, 0.6666666666666666)","[-inf, 1.0)",4.608,Higher2Lower,"[10.0, inf)"


In [78]:
raw_data

Unnamed: 0,run_id,seq,exec_time,delay,neuroticism
0,134146,1,4.433,0.0,0.375
1,134146,2,2.956,0.0,0.375
2,134146,3,5.443,0.0,0.375
3,134146,4,5.206,0.0,0.375
4,134146,5,4.783,0.0,0.375
...,...,...,...,...,...
6715,137353,164,6.501,0.0,0.625
6716,137353,165,4.722,0.0,0.625
6717,137353,166,3.475,0.0,0.625
6718,137353,167,2.296,0.0,0.625


In [84]:
# generate data for model

num_traces = 100
run_ids = raw_data.run_id.unique()
run_len = len(raw_data[raw_data.run_id == np.random.choice(run_ids)].index)

diff_vals = np.empty((len(run_ids), num_traces, run_len))
results = {}

with mp.Pool() as pool:
    for model_cls in (TheoreticalExecutionTimeModel, EmpiricalExecutionTimeModel):
        model_name = model_cls.__name__

        for i, participant in tqdm(enumerate(run_ids),
                                   total=len(run_ids),
                                   desc=model_name,
                                   leave=True):
            # calculate average model deviation wrt to real data
            # by generating num_traces for each experimental run,
            # and then averaging the difference per step

            raw_delays = raw_data[raw_data.run_id == participant].delay
            neuro = raw_data[raw_data.run_id == participant].neuroticism.values[0]
            raw_exec_times = raw_data[raw_data.run_id == participant].exec_time.to_numpy()

            model = model_cls(data=data, neuroticism=neuro)

            traces = np.array(pool.starmap(
                util.gen_model_trace,
                iterable=zip(itertools.repeat(raw_delays, num_traces),
                             itertools.repeat(model))
            ))[:, :-1]  # generated trace has one extra step at the end

            # generate diff wrt real data
            trace_diff = traces / raw_exec_times  # TODO: scale results?

            for j, trace in enumerate(trace_diff):
                diff_vals[i, j, :] = trace

        results[model_name] = (np.mean(diff_vals), np.std(diff_vals))

for model, (mean, std) in results.items():
    print(f'{model} relative to empirical data --- mean = {mean:0.03f} | std = {std:0.03f}')

TheoreticalExecutionTimeModel:   0%|          | 0/40 [00:00<?, ?it/s]

EmpiricalExecutionTimeModel:   0%|          | 0/40 [00:00<?, ?it/s]

TheoreticalExecutionTimeModel --- mean = 1.268 | std = 3.467
EmpiricalExecutionTimeModel --- mean = 1.300 | std = 3.424
