# Data-informed parameter synthesis for population pDTMCs, notebook 4/6

The main part of this notebook is the data generation for the case models by simulating the models.
1. [Generate the data](#one)

There are four things to be setup in this notebook:
1. `agents_quantities` / `populations` - set of agents number to be considered,
2. `model_types` - set of model types to be considered,
3. `dimension_sample_size` - number of samples per parameter,
4. `n_samples` - number of samples hence the number of simulations.

The following notebook is: `analysis`

<a name="one"></a>

## GENERATE THE DATA

In [None]:
import numpy, os, sys, pickle

In [None]:
cwd = os.getcwd()
os.chdir("..")
sys.path.append(os.getcwd())
os.chdir(cwd)

In [None]:
import configparser
config = configparser.ConfigParser()
print(os.getcwd())

config.read("../config.ini")

data_path = config.get("paths", "data")
if not os.path.exists(data_path):
    raise OSError("Directory does not exist: " + str(data_path))

In [None]:
## create a dimension for each parameter
default_10dim_param_space = numpy.random.random((10,5))
print(default_10dim_param_space)
for row in range(len(default_10dim_param_space)):
    # print(row)
    if row >= 4:
        # print(default_10dim_param_space[row])
        default_10dim_param_space[row]=numpy.zeros(5)
    elif row >= 2:
        default_10dim_param_space[row]=default_10dim_param_space[1]
print()
print(default_10dim_param_space)

Here we generate the data by simulating the model:

In [None]:
from src.generate_data import generate_experiments_and_data, generate_experiments, generate_data

## two-param

In [None]:
model_types = ["synchronous_"]
n_samples = [3500,1500,100]
populations = [2,3,5,10,20,40]
sample_size = 5

In [None]:
# p_values=[]
# for v_p in numpy.random.uniform(0.0, 1.0,dimension_sample_size):
#     p_values.append(v_p)
# q_values=[]
# for v_q in numpy.random.uniform(0.0, 1.0,dimension_sample_size):
#     q_values.append(v_q)

In [None]:
## SOME UNIFORMLY RANDOM SAMPLES PARAMETER VALUES
p_values = [0.028502714675268215, 0.45223461506339047, 0.8732745414252937, 0.6855555397734584, 0.13075717833714784]
q_values = [0.5057623641293089  , 0.29577906622244676, 0.8440550299528644, 0.8108008054929994, 0.03259111103419188]

In [None]:
## PASTING IT INTO ONE NUMPY ARRAY
default_2dim_param_space = numpy.zeros((2,5))
default_2dim_param_space[0] = p_values
default_2dim_param_space[1] = q_values
default_2dim_param_space

In [None]:
## PERMUTATE default_2dim_param_space
param_space = default_2dim_param_space
column_values = []
for column in range(len(param_space[0])):
    for column2 in range(len(param_space[0])):
        column_values.append([param_space[0][column],param_space[1][column2]])
permutated_default_2dim_param_space = column_values

In [None]:
permutated_default_2dim_param_space

In [None]:
numpy.array(permutated_default_2dim_param_space).transpose()

In [None]:
param_space = default_2dim_param_space
for column in range(len(param_space[0])):
    column_values = []
    for value in param_space[:, column]:
        column_values.append(value)
    column_values = tuple(column_values)
    print("parametrisation:", column_values)

In [None]:
dimension_sample_size = 5

In [None]:
# Experiments_two_param, Data_two_param = generate_experiments_and_data(model_types,n_samples,populations,dimension_sample_size,default_2dim_param_space)

In [None]:
# import time
# start_time = time.time()
# Experiments_two_param, Data_two_param = generate_experiments_and_data(model_types,n_samples,populations,dimension_sample_size, numpy.array(permutated_default_2dim_param_space).transpose(), debugging=True)
# print("  It took", socket.gethostname(), time.time() - start_time, "seconds to run")

This took Freya more than 6 days to run

As you can see this process took a while so we have pickled the data and we load it back

In [None]:
# pickle.dump(Experiments_two_param,open(os.path.join(data_path,"Experiments_two_param.p"), "wb" ))
# pickle.dump(Data_two_param,open(os.path.join(data_path,"Data_two_param.p"), "wb"))

Now we load the pickled data

In [None]:
from src.load import load_pickled_data

In [None]:
D3 = load_pickled_data("Data_two_param")

MANAGING THE "parallel_"

In [None]:
for key in D3.keys():
    if "synchronous_parallel_" in key:
        print(key)
        key2 = []
        for key_item in key:
            key2.append(key_item) 
        key2[0]="synchronous_"
        key2= tuple(key2)
        print(key2)
        spam = D3[key] 
        del D3[key]
        D3[key2]=spam        

In [None]:
pickle.dump(D3,open(os.path.join(data_path,"Data_two_param.p"), "wb"))

Example of the used structure:

In [None]:
D3[('synchronous_', 2, 3500, 0.028502714675268215, 0.5057623641293089)]

In [None]:
D3.keys()

In [None]:
len(D3.keys())

REFORMATING THE D3 DICTIONARY

In [None]:
D8 = {}
for item in D3.keys():
    print(item[0])
    try:
        print(D8[item[0]])
        try:
            print(D8[item[0]][item[1]])
            try:
                print(D8[item[0]][item[1]][item[2]])
                try:
                    print(D8[item[0]][item[1]][item[2]][item[3],item[4]])
                except:
                    D8[item[0]][item[1]][item[2]][item[3],item[4]] = D3[item]
            except:
                D8[item[0]][item[1]][item[2]] = {}
        except:
            D8[item[0]][item[1]] = {}
    except:
        D8[item[0]] = {}   
D8

REFORMATING THE NEW DICTIONARY

In [None]:
Data_two_param_20 = pickle.load(open(os.path.join(data_path,"Data_two_param_20.p"), "rb" ))
Data_two_param_40 = pickle.load(open(os.path.join(data_path,"Data_two_param_40.p"), "rb" ))
D9 = {}

for item1 in Data_two_param_20.keys():
    for item2 in Data_two_param_20[item1].keys():
        for item3 in Data_two_param_20[item1][item2].keys():
            for item4 in Data_two_param_20[item1][item2][item3].keys():
                # print(Data_two_param_20[item1][item2][item3][item4])
                D9[(item1,item2,item3,item4)] = Data_two_param_20[item1][item2][item3][item4]
                
for item1 in Data_two_param_40.keys():
    for item2 in Data_two_param_40[item1].keys():
        for item3 in Data_two_param_40[item1][item2].keys():
            for item4 in Data_two_param_40[item1][item2][item3].keys():
                # print(Data_two_param_40[item1][item2][item3][item4])
                D9[(item1,item2,item3,item4)] = Data_two_param_40[item1][item2][item3][item4]

D3 = pickle.load(open(os.path.join(data_path,"Data_two_param.p"), "rb" ))

for item in D3.keys():
    spam = D3[item]
    del(D3[item])
    item = (item[0].split("p")[0], item[1], item[2], item[3], item[4])
    D3[item] = spam

D9.update(D3)
pickle.dump(D9,open(os.path.join(data_path,"Data_two_param.p"), "wb"))

In [None]:
D9.keys()

In [None]:
Experiments_two_param_20 = pickle.load(open(os.path.join(data_path,"Experiments_two_param_20.p"), "rb" ))
Experiments_two_param_40 = pickle.load(open(os.path.join(data_path,"Experiments_two_param_40.p"), "rb" ))
D9 = {}

for item1 in Experiments_two_param_20.keys():
    for item2 in Experiments_two_param_20[item1].keys():
        for item3 in Experiments_two_param_20[item1][item2].keys():
            for item4 in Experiments_two_param_20[item1][item2][item3].keys():
                # print(Experiments_two_param_20[item1][item2][item3][item4])
                D9[(item1,item2,item3,item4)] = Experiments_two_param_20[item1][item2][item3][item4]
                
for item1 in Experiments_two_param_40.keys():
    for item2 in Experiments_two_param_40[item1].keys():
        for item3 in Experiments_two_param_40[item1][item2].keys():
            for item4 in Experiments_two_param_40[item1][item2][item3].keys():
                # print(Experiments_two_param_40[item1][item2][item3][item4])
                D9[(item1,item2,item3,item4)] = Experiments_two_param_40[item1][item2][item3][item4]

D3 = pickle.load(open(os.path.join(data_path,"Experiments_two_param.p"), "rb" ))

for item in D3.keys():
    spam = D3[item]
    del(D3[item])
    item = (item[0].split("p")[0], item[1], item[2], item[3], item[4])
    D3[item] = spam

D9.update(D3)
pickle.dump(D9,open(os.path.join(data_path,"Experiments_two_param.p"), "wb"))

In [None]:
D9.keys()

REFORMATING THE D3 DICTIONARY VOL 2

In [None]:
D6 = {}
for item in D3.keys():
    print(item)
    print(item[0:3])
    print(item[3:5])
    try:
        print(D6[item[0:3]])
    except:
        D6[item[0:3]] = {}
    D6[item[0:3]][item[3:5]] = D3[item]
    print(D3[item])

In [None]:
D6[('synchronous_', 2, 3500)].keys()

In [None]:
len(D6[('synchronous_', 2, 3500)].keys())

## multiparam

## Experiments_one_point_step_down_10dim_param_space

In [None]:
model_types = ["synchronous_"]
n_samples = [3500,1500,100]
populations = [10]
dimension_sample_size = 5

In [None]:
one_point_step_down_10dim_param_space = numpy.zeros((10,1))
for i in range(0,4):
    one_point_step_down_10dim_param_space[i] = [0.1]
for i in range(4,10):
    one_point_step_down_10dim_param_space[i] = [0]
one_point_step_down_10dim_param_space

In [None]:
# Experiments_one_point_step_down_10dim_param_space, Data_one_point_step_down_10dim_param_space = generate_experiments_and_data(model_types,n_samples,populations,dimension_sample_size,one_point_step_down_10dim_param_space)

In [None]:
# pickle.dump( Experiments_one_point_step_down_10dim_param_space,open(os.path.join(data_path,"Experiments_one_point_step_down_10dim_param_space.p"), "wb" ))
# pickle.dump( Data_one_point_step_down_10dim_param_space,open(os.path.join(data_path,"Data_one_point_step_down_10dim_param_space.p"), "wb" ))

In [None]:
Experiments_one_point_step_down_10dim_param_space = pickle.load(open(os.path.join(data_path,"Experiments_one_point_step_down_10dim_param_space.p"), "rb" ))
Data_one_point_step_down_10dim_param_space = pickle.load(open(os.path.join(data_path,"Data_one_point_step_down_10dim_param_space.p"), "rb" ))

## linear generations

In [None]:
model_types = ["semisynchronous_"]
n_samples = [3500,1500,100]
populations = [2,3,5,10]
dimension_sample_size = 5

In [None]:
one_point_linear_param_space  = numpy.zeros((10,1))
one_point_linear_param_space[0] = [0.4]
for i in range(1,10):
    one_point_linear_param_space[i] = [i*0.1]
one_point_linear_param_space

In [None]:
# Experiments_one_point_linear_param_space, Data_one_point_linear_param_space = generate_experiments_and_data(model_types,n_samples,populations,dimension_sample_size,one_point_linear_param_space)

In [None]:
# len(Experiments_one_point_linear_param_space['multiparam_semisynchronous_'][10][3500][(0.4,0.1,  0.2,  0.30000000000000004,  0.4,  0.5,  0.6000000000000001,  0.7000000000000001,  0.8,  0.9)])

In [None]:
# Data_one_point_linear_param_space['multiparam_semisynchronous_'][2][100]

In [None]:
# pickle.dump( Experiments_one_point_linear_param_space,open(os.path.join(data_path,"Experiments_one_point_linear_param_space.p"), "wb" ))
# pickle.dump( Data_one_point_linear_param_space,open(os.path.join(data_path,"Data_one_point_linear_param_space.p"), "wb" ))

In [None]:
Experiments_one_point_linear_param_space = pickle.load(open(os.path.join(data_path,"Experiments_one_point_linear_param_space.p"), "rb" ))
Data_one_point_linear_param_space = pickle.load(open(os.path.join(data_path,"Data_one_point_linear_param_space.p"), "rb" ))

## One point linear bee models

In [None]:
model_types = ["bee_multiparam_synchronous_"]
n_samples = [3500,1500,100]
populations = [10]
dimension_sample_size = 5

In [None]:
one_point_linear_param_space  = numpy.zeros((10,1))
one_point_linear_param_space[0] = [0.1]
for i in range(1,10):
    one_point_linear_param_space[i] = [i*0.05+0.1]
one_point_linear_param_space

In [None]:
# Experiments_bee_one_point_linear_param_space, Data_bee_one_point_linear_param_space = generate_experiments_and_data(model_types,n_samples,populations,dimension_sample_size, sim_length=max(populations), modular_param_space=one_point_linear_param_space)

In [None]:
# pickle.dump( Experiments_bee_one_point_linear_param_space,open(os.path.join(data_path,"Experiments_bee_one_point_linear_param_space.p"), "wb" ))
# pickle.dump( Data_bee_one_point_linear_param_space,open(os.path.join(data_path,"Data_bee_one_point_linear_param_space.p"), "wb" ))

In [None]:
Experiments_bee_one_point_linear_param_space = pickle.load(open(os.path.join(data_path,"Experiments_bee_one_point_linear_param_space.p"), "rb" ))
Data_bee_one_point_linear_param_space = pickle.load(open(os.path.join(data_path,"Data_bee_one_point_linear_param_space.p"), "rb" ))