# Run Simulations

Given an encoded dataset of targets and queries, run simulations.

In [1]:
import numpy as np
import pandas as pd

import primo.models

import cupyck

from tqdm.notebook import tqdm

In [2]:
hosts = [
    ("localhost", 2046),
]
client = cupyck.Client(hosts)
simulator = primo.models.Simulator(client)

In [3]:
target_seqs = pd.read_hdf('/tf/primo/data/extended_targets/feature_seqs.h5')
query_seqs = pd.read_hdf('/tf/primo/data/queries/feature_seqs.h5')

In [4]:
pairs = (target_seqs
 .rename(columns={'FeatureSequence':'target_features'})
 .assign(query_features = query_seqs.loc['callie_janelle'].FeatureSequence)
)

In [5]:
# 4,000 here is just a memory-management batch size so that each progress chunk reports period of time.
split_size = 4000
nsplits = len(pairs) / split_size
splits = np.array_split(pairs, nsplits)

In [6]:
result_store = pd.HDFStore('/tf/primo/data/simulation/extended_targets/callie_janelle.h5', complevel=9, mode='w')
try:
    for split in tqdm(splits):
        results = simulator.simulate(split)
        result_store.append('df', results[['duplex_yield']])
finally:
    result_store.close()

HBox(children=(HTML(value=u''), FloatProgress(value=0.0, max=1394.0), HTML(value=u'')))


