In [404]:
import numpy as np
from scipy import stats, random
import pylab as plt
from itertools import compress
from tqdm import tqdm
import zarr
import numcodecs

import pylab as plt

In [681]:
###### Datastore with N samples from normal distribution

class DataStore:
    def __init__(self, filename):
        # Open (new) datastore
        self.store = zarr.DirectoryStore(filename)
        self.root = zarr.group(store = self.store)
        
        if 'samples' not in self.root.keys():
            print("Creating empty datastore:", filename)
            print("...don't forget to run `init` to set up storage parameters.")
            return
        
        print("Loading datastore:", filename)
        self.x = self.root['samples/x']
        self.z = self.root['samples/z']
        self.m = self.root['metadata/needs_sim']
        self.u = self.root['metadata/intensity']
        
    def init(self, xdim, zdim):
        """Initialize data store."""
        if 'samples' in self.root.keys():
            print("WARNING: Datastore is already initialized.")
            return
        self.x = self.root.zeros('samples/x', shape=(0,)+xdim, chunks=(1,)+xdim, dtype='f4')
        self.z = self.root.zeros('samples/z', shape=(0,)+(zdim,), chunks=(10000,)+(zdim,), dtype='f4')
        self.m = self.root.zeros('metadata/needs_sim', shape=(0,1), chunks=(10000,)+(1,), dtype='bool')
        self.u = self.root.create('metadata/intensity', shape=(0,), dtype=object, object_codec=numcodecs.Pickle())
        print("Datastore initialized.")
        
    def _append_z(self, z):
        """Append z to datastore content and new slots for x."""
        # Add simulation slots
        xshape = list(self.x.shape)
        xshape[0] += len(z)
        self.x.resize(*xshape)
        
        # Add z samples
        self.z.append(z)
        
        # Register as missing
        m = np.ones((len(z),1), dtype='bool')
        self.m.append(m)
        
    def __len__(self):
        """Returns number of samples in the datastore."""
        return len(self.z)
        
    def intensity(self, zlist):
        """Replace DS intensity function with max of intensity functions."""
        if len(self.u) == 0:
            return np.zeros(len(zlist))
        else:
            return np.array([self.u[i](zlist) for i in range(len(self.u))]).max(axis=0)
        
    def _grow(self, p):
        """Grow number of samples in datastore."""
        # Proposed new samples z from p
        z_prop = p.sample()
        
        # Rejection sampling from proposal list
        accepted = []
        ds_intensities = self.intensity(z_prop)
        target_intensities = p(z_prop)
        for z, Ids, It in zip(z_prop, ds_intensities, target_intensities):
            rej_prob = np.minimum(1, Ids/It)
            w = np.random.rand(1)[0]
            accepted.append(rej_prob < w)
        z_accepted = z_prop[accepted, :]
        
        # Add new entries to datastore and update intensity function
        self._append_z(z_accepted)
        if len(z_accepted) > 0:
            self.u.resize(len(self.u)+1)
            self.u[-1] = p
            print("Adding %i new samples. Run simulator!"%len(z_accepted))
        else:
            print("No new simulator runs required.")

    def sample(self, p):
        self._grow(p)
        
        accepted = []
        zlist = self.z[:]
        I_ds = self.intensity(zlist)
        I_target = p(zlist)
        for i, z in enumerate(zlist):
            accept_prob  = I_target[i]/I_ds[i]
            assert accept_prob <= 1.
            w = np.random.rand(1)[0]
            if accept_prob > w:
                accepted.append(i)
        return accepted
    
    def __getitem__(self, i):
        return self.x[i], self.z[i]
                
#    def sample(self, mu, p):
#        indices = self._sample_indices(mu, p)
#        x_sub = list(self.x[i] for 
#        z_sub = self.z[accepted, :]
#        return x_sub, z_sub
    
    def requires_sim(self):
        indices = []
        m = self.m[:]
        for i in range(len(self.z)):
            if m[i]:
                indices.append(i)
        return indices
    
    def add_sim(self, i, x):
        self.x[i] = x
        self.m[i] = False

In [682]:
class Intensity:
    def __init__(self, mu, z0, z1):
        self.mu = mu
        self.z0 = np.array(z0)
        self.z1 = np.array(z1)
        
    def sample(self):
        N = np.random.poisson(self.mu, 1)[0]
        q = np.random.rand(N, len(self.z0))
        q *= self.z1 - self.z0
        q += self.z0
        return q
    
    def __call__(self, z):
        return self._pdf(z)*self.mu

    def _pdf(self, z):
        val = 1./(self.z1 - self.z0).prod()
        return np.where(z >= self.z0, np.where(z <= self.z1, val, 0.), 0.).prod(axis=-1)

In [751]:
ds = DataStore("/home/ubuntu/volume1/dsH.zarr")
ds.init(zdim = 2, xdim = (400, 400))

Loading datastore: /home/ubuntu/volume1/dsH.zarr


In [752]:
p = Intensity(2000, [0.0, 0.0], [1., 1.])
indices = ds.sample(p)
indices_req = ds.requires_sim()

No new simulator runs required.


In [753]:
print(len(indices), len(indices_req))

2001 0


In [754]:
def model(z):
    return np.random.randn(400, 400)

for i in indices_req:
    _, z = ds[i]
    x = model(z)
    ds.add_sim(i, x)