In [185]:
import numpy as np
from scipy import stats, random
import pylab as plt
from itertools import compress
import swyft
from tqdm import tqdm
import zarr
import numcodecs
import torch
import pylab as plt

SyntaxError: invalid syntax (interface.py, line 212)

In [177]:
###### Datastore with N samples from normal distribution

class DataStore:
    def __init__(self, filename):
        # Open (new) datastore
        self.store = zarr.DirectoryStore(filename)
        self.root = zarr.group(store = self.store)
        
        if 'samples' not in self.root.keys():
            print("Creating empty datastore:", filename)
            print("...don't forget to run `init` to set up storage parameters.")
            return
        
        print("Loading datastore:", filename)
        self.x = self.root['samples/x']
        self.z = self.root['samples/z']
        self.m = self.root['metadata/needs_sim']
        self.u = self.root['metadata/intensity']
        
    def init(self, xdim, zdim):
        """Initialize data store."""
        if 'samples' in self.root.keys():
            print("WARNING: Datastore is already initialized.")
            return
        self.x = self.root.zeros('samples/x', shape=(0,)+xdim, chunks=(1,)+xdim, dtype='f4')
        self.z = self.root.zeros('samples/z', shape=(0,)+(zdim,), chunks=(10000,)+(zdim,), dtype='f4')
        self.m = self.root.zeros('metadata/needs_sim', shape=(0,1), chunks=(10000,)+(1,), dtype='bool')
        self.u = self.root.create('metadata/intensity', shape=(0,), dtype=object, object_codec=numcodecs.Pickle())
        print("Datastore initialized.")
        
    def _append_z(self, z):
        """Append z to datastore content and new slots for x."""
        # Add simulation slots
        xshape = list(self.x.shape)
        xshape[0] += len(z)
        self.x.resize(*xshape)
        
        # Add z samples
        self.z.append(z)
        
        # Register as missing
        m = np.ones((len(z),1), dtype='bool')
        self.m.append(m)
        
    def __len__(self):
        """Returns number of samples in the datastore."""
        return len(self.z)
        
    def intensity(self, zlist):
        """Replace DS intensity function with max of intensity functions."""
        if len(self.u) == 0:
            return np.zeros(len(zlist))
        else:
            return np.array([self.u[i](zlist) for i in range(len(self.u))]).max(axis=0)
        
    def _grow(self, p):
        """Grow number of samples in datastore."""
        # Proposed new samples z from p
        z_prop = p.sample()
        
        # Rejection sampling from proposal list
        accepted = []
        ds_intensities = self.intensity(z_prop)
        target_intensities = p(z_prop)
        for z, Ids, It in zip(z_prop, ds_intensities, target_intensities):
            rej_prob = np.minimum(1, Ids/It)
            w = np.random.rand(1)[0]
            accepted.append(rej_prob < w)
        z_accepted = z_prop[accepted, :]
        
        # Add new entries to datastore and update intensity function
        self._append_z(z_accepted)
        if len(z_accepted) > 0:
            self.u.resize(len(self.u)+1)
            self.u[-1] = p
            print("Adding %i new samples. Run simulator!"%len(z_accepted))
        else:
            print("No new simulator runs required.")

    def sample(self, p):
        self._grow(p)
        
        accepted = []
        zlist = self.z[:]
        I_ds = self.intensity(zlist)
        I_target = p(zlist)
        for i, z in enumerate(zlist):
            accept_prob  = I_target[i]/I_ds[i]
            assert accept_prob <= 1.
            w = np.random.rand(1)[0]
            if accept_prob > w:
                accepted.append(i)
        return accepted
    
    def __getitem__(self, i):
        return self.x[i], self.z[i]
                
#    def sample(self, mu, p):
#        indices = self._sample_indices(mu, p)
#        x_sub = list(self.x[i] for 
#        z_sub = self.z[accepted, :]
#        return x_sub, z_sub
    
    def requires_sim(self):
        indices = []
        m = self.m[:]
        for i in range(len(self.z)):
            if m[i]:
                indices.append(i)
        return indices
    
    def add_sim(self, i, x):
        self.x[i] = x
        self.m[i] = False

In [3]:
class Intensity:
    def __init__(self, mu, z0, z1):
        self.mu = mu
        self.z0 = np.array(z0)
        self.z1 = np.array(z1)
        
    def sample(self):
        N = np.random.poisson(self.mu, 1)[0]
        q = np.random.rand(N, len(self.z0))
        q *= self.z1 - self.z0
        q += self.z0
        return q
    
    def __call__(self, z):
        return self._pdf(z)*self.mu

    def _pdf(self, z):
        val = 1./(self.z1 - self.z0).prod()
        return np.where(z >= self.z0, np.where(z <= self.z1, val, 0.), 0.).prod(axis=-1)

In [4]:
ds = DataStore("/home/ubuntu/volume1/dsX.zarr")
ds.init(zdim = 2, xdim = (400, 400))

Loading datastore: /home/ubuntu/volume1/dsX.zarr


In [5]:
p = Intensity(3000, [0.5, 0.5], [1., 1.])
indices = ds.sample(p)
indices_req = ds.requires_sim()

No new simulator runs required.


In [6]:
print(len(indices), len(indices_req))

2979 2673


In [7]:
def model(z):
    return np.random.randn(400, 400)

In [8]:
def simulate(model, ds):
    indices = ds.requires_sim()
    for i in tqdm(indices, desc="Running simulations"):
        _, z = ds[i]
        x = model(z)
        ds.add_sim(i, x)

In [9]:
simulate(model, ds)

Running simulations: 100%|██████████| 2673/2673 [00:39<00:00, 67.72it/s]


In [790]:
class Data(torch.utils.data.Dataset):
    """Simple data container class.

    Note: The noisemodel allows scheduled noise level increase during training.
    """
    def __init__(self, datastore, indices, noisemodel = None):
        super().__init__()
        self.ds = datastore
        self.indices = indices
        self.noisemodel = noisemodel

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        # Obtain x, z
        i = self.indices[idx]
        x = self.ds.x[i]
        z = self.ds.z[i]
        
        # Add optional noise
        if self.noisemodel is not None:
            x = self.noisemodel(x, z, noiselevel = self.noiselevel)

        # Tensors
        x = torch.tensor(x).float()
        z = torch.tensor(z).float()
        
        # Done
        xz = dict(x=x, z=z)
        return xz

In [10]:
ds = Datastore(...)
sw = SWYFT(..., ds = ds)
status = sw.run(3000, run_sims_inline = True)
if status == 'requires_sim':
    sim...
status == 'requires_sim', 'success', '...'

NameError: name 'Datastore' is not defined

In [791]:
dd = Data(ds, indices)

In [794]:
for i in range(len(dd)):
    print(dd[i]['x'].sum())

tensor(-107.9124)
tensor(17.1382)
tensor(-80.0679)
tensor(133.7194)
tensor(371.1330)
tensor(-378.2868)
tensor(-185.0058)
tensor(-300.8576)
tensor(112.9492)
tensor(-95.6544)
tensor(644.2219)
tensor(172.4902)
tensor(-135.6606)
tensor(-153.9301)
tensor(-9.1181)
tensor(613.3198)
tensor(730.1019)
tensor(-91.0823)
tensor(-368.1519)
tensor(302.6432)
tensor(51.1544)
tensor(491.2526)
tensor(-405.9981)
tensor(764.8562)
tensor(-156.9392)


In [178]:
class Mask1d:
    def __init__(self, intervals):
        self.intervals = np.array(intervals)  # n x 2 matrix
        
    def __call__(self, z):
        """Returns 1. if inside interval, otherwise 0."""
        m = np.zeros_like(z)
        for z0, z1 in self.intervals:
            m += np.where(z >= z0, np.where(z <= z1, 1., 0.), 0.)
        assert not any(m > 1.), "Overlapping intervals."
        return m
        
    def area(self):
        """Combined length of all intervals."""
        return (self.intervals[:, 1] - self.intervals[:, 0]).sum()
    
    def sample(self, N):
        p = self.intervals[:, 1] - self.intervals[:,0]
        p /= p.sum()
        i = np.random.choice(len(p), size = N, replace = True, p = p)
        w = np.random.rand(N)
        z = self.intervals[i,0] + w*(self.intervals[i,1] - self.intervals[i,0])
        return z

In [179]:
class FactorMask:
    def __init__(self, masks):
        self.masks = masks
        self.d = len(masks)
    
    def __call__(self, z):
        m = [self.masks[i](z[:,i]) for i in range(self.d)]
        return np.array(m).prod(axis=1)
    
    def area(self):
        m = [self.masks[i].area() for i in range(self.d)]
        return np.array(m).prod()
    
    def sample(self, N):
        z = np.empty((N, self.d))
        for i in range(self.d):
            z[:,i] = self.masks[i].sample(N)
        return z

In [180]:
class Intensity:
    def __init__(self, mu, mask):
        self.mu = mu
        self.mask = mask
        self.area = mask.area()
        
    def __call__(self, z):
        return self.mask(z)/self.area*self.mu
    
    def sample(self):
        N = np.random.poisson(self.mu, 1)[0]
        return self.mask.sample(N)

In [181]:
bins1 = [[0,0.1], [0.9, 1.0]]
bins2 = [[0,0.0000001], [0.9, 1.0]]
m1 = Mask1d(bins1)
m2 = Mask1d(bins2)
mf = FactorMask([m1, m2, m2])
z = mf.sample(10000)
mf(z).max()

1.0

In [182]:
I = Intensity(1, mf)
I(z).min()/1000

0.4999990000015002

In [186]:
def get_lnL(net, x0, z, n_batch = 64):
    """Return current estimate of normalized marginal 1-dim lnL.

    Args:
        net (nn.Module): trained ratio estimation net.
        x0 (torch.tensor): data.
        z : (nsamples, pnum, pdim)
        n_batch (int): minibatch size.

    Returns:
        lnL: (nsamples, pnum)
    """
    nsamples = len(z)

    lnL = []
    for i in range(nsamples//n_batch+1):
        zbatch = z[i*n_batch:(i+1)*n_batch]
        lnL += net(x0.unsqueeze(0), zbatch).detach().cpu()

    return torch.stack(lnL)

In [183]:
swyft.get_lnL

NameError: name 'swyft' is not defined