In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import gurobipy as gp
from gurobipy import GRB
from ast import literal_eval
import scipy
import json
import tqdm

In [2]:
status_codes = {
    1: 'LOADED',
    2: 'OPTIMAL',
    3: 'INFEASIBLE',
    4: 'INF_OR_UNBD',
    5: 'UNBOUNDED',
    6: 'CUTOFF',
    7: 'ITERATION_LIMIT',
    8: 'NODE_LIMIT',
    9: 'TIME_LIMIT',
    10: 'SOLUTION_LIMIT',
    11: 'INTERRUPTED',
    12: 'NUMERIC',
    13: 'SUBOPTIMAL',
    14: 'INPROGRESS',
    15: 'USER_OBJ_LIMIT'
}

# Moment Bootstrap

As an alternative / addition to bootstrapping confidence intervals for probabilities, can do so for moments e.g.

$$ \mathbb{E}[X_{1}^{OB}] \quad \mathbb{E}[X_{2}^{OB}] \quad \mathbb{E}[X_{1}^{OB}X_{2}^{OB}] \quad \cdots $$

Which can then be easily scaled by capture efficiency to relate to $OG$ counts, and can form constraints relating to probabilities (and so CME) e.g.

$$ \mathbb{E}[X_{1}^{OG}] = \sum_{x_{1}^{OG}} x_{1}^{OG} p_{1}(x_{1}^{OG}) \in \text{CI} $$

## Code: Simulation

In [3]:
def gillespie(params, n, beta, tmax=100, ts=10, plot=False, initial_state=(0, 0)):
    '''
    Simulate a sample path of birth-death regulation model.

    Gillespie algorithm to simulate a sample path of the markov chain described
    by the birth-death regulation stochastic reaction network model with given
    parameters. After a burn-in time of 'tmax' samples are taken from the sample
    path at time intervals of 'ts'. The states / samples are pairs of counts
    (x1, x2) from a pair of genes.

    Args:
        params: dict of reaction rate constants 'k_tx_1', 'k_tx_2', 'k_deg_1',
                'k_deg_2', 'k_deg'
        n: sample size
        beta: per cell capture efficiency vector of size n / single value
        tmax: burn-in time of simulation
        ts: time between samples
        plot: toggle plotting of sample path
        intitial_state: starting state of simulation

    Returns:
        A dictionary containing results

        Samples without capture efficiency

        'x1_OG': n samples from gene 1
        'x2_OG': n samples from gene 2
        'OG': n pairs of samples

        Samples with capture efficiency

        'x1_OB': n samples from gene 1 affected by capture efficiency
        'x2_OB': n samples from gene 2 affected by capture efficiency
        'OB': n pairs of samples affected by capture efficiency
    '''

    # initialize random generator
    rng = np.random.default_rng()

    # initialise time and state
    t = 0
    path = [initial_state]
    jump_times = [0]

    # simulate for burn-in time and time between n samples
    while t < tmax + (n - 1) * ts:

        # current state
        x1, x2 = path[-1][0], path[-1][1]

        # transition rates
        q_tx_1 = params['k_tx_1']
        q_tx_2 = params['k_tx_2']
        q_deg_1 = x1 * params['k_deg_1']
        q_deg_2 = x2 * params['k_deg_2']
        q_reg = x1 * x2 * params['k_reg']
        q_hold = q_tx_1 + q_tx_2 + q_deg_1 + q_deg_2 + q_reg

        # holding time in current state
        t_hold = -np.log(rng.uniform()) / q_hold
        t += t_hold
        jump_times.append(t)

        # jump probability
        outcome = [1, 2, 3, 4, 5]
        prob = [
            q_tx_1 / q_hold,
            q_tx_2 / q_hold,
            q_deg_1 / q_hold,
            q_deg_2 / q_hold,
            q_reg / q_hold
        ]
        jump = rng.choice(outcome, p=prob)
        match jump:
            case 1:
                path.append((x1 + 1, x2))
            case 2:
                path.append((x1, x2 + 1))
            case 3:
                path.append((x1 - 1, x2))
            case 4:
                path.append((x1, x2 - 1))
            case 5:
                path.append((x1 - 1, x2 - 1))

    # take the transcript states
    x1_path = [state[0] for state in path]
    x2_path = [state[1] for state in path]

    # create step function of sample path from jump times and jump values
    x1_path_function = scipy.interpolate.interp1d(jump_times, x1_path, kind='previous')
    x2_path_function = scipy.interpolate.interp1d(jump_times, x2_path, kind='previous')

    # take values at sampling times as samples from stationary dist
    sample_times = [tmax + i * ts for i in range(n)]
    x1_samples = x1_path_function(sample_times)
    x2_samples = x2_path_function(sample_times)

    # convert to integers
    x1_samples = [int(x1) for x1 in x1_samples]
    x2_samples = [int(x2) for x2 in x2_samples]

    # apply capture efficiency: for each count, draw from Binomial(count, beta)
    x1_samples_beta = np.random.binomial(x1_samples, beta).tolist()
    x2_samples_beta = np.random.binomial(x2_samples, beta).tolist()

    # re-combine to pairs of samples
    samples = list(zip(x1_samples, x2_samples))
    samples_beta = list(zip(x1_samples_beta, x2_samples_beta))

    # plot sample paths
    if plot:
        x = np.linspace(0, tmax + (n - 1) * ts, 10000)
        plt.plot(x, x1_path_function(x), label="X1 sample path", color="blue")
        plt.plot(x, x2_path_function(x), label="X2 sample path", color="purple")
        #plt.axvline(tmax, label="Burn-in time", color="orange")
        plt.xlabel("Time")
        plt.ylabel("Counts")
        plt.legend()
        plt.show()

    # collect all sample paths: original and observed
    data = {
        'x1_OG': x1_samples,
        'x2_OG': x2_samples,
        'OG': samples,
        'x1_OB': x1_samples_beta,
        'x2_OB': x2_samples_beta,
        'OB': samples_beta
    }

    return data

## Code: Bootstrap 

### Probabilities

In [4]:
def bootstrap_probabilities(data, resamples=None, splits=1, thresh_OB=10, threshM_OB=10, plot=False, printing=False):
    '''
    Compute confidence intervals on the distribution of a sample of count pairs.

    Compute confidence intervals for the joint and marginal probabilities of the 
    sample using the percentile bootstrap and settings specified in the method
    object. Compute a state space truncation using a given threshold on the
    number of samples per interval, replacing intervals on probabilities of
    states outside the truncation by [0, 1] to improve coverage.

    Args:
        data: dict of information on integer counts of genes per cell
        method: instance of Hypothesis or Minimization class with settings
                stored as attributes

                .resamples: integer number of bootstrap resamples to use
                .splits: integer number of times to 'split' resampling across
                         multiple arrays to avoid memory issues
                .thresh_OB: threshold on observation frequency of a state pair
                            for state space truncation
                .threshM_OB: threshold on observation frequency on a state for
                             marginal state space truncation
        
        plot: toggle plotting of confidence intervals and estimates
        print: toggle printing of observed state space truncation

    Returns:
        A dictionary containing results

        Sample information:

        'sample': original sample used
        'sample_counts': occurances of each state pair in the original sample
        'sample_counts_x1': occurances of each state in the original sample (gene 1)
        'sample_counts_x2': occurances of each state in the original sample (gene 2)

        Confidence intervals:
    
        'joint': (2, _, _) numpy array of CI bounds on joint distribution
        'x1': (2, _) numpy array of CI bounds on marginal distribution (gene 1)
        'x2': (2, _) numpy array of CI bounds on marginal distribution (gene 2)

        Truncation information

        'min_x1_OB', 'max_x1_OB', 'min_x2_OB', 'max_x2_OB': joint truncation
        'minM_x1_OB', 'maxM_x1_OB': marginal truncation (gene 1)
        'minM_x2_OB', 'maxM_x2_OB': marginal truncation (gene 2)
        'thresh_flag': bool if joint state space was truncated
        'thresh_flag_x1': bool if marginal state space was truncated (gene 1)
        'thresh_flag_x2': bool if marginal state space was truncated (gene 2)
    '''

    # get sample size
    n = len(data['OB'])

    # get bootstrap size: default to sample size
    if resamples is None:
        resamples = n

    # initialize random generator
    rng = np.random.default_rng()

    # convert string to tuple if neccessary (pandas reading csv to string)
    #if type(sample[0]) == str:
    #    sample = [literal_eval(count_pair) for count_pair in sample]

    # compute maximum x1 and x2 values
    M = int(np.max(data['x1_OB']))
    N = int(np.max(data['x2_OB']))
    #M, N = np.max(sample, axis=0)
    #M, N = int(M), int(N)

    # map (x1, x2) pairs to integers: x2 + (N + 1) * x1
    integer_sample = np.array([x[1] + (N + 1)*x[0] for x in data['OB']], dtype='uint32')

    # maxiumum of integer sample
    D = (M + 1)*(N + 1) - 1

    # number of bootstrap samples per split (split to reduce memory usage)
    resamples_split = resamples // splits

    # setup count array
    counts = np.empty((resamples, M + 1, N + 1), dtype='uint32')

    # BS bootstrap samples: split into 'splits' number of BS_split x n arrays
    for split in range(splits):

        # BS_split bootstrap samples as BS_split x n array
        bootstrap_split = rng.choice(integer_sample, size=(resamples_split, n))

        # offset row i by (D + 1)i
        bootstrap_split += np.arange(resamples_split, dtype='uint32')[:, None]*(D + 1)

        # flatten, count occurances of each state and reshape, reversing map to give counts of each (x1, x2) pair
        counts_split = np.bincount(bootstrap_split.ravel(), minlength=resamples_split*(D + 1)).reshape(-1, M + 1, N + 1)

        # add to counts
        counts[(split * resamples_split):((split + 1) * resamples_split), :, :] = counts_split

    # sum over columns / rows to give counts (/n) of each x1 / x2 state
    x1_counts = counts.sum(axis=2)
    x2_counts = counts.sum(axis=1)

    # compute 2.5% and 97.5% quantiles for each p(x1, x2), p(x1) and p(x2)
    bounds = np.quantile(counts, [0.025, 0.975], axis=0)
    x1_bounds = np.quantile(x1_counts, [0.025, 0.975], axis=0)
    x2_bounds = np.quantile(x2_counts, [0.025, 0.975], axis=0)

    # scale to probability
    bounds = bounds / n
    x1_bounds = x1_bounds / n
    x2_bounds = x2_bounds / n

    # count occurances per (x1, x2) in the in original sample
    sample_counts = np.bincount(integer_sample, minlength=D + 1).reshape(M + 1, N + 1)

    # sum over columns / rows to give counts per x1 / x2 state
    x1_sample_counts = sample_counts.sum(axis=1)
    x2_sample_counts = sample_counts.sum(axis=0)

    # set truncation bounds
    min_x1_OB, max_x1_OB, min_x2_OB, max_x2_OB = M, 0, N, 0
    minM_x1_OB, maxM_x1_OB = M, 0
    minM_x2_OB, maxM_x2_OB = N, 0

    # set flag for changes
    thresh_flag = False
    thresh_flag_x1 = False
    thresh_flag_x2 = False

    # replace CI's for states below threshold occurances by [0, 1] bounds
    for x1 in range(M + 1):
        for x2 in range(N + 1):
            # below: replace
            if sample_counts[x1, x2] < thresh_OB:
                bounds[:, x1, x2] = [0.0, 1.0]
            # above: update truncation
            else:
                # check if smaller than current min
                if x1 < min_x1_OB:
                    min_x1_OB = x1
                    thresh_flag = True
                if x2 < min_x2_OB:
                    min_x2_OB = x2
                    thresh_flag = True
                # check if larger than current max
                if x1 > max_x1_OB:
                    max_x1_OB = x1
                    thresh_flag = True
                if x2 > max_x2_OB:
                    max_x2_OB = x2
                    thresh_flag = True

    for x1 in range(M + 1):
        # below: replace
        if x1_sample_counts[x1] < threshM_OB:
            x1_bounds[:, x1] = [0.0, 1.0]
        # above: update truncation
        else:
            # check if smaller than current min
            if x1 < minM_x1_OB:
                minM_x1_OB = x1
                thresh_flag_x1 = True
            # check if larger than current max
            if x1 > maxM_x1_OB:
                maxM_x1_OB = x1
                thresh_flag_x1 = True

    for x2 in range(N + 1):
        # below: replace
        if x2_sample_counts[x2] < threshM_OB:
            x2_bounds[:, x2] = [0.0, 1.0]
        # above: update truncation
        else:
            # check if smaller than current min
            if x2 < minM_x2_OB:
                minM_x2_OB = x2
                thresh_flag_x2 = True
            # check if larger than current max
            if x2 > maxM_x2_OB:
                maxM_x2_OB = x2
                thresh_flag_x2 = True

    # if no states were above threshold: default to max range, report
    if not thresh_flag:
        min_x1_OB, max_x1_OB, min_x2_OB, max_x2_OB = 0, M, 0, N
    if not thresh_flag_x1:
        minM_x1_OB, maxM_x1_OB = 0, M
    if not thresh_flag_x2:
        minM_x2_OB, maxM_x2_OB = 0, N

    # plotting
    if plot:
        fig, axs = plt.subplots(M + 1, N + 1, figsize=(10, 10))
        fig.tight_layout()
        for x1 in range(M + 1):
            for x2 in range(N + 1):
                # within truncation: green CI lines
                if (x1 >= min_x1_OB) and (x2 >= min_x2_OB) and (x1 <= max_x1_OB) and (x2 <= max_x2_OB):
                    color = "green"
                else:
                    color = "red"
                axs[x1, x2].hist(counts[:, x1, x2] / n)
                axs[x1, x2].set_title(f"p({x1}, {x2})")
                axs[x1, x2].axvline(bounds[0, x1, x2], color=color)
                axs[x1, x2].axvline(bounds[1, x1, x2], color=color)

        plt.suptitle("X1 X2 Confidence Intervals")
        plt.show()

        fig, axs = plt.subplots(1, M + 1, figsize=(10, 3))
        fig.tight_layout()
        for x1 in range(M + 1):
            # within truncation: green CI lines
            if (x1 >= minM_x1_OB) and (x1 <= maxM_x1_OB):
                color = "green"
            else:
                color = "red"
            axs[x1].hist(x1_counts[:, x1] / n)
            axs[x1].set_title(f"p({x1})")
            axs[x1].axvline(x1_bounds[0, x1], color=color)
            axs[x1].axvline(x1_bounds[1, x1], color=color)

        plt.suptitle("X1 Confidence Intervals")
        plt.show()

        fig, axs = plt.subplots(1, N + 1, figsize=(10, 3))
        fig.tight_layout()
        for x2 in range(N + 1):
            # within truncation: green CI lines
            if (x2 >= minM_x2_OB) and (x2 <= maxM_x2_OB):
                color = "green"
            else:
                color = "red"
            axs[x2].hist(x2_counts[:, x2] / n)
            axs[x2].set_title(f"p({x2})")
            axs[x2].axvline(x2_bounds[0, x2], color=color)
            axs[x2].axvline(x2_bounds[1, x2], color=color)

        plt.suptitle("X2 Confidence Intervals")
        plt.show()

    # printing
    if printing:
        print(f"Box truncation: [{min_x1_OB}, {max_x1_OB}] x [{min_x2_OB}, {max_x2_OB}]")
        print(f"Marginal x1 truncation: [{minM_x1_OB}, {maxM_x1_OB}]")
        print(f"Marginal x2 truncation: [{minM_x2_OB}, {maxM_x2_OB}]")

    # collect results
    result_dict =  {
        'data': data,
        'sample_counts': sample_counts,
        'sample_counts_x1': x1_sample_counts,
        'sample_counts_x2': x2_sample_counts,
        'joint': bounds,
        'x1': x1_bounds,
        'x2': x2_bounds,
        'min_x1_OB': min_x1_OB,
        'max_x1_OB': max_x1_OB,
        'min_x2_OB': min_x2_OB,
        'max_x2_OB': max_x2_OB,
        'minM_x1_OB': minM_x1_OB,
        'maxM_x1_OB': maxM_x1_OB,
        'minM_x2_OB': minM_x2_OB,
        'maxM_x2_OB': maxM_x2_OB,
        'thresh_flag': thresh_flag,
        'thresh_flag_x1': thresh_flag_x1,
        'thresh_flag_x2': thresh_flag_x2
    }

    return result_dict

### Moments

In [142]:
def bootstrap_moments(data, beta, resamples=None):
    '''
    Compute confidence intervals on the moments of a sample of count pairs.

    Compute confidence intervals for the moments: mean, variance, cross moments,
    etc of the sample using the percentile bootstrap.

    Args:
        sample: list of tuples (x1, x2) of integer counts per cell
        resamples: integer number of bootstrap resamples to use

    Returns:
        A dictionary containing results

        'E_x1': CI bounds on E[X1]
        'E_x2': CI bounds on E[X2]
        'E_x1_x2': CI bounds on E[X1X2]

        For OB counts and rescaled to moments on OG counts
    '''

    # get sample size
    n = len(data['OB'])

    # get bootstrap size: default to sample size
    if resamples is None:
        resamples = n

    # initialize random generator
    rng = np.random.default_rng()

    # convert sample to n x 2 array
    sample = np.array([data['x1_OB'], data['x2_OB']]).T

    # bootstrap to resamples x n x 2 array
    boot = rng.choice(sample, size=(resamples, n))

    # mean over axis 1 to get E[X1], E[X2] for each resample
    means = np.mean(boot, axis=1)

    #E[X1^2], E[X2^2]
    means_sq = np.mean(boot**2, axis=1)

    # product over axis 2 to get x1x2 counts
    prods = np.prod(boot, axis=2)

    # mean over axis 1 to get E[X1X2] for each resample
    prod_means = np.mean(prods, axis=1)

    # E[X1^2 X2^2]
    prod_means_sq = np.mean(prods**2, axis=1)
    
    # quantiles over resamples
    mean_bounds = np.quantile(means, [0.025, 0.975], axis=0)
    prod_mean_bounds = np.quantile(prod_means, [0.025, 0.975], axis=0)

    mean_bounds_sq = np.quantile(means_sq, [0.025, 0.975], axis=0)
    prod_mean_bounds_sq = np.quantile(prod_means_sq, [0.025, 0.975], axis=0)

    # collect OB moments
    result_dict = {
        'E_x1_OB': mean_bounds[:, 0],
        'E_x2_OB': mean_bounds[:, 1],
        'E_x1_x2_OB': prod_mean_bounds,
        'E_x1_sq_OB': mean_bounds_sq[:, 0],
        'E_x2_sq_OB': mean_bounds_sq[:, 1],
        'E_x1_x2_sq_OB': prod_mean_bounds_sq
    }

    # capture efficiency moments
    E_beta = np.mean(beta)
    E_beta_sq = np.mean(beta**2)

    # rescale to OG moments
    result_dict['E_x1_OG'] = result_dict['E_x1_OB'] / E_beta
    result_dict['E_x2_OG'] = result_dict['E_x2_OB'] / E_beta
    result_dict['E_x1_x2_OG'] = result_dict['E_x1_x2_OB'] / E_beta_sq

    return result_dict

## Testing

In [143]:
# settings
params = {
    'k_tx_1': 1,
    'k_tx_2': 1,
    'k_deg_1': 1,
    'k_deg_2': 1,
    'k_reg': 1
}
n = 1000
beta = 0.5

# simulate data
data = gillespie(params, n, beta)

In [144]:
# bootstrap probabilities
probabilities = bootstrap_probabilities(data)

In [146]:
# bootstrap moments
moments = bootstrap_moments(data, beta)

In [117]:
# moment bounds from probabilities
cut_x1 = probabilities['maxM_x1_OB'] + 1
cut_x2 = probabilities['maxM_x2_OB'] + 1
prob_E_x1 = np.sum(probabilities['x1'][:, :cut_x1] * np.arange(cut_x1), axis=1)
prob_E_x2 = np.sum(probabilities['x2'][:, :cut_x2] * np.arange(cut_x2), axis=1)
prob_E_x1_x2 = np.sum((np.arange(cut_x1)[:, None] * np.arange(cut_x2)[None, :]) * probabilities['joint'][:, :cut_x1, :cut_x2], axis=(1, 2))

In [147]:
# compare
print("(OB) Moment bounds:\n")
print(f"E[X1] = ({moments['E_x1_OB'][0]}, {moments['E_x1_OB'][1]})")
print(f"E[X2] = ({moments['E_x2_OB'][0]}, {moments['E_x2_OB'][1]})")
print(f"E[X1X2] = ({moments['E_x1_x2_OB'][0]}, {moments['E_x1_x2_OB'][1]})")
print(f"E[X1]E[X2] = ({moments['E_x1_OB'][0] * moments['E_x2_OB'][0]}, {moments['E_x1_OB'][1] * moments['E_x2_OB'][1]})")

print("\n(OB) Probability moment bounds:\n")
print(f"E[X1] = ({prob_E_x1[0]}, {prob_E_x1[1]})")
print(f"E[X2] = ({prob_E_x2[0]}, {prob_E_x2[1]})")
print(f"E[X1X2] = ({prob_E_x1_x2[0]}, {prob_E_x1_x2[1]})")
print(f"E[X1]E[X2] = ({prob_E_x1[0] * prob_E_x2[0]}, {prob_E_x1[1] * prob_E_x2[1]})")

print("\n(OB) Truncation\n")
print(f"[0, {probabilities['maxM_x1_OB']}] x [0, {probabilities['maxM_x2_OB']}]")

(OB) Moment bounds:

E[X1] = (0.303, 0.378025)
E[X2] = (0.29, 0.356)
E[X1X2] = (0.069, 0.112)
E[X1]E[X2] = (0.08786999999999999, 0.1345769)

(OB) Probability moment bounds:

E[X1] = (0.271, 0.37302499999999994)
E[X2] = (0.26597499999999996, 0.369)
E[X1X2] = (0.033975, 8.06)
E[X1]E[X2] = (0.072079225, 0.13764622499999998)

(OB) Truncation

[0, 2] x [0, 2]


In [149]:
# scale by capture efficiency for OG moments
print("(OG) Moment bounds:\n")
print(f"E[X1] = ({moments['E_x1_OG'][0]}, {moments['E_x1_OG'][1]})")
print(f"E[X2] = ({moments['E_x2_OG'][0]}, {moments['E_x2_OG'][1]})")
print(f"E[X1X2] = ({moments['E_x1_x2_OG'][0]}, {moments['E_x1_x2_OG'][1]})")
print(f"E[X1]E[X2] = ({moments['E_x1_OG'][0] * moments['E_x2_OG'][0]}, {moments['E_x1_OG'][1] * moments['E_x2_OG'][1]})")

(OG) Moment bounds:

E[X1] = (0.606, 0.75605)
E[X2] = (0.58, 0.712)
E[X1X2] = (0.276, 0.448)
E[X1]E[X2] = (0.35147999999999996, 0.5383076)


## Code: Constraints

### Base constraints

In [11]:
def add_base_constraints(model, variables):

    # fix k_deg_1 = 1, k_deg = 2 for identifiability
    model.addConstr(variables['rates']['k_deg_1'] == 1, name="Fix_k_deg_1")
    model.addConstr(variables['rates']['k_deg_2'] == 1, name="Fix_k_deg_2")

    # distributional constraints
    model.addConstr(variables['p1'].sum() <= 1, name="Dist_x1")
    model.addConstr(variables['p2'].sum() <= 1, name="Dist_x2")

In [12]:
def add_independence_constraint(model, variables):

    # get variables
    p1 = variables['p1']
    p2 = variables['p2']
    p = variables['p']

    # outer product marginals
    outer = p1[:, None] @ p2[None, :]

    # equate dummy joint variable to product of marginals: all original states
    model.addConstr(p == outer, name=f"Independece")

In [13]:
def add_variables(model, extent_OG, joint=False):

    # settings
    K = 100

    # variable sizes: OG state extent + 1
    p1_size = extent_OG['max_x1_OG'] + 1
    p2_size = extent_OG['max_x2_OG'] + 1

    print(f"Variable sizes: {p1_size}, {p2_size}")

    # marginal stationary distributions: original counts (size = largest original state used + 1)
    p1 = model.addMVar(shape=(p1_size), vtype=GRB.CONTINUOUS, name="p1", lb=0, ub=1)
    p2 = model.addMVar(shape=(p2_size), vtype=GRB.CONTINUOUS, name="p2", lb=0, ub=1)
    
    # joint variable to avoid triple products (not supported by GUROBI): should be removed by presolve
    if joint:
        p = model.addMVar(shape=(p1_size, p2_size), vtype=GRB.CONTINUOUS, name="p_dummy", lb=0, ub=1)

    # reaction rate constants
    rate_names = ['k_tx_1', 'k_tx_2', 'k_deg_1', 'k_deg_2']
    rates = model.addVars(rate_names, vtype=GRB.CONTINUOUS, lb=0, ub=K, name=rate_names)

    # moments
    E_x1 = model.addVar(vtype=GRB.CONTINUOUS, name="E_x1")
    E_x2 = model.addVar(vtype=GRB.CONTINUOUS, name="E_x2")

    # collect variables
    variables = {
        'p1': p1,
        'p2': p2,
        'rates': rates,
        'E_x1': E_x1,
        'E_x2': E_x2
    }

    if joint:
        variables['p'] = p

    return variables

### CME

In [14]:
def add_CME_constraints(model, variables, extent_OG):

    # get extent of OG states
    max_x1_OG = extent_OG['max_x1_OG']
    max_x2_OG = extent_OG['max_x2_OG']

    # get variables
    p = variables['p']
    k_tx_1 = variables['rates']['k_tx_1']
    k_tx_2 = variables['rates']['k_tx_2']
    k_deg_1 = variables['rates']['k_deg_2']
    k_deg_2 = variables['rates']['k_deg_1']
    
    # manually add x1_OG = x2_OG = 0 constraint (to avoid p(0) terms)
    model.addConstr(
        0 == k_deg_1 * p[1, 0] + \
        k_deg_2 * p[0, 1] - \
        (k_tx_1 + k_tx_2) * p[0, 0],
        name="CME_0_0"
    )

    # manually add x1_OG = 0 constraints (to avoid p1(-1) terms)
    model.addConstrs(
        (
            0 == k_tx_2 * p[0, x2_OG - 1] + \
            k_deg_1 * p[1, x2_OG] + \
            k_deg_2 * (x2_OG + 1) * p[0, x2_OG + 1] - \
            (k_tx_1 + k_tx_2 + k_deg_2 * x2_OG) * p[0, x2_OG]
            for x2_OG in range(1, max_x2_OG)
        ),
        name="CME_0_x2"
    )
    # manually add x2_OG = 0 constraints (to avoid p2(-1) terms)
    model.addConstrs(
        (
            0 == k_tx_1 * p[x1_OG - 1, 0] + \
            k_deg_1 * (x1_OG + 1) * p[x1_OG + 1, 0] + \
            k_deg_2 * p[x1_OG, 1] - \
            (k_tx_1 + k_tx_2 + k_deg_1 * x1_OG) * p[x1_OG, 0]
            for x1_OG in range(1, max_x1_OG)
        ),
        name="CME_x1_0"
    )

    # add CME constraints
    model.addConstrs(
        (
            0 == k_tx_1 * p[x1_OG - 1, x2_OG] + \
            k_tx_2 * p[x1_OG, x2_OG - 1] + \
            k_deg_1 * (x1_OG + 1) * p[x1_OG + 1, x2_OG] + \
            k_deg_2 * (x2_OG + 1) * p[x1_OG, x2_OG + 1] - \
            (k_tx_1 + k_tx_2 + k_deg_1 * x1_OG + k_deg_2 * x2_OG) * p[x1_OG, x2_OG]
            for x1_OG in range(1, max_x1_OG)
            for x2_OG in range(1, max_x2_OG)
        ),
        name="CME_x1_x2"
    )

In [15]:
def add_marginal_CME_constraints(model, variables, extent_OG):

    # get extent of OG states
    max_x1_OG = extent_OG['max_x1_OG']
    max_x2_OG = extent_OG['max_x2_OG']

    # get variables
    p1 = variables['p1']
    p2 = variables['p2']
    k_tx_1 = variables['rates']['k_tx_1']
    k_tx_2 = variables['rates']['k_tx_2']
    k_deg_1 = variables['rates']['k_deg_2']
    k_deg_2 = variables['rates']['k_deg_1']

    # construct Q matrices: 1 more column than square to add upper diagonal to last row
    Q_tx_1 = (np.diag([1 for x in range(1, max_x1_OG + 1)], -1) - np.diag([1 for x in range(max_x1_OG + 1)]))[:-1, :]
    Q_tx_2 = (np.diag([1 for x in range(1, max_x2_OG + 1)], -1) - np.diag([1 for x in range(max_x2_OG + 1)]))[:-1, :]
    Q_deg_1 = (np.diag([x for x in range(1, max_x1_OG + 1)], 1) - np.diag([x for x in range(max_x1_OG + 1)]))[:-1, :]
    Q_deg_2 = (np.diag([x for x in range(1, max_x2_OG + 1)], 1) - np.diag([x for x in range(max_x2_OG + 1)]))[:-1, :]

    # add matrix constraints
    model.addConstr(
        k_tx_1 * (Q_tx_1 @ p1) + k_deg_1 * (Q_deg_1 @ p1) == 0,
        name="Marginal_CME_x1"
    )

    model.addConstr(
        k_tx_2 * (Q_tx_2 @ p2) + k_deg_2 * (Q_deg_2 @ p2) == 0,
        name="Marginal_CME_x2"
    )

### Moment constraints

In [197]:
def add_moment_constraint(model, variables, extent_OG, moment_bounds):

    # get extent of OG states
    max_x1_OG = extent_OG['max_x1_OG']
    max_x2_OG = extent_OG['max_x2_OG']

    # get variables
    p1 = variables['p1']
    p2 = variables['p2']
    E_x1 = variables['E_x1']
    E_x2 = variables['E_x2']

    # expressions for moments
    expr_E_x1 = gp.quicksum(p1 * np.arange(max_x1_OG + 1))
    expr_E_x2 = gp.quicksum(p2 * np.arange(max_x2_OG + 1))

    # equate expressions
    model.addConstr(E_x1 == expr_E_x1, name="E_x1_equality")
    model.addConstr(E_x2 == expr_E_x2, name="E_x2_equality")

    # moment bounds
    model.addConstr(E_x1 <= moment_bounds['E_x1_OG'][1], name="E_x1_UB")
    model.addConstr(E_x1 >= moment_bounds['E_x1_OG'][0], name="E_x1_LB")
    model.addConstr(E_x2 <= moment_bounds['E_x2_OG'][1], name="E_x2_UB")
    model.addConstr(E_x2 >= moment_bounds['E_x2_OG'][0], name="E_x2_LB")

    # moment independence constraint
    model.addConstr(E_x1 * E_x2 <= moment_bounds['E_x1_x2_OG'][1], name="Indep_UB")
    model.addConstr(E_x1 * E_x2 >= moment_bounds['E_x1_x2_OG'][0], name="Indep_LB")

In [17]:
def add_moment_constraint_linear(model, variables, extent_OG, moment_bounds):

    # get extent of OG states
    max_x1_OG = extent_OG['max_x1_OG']
    max_x2_OG = extent_OG['max_x2_OG']

    # add new variable
    E_x1_x2 = model.addVar(vtype=GRB.CONTINUOUS, name="E_x1_x2")
    variables['E_x1_x2'] = E_x1_x2

    # get variables
    p = variables['p']
    p1 = variables['p1']
    p2 = variables['p2']    
    E_x1 = variables['E_x1']
    E_x2 = variables['E_x2']

    # expressions for moments
    expr_E_x1 = gp.quicksum(p1 * np.arange(max_x1_OG + 1))
    expr_E_x2 = gp.quicksum(p2 * np.arange(max_x2_OG + 1))
    # coeffs = np.arange(max_x1_OG + 1)[:, None] * np.arange(max_x2_OG + 1)[None, :]
    expr_E_x1_x2 = gp.quicksum(x1 * x2 * p[x1, x2] for x1 in range(max_x1_OG + 1) for x2 in range(max_x2_OG + 1))

    # equate expressions
    model.addConstr(E_x1 == expr_E_x1, name="E_x1_equality")
    model.addConstr(E_x2 == expr_E_x2, name="E_x2_equality")
    model.addConstr(E_x1_x2 == expr_E_x1_x2, name="E_x1_x2_equality")    

    # moment bounds
    model.addConstr(E_x1 <= moment_bounds['E_x1'][1], name="E_x1_UB")
    model.addConstr(E_x1 >= moment_bounds['E_x1'][0], name="E_x1_LB")
    model.addConstr(E_x2 <= moment_bounds['E_x2'][1], name="E_x2_UB")
    model.addConstr(E_x2 >= moment_bounds['E_x2'][0], name="E_x2_LB")
    model.addConstr(E_x1_x2 <= moment_bounds['E_x1_x2'][1], name="E_x1_x2_UB")
    model.addConstr(E_x1_x2 >= moment_bounds['E_x1_x2'][0], name="E_x1_x2_LB")

    # moment independence constraint
    # model.addConstr(E_x1 * E_x2 <= moment_bounds['E_x1_x2'][1], name="Indep_UB")
    # model.addConstr(E_x1 * E_x2 >= moment_bounds['E_x1_x2'][0], name="Indep_LB")

### Higher moment constraints

ONLY FOR 100% CAPTURE!

In [122]:
def add_moment_constraint_higher(model, variables, extent_OG, moment_bounds, beta):

    # get extent of OG states
    max_x1_OG = extent_OG['max_x1_OG']
    max_x2_OG = extent_OG['max_x2_OG']

    # get variables
    p1 = variables['p1']
    p2 = variables['p2']
    E_x1 = variables['E_x1']
    E_x2 = variables['E_x2']

    # add variables
    E_x1_sq = model.addVar(vtype=GRB.CONTINUOUS, name="E_x1_sq")
    E_x2_sq = model.addVar(vtype=GRB.CONTINUOUS, name="E_x2_sq")
    E_x1_x2_sq = model.addVar(vtype=GRB.CONTINUOUS, name="E_x1_x2_sq")

    # store
    variables['E_x1_sq'] = E_x1_sq
    variables['E_x2_sq'] = E_x2_sq
    variables['E_x1_x2_sq'] = E_x1_x2_sq

    # expressions for moments
    expr_E_x1 = gp.quicksum(p1 * np.arange(max_x1_OG + 1))
    expr_E_x2 = gp.quicksum(p2 * np.arange(max_x2_OG + 1))

    expr_E_x1_sq = gp.quicksum(p1 * np.arange(max_x1_OG + 1)**2)
    expr_E_x2_sq = gp.quicksum(p2 * np.arange(max_x2_OG + 1)**2)

    # equate expressions
    model.addConstr(E_x1 == expr_E_x1, name="E_x1_equality")
    model.addConstr(E_x2 == expr_E_x2, name="E_x2_equality")

    model.addConstr(E_x1_sq == expr_E_x1_sq, name="E_x1_sq_equality")
    model.addConstr(E_x2_sq == expr_E_x2_sq, name="E_x2_sq_equality")

    # moment bounds
    model.addConstr(E_x1 <= moment_bounds['E_x1'][1], name="E_x1_UB")
    model.addConstr(E_x1 >= moment_bounds['E_x1'][0], name="E_x1_LB")
    model.addConstr(E_x2 <= moment_bounds['E_x2'][1], name="E_x2_UB")
    model.addConstr(E_x2 >= moment_bounds['E_x2'][0], name="E_x2_LB")
    
    model.addConstr(E_x1_sq <= moment_bounds['E_x1_sq'][1], name="E_x1_sq_UB")
    model.addConstr(E_x1_sq >= moment_bounds['E_x1_sq'][0], name="E_x1_sq_LB")
    model.addConstr(E_x2_sq <= moment_bounds['E_x2_sq'][1], name="E_x2_sq_UB")
    model.addConstr(E_x2_sq >= moment_bounds['E_x2_sq'][0], name="E_x2_sq_LB")

    # moment independence constraint
    model.addConstr(E_x1 * E_x2 <= moment_bounds['E_x1_x2'][1], name="Indep_UB")
    model.addConstr(E_x1 * E_x2 >= moment_bounds['E_x1_x2'][0], name="Indep_LB")

    model.addConstr(E_x1_sq * E_x2_sq <= moment_bounds['E_x1_x2_sq'][1], name="Indep_sq_UB")
    model.addConstr(E_x1_sq * E_x2_sq >= moment_bounds['E_x1_x2_sq'][0], name="Indep_sq_LB")

### Model

In [152]:
def model_setup(extent_OG, moment_bounds, silent=True):
    
    # WLS license
    options = json.load(open("../../../WLS_credentials.json"))

    # silent
    if silent:
        options['OutputFlag'] = 0

    # environment context
    with gp.Env(params=options) as env:

        # model context
        with gp.Model('test-construction', env=env) as model:

            # model settings
            model.Params.TimeLimit = 300
            # model.Params.Presolve = 2

            # variables
            variables = add_variables(model, extent_OG, joint=True)

            # base constraints
            add_base_constraints(model, variables)

            # moment constraints
            add_moment_constraint(model, variables, extent_OG, moment_bounds)

            # independence constraints
            add_independence_constraint(model, variables)

            # CME constraints
            add_CME_constraints(model, variables, extent_OG)

            # marginal CME constraints
            add_marginal_CME_constraints(model, variables, extent_OG)

            # write to file
            # model.write("./Test-Info/Models/moment_model_test_linear.lp")

            # optimize
            model.setObjective(0, GRB.MINIMIZE)
            model.optimize()
            print(f"Model is {status_codes[model.status]}")

            # IIS
            # if model.status == 3:
            #    model.computeIIS()
            #    model.write('./Test-Info/Models/iis_model.ilp')

## Testing

In [159]:
# settings
params = {
    'k_tx_1': 1,
    'k_tx_2': 1,
    'k_deg_1': 1,
    'k_deg_2': 1,
    'k_reg': 2.3
}
n = 1000
beta = 0.5

# simulate data
data = gillespie(params, n, beta)

# bootstrap moments
moments = bootstrap_moments(data, beta)

In [160]:
model_setup({'max_x1_OG': 10, 'max_x2_OG': 10}, moments, silent=False)

Set parameter Username
Set parameter LicenseID to value 2616229
Set parameter WLSAccessID
Set parameter WLSSecret
Set parameter LicenseID to value 2587777
Academic license 2587777 - for non-commercial use only - registered to wj___@ic.ac.uk
Set parameter TimeLimit to value 300
Variable sizes: 11, 11
Gurobi Optimizer version 12.0.1 build v12.0.1rc0 (win64 - Windows 11.0 (22631.2))

CPU model: Intel(R) Core(TM) i5-1035G1 CPU @ 1.00GHz, instruction set [SSE2|AVX|AVX2|AVX512]
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads

Non-default parameters:
TimeLimit  300

Academic license 2587777 - for non-commercial use only - registered to wj___@ic.ac.uk
Optimize a model with 10 rows, 149 columns and 50 nonzeros
Model fingerprint: 0x92048dba
Model has 243 quadratic constraints
Coefficient statistics:
  Matrix range     [1e+00, 1e+01]
  QMatrix range    [1e+00, 1e+01]
  QLMatrix range   [1e+00, 1e+00]
  Objective range  [0e+00, 0e+00]
  Bounds range     [1e+00, 1e+02]
 

# Perfect Information, Perfect output?

Remove the uncertainty of the bootstrap and replace CI bounds by exact moment values to test if optimization can recover exact parameter values

## Code: Computing exact moments

In [200]:
def exact_moments(params, beta, delta=0.01):
    '''
    Compute exact moments for a reaction network with given parameters, applying
    given capture efficiency to product observed moments
    '''

    # only valid when no interaction
    if params['k_reg'] > 0:
        print("ERROR: Invalid for k_reg > 0")
        return None

    # capture efficiency moments
    E_beta = np.mean(beta)
    E_beta_sq = np.mean(beta**2)

    # OG moments
    E_x1_OG = params['k_tx_1'] / params['k_deg_1']
    E_x2_OG = params['k_tx_2'] / params['k_deg_2']
    E_x1_x2_OG = E_x1_OG * E_x2_OG

    # OB moments
    E_x1_OB = E_x1_OG * E_beta
    E_x2_OB = E_x2_OG * E_beta
    E_x1_x2_OB = E_x1_x2_OG * E_beta_sq

    # width for numerics
    eps = np.array([-delta, delta])

    # collect moments
    result_dict = {
        'E_x1_OB': E_x1_OB + eps,
        'E_x2_OB': E_x2_OB + eps,
        'E_x1_x2_OB': E_x1_x2_OB + eps,
        'E_x1_OG': E_x1_OG + eps,
        'E_x2_OG': E_x2_OG + eps,
        'E_x1_x2_OG': E_x1_x2_OG + eps
    }

    return result_dict

## Testing

In [201]:
# settings
params = {
    'k_tx_1': 1,
    'k_tx_2': 1,
    'k_deg_1': 1,
    'k_deg_2': 1,
    'k_reg': 0
}
beta = 0.5

# compute exact moments
moments = exact_moments(params, beta)

# display
for key, val in moments.items():
    print(f"{key} = ({val[0]}, {val[1]})")

E_x1_OB = (0.49, 0.51)
E_x2_OB = (0.49, 0.51)
E_x1_x2_OB = (0.24, 0.26)
E_x1_OG = (0.99, 1.01)
E_x2_OG = (0.99, 1.01)
E_x1_x2_OG = (0.99, 1.01)


## Code: Optimizing for exact moments

In [209]:
def model_perfect(extent_OG, moment_bounds, silent=True):
    
    # WLS license
    options = json.load(open("../../../WLS_credentials.json"))

    # silent
    if silent:
        options['OutputFlag'] = 0

    # environment context
    with gp.Env(params=options) as env:

        # model context
        with gp.Model('test-construction', env=env) as model:

            # model settings
            model.Params.TimeLimit = 300
            # model.Params.Presolve = 2

            # variables
            variables = add_variables(model, extent_OG, joint=True)

            # base constraints
            add_base_constraints(model, variables)

            # moment constraints
            add_moment_constraint(model, variables, extent_OG, moment_bounds)

            # independence constraints
            add_independence_constraint(model, variables)

            # CME constraints
            # add_CME_constraints(model, variables, extent_OG)

            # marginal CME constraints
            add_marginal_CME_constraints(model, variables, extent_OG)

            # write to file
            # model.write("./Test-Info/Models/moment_model_test_linear.lp")

            # solution dict
            solution_dict = {}

            # optimize rates
            for name, rate in variables['rates'].items():
                rate_dict = {}
                model.setObjective(rate, GRB.MINIMIZE)
                model.optimize()
                rate_dict['min'] = model.ObjVal
                rate_dict['min_status'] = status_codes[model.status]
                model.setObjective(rate, GRB.MAXIMIZE)
                model.optimize()
                rate_dict['max'] = model.ObjVal
                rate_dict['max_status'] = status_codes[model.status]

                # store
                solution_dict[name] = rate_dict

                # display
                print(f"{name} in ({rate_dict['min']}, {rate_dict['max']}), status {rate_dict['min_status']}, {rate_dict['max_status']}")

    return solution_dict

## Testing

In [210]:
# settings
params = {
    'k_tx_1': 2,
    'k_tx_2': 2,
    'k_deg_1': 1,
    'k_deg_2': 1,
    'k_reg': 0
}
beta = 0.75

# compute exact moments
moments = exact_moments(params, beta, delta=0.01)

In [211]:
perfect_solution = model_perfect({'max_x1_OG': 10, 'max_x2_OG': 10}, moments, silent=True)

Variable sizes: 11, 11
k_tx_1 in (1.9900730367647164, 100.0), status OPTIMAL, OPTIMAL
k_tx_2 in (1.9900730367631554, 100.0), status OPTIMAL, OPTIMAL
k_deg_1 in (1.0, 1.0), status OPTIMAL, OPTIMAL
k_deg_2 in (1.0, 1.0), status OPTIMAL, OPTIMAL
