In [8]:
import numpy as np
import matplotlib.pyplot as plt
import scipy
import gurobipy as gp
from gurobipy import GRB
import json

In [9]:
status_codes = {
    1: 'LOADED',
    2: 'OPTIMAL',
    3: 'INFEASIBLE',
    4: 'INF_OR_UNBD',
    5: 'UNBOUNDED',
    6: 'CUTOFF',
    7: 'ITERATION_LIMIT',
    8: 'NODE_LIMIT',
    9: 'TIME_LIMIT',
    10: 'SOLUTION_LIMIT',
    11: 'INTERRUPTED',
    12: 'NUMERIC',
    13: 'SUBOPTIMAL',
    14: 'INPROGRESS',
    15: 'USER_OBJ_LIMIT'
}

In [10]:
rng = np.random.default_rng(43)

# Scaling bounds & Capture efficiency

## Code

### Bootstrap

In [35]:
def bootstrap(sample, beta, N, tau, drop=False):
    """
    Bootstrap estimate of probabilities of all states in the sample
    Set bounds outside tau threshold truncation to [0, 1]

    sample: integer sample
    N: number of bootstrap resamples
    tau: truncation threshold
    drop: if true, values outside truncation are removed from the sample before bootstrap
    """

    # OB truncation bounds
    xmin, xmax = int(sample.min()), int(sample.max())
    sample_counts = np.bincount(sample, minlength=(xmax + 1))
    t_indices = np.where(sample_counts > tau)[0]
    tmin, tmax = int(t_indices.min()), int(t_indices.max())

    # if drop: remove values outside the truncation before bootstrapping
    if drop:
        # indices below
        Itmin = (sample >= tmin)
        # drop
        sample = sample[Itmin]
        beta = beta[Itmin]

        # indices above
        Itmax = (sample <= tmax)
        # drop
        sample = sample[Itmax]
        beta = beta[Itmax]

    # bootstrap
    prob_intervals = np.empty((2, xmax + 1))
    prob_intervals[0, :] = 0
    prob_intervals[1, :] = 1
    fm_intervals = np.empty((2, xmax + 1))
    fm_intervals[0, :] = 0
    fm_intervals[1, :] = 1

    # resample
    prob_resamples = rng.choice(sample, size=(N, len(sample)))

    # estimate over truncation
    for x in range(tmin, tmax + 1):

        '''probs'''
        # proportion of x per resample
        prob_estimates = (prob_resamples == x).mean(axis=1)
        # interval
        prob_intervals[:, x] = np.quantile(prob_estimates, [0.025, 0.975])

        '''fms'''
        # capture for cells with x observed counts
        beta_x = beta[sample == x]
        # resample
        beta_resamples = rng.choice(beta_x, size=(N, len(beta_x)))
        # estimate E[beta|m]
        fm_estimate = beta_resamples.mean(axis=1)
        # quantile for confidence intervals
        fm_intervals[:, x] = np.quantile(fm_estimate, [0.025, 0.975], axis=0)

    return tmin, tmax, prob_intervals, fm_intervals

### Standard Optimization (Downsampled)

Treat bootstrap intervals as bounds on (downsampled) probabilities

\begin{align*}
    & \hat{p_{L}} \le p(x) \le \hat{p_{U}} \quad , \quad \forall x \in \{0, \ldots m\} \\
    & \sum \limits_{x=0}^{m} p(x) \le 1 \\
    & Q(f)p = 0
\end{align*}

In [46]:
def optimize_standard(xmax, tmin, tmax, prob_intervals, fm_intervals, CME=True, silent=True, printing=True, time_limit=300, dist_opt=True):
    
    # WLS license
    options = json.load(open("../../../WLS_credentials.json"))

    # silent
    if silent:
        options['OutputFlag'] = 0

    # environment context
    with gp.Env(params=options) as env:

        # model context
        with gp.Model('test-construction', env=env) as model:

            # model settings
            model.Params.TimeLimit = time_limit
            K = 100

            # variables
            p = model.addMVar(shape=(xmax + 1), vtype=GRB.CONTINUOUS, name="p", lb=0, ub=1)
            f = model.addMVar(shape=(xmax + 1), vtype=GRB.CONTINUOUS, name="f", lb=0, ub=1)
            k_tx = model.addVar(vtype=GRB.CONTINUOUS, name="k_tx", lb=0, ub=K)

            # distributional constraint
            model.addConstr(p.sum() <= 1, name="Distribution")

            # probability bounds
            model.addConstr(p[:(tmax+1)] <= prob_intervals[1, :(tmax+1)], name="prob_UB")
            model.addConstr(p[:(tmax+1)] >= prob_intervals[0, :(tmax+1)], name="prob_LB")

            # f rate bounds
            model.addConstr(f[:(tmax+1)] <= fm_intervals[1, :(tmax+1)], name="f_UB")
            model.addConstr(f[:(tmax+1)] >= fm_intervals[0, :(tmax+1)], name="f_LB")

            # dummy zero variable for non-linear constraints
            z = model.addVar()
            model.addConstr(z == 0)

            # CME
            if CME:
                model.addConstr(
                    z == p[1] - k_tx * f[0] * p[0],
                    name="CME_0"
                )

                model.addConstrs(
                    (
                        z == k_tx * f[x - 1] * p[x - 1] + (x + 1) * p[x + 1] - (k_tx * f[x] + x) * p[x] for x in range(1, xmax)
                    ),
                    name="CME_x"
                )

            # optimize
            solution = {}
            model.setObjective(k_tx, GRB.MINIMIZE)
            model.optimize()
            try:
                solution['min'] = model.ObjVal
            except:
                solution['min'] = None
            solution['min_status'] = status_codes[model.status]

            if solution['min_status'] == "INFEASIBLE":
                model.computeIIS()
                model.write('iis-mmt.ilp')

            model.setObjective(k_tx, GRB.MAXIMIZE)
            model.optimize()
            try:
                solution['max'] = model.ObjVal
            except:
                solution['max'] = None
            solution['max_status'] = status_codes[model.status]

            # display
            if printing:
                print(f"k_tx in ({solution['min']}, {solution['max']}), status {solution['min_status']}, {solution['max_status']}")

            # optimize p
            if dist_opt:
                p_bounds = np.zeros((xmax + 1, 2))
                for x in range(xmax + 1):
                    model.setObjective(p[x], GRB.MAXIMIZE)
                    model.optimize()
                    try:
                        p_bounds[x, 1] = model.ObjVal
                    except:
                        p_bounds[x, 1] = -1

                    model.setObjective(p[x], GRB.MINIMIZE)
                    model.optimize()
                    try:
                        p_bounds[x, 0] = model.ObjVal
                    except:
                        p_bounds[x, 0] = -1

                # store
                solution['p'] = p_bounds

    return solution

### Scaling Optimization (Downsampled)

Treat bootstrap intervals as bounds on (downsampled) probabilities **relative** to the observed mass of the sample, bounding the unobserved mass from above (assume relatively small):

\begin{align*}
    & \hat{p_{L}} \le \frac{p(x)}{p(obs)} \le \hat{p_{U}} \quad , \quad \forall x \in \{0, \ldots m\} \\
    & p(obs) = \sum \limits_{x=0}^{m} p(x) \quad , \quad p(unobs) = \sum \limits_{x=m+1}^{M} p(x) \\
    & p(obs) + p(unobs) = 1 \\
    & p(unobs) \le UB \\
    & Q(f)p = 0
\end{align*}

In [47]:
def optimize_scaling(xmax, tmin, tmax, prob_intervals, fm_intervals, silent=True, printing=True, time_limit=300,
                  CME=True, dist_opt=True, UB=1):
    
    # WLS license
    options = json.load(open("../../../WLS_credentials.json"))

    # silent
    if silent:
        options['OutputFlag'] = 0

    # environment context
    with gp.Env(params=options) as env:

        # model context
        with gp.Model('test-construction', env=env) as model:

            # model settings
            model.Params.TimeLimit = time_limit
            K = 100

            # variables
            p = model.addMVar(shape=(xmax + 1), vtype=GRB.CONTINUOUS, name="p", lb=0, ub=1)
            f = model.addMVar(shape=(xmax + 1), vtype=GRB.CONTINUOUS, name="f", lb=0, ub=1)
            k_tx = model.addVar(vtype=GRB.CONTINUOUS, name="k_tx", lb=0, ub=K)

            # distributional constraint
            model.addConstr(p.sum() == 1, name="Distribution")

            # bounds on the unobserved mass
            model.addConstr(p[(tmax+1):] <= UB, name="Unobserved_ub")

            # probability bounds
            p_obs = p[:(tmax+1)].sum()
            model.addConstr(p[:(tmax+1)] <= prob_intervals[1, :(tmax+1)] * p_obs, name="prob_UB")
            model.addConstr(p[:(tmax+1)] >= prob_intervals[0, :(tmax+1)] * p_obs, name="prob_LB")

            # f rate bounds
            model.addConstr(f[:(tmax+1)] <= fm_intervals[1, :(tmax+1)], name="f_UB")
            model.addConstr(f[:(tmax+1)] >= fm_intervals[0, :(tmax+1)], name="f_LB")

            # dummy zero variable for non-linear constraints
            z = model.addVar()
            model.addConstr(z == 0)

            # CME
            if CME:
                model.addConstr(
                    z == p[1] - k_tx * f[0] * p[0],
                    name="CME_0"
                )

                model.addConstrs(
                    (
                        z == k_tx * f[x - 1] * p[x - 1] + (x + 1) * p[x + 1] - (k_tx * f[x] + x) * p[x] for x in range(1, xmax)
                    ),
                    name="CME_x"
            )

            # optimize
            solution = {}
            model.setObjective(k_tx, GRB.MINIMIZE)
            model.optimize()
            try:
                solution['min'] = model.ObjVal
            except:
                solution['min'] = None
            solution['min_status'] = status_codes[model.status]

            if solution['min_status'] == "INFEASIBLE":
                model.computeIIS()
                model.write('iis-mmt.ilp')

            model.setObjective(k_tx, GRB.MAXIMIZE)
            model.optimize()
            try:
                solution['max'] = model.ObjVal
            except:
                solution['max'] = None
            solution['max_status'] = status_codes[model.status]

            # display
            if printing:
                print(f"k_tx in ({solution['min']}, {solution['max']}), status {solution['min_status']}, {solution['max_status']}")

            # optimize p
            if dist_opt:
                p_bounds = np.zeros((xmax + 1, 2))
                for x in range(xmax + 1):
                    model.setObjective(p[x], GRB.MAXIMIZE)
                    model.optimize()
                    try:
                        p_bounds[x, 1] = model.ObjVal
                    except:
                        p_bounds[x, 1] = -1

                    model.setObjective(p[x], GRB.MINIMIZE)
                    model.optimize()
                    try:
                        p_bounds[x, 0] = model.ObjVal
                    except:
                        p_bounds[x, 0] = -1

                # store
                solution['p'] = p_bounds

    return solution

## Testing

### Poisson data

In [113]:
k = 2
tau = 5
N = 1000
n = 1000
xmax = 30

sample = rng.poisson(k, size=n)
beta = rng.beta(1, 20, size=n)
sample_downsampled = rng.binomial(sample, beta)
tmin, tmax, prob_intervals, fm_intervals = bootstrap(sample_downsampled, beta, N, tau, drop=True)

In [None]:
# Standard
solution = optimize_standard(xmax, tmin, tmax, prob_intervals, fm_intervals, dist_opt=False)

k_tx in (1.734683896380789, 3.020347681070055), status OPTIMAL, OPTIMAL


In [None]:
# Scaling
solution = optimize_scaling(xmax, tmin, tmax, prob_intervals, fm_intervals, dist_opt=False, UB=0.1)

k_tx in (1.7913122646998674, 99.99998569731527), status OPTIMAL, OPTIMAL


### Non-poisson data

In [117]:
from interaction_inference import simulation

In [132]:
params = {
    'k_on_1': 1,
    'k_on_2': 1,
    'k_off_1': 1,
    'k_off_2': 1,
    'k_tx_1': 1,
    'k_tx_2': 1,
    'k_deg_1': 1,
    'k_deg_2': 1,
    'k_reg': 2
}

sample = simulation.gillespie_telegraph(params, 1000)
x1_sample = np.array([x[0] for x in sample])

In [133]:
beta = rng.beta(1, 2, size=n)
sample_downsampled = rng.binomial(x1_sample, beta)
tmin, tmax, prob_intervals, fm_intervals = bootstrap(sample_downsampled, beta, N, tau, drop=True)

In [134]:
# Standard
solution = optimize_standard(xmax, tmin, tmax, prob_intervals, fm_intervals, dist_opt=False)

k_tx in (0.3297851812244912, 0.4605224023490482), status OPTIMAL, OPTIMAL


In [135]:
# Scaling
solution = optimize_scaling(xmax, tmin, tmax, prob_intervals, fm_intervals, dist_opt=False, UB=0.1)

k_tx in (0.32972013160906655, 99.99999999930894), status OPTIMAL, OPTIMAL
