In [82]:
import numpy as np
import matplotlib.pyplot as plt
import scipy
import gurobipy as gp
from gurobipy import GRB
import json
from copy import copy

In [2]:
status_codes = {
    1: 'LOADED',
    2: 'OPTIMAL',
    3: 'INFEASIBLE',
    4: 'INF_OR_UNBD',
    5: 'UNBOUNDED',
    6: 'CUTOFF',
    7: 'ITERATION_LIMIT',
    8: 'NODE_LIMIT',
    9: 'TIME_LIMIT',
    10: 'SOLUTION_LIMIT',
    11: 'INTERRUPTED',
    12: 'NUMERIC',
    13: 'SUBOPTIMAL',
    14: 'INPROGRESS',
    15: 'USER_OBJ_LIMIT'
}

In [3]:
rng = np.random.default_rng(43)

# Scaling Bounds: Observed Set

Use sample to identify the set of observed counts $x$ (above a certain threshold of observations) and unobserved counts (up to M >> maximum observed)

From these construct p(obs) and p(unobs) via sum over sets, and use to construct scaling bounds and CME constraints

In [112]:
def bootstrap(sample, beta, N, tau, drop=False):
    """
    Bootstrap estimate of probabilities of all states in the sample
    Set bounds outside tau threshold truncation to [0, 1]

    sample: integer sample
    N: number of bootstrap resamples
    tau: truncation threshold
    drop: if true, values outside truncation are removed from the sample before bootstrap
    """

    # min and max states
    xmin, xmax = int(sample.min()), int(sample.max())

    # count # observations of each state
    sample_counts = np.bincount(sample, minlength=(xmax + 1))

    # observed states (# obs >= tau)
    observed_states = np.where(sample_counts >= tau)[0]
    '''NOTE: changed to >=, was > in prev work'''

    # unobserved states (# obs < tau)
    unobserved_states = np.where(sample_counts < tau)[0]

    # (optional) remove 'unobserved' states from sample before bootstrapping
    if drop:

        # boolean mask (to keep)
        mask = np.isin(sample, observed_states)
        
        # remove (from sample and capture)
        sample = sample[mask]
        beta = beta[mask]

    # confidence intervals
    prob_intervals = np.empty((2, xmax + 1))
    fm_intervals = np.empty((2, xmax + 1))

    # resample
    prob_resamples = rng.choice(sample, size=(N, len(sample)))

    # for all x
    for x in range(xmin, xmax + 1):

        # unobserved: set to [0, 1]
        if x in unobserved_states:

            prob_intervals[:, x] = [0, 1]
            fm_intervals[:, x] = [0, 1]

        # observed: bootstrap
        else:

            '''probs'''
            # proportion of x per resample
            prob_estimates = (prob_resamples == x).mean(axis=1)
            # interval
            prob_intervals[:, x] = np.quantile(prob_estimates, [0.025, 0.975])

            '''fms'''
            # capture for cells with x observed counts
            beta_x = beta[sample == x]
            # resample
            beta_resamples = rng.choice(beta_x, size=(N, len(beta_x)))
            # estimate E[beta|m]
            fm_estimate = beta_resamples.mean(axis=1)
            # quantile for confidence intervals
            fm_intervals[:, x] = np.quantile(fm_estimate, [0.025, 0.975], axis=0)

    # convert state arrays to list
    observed_states = observed_states.tolist()
    unobserved_states = unobserved_states.tolist()

    return observed_states, unobserved_states, prob_intervals, fm_intervals

In [113]:
def optimize_downsampled_scaling(observed_states, unobserved_states, M, prob_intervals, fm_intervals,
                                 silent=True, printing=True, time_limit=300, dist_opt=True, UB=1):
    '''
    Downsampled Scaling Optimization

    observed states: set of states observed (above a threshold #)
    unobserved states: set of states not observed (")
    M: consider additional unobserved states up to this value
    prob_intervals: Bootstrap CI on probabilities
    fm_intervals: Bootstrap CI on fms
    
    silent: optimization log toggle
    printing: solution print toggle
    time_limit: optimization time limit
    dist_opt: toggle for optimizing probabilities
    
    UB: upper bound on unobserved mass
    '''

    # copy sets
    observed_states = copy(observed_states)
    unobserved_states = copy(unobserved_states)

    # errors for small M
    if M in observed_states:
        print(f"M = {M} too low: in observed set")
        raise Exception

    if M in unobserved_states:
        print(f"M = {M} too low: in unobserved set")
        raise Exception
    
    # add states to unobserved set up to M
    for x in range(M + 1):
        if x in observed_states:
            continue
        elif x in unobserved_states:
            continue
        else:
            unobserved_states.append(x)
    
    # WLS license
    options = json.load(open("../../../WLS_credentials.json"))

    # silent
    if silent:
        options['OutputFlag'] = 0

    # environment context
    with gp.Env(params=options) as env:

        # model context
        with gp.Model('test-construction', env=env) as model:

            # model settings
            model.Params.TimeLimit = time_limit
            K = 100

            # variables
            p = model.addMVar(shape=(M + 1), vtype=GRB.CONTINUOUS, name="p", lb=0, ub=1)
            f = model.addMVar(shape=(M + 1), vtype=GRB.CONTINUOUS, name="f", lb=0, ub=1)
            k_tx = model.addVar(vtype=GRB.CONTINUOUS, name="k_tx", lb=0, ub=K)

            # observed mass
            p_obs = gp.quicksum([p[x] for x in observed_states])

            # unobserved mass
            p_unobs = gp.quicksum([p[x] for x in unobserved_states])

            # unobserved mass upper bound
            model.addConstr(p_unobs <= UB, name="Unobserved_ub")

            # distributional constraint
            model.addConstr(p.sum() == 1, name="Distribution")

            # scaled observed probability bounds
            model.addConstrs(
                (
                    p[x] <= prob_intervals[1, x] * p_obs
                    for x in observed_states
                ),
                name="prob_ub"
            )
            model.addConstrs(
                (
                    p[x] >= prob_intervals[0, x] * p_obs
                    for x in observed_states
                ),
                name="prob_lb"
            )

            # f rate bounds
            model.addConstrs(
                (
                    f[x] <= fm_intervals[1, x]
                    for x in observed_states
                ),
                name="f_ub"
            )
            model.addConstrs(
                (
                    f[x] >= fm_intervals[0, x]
                    for x in observed_states
                ),
                name="f_lb"
            )

            # dummy zero variable for non-linear constraints
            z = model.addVar()
            model.addConstr(z == 0)

            # CME
            model.addConstr(
                z == p[1] - k_tx * f[0] * p[0],
                name="CME_0"
            )

            model.addConstrs(
                (
                    z == k_tx * f[x - 1] * p[x - 1] + (x + 1) * p[x + 1] - (k_tx * f[x] + x) * p[x] for x in range(1, M)
                ),
                name="CME_x"
            )

            # optimize
            solution = {}
            model.setObjective(k_tx, GRB.MINIMIZE)
            model.optimize()
            try:
                solution['min'] = model.ObjVal
            except:
                solution['min'] = None
            solution['min_status'] = status_codes[model.status]

            if solution['min_status'] == "INFEASIBLE":
                model.computeIIS()
                model.write('iis-mmt.ilp')

            model.setObjective(k_tx, GRB.MAXIMIZE)
            model.optimize()
            try:
                solution['max'] = model.ObjVal
            except:
                solution['max'] = None
            solution['max_status'] = status_codes[model.status]

            # display
            if printing:
                print(f"k_tx in ({solution['min']}, {solution['max']}), status {solution['min_status']}, {solution['max_status']}")

            # optimize p
            if dist_opt:
                p_bounds = np.zeros((M + 1, 2))
                for x in range(M + 1):
                    model.setObjective(p[x], GRB.MAXIMIZE)
                    model.optimize()
                    try:
                        p_bounds[x, 1] = model.ObjVal
                    except:
                        p_bounds[x, 1] = -1

                    model.setObjective(p[x], GRB.MINIMIZE)
                    model.optimize()
                    try:
                        p_bounds[x, 0] = model.ObjVal
                    except:
                        p_bounds[x, 0] = -1

                # store
                solution['p'] = p_bounds

    return solution

In [125]:
def optimize_downsampled_standard(observed_states, unobserved_states, M, prob_intervals, fm_intervals,
                                 silent=True, printing=True, time_limit=300, dist_opt=True):
    '''
    Downsampled Standard Optimization

    observed states: set of states observed (above a threshold #)
    unobserved states: set of states not observed (")
    M: consider additional unobserved states up to this value
    prob_intervals: Bootstrap CI on probabilities
    fm_intervals: Bootstrap CI on fms
    
    silent: optimization log toggle
    printing: solution print toggle
    time_limit: optimization time limit
    dist_opt: toggle for optimizing probabilities
    '''

    # copy sets
    observed_states = copy(observed_states)
    unobserved_states = copy(unobserved_states)

    # errors for small M
    if M in observed_states:
        print(f"M = {M} too low: in observed set")
        raise Exception

    if M in unobserved_states:
        print(f"M = {M} too low: in unobserved set")
        raise Exception
    
    # add states to unobserved set up to M
    for x in range(M + 1):
        if x in observed_states:
            continue
        elif x in unobserved_states:
            continue
        else:
            unobserved_states.append(x)

    # WLS license
    options = json.load(open("../../../WLS_credentials.json"))

    # silent
    if silent:
        options['OutputFlag'] = 0

    # environment context
    with gp.Env(params=options) as env:

        # model context
        with gp.Model('test-construction', env=env) as model:

            # model settings
            model.Params.TimeLimit = time_limit
            K = 100

            # variables
            p = model.addMVar(shape=(M + 1), vtype=GRB.CONTINUOUS, name="p", lb=0, ub=1)
            f = model.addMVar(shape=(M + 1), vtype=GRB.CONTINUOUS, name="f", lb=0, ub=1)
            k_tx = model.addVar(vtype=GRB.CONTINUOUS, name="k_tx", lb=0, ub=K)

            # distributional constraint
            model.addConstr(p.sum() <= 1, name="Distribution")

            # scaled observed probability bounds
            model.addConstrs(
                (
                    p[x] <= prob_intervals[1, x]
                    for x in observed_states
                ),
                name="prob_ub"
            )
            model.addConstrs(
                (
                    p[x] >= prob_intervals[0, x]
                    for x in observed_states
                ),
                name="prob_lb"
            )

            # f rate bounds
            model.addConstrs(
                (
                    f[x] <= fm_intervals[1, x]
                    for x in observed_states
                ),
                name="f_ub"
            )
            model.addConstrs(
                (
                    f[x] >= fm_intervals[0, x]
                    for x in observed_states
                ),
                name="f_lb"
            )

            # dummy zero variable for non-linear constraints
            z = model.addVar()
            model.addConstr(z == 0)

            # CME
            model.addConstr(
                z == p[1] - k_tx * f[0] * p[0],
                name="CME_0"
            )

            model.addConstrs(
                (
                    z == k_tx * f[x - 1] * p[x - 1] + (x + 1) * p[x + 1] - (k_tx * f[x] + x) * p[x] for x in range(1, M)
                ),
                name="CME_x"
            )

            # optimize
            solution = {}
            model.setObjective(k_tx, GRB.MINIMIZE)
            model.optimize()
            try:
                solution['min'] = model.ObjVal
            except:
                solution['min'] = None
            solution['min_status'] = status_codes[model.status]

            if solution['min_status'] == "INFEASIBLE":
                model.computeIIS()
                model.write('iis-mmt.ilp')

            model.setObjective(k_tx, GRB.MAXIMIZE)
            model.optimize()
            try:
                solution['max'] = model.ObjVal
            except:
                solution['max'] = None
            solution['max_status'] = status_codes[model.status]

            # display
            if printing:
                print(f"k_tx in ({solution['min']}, {solution['max']}), status {solution['min_status']}, {solution['max_status']}")

            # optimize p
            if dist_opt:
                p_bounds = np.zeros((M + 1, 2))
                for x in range(M + 1):
                    model.setObjective(p[x], GRB.MAXIMIZE)
                    model.optimize()
                    try:
                        p_bounds[x, 1] = model.ObjVal
                    except:
                        p_bounds[x, 1] = -1

                    model.setObjective(p[x], GRB.MINIMIZE)
                    model.optimize()
                    try:
                        p_bounds[x, 0] = model.ObjVal
                    except:
                        p_bounds[x, 0] = -1

                # store
                solution['p'] = p_bounds

    return solution

In [128]:
# settings
k = 2
tau = 5
N = 1000
n = 1000

# simulate & bootstrap (no capture: beta = 1)
sample = rng.poisson(k, size=n)
beta = rng.beta(1, 2, size=n) #np.ones(n)
sample_downsampled = rng.binomial(sample, beta)
obs_states, unobs_states, prob_intervals, fm_intervals = bootstrap(sample_downsampled, beta, N, tau, drop=True)

In [130]:
print(f"Observed {obs_states}")
print(f"Unobserved {unobs_states}")

Observed [0, 1, 2, 3, 4, 5]
Unobserved [6, 7]


In [140]:
# settings
M = 30

# optimize
solution = optimize_downsampled_scaling(
    obs_states,
    unobs_states,
    M,
    prob_intervals,
    fm_intervals,
    dist_opt= False,
    time_limit=30,
    UB = 0.1
)

k_tx in (1.6304115461785786, 2.1925285195431985), status OPTIMAL, OPTIMAL


In [136]:
# settings
M = 30

# optimize
solution = optimize_downsampled_standard(
    obs_states,
    unobs_states,
    M,
    prob_intervals,
    fm_intervals,
    dist_opt= False,
    time_limit=30,
)

k_tx in (1.630525961091679, 2.1905106402014756), status OPTIMAL, OPTIMAL


In [123]:
# compute true unobserved mass
p_obs = np.zeros(len(obs_states))
for b in beta:
    p_obs += scipy.stats.poisson.pmf(obs_states, b * k) / len(beta)
m_obs = sum(p_obs)
m_unobs = 1 - sum(p_obs)
print(f"p(obs) = {m_obs}")
print(f"p(unobs) = {m_unobs}")

p(obs) = 0.9965406374002526
p(unobs) = 0.0034593625997474398
