# Scenario modelling

## Set up environment

In [8]:
CM_BASEPATH = '../cibusmod'

import sys
import os
sys.path.insert(0, os.path.join(os.getcwd(), CM_BASEPATH))

In [9]:
import CIBUSmod as cm
import CIBUSmod.utils.plot as plot

import time
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt
import cvxpy

In [10]:
from CIBUSmod.utils.misc import inv_dict, aggregate_data_coords_pair
from CIBUSmod.optimisation.indexed_matrix import IndexedMatrix
from CIBUSmod.optimisation.utils import make_cvxpy_constraint
from itertools import product

In [11]:
# Create session
session = cm.Session(
    name = 'ww_scenarios',
    data_path = CM_BASEPATH + "/data",
    data_path_default = CM_BASEPATH + "/data/default",
    data_path_scenarios = "../scenarios"
)

# Load and apply scenario
session.add_scenario(
    "base",
    years=[2020],
    pars = "all",
    scenario_workbooks="base"
)

# Load and apply scenario
session.add_scenario(
    "default_fix",
    years=[2020],
    pars = "all",
    scenario_workbooks="default_fix"
)

A scenario with the name 'base' already exists use .update_scenario() or .remove_scenario() instead.
A scenario with the name 'default_fix' already exists use .update_scenario() or .remove_scenario() instead.


In [12]:
%%time

scn = "default_fix"

retrievers = {
    'Regions': cm.ParameterRetriever('Regions'),
    'DemandAndConversions': cm.ParameterRetriever('DemandAndConversions'),
    'CropProduction': cm.ParameterRetriever('CropProduction'),
    'FeedMgmt': cm.ParameterRetriever('FeedMgmt'),
    'GeoDistributor': cm.ParameterRetriever('GeoDistributor'),
}

cm.ParameterRetriever.update_all_parameter_values(**session[scn], year=2020)

# Instatiate Regions
regions = cm.Regions(
    par = retrievers['Regions'],
)

# Instantiate DemandAndConversions
demand = cm.DemandAndConversions(
    par = retrievers['DemandAndConversions'],
)

# Instantiate CropProduction
crops = cm.CropProduction(
    par = retrievers['CropProduction'],
    index = regions.data_attr.get('x0_crops').index
)

# Instantiate AnimalHerds
# Each AnimalHerd object is stored in an indexed pandas.Series
herds = cm.make_herds(regions)

# Instantiate feed management
feed_mgmt = cm.FeedMgmt(
    herds = herds,
    par = retrievers['FeedMgmt'],
)

# Instantiate geo distributor
optproblem = cm.FeedDistributor(
    regions = regions,
    demand = demand,
    crops = crops,
    herds = herds,
    feed_mgmt = feed_mgmt,
    par = retrievers['GeoDistributor'],
)

self = optproblem

-----------------------------------------------------------------------------
Some filter values included in data were not available in relation_tables.xlsx.
Missing for 'feed': 'maize gluten meal', 'minerals', 'rapeseed cake'
Missing for 'by_prod': 'palm kernel expeller', 'soybean meal', 'cream', 'luzern meal', 'fish meal', 'soybean protein concentrate'
------------------------------------------------------------------------------


CPU times: user 5.74 s, sys: 30.1 ms, total: 5.77 s
Wall time: 5.77 s


In [13]:
cm.ParameterRetriever.update_all_parameter_values()
cm.ParameterRetriever.update_relation_tables()

cm.ParameterRetriever.update_all_parameter_values(**session[scn], year=2020)

regions.calculate()
demand.calculate()
crops.calculate()
for h in herds:
    h.calculate(verbose=True)

self.make(use_cons=[1, 2, 3, 4, 5, 6, 11, 12, 14], verbose=True)

-----------------------------------------------------------------------------
Some filter values included in data were not available in relation_tables.xlsx.
Missing for 'feed': 'maize gluten meal', 'minerals', 'rapeseed cake'
Missing for 'by_prod': 'palm kernel expeller', 'soybean meal', 'cream', 'luzern meal', 'fish meal', 'soybean protein concentrate'
------------------------------------------------------------------------------
-----------------------------------------------------------------------------
Some filter values included in data were not available in relation_tables.xlsx.
Missing for 'feed': 'maize gluten meal', 'minerals', 'rapeseed cake'
Missing for 'by_prod': 'palm kernel expeller', 'soybean meal', 'cream', 'luzern meal', 'fish meal', 'soybean protein concentrate'
------------------------------------------------------------------------------
  share_per_prod_system.update(share_con)
  share_per_origin.loc[:, "domestic"] = 1 - share_per_origin.loc[:, "imported"]


[21:23:45][AnimalHerd (cattle, beef, conventional, ley based)] Calculating herd structure ... 0.6s
[21:23:45][AnimalHerd (cattle, beef, conventional, ley based)] Calculating feed requirements ... [fat, AAT, PBV, DM, ME] 1.3s
[21:23:47][AnimalHerd (cattle, beef, conventional, ley based)] Calculating production ... 0.1s
[21:23:47][AnimalHerd (cattle, beef, conventional, ley based)] Done! Elapsed time: 2 sec
[21:23:47][AnimalHerd (cattle, beef, conventional, maize based)] Calculating herd structure ... 0.6s
[21:23:47][AnimalHerd (cattle, beef, conventional, maize based)] Calculating feed requirements ... [fat, AAT, PBV, DM, ME] 1.3s
[21:23:49][AnimalHerd (cattle, beef, conventional, maize based)] Calculating production ... 0.1s
[21:23:49][AnimalHerd (cattle, beef, conventional, maize based)] Done! Elapsed time: 2 sec
[21:23:49][AnimalHerd (cattle, beef, organic, ley based)] Calculating herd structure ... 0.7s
[21:23:50][AnimalHerd (cattle, beef, organic, ley based)] Calculating feed requi

In [14]:
self.solve(
    apply_solution=False,
    verbose=True,
    solver_settings={
        "solver": "GUROBI",
        "reoptimize": True,
        "verbose": True,
        "BarConvTol": 1e-8,
        "NumericFocus": 1,
    }
)
raise Exception("stop")

[21:25:12][FeedDistributor.solve] Defining problem ... 0.0s
                                     CVXPY                                     
                                     v1.6.0                                    
(CVXPY) Dec 18 09:25:12 PM: Your problem has 402800 variables, 1201818 constraints, and 0 parameters.
(CVXPY) Dec 18 09:25:12 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Dec 18 09:25:12 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Dec 18 09:25:12 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
(CVXPY) Dec 18 09:25:12 PM: Your problem is compiled with the CPP canonicalization backend.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(C

Exception: stop

In [None]:
def print_ranges():
    for k, v in self.matrices().items():
        rng = int(v.M.max() - v.M.min())
        print(f"{k}: {rng:,}")

print("RANGES BEFORE:")
print_ranges()

In [None]:
def rescale_constraints():
    constraints_to_scale = filter(
        lambda cons_label: any([c_nr in cons_label for c_nr in ["C1", "C12"]]),
        self.constraints.keys()
    )
    for cons_label in constraints_to_scale:
        cons = self.constraints[cons_label]
        for par_k in cons["pars"].keys():
            v = cons["pars"][par_k]
            if hasattr(v, 'M'):
                v.M /= (max(abs(v.M.max()), abs(v.M.min())) / 100)
            else:
                cons["pars"][par_k] /= (max(abs(v.max()), abs(v.min())) / 100)
    

# Replace the objective function

While the original optimisation objective focused on minimising the change, we now instead want to maximize the protein contents.

## Mapping `x` to protein contents

First we need to create a row-array that maps each element in `x` with its protein content, so that we compute the aggregate protein amount from the decision variable.

In [None]:
PROTEIN_CONTENTS = {
    "Peas (add)": 220,
    "Wheat (add)": 67.15,
    "meat": 155.5,
    "milk": 35.0,
}

PROTEIN_CONTENTS = {
    "Peas (add)": 0,
    "Wheat (add)": 0,
    "meat": 1,
    "milk": 1,
}

def make_protein_mask_ani():
    RELEVANT_ANIMAL_PRODUCTS = ["meat", "milk"]
    
    # Get row index from animal product demand vector (ps,sp,ap)
    row_idx = pd.MultiIndex.from_tuples(
        [
            ("conventional", "cattle", "meat"),
            ("conventional", "cattle", "milk"),
            ("organic", "cattle", "meat"),
            ("organic", "cattle", "milk"),
        ],
        names=["prod_system", "species", "animal_prod"]
    )

    # Get col index from animal herds (sp,br,ps,ss,re)
    col_idx = self.x_idx["ani"]

    # To store data and corresponding row/col numbers for constructing matrix
    val = []
    row_nr = []
    col_nr = []

    # Go through animal herds
    for herd in self.herds:
        sp = herd.species
        br = herd.breed
        ps = herd.prod_system
        ss = herd.sub_system

        if sp != "cattle":
            continue

        def get_uniq(col):
            return herd.data_attr.get("production").columns.unique(col)
        
        # Get all animal products that we are concerned with
        aps = set(get_uniq("animal_prod")) & set(RELEVANT_ANIMAL_PRODUCTS)
        opss = get_uniq("prod_system")
        
        for ap, ops in product(aps, opss):
            if (ops, sp, ap) not in row_idx:
                print("CONT", ops, sp, ap)
                continue
        
            # Get production of animal product (ap) from output production system (ops) per head
            # of defining animal of species (sp) and breed (br) in production system (ps), sub system (ss)
            # and region (re)
            res = (
                herd.data_attr.get("production")
                .loc[:, (ops, slice(None), ap)]
                .sum(axis=1)
            ) * PROTEIN_CONTENTS[ap]
        
            if all(res == 0):
                continue
        
            val.extend(res)
            col_nr.extend([col_idx.get_loc((sp, br, ps, ss, re)) for re in res.index])
            row_nr.extend(np.zeros(len(res)))

    # Aggregate data_coords_pair to ensure that any overlapping values are summed rather than replace each other
    val, (row_nr, col_nr) = aggregate_data_coords_pair(val, row_nr, col_nr)

    # Create Compressed Sparse Column matrix
    return scipy.sparse.coo_array((val, (row_nr, col_nr)), shape=(1, len(col_idx))).tocsc()

def make_protein_mask_crp():
    wheat_locs = self.x_idx["crp"].get_locs(("Wheat (add)", ))
    peas_locs = self.x_idx["crp"].get_locs(("Peas (add)", ))

    val = ([PROTEIN_CONTENTS["Wheat (add)"]] * len(wheat_locs)) + ([PROTEIN_CONTENTS["Peas (add)"]] * len(peas_locs))
    col_nr = [*wheat_locs, *peas_locs]
    row_nr = np.zeros(len(val))

    return scipy.sparse.coo_array(
        (val, (row_nr, col_nr)),
        shape=(1, len(self.x_idx["crp"]))
    ).tocsc()

def make_protein_mask():
    A_ani = make_protein_mask_ani()
    A_crp = make_protein_mask_crp()
    A_fds = scipy.sparse.csc_matrix((1, len(self.x_idx["fds"])))

    return scipy.sparse.hstack([A_ani, A_crp, A_fds], format="csc")

make_protein_mask()

In [None]:
df_ani = pd.DataFrame(make_protein_mask_ani(), columns=self.x_idx["ani"])
df_crp = pd.DataFrame(make_protein_mask_crp(), columns=self.x_idx["crp"])

In [None]:
# Check that only the added crops have values in the crp part of the mask
ADDED_CROPS = ["Wheat (add)", "Peas (add)"]

for crop in df_crp.columns.unique("crop"):
    is_all_zeroes = (df_crp.loc[:,(crop, slice(None), slice(None))]==0).all().all()
    assert is_all_zeroes == (crop not in ADDED_CROPS)

In [None]:
# Check that only cattle has values in the ani part of the protein mask
for sp in df_ani.columns.unique("species"):
    is_all_zeroes = (df_ani.loc[:,(sp, slice(None), slice(None), slice(None), slice(None))]==0).all().all()
    is_cattle = sp == "cattle"
    assert is_all_zeroes != is_cattle

## Construct and replace the `cvxpy.Problem`

### archive

### latest

In [16]:
def protein_mask_as_opt_goal():
    n = (
        len(self.x_idx_short["ani"])
        + len(self.x_idx_short["crp"])
        + len(self.x_idx_short["fds"])
    )
    x = cvxpy.Variable(n, nonneg=True)

    M = make_protein_mask()
    objective = cvxpy.Maximize(M @ x)

    # Append constraints
    constraints = [
        make_cvxpy_constraint(cons, x) for cons in self.constraints.values()
    ]

    # Define problem
    self.problem = cvxpy.Problem(
        objective=objective,
        constraints=constraints
    )

protein_mask_as_opt_goal()

NameError: name 'make_protein_mask' is not defined

# Run model

In [None]:
self.solve(
    apply_solution=False,
    verbose=True,
    solver_settings=[{
        "solver": "GUROBI",
        "reoptimize": True,
        "verbose": True,

        # Custom params
        "BarConvTol": 1e-8,
        "Aggregate": 0,
        "NumericFocus": 3,
        # Useful for recognizing infeasibility or unboundedness. It is a bit slower than the default algorithm.
        # values: -1 auto, 0 off, 1 force on. 00
        "BarHomogeneous": 1, 
        
    }]
)

In [None]:
n = len(M[0])
x = np.random.rand(n)
x.shape

In [None]:
self.problem.variables()[0]

In [None]:
if self.problem.status != "success":
    raise Exception("Here be dragons")

In [None]:
def protein_map_as_cons(FACTOR=0.9):
    max_protein_amount = self.problem.value
    if max_protein_amount is None:
        raise Exception("Could not get the optimal value from the problem")

     return {
        "left": lambda x, M: M @ x,
        "right": lambda M: 0,
        "rel": ">=",
        "pars": { "M": make_protein_mask() }
    }

self.constraints["CX: Protein"] = protein_map_as_cons()

In [None]:
self.problem = self.get_cvx_problem()

In [None]:
self.solve(
    apply_solution=True,
    verbose=True,
    solver_settings=[{
        "solver": "GUROBI",
        "reoptimize": True,
        "verbose": True,
    }]
)

# Plot results

In [None]:
cm.plot.bar(
    session.get_attr('c','area',{'crop':['land_use',None],'region':None}).iloc[0].unstack('crop'),
    group_levels='land_use'
)

plt.show()   

In [None]:
cm.plot.bar(
    session.get_attr('a','heads',['region','species']).iloc[0].unstack('species')
)
plt.show()

# Archive