# Scenario modelling

## Set up environment

In [1]:
CM_BASEPATH = '../cibusmod'

import sys
import os
sys.path.insert(0, os.path.join(os.getcwd(), CM_BASEPATH))

In [2]:
import CIBUSmod as cm
import CIBUSmod.utils.plot as plot

import time
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt
import cvxpy

from typing import Literal

In [3]:
from CIBUSmod.utils.misc import inv_dict, aggregate_data_coords_pair
from CIBUSmod.optimisation.indexed_matrix import IndexedMatrix
from CIBUSmod.optimisation.utils import make_cvxpy_constraint
from itertools import product

In [4]:
#from parallel_pandas import ParallelPandas
#ParallelPandas.initialize(disable_pr_bar=True)

In [5]:
# Create session
session = cm.Session(
    name = 'main-foo',
    data_path = "data",
    data_path_default = CM_BASEPATH + "/data/default",
)

# Load scenarios
# ==============

session.add_scenario(
    "BASELINE", years=[2020], pars = "all",
    scenario_workbooks="default_fix"
)

session.add_scenario(
    "SCN_CORE", years=[2020], pars = "all",
    scenario_workbooks="base"
)

session.add_scenario(
    "SCN_MIN_LEY", years=[2020], pars = "all",
    scenario_workbooks=["base", "scn-min-ley"]
)

session.add_scenario(
    "SCN_SNG", years=[2020], pars = "all",
    scenario_workbooks="base"
)

session.add_scenario(
    "SCN_ORG", years=[2020], pars = "all",
    scenario_workbooks="base"
)

session.add_scenario(
    "SCN_DEMAND", years=[2020], pars="all",
    scenario_workbooks="scn-demand",
)
   

A scenario with the name 'BASELINE' already exists use .update_scenario() or .remove_scenario() instead.
A scenario with the name 'SCN_CORE' already exists use .update_scenario() or .remove_scenario() instead.
A scenario with the name 'SCN_MIN_LEY' already exists use .update_scenario() or .remove_scenario() instead.
A scenario with the name 'SCN_SNG' already exists use .update_scenario() or .remove_scenario() instead.
A scenario with the name 'SCN_ORG' already exists use .update_scenario() or .remove_scenario() instead.
A scenario with the name 'SCN_DEMAND' already exists use .update_scenario() or .remove_scenario() instead.


In [6]:
%%time

scn = "SCN_CORE"

retrievers = {
    'Regions': cm.ParameterRetriever('Regions'),
    'DemandAndConversions': cm.ParameterRetriever('DemandAndConversions'),
    'CropProduction': cm.ParameterRetriever('CropProduction'),
    'FeedMgmt': cm.ParameterRetriever('FeedMgmt'),
    'GeoDistributor': cm.ParameterRetriever('GeoDistributor'),
}
    
cm.ParameterRetriever.update_all_parameter_values(**session[scn], year=2020)

# Instatiate Regions
regions = cm.Regions(
    par = retrievers['Regions'],
)

# Instantiate DemandAndConversions
demand = cm.DemandAndConversions(
    par = retrievers['DemandAndConversions'],
)

# Instantiate CropProduction
crops = cm.CropProduction(
    par = retrievers['CropProduction'],
    index = regions.data_attr.get('x0_crops').index
)

# Instantiate AnimalHerds
# Each AnimalHerd object is stored in an indexed pandas.Series
herds = cm.make_herds(regions, sub_systems={
#    'cattle': ['ley based'], 
#    ('cattle', 'dairy', 'conventional'): ['maize based'], 
#    ('cattle', 'beef', 'conventional'): ['maize based'],
    'sheep': ['autumn lamb', 'spring lamb', 'winter lamb', 'other sheep']
})


# Instantiate feed management
feed_mgmt = cm.FeedMgmt(
    herds = herds,
    par = retrievers['FeedMgmt'],
)

# Instantiate geo distributor
optproblem = cm.FeedDistributor(
    regions = regions,
    demand = demand,
    crops = crops,
    herds = herds,
    feed_mgmt = feed_mgmt,
    par = retrievers['GeoDistributor'],
)

self = optproblem

# Instantiate WasteAndCircularity
waste = cm.WasteAndCircularity(
    demand = demand,
    crops = crops,
    herds = herds,
    par = cm.ParameterRetriever('WasteAndCircularity')
)

# Instantiate by-product management
byprod_mgmt = cm.ByProductMgmt(
    demand = demand,
    herds = herds,
    par = cm.ParameterRetriever('ByProductMgmt')
)

# Instantiate manure management
manure_mgmt = cm.ManureMgmt(
    herds = herds,
    feed_mgmt = feed_mgmt,
    par = cm.ParameterRetriever('ManureMgmt'),
    settings = {
        'NPK_excretion_from_balance' : True
    }
)

# Instantiate crop residue managment
crop_residue_mgmt = cm.CropResidueMgmt(
    demand = demand,
    crops = crops,
    herds = herds,
    par = cm.ParameterRetriever('CropResidueMgmt')
)

# Instantiate plant nutrient management
plant_nutrient_mgmt = cm.PlantNutrientMgmt(
    demand = demand,
    regions = regions,
    crops = crops,
    waste = waste,
    herds = herds,
    par = cm.ParameterRetriever('PlantNutrientMgmt')
)

# Instatiate machinery and energy management
machinery_and_energy_mgmt  = cm.MachineryAndEnergyMgmt(
    regions = regions,
    crops = crops,
    waste = waste,
    herds = herds,
    par = cm.ParameterRetriever('MachineryAndEnergyMgmt')
)

# Instatiate inputs management
inputs = cm.InputsMgmt(
    demand = demand,
    crops = crops,
    waste = waste,
    herds = herds,
    par = cm.ParameterRetriever('InputsMgmt')
)

-----------------------------------------------------------------------------
Some filter values included in data were not available in relation_tables.xlsx.
Missing for 'by_prod': 'soybean meal', 'palm kernel expeller', 'maize gluten meal', 'luzern meal', 'fish meal', 'soybean protein concentrate', 'cream'
------------------------------------------------------------------------------


CPU times: user 11.2 s, sys: 112 ms, total: 11.3 s
Wall time: 11.4 s


In [7]:
cm.ParameterRetriever.update_all_parameter_values()
cm.ParameterRetriever.update_relation_tables()

cm.ParameterRetriever.update_all_parameter_values(**session[scn], year=2020)

regions.calculate()
demand.calculate()
crops.calculate()
for h in herds:
    h.calculate()
    

-----------------------------------------------------------------------------
Some filter values included in data were not available in relation_tables.xlsx.
Missing for 'crop': 'Lentils'
Missing for 'animal': 'calves'
Missing for 'by_prod': 'soybean meal', 'palm kernel expeller', 'maize gluten meal', 'luzern meal', 'fish meal', 'soybean protein concentrate', 'cream'
------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------
Data includes crop(s) without an x0 area specified through 'x0_crops' in the Regions module.
Missing: 'Peas (add)', 'Wheat (add)', 'Lentils'
--------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------
Some filter values included in data were not available in relation_tables.xlsx.
Missing for 'crop': 'Lentils'
Missing for 'animal': 'calves'
Missi

[fat, ME, rough, DM, PBV, AAT] [fat, ME, rough, DM, PBV, AAT] [fat, ME, rough, DM, PBV, AAT] [fat, ME, rough, DM, PBV, AAT] [ME] [ME] [ME] [ME] [ME] [ME] [ME] [ME] [NE] [NE] [DM] [DM] [DM] [DM] [DM] [DM] [DM] [DM] [DM] [DM] [DM] [DM] 

In [8]:
self.get_x0()

In [None]:
%%time
t0 = time.time()
rel_type="min"
col_idx = self.x_idx["fds"].sort_values()

feed_pars = set()
herd_dfs = {}

for herd in self.herds:
    data = herd.data_attr.get(f"feed_req_of_DM_{rel_type}")

    # Where there are no 'constraints' for this parameter- and herd combo, we
    # do not need to add anything to the df.
    if data.empty:
        continue
    # Keep track of which
    feed_pars.update(data.columns.unique("feed_par"))
    # prod_system already in data attribute, hence not here.
    herd_dfs[(herd.species, herd.breed, herd.sub_system)] = data.T.stack(
        "region"
    )

if len(herd_dfs) == 0:
    row_idx = pd.MultiIndex.from_tuples(
        [],
        names=[
            "feed_par",
            "animal",
            "species",
            "breed",
            "prod_system",
            "sub_system",
            "region",
        ],
    )
    raise Exception("No herds found")

herds_df = (
    pd.concat(herd_dfs, names=["species", "breed", "sub_system"])
    .to_frame(name="feed_req_of_DM")
)

# We base our row-idx on x_fds, but without the 'feed' level
_base_row_idx = self.x_idx["fds"].droplevel("feed")
# ... and then multiply in each feed_par that we want to look at
row_idx = cm.utils.misc.extend_index(
    levels=[feed_pars], names=["feed_par"], index=_base_row_idx, mode="prepend"
).sort_values()

# Get all feed
losses_factors = self._get_losses_factors(shape="long").sort_index()
feed_compositions = self._get_feed_compositions(shape="long").sort_index()
#feed_compositions = feed_compositions.replace({0: np.nan}).dropna()
#factors = losses_factors.join(feed_compositions)
#factors = (factors["losses_factor"] * factors["feed_to_par_factor"]).fillna(0).to_frame(name="feed_to_par_factor")
#factors

row_idx_df = pd.DataFrame(range(len(row_idx)), index=row_idx, columns=["row_i"])
col_idx_df = pd.DataFrame(range(len(col_idx)), index=col_idx, columns=["col_i"])

joined = (
    herds_df
    .merge(row_idx_df, on=row_idx.names)
    .merge(col_idx_df, on=[
        "animal",
        "species",
        "breed",
        "prod_system",
        "sub_system",
        "region",
    ])
    .join(feed_compositions)
    .join(losses_factors)
    .fillna(int(0))
)
#joined["values"] = (
#    joined["feed_to_par_factor"] * joined["losses_factor"]
#    - joined["feed_req_of_DM"]
#)
joined
#joined = joined[["row_i", "col_i", "values"]].reset_index(drop=True)


In [None]:
col_idx.names

In [None]:
%%time
t0 = time.time()
rel_type="min"
col_idx = self.x_idx["fds"].sort_values()

feed_pars = set()
herd_dfs = {}

for herd in self.herds:
    data = herd.data_attr.get(f"feed_req_of_DM_{rel_type}")

    # Where there are no 'constraints' for this parameter- and herd combo, we
    # do not need to add anything to the df.
    if data.empty:
        continue
    # Keep track of which
    feed_pars.update(data.columns.unique("feed_par"))
    # prod_system already in data attribute, hence not here.
    herd_dfs[(herd.species, herd.breed, herd.sub_system)] = data.T.stack(
        "region"
    )

if len(herd_dfs) == 0:
    row_idx = pd.MultiIndex.from_uniquetuples(
        [],
        names=[
            "feed_par",
            "animal",
            "species",
            "breed",
            "prod_system",
            "sub_system",
            "region",
        ],
    )
    raise Exception("No herds found")

herds_df = (
    pd.concat(herd_dfs, names=["species", "breed", "sub_system"])
    .to_frame(name="feed_req_of_DM")
    .reset_index()
)

# We base our row-idx on x_fds, but without the 'feed' level
_base_row_idx = self.x_idx["fds"].droplevel("feed")
# ... and then multiply in each feed_par that we want to look at
row_idx = cm.utils.misc.extend_index(
    levels=[feed_pars], names=["feed_par"], index=_base_row_idx, mode="prepend"
).sort_values()

# Get all feeds
losses_factors = self._get_losses_factors(shape="long").reset_index()
feed_compositions = self._get_feed_compositions(shape="long").reset_index()

row_idx_df = row_idx.to_frame(index=False).reset_index(names="row_i")
col_idx_df = col_idx.to_frame(index=False).reset_index(names="col_i")


merged = (
    herds_df
    # Merge on row_idx to get a full index matching (feed_par, sp, br, ps, ss, ani, region) -> feed_req_of_dm
    .merge(row_idx_df, on=row_idx.names)
    # merge with col_idx to add feed to the rows0.0
    .merge(
        col_idx_df,
        on=[
            "animal",
            "species",
            "breed",
            "prod_system",
            "sub_system",
            "region",
        ],
    )
    # Now merge with feed_compositions to map feeds to feed_pars. how="left" to set a default value of 0
    .merge(
        feed_compositions,
        how="left",
        on=[
            "feed_par",
            "feed",
            "animal",
            "species",
            "breed",
            "prod_system",
            "sub_system",
        ],
    )
    .merge(
        losses_factors,
        how="left",
        on=["feed", "animal", "species", "breed", "prod_system", "sub_system"],
    )
    .fillna(0)
)

merged["values"] = (
    merged["feed_to_par_factor"] * merged["losses_factor"]
    - merged["feed_req_of_DM"]
)

In [None]:
m = merged[["row_i", "col_i", "values"]].set_index(["row_i", "col_i"]).replace({0:np.nan}).dropna().sort_index()
m

In [None]:
j = joined.set_index(["row_i", "col_i"]).replace({0:np.nan}).dropna().sort_index()
j

In [None]:
pd.testing.assert_frame_equal(m, j)