In [None]:
# JBY: Set up env using "ipython --pylab" imports
%pylab

In [None]:
%autoreload 2

In [None]:
interactive = False
if interactive:
    %matplotlib osx
else:
    %matplotlib inline
figsize(17,6)
rcParams['font.size'] = 18

In [None]:
import pandas as pd
import json
import os

#from pyextra import looser

# Misc functions

## Load some functions from helper.py

In [None]:
from helper import colors, primary_tags, primary_tag_set, primary_tags_carbonplan_pt_order, get_pt, get_clr
from helper import DuckStruct, lsprint

## Define some functions here

In [None]:
def display_full(df):
    with pd.option_context('display.max_columns', 2000), pd.option_context('display.max_colwidth', -1):
        display(df)

In [None]:
def savefigs(name):
    savefig('%s.png' % name)
    savefig('%s.pdf' % name)

# Load data

In [None]:
first_year = 2020
last_year = 2035
n_years = last_year - first_year + 1       # 16 years from 2020 - 2035, inclusive

In [None]:
with open(os.path.join(os.getcwd(), '..', 'data', 'Supply_Cost_Projections_df.csv'), 'r') as ff:
    df = pd.read_csv(ff)

In [None]:
#df = pd.json_normalize(whole_json['projects'])

In [None]:
df.shape

In [None]:
#df.head()

Fix some CSV issues that were not present with the initial JSON version

In [None]:
# Convert from string like "['forests', 'reforestation']" to list of strings
df.tags = df.tags.map(lambda x: json.loads(x.replace("'", '"')))

## Clean and standardize cost and volume data

In [None]:
# Just leave them as nans!
##df.fillna('', inplace=True)

In [None]:
# If people left "applicant" blank, fill it in with data from the ID column
sel = df.applicant.isna()
df.loc[sel, 'applicant'] = df.loc[sel, 'id']

If cost is provided in cost_2020_min column, take it as valid.

If cost is provided in cost_value column AND the cost_rating is not -9999, copy to cost_2020_min colum and mark as valid.

Note that this rules out two MS Forest projects that have a cost but where it is marked as -9999 (unless someone researches those options and enters their evaluation of the cost in cost_2020_min)

In [None]:
selector_copy_from_cost_val = (df.cost_value.map(bool) & 
                               (df.cost_rating != -9999) & 
                               (df.cost_2020_min.isna()))
df.loc[selector_copy_from_cost_val, 'cost_2020_min'] = df[selector_copy_from_cost_val].cost_value
df['valid_cost'] = df.cost_2020_min.notna()

In [None]:
#selector_copy_from_cost_val = (df.cost_value.map(bool) & 
#                               (df.cost_rating != -9999) & 
#                               (df.cost_2020_min == ''))
#df.loc[selector_copy_from_cost_val, 'cost_2020_min'] = df[selector_copy_from_cost_val].cost_value
#df['valid_cost'] = (df.cost_2020_min != '')

Ditto for volume

In [None]:
selector_copy_from_volume_val = (df.volume_value.map(bool) & 
                                (df.volume_rating != -9999) & 
                                (df.vol_2020_min.isna()))
df.loc[selector_copy_from_volume_val, 'vol_2020_min'] = df[selector_copy_from_volume_val].volume_value
df['valid_volume'] = df.vol_2020_min.notna()

Mark those rows with valid volume and cost. Only these will be plotted later

In [None]:
df['valid_vc'] = (df['valid_volume'] & df['valid_cost'])

If cost_2020_max and/or vol_2020_max are not filled in, copy from mins

In [None]:
selector_copy_cost_min_to_max = (df.cost_2020_min.notna() & df.cost_2020_max.isna())
df.loc[selector_copy_cost_min_to_max, 'cost_2020_max'] = df.cost_2020_min

In [None]:
selector_copy_vol_min_to_max = (df.vol_2020_min.notna() & df.vol_2020_max.isna())
df.loc[selector_copy_vol_min_to_max, 'vol_2020_max'] = df.vol_2020_min

In [None]:
assert (df[df.valid_vc].cost_2020_max >= df[df.valid_vc].cost_2020_min).all(), 'Error: max < min'
assert (df[df.valid_vc].vol_2020_max >= df[df.valid_vc].vol_2020_min).all(), 'Error: max < min'

In [None]:
def basic_checks(just_first_year=False):
    _last_year = first_year if just_first_year else last_year
    for year in range(first_year, _last_year + 1):
        assert df[df.valid_vc]['cost_%s_min' % year].notnull().all(), year
        assert df[df.valid_vc]['cost_%s_max' % year].notnull().all(), year
        assert df[df.valid_vc]['vol_%s_min' % year].notnull().all(), year
        assert df[df.valid_vc]['vol_%s_max' % year].notnull().all(), year
        assert (df[df.valid_vc]['cost_%s_max' % year] >= df[df.valid_vc]['cost_%s_min' % year]).all(), year
        assert (df[df.valid_vc]['vol_%s_max' % year] >= df[df.valid_vc]['vol_%s_min' % year]).all(), year

In [None]:
# Run basic checks after
basic_checks(just_first_year=True)

# Cost adjustments for credit decay

In [None]:
assert (df[df.cost_units == '$/tCO₂-year'].permanence_value == 1).all(), 'Need to deal with different units. Cost is given as %/t/y but permanence is not 1.0 years'

In [None]:
def cost_mult_with_decay(permanence, wacc=1.05, model='exponential'):
    if model == 'exponential':
        return 1 + (1 - exp(-1/permanence)) * 1/(1 - 1/wacc) / wacc
    else:
        raise Exception('Unrecognized model: %s' % model)

In [None]:
wacc = 1.05

In [None]:
wacc = 1.015

In [None]:
df['decay_cost_mult'] = df.apply(lambda x: cost_mult_with_decay(x['permanence_value'], wacc=1.05), axis=1)

## Orig

In [None]:
def get_array_all_years(project, pattern):
    '''Pattern like "cost_%s_min"'''
    return array([getattr(project, pattern % year) for year in range(first_year, last_year + 1)])

def min_cost_array(project): return get_array_all_years(project, 'cost_%s_min')
def max_cost_array(project): return get_array_all_years(project, 'cost_%s_max')
def min_vol_array(project): return get_array_all_years(project, 'vol_%s_min')
def max_vol_array(project): return get_array_all_years(project, 'vol_%s_max')

def min_cost_df(project): return pd.DataFrame(get_array_all_years(project, 'cost_%s_min'))
def max_cost_df(project): return pd.DataFrame(get_array_all_years(project, 'cost_%s_max'))
def min_vol_df(project): return pd.DataFrame(get_array_all_years(project, 'vol_%s_min'))
def max_vol_df(project): return pd.DataFrame(get_array_all_years(project, 'vol_%s_max'))

def get_costs(project):
    '''Return average costs for all years'''
    #c_min = array([getattr(project, 'cost_%s_min' % year) for year in range(first_year, last_year + 1)])
    #c_max = array([getattr(project, 'cost_%s_max' % year) for year in range(first_year, last_year + 1)])    
    return (min_cost_array(project) + max_cost_array(project)) / 2

def get_vols(project):
    '''Return average volumes for all years'''
    #v_min = array([getattr(project, 'vol_%s_min' % year) for year in range(first_year, last_year + 1)])
    #v_max = array([getattr(project, 'vol_%s_max' % year) for year in range(first_year, last_year + 1)])    
    return (min_vol_array(project) + max_vol_array(project)) / 2

In [None]:
def write_yearly_data_to_df(df, idx, yearly_data_array, pattern):
    '''Writes the data in yearly_data_array back to df at the given row index and
    for all years to the columns described by pattern.
    Ugly, but 🤷‍♂️'''
    
    assert len(yearly_data_array) == n_years, 'Wrong length data'
    for ii, year in enumerate(range(first_year, last_year + 1)):
        df.at[idx, pattern % year] = yearly_data_array[ii]

Interpolate any missing data exponentially in time. Uses [pandas.DataFrame.interpolate](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.interpolate.html)

In [None]:
def exponential_interpolate(df):
    for project in df.itertuples():
        if not project.valid_vc:
            continue
        #print(project.Index)
        
        tmp = exp(log(min_cost_df(project)).interpolate()).to_numpy()
        write_yearly_data_to_df(df, project.Index, tmp, 'cost_%s_min')

        tmp = exp(log(max_cost_df(project)).interpolate()).to_numpy()
        write_yearly_data_to_df(df, project.Index, tmp, 'cost_%s_max')

        tmp = exp(log(min_vol_df(project)).interpolate()).to_numpy()
        write_yearly_data_to_df(df, project.Index, tmp, 'vol_%s_min')

        tmp = exp(log(max_vol_df(project)).interpolate()).to_numpy()
        write_yearly_data_to_df(df, project.Index, tmp, 'vol_%s_max')
                
        #write_min_costs(min_costs)

In [None]:
exponential_interpolate(df)

Make non-null volumes at least 10 to avoid very low values on log-scale volume plots. Make max always at least as large as min.

In [None]:
for year in range(first_year, last_year + 1):
    # 1. Make non-null volumes at least 10 to avoid very low values on log-scale volume plots.
    col = 'vol_%s_max' % year
    sel = df.valid_vc & df[col].notna() & (df[col] < 10.0)
    df.loc[sel, col] = 10.0
    
    # 2. Make max always at least as large as min and fill max in with min if it's blank
    
    # Copy any min cost to max if max is less or nan
    min_col, col = ('cost_%s_min' % year), ('cost_%s_max' % year)
    sel = df.valid_vc & (~(df[col] > df[min_col]))
    df.loc[sel, col] = df.loc[sel, min_col]

    # Copy any min vol to max if max is less or nan
    min_col, col = ('vol_%s_min' % year), ('vol_%s_max' % year)
    sel = df.valid_vc & (~(df[col] > df[min_col]))
    df.loc[sel, col] = df.loc[sel, min_col]

Run some basic checks

In [None]:
# Run basic checks after
basic_checks()

## Other cleaning and standardization

Flatten tags for easier filtering

In [None]:
tagset = set()
for tags in df.tags:
    for tag in tags:
        tagset.add(tag)
taglist = sorted(list(tagset))

In [None]:
# Create boolean field for each tag, e.g. t_dac and t_ocean
for tag in taglist:
    df['t_%s' % tag] = df.tags.map(lambda x: tag in x)
# n_tags
df['n_tags'] = df.tags.map(len)

In [None]:
#df.head()

If pt is missing, fill in from tags column

In [None]:
sel = df.pt.isna()
df.loc[sel, 'pt'] = df.tags.map(get_pt)

Extract different aggregation groups

In [None]:
projection_groups = {}
print('Available projection groups by primary tag:')
for pt in primary_tags:
    projection_groups[pt] = list(df[df.pt == pt].projection_group.unique())
    print('%16s: %s' % (pt, projection_groups[pt]))

In [None]:
# Get selector the prefers "aggregate" grouping but falls back to "company" if not available

sel_prefer_aggregate = False & df.valid_vc    # Initially select nothing
for pt, groups in projection_groups.items():
    which_group = 'aggregate' if 'aggregate' in groups else 'company'
    sel_this = (df.pt == pt) & (df.projection_group == which_group)
    print('%16s: selected %3d entries from group %s' % (pt, sel_this.sum(), which_group))
    sel_prefer_aggregate |= sel_this

In [None]:
assert set(primary_tags_carbonplan_pt_order) == primary_tag_set, 'Set of primary tags must be the same!'
print('Primary tags in plot order:\n', primary_tags)
print('Primary tags in pt order:  \n', primary_tags_carbonplan_pt_order)

In [None]:
# Check that pt column is filled in correctly
assert (df.pt != df.tags.map(get_pt)).sum() == 0, 'Something went wrong'
# Add clr column
df['clr'] = df.tags.map(get_clr)

In [None]:
# Does any project not have a primary tag?
print(((df.pt == 'none') | (df.pt.isna())).sum(), 'projects are missing a primary tag')

# Look at data

In [None]:
df.head(5)

In [None]:
lsprint(df.columns.to_list())

In [None]:
#display_full(df.head(1))

In [None]:
for tag in taglist:
    print('%4d: %s' % (df['t_%s' % tag].sum(), tag))

In [None]:
#display_full(df[df.t_dac & df.t_mineralization])

# Plots

## Reproduce Carbon Plan plots

In [None]:
# For these plots, filter just to the 'company' projection group

df_ = df[df.projection_group == 'company']

In [None]:
figsize(18,18)
clr_handles = {tag: None for tag in primary_tags}
for ii, project in enumerate(df_.itertuples()):
    clr = project.clr
    volume = project.volume_value
    hh, = semilogx(volume, ii, 'o', mec=clr, mfc=clr, ms=15)
    clr_handles[project.pt] = hh
xlabel('Volume (tons)')
ylabel('Project ID')
legend(clr_handles.values(), clr_handles.keys())
savefigs('carbon_plan_type_vol_separate')

In [None]:
figsize(18,4)
for ii, project in enumerate(df_.itertuples()):
    clr = project.clr
    y_coord = (-len(primary_tags)) if project.pt == 'none' else -primary_tags.index(project.pt)
    volume = project.volume_value
    semilogx(volume, y_coord, 'o', mec=clr, mfc=clr, ms=20)
xlabel('Volume (tons)')
yticks([])
tight_layout()
savefigs('carbon_plan_type_vol')

In [None]:
figsize(18,4)
for ii, project in enumerate(df_.itertuples()):
    clr = project.clr
    y_coord = (-len(primary_tags)) if project.pt == 'none' else -primary_tags.index(project.pt)
    permanence = project.permanence_value
    semilogx(permanence, y_coord, 'o', mec=clr, mfc=clr, ms=20)
xlabel('Permanence (years)')
yticks([])
tight_layout()
savefigs('carbon_plan_type_permanence')

In [None]:
figsize(18,4)
for ii, project in enumerate(df_.itertuples()):
    clr = project.clr
    y_coord = (-len(primary_tags)) if project.pt == 'none' else -primary_tags.index(project.pt)
    cost = project.cost_value
    semilogx(cost, y_coord, 'o', mec=clr, mfc=clr, ms=20)
xlabel('Cost ($/ton)')
yticks([])
tight_layout()
savefigs('carbon_plan_type_cost')

## Vol-vs-Cost

In [None]:
def nice_vc_axes(skip_x=False):
    gca().yaxis.set_major_formatter('${x:1.0f}')
    if not skip_x:
        ax = axis()
        xticks(10 ** arange(3*4),
               ['1', '10', '100', '1k', '10k', '100k', '1M', '10M', '100M', '1G', '10G', '100G'])
        axis(ax)

In [None]:
def plot_vc_to_time(df, start=2020, end=2025, logx=True, logy=True, decay_cost=False, save_as=None):
    '''Plot data from first year to last year, highlighting last year.'''

    # Convert from years to slice index
    start_ii = start - first_year
    end_ii = end - first_year
    
    for ii, project in enumerate(df.itertuples()):
        #print('project is', type(project))
        clr = project.clr
        vols = get_vols(project)
        costs = get_costs(project) * (project.decay_cost_mult if decay_cost else 1.0)
        #costs = get_costs(project)
        
        #plfn(vols[start_ii], costs[start_ii], 'o', mec=clr, mfc='w', ms=14)
        #plfn(vols[start_ii:end_ii+1], costs[start_ii:end_ii+1], '-', c=clr)
        plot(vols[start_ii:end_ii+1], costs[start_ii:end_ii+1], '-o', c=clr, mec=clr, mfc='w', ms=14)
        plot(vols[end_ii], costs[end_ii], 'o', mec=clr, mfc=clr, ms=20)
        text(vols[end_ii], costs[end_ii], '   %s' % project.applicant)
    #text(10*.7, 1000, '%s' % end)
    annotate('%s' % end, xy=(.03, .93), xycoords='axes fraction', fontsize=32)
    xlabel('Volume (tons)')
    ylabel('Cost ($/ton)')
    if logx: gca().set_xscale('log')
    if logy: gca().set_yscale('log')
    nice_vc_axes(skip_x=not logx)
    tight_layout()
    if save_as:
        savefigs(save_as)

In [None]:
figsize(18,12)
sel = (df.valid_vc & (df.projection_group == 'company'))
#plot_vc_to_time(df[sel], save_as='carbon_plan_vol_cost_pt')
plot_vc_to_time(df[sel], end=2020)

In [None]:
figsize(18,12)
sel = (df.valid_vc & (df.projection_group == 'company'))
#plot_vc_to_time(df[sel], save_as='carbon_plan_vol_cost_pt')
plot_vc_to_time(df[sel], decay_cost=True, end=2020)

In [None]:
if False:
    # Linear versions for Jenny
    figsize(18,12)
    sel = (df.valid_vc & (df.projection_group == 'company'))
    #plot_vc_to_time(df[sel], save_as='carbon_plan_vol_cost_pt')
    plot_vc_to_time(df[sel], logx=True, logy=True, end=2035, save_as='carbon_plan_vol_cost_pt_logx_logy')

In [None]:
# Plot data to last year
figsize(18,12)
sel = (df.valid_vc & (df.projection_group == 'company'))
year = last_year
plot_vc_to_time(df[sel], end=year, save_as='carbon_plan_vol_cost_pt_nodecay')

In [None]:
# Plot data to last year
figsize(18,12)
sel = (df.valid_vc & (df.projection_group == 'company'))
year = last_year
plot_vc_to_time(df[sel], decay_cost=True, end=year, save_as='carbon_plan_vol_cost_pt_withdecay_1.015')

In [None]:
# Expensive: generate each frame for animation
figsize(18,12)
sel = (df.valid_vc & (df.projection_group == 'company'))
for year in range(first_year, last_year + 1):
    clf()
    plot_vc_to_time(df[sel], end=year, save_as='vol_cost_pt_y%d' % year)

In [None]:
# Plot data to last year. Aggregate version.
figsize(18,12)
sel = (df.valid_vc & sel_prefer_aggregate)
year = last_year
plot_vc_to_time(df[sel], end=year)

In [None]:
# Expensive: generate each frame for animation. Aggregate version.
figsize(18,12)
sel = (df.valid_vc & sel_prefer_aggregate)
for year in range(first_year, last_year + 1):
    clf()
    plot_vc_to_time(df[sel], end=year, save_as='vol_cost_pt_ag_y%d' % year)

## Plot Individual Supply curves

In [None]:
#print('Sequestration types:')
#df.pt.unique()

In [None]:
#print('Sequestration types that have some cost data:')
#df[(df.cost_rating != -9999)].pt.unique()

In [None]:
#display_full(df[(df.cost_rating != -9999) & (df.pt == 'mineralization')].sort_values(by='cost_value'))

In [None]:
# Add private columns to store temporary data. May be used by functions as they wish
#def zero_priv():
#    df['_v'] = 0.0
#    df['_c'] = 0.0
#zero_priv()

In [None]:
# NEW

def plot_vcc_at_time(df_in, year=2020, logx=False, logy=False, save_as=None, ax=None):
    '''Plot data from first year to last year, highlighting last year.'''
    
    # Create a copy so we can add temp columns and sort
    df = df_in.copy()
    
    # Use average volume and cost for now
    df['vv'] = df.loc[:, ['vol_%d_min' % year, 'vol_%d_max' % year]].mean(axis=1)
    df['cc'] = df.loc[:, ['cost_%d_min' % year, 'cost_%d_max' % year]].mean(axis=1)
    
    # Sort from cheapest to most expensive
    df.sort_values(by='cc', inplace=True)
    
    df['vv_cumsum'] = df.vv.cumsum()
    
    # Deal with logx and logy
    min_y = 0 if not logy else 0.1

    last_vv_cumsum = 10.0
    for ii, proj in enumerate(df.itertuples()):
        #print('project is', type(project))
        #project = df.iloc[ii]
        clr = proj.clr
        vols = get_vols(project)
        costs = get_costs(project)
        
        # Just line
        #plot([last_vv_cumsum, proj.vv_cumsum], [proj.cc, proj.cc], '-', c=proj.clr, lw=2)
        
        # Left point
        plot([last_vv_cumsum], [proj.cc], 'o', c=proj.clr, ms=16)
        text(last_vv_cumsum, proj.cc, '  %s  ' % proj.applicant)

        # Right point
        #plot([proj.vv_cumsum], [proj.cc], 'o', c=proj.clr, ms=16)
        #text(proj.vv_cumsum, proj.cc, '%s  ' % proj.applicant, ha='right')

        # Fill below
        fill_between([last_vv_cumsum, proj.vv_cumsum], [proj.cc, proj.cc], color=proj.clr)
        
        last_vv_cumsum = proj.vv_cumsum
    annotate('%s' % year, xy=(.02, .93), xycoords='axes fraction', fontsize=32, va='top')
    xlabel('Cumulative Volume (tons)')
    ylabel('Cost ($/ton)')
    if logx: gca().set_xscale('log')
    if logy: gca().set_yscale('log')
    nice_vc_axes(skip_x=True)
    if ax: axis(ax)
    #tight_layout()
    if save_as:
        savefigs(save_as)

In [None]:
# Plot a single sector supply curve for 2020
figsize(18,4)
sel = (df.valid_vc & (df.projection_group == 'company') & (df.pt == 'soil'))
plot_vcc_at_time(df[sel], 2020, logx=False, logy=True)

In [None]:
# Plot a single sector supply curve for the next three years
figsize(18,4)
sel = (df.valid_vc & (df.projection_group == 'company') & (df.pt == 'soil'))
plot_vcc_at_time(df[sel], 2022, logx=False, logy=True)
ax=axis(); clf()
print(ax)
plot_vcc_at_time(df[sel], 2020, logx=False, logy=True, ax=ax)
figure()
plot_vcc_at_time(df[sel], 2021, logx=False, logy=True, ax=ax)
figure()
plot_vcc_at_time(df[sel], 2022, logx=False, logy=True, ax=ax)

In [None]:
# Plot all sectors for 2020
figsize(18,10)
sel = (df.valid_vc & (df.projection_group == 'company'))
plot_vcc_at_time(df[sel], 2020, logx=False, logy=True)

In [None]:
# Plot all sectors for 2020 - 2035
# Expensive: generate each frame for animation
figsize(18,10)
sel = (df.valid_vc & (df.projection_group == 'company'))
# Plot and compute final axes
plot_vcc_at_time(df[sel], 2035, logx=False, logy=True)
ax = axis()
for year in range(first_year, last_year + 1):
    clf()
    plot_vcc_at_time(df[sel], year, logx=False, logy=True, save_as='vcc_y%d' % year, ax=ax)

In [None]:
# Plot all sectors for 2020 - 2035, Aggregate version
# Expensive: generate each frame for animation
figsize(18,10)
sel = (df.valid_vc & sel_prefer_aggregate)
# Plot and compute final axes
plot_vcc_at_time(df[sel], 2035, logx=False, logy=True)
ax = axis()
for year in range(first_year, last_year + 1):
    clf()
    plot_vcc_at_time(df[sel], year, logx=False, logy=True, save_as='vcc_ag_y%d' % year, ax=ax)

In [None]:
figsize(18,4)
df_filt = df[(df.cost_rating != -9999) & (df.pt == 'mineralization')]
plot_single_vol_cost_curve(df_filt, save_as='vol_cost_mineralization')

In [None]:
figsize(18,12)
sel = (df.valid_vc & (df.projection_group == 'company'))
#plot_vc_to_time(df[sel], save_as='carbon_plan_vol_cost_pt')
plot_vc_to_time(df[sel], end=2020)

In [None]:
for pt in primary_tags:
    if pt in df[(df.cost_rating != -9999)].pt.unique():
        df_filt = df[(df.cost_rating != -9999) & (df.pt == pt)]
        figure()
        plot_single_vol_cost_curve(df_filt, save_as='vol_cost_%s' % pt)

In [None]:
for pt in df[(df.cost_rating != -9999)].pt.unique():
    df_filt = df[(df.cost_rating != -9999) & (df.pt == pt)]
    figure()
    plot_single_vol_cost_curve(df_filt, save_as='vol_cost_%s' % pt)

**Plot Combined Supply curves**

In [None]:
def plot_vol_cost_curve(df, save_as=None, plot_legend=True):
    # Creates a sorted copy
    df = df.sort_values(by='cost_value')

    df['volume_cumsum'] = df.volume_value.cumsum()
    
    # Each entry is [vol, cost].
    # Separate curve for each primary tag
    pt_dat = {pt: DuckStruct(vc_list=[[0, 0]], clr=None) for pt in df.pt.unique()}
    
    for ii, project in enumerate(df.itertuples()):
        pt = project.pt
        duck = pt_dat[pt]
        duck.clr = project.clr
        vc = duck.vc_list

        left_vol = 0 if ii == 0 else df.volume_cumsum[ii-1]
        right_vol = project.volume_cumsum
        cost = project.cost_value
        print('ii is', ii, 'and adding left_vol and right vol', left_vol, right_vol)
        # Four points
        vc.append([left_vol, 0])
        vc.append([left_vol, cost])
        vc.append([right_vol, cost])
        vc.append([right_vol, 0])

    for pt, duck in pt_dat.items():
        duck.vc_arr = array(duck.vc_list)
        if pt == 'mineralization':
            #fill_between(duck.vc_arr[:,0], duck.vc_arr[:,1], color=duck.clr)
            print('plotting')
            plot(duck.vc_arr[:,0], duck.vc_arr[:,1], 'o-', color=duck.clr)
            display(duck.vc_arr)
            #display(duck.vc_arr[:,0].diff)
            return(duck)
    xlabel('Volume (tons)')
    ylabel('Cost ($/ton)')
    if plot_legend:
        legend((pt,), loc='upper left')
    tight_layout()
    ylim(bottom=0)
    if save_as:
        savefigs(save_as)
        

In [None]:
df[(df.cost_rating != -9999)].pt.unique()

In [None]:
figsize(18,6)
df_filt = df[(df.cost_rating != -9999)]
duck = plot_vol_cost_curve(df_filt)

# Scratch

In [None]:
display_full(df.iloc[91:92])


In [None]:
# OLD
if False:
    # Fill in missing data with copies of old data until new data is encountered.
    for year in range(2021, 2036):
        prev_min = 'cost_%s_min' % (year - 1)
        this_min = 'cost_%s_min' % year
        sel = df.valid_vc & (df[this_min] == '')
        df.loc[sel, this_min] = df[sel][prev_min]

        prev_max = 'cost_%s_max' % (year - 1)
        this_max = 'cost_%s_max' % year
        sel = df.valid_vc & (df[this_max] == '')
        df.loc[sel, this_max] = df[sel][[this_min, prev_max]].max(axis=1)

        prev_min = 'vol_%s_min' % (year - 1)
        this_min = 'vol_%s_min' % year
        sel = df.valid_vc & (df[this_min] == '')
        df.loc[sel, this_min] = df[sel][prev_min]

        prev_max = 'vol_%s_max' % (year - 1)
        this_max = 'vol_%s_max' % year
        sel = df.valid_vc & (df[this_max] == '')
        df.loc[sel, this_max] = df[sel][[this_min, prev_max]].max(axis=1)

In [None]:
#OLD
def plot_single_vol_cost_curve(df, save_as=None, plot_legend=True):
    df = df.sort_values(by='cost_value')

    cv_cost = []
    cv_vol = []
    for ii, project in enumerate(df.itertuples()):
        # Left point
        cv_vol.append(0 if len(cv_vol) == 0 else cv_vol[-1])
        cv_cost.append(project.cost_value)
        # Right point
        cv_vol.append(cv_vol[-1] + project.volume_value)
        cv_cost.append(project.cost_value)    
        clr = project.clr
        pt = project.pt
    cv_cost = array(cv_cost)
    cv_vol = array(cv_vol)
    plot(cv_vol, cv_cost, 'o', mfc='w', mec=clr, ms=12)
    fill_between(cv_vol, cv_cost, color=clr)
    xlabel('Volume (tons)')
    ylabel('Cost ($/ton)')
    if plot_legend:
        legend((pt,), loc='upper left')
    tight_layout()
    ylim(bottom=0)
    if save_as:
        savefigs(save_as)

# Misc static plots

In [None]:
tt = arange(30)

In [None]:
plot(tt, exp(-(1/15) * tt), lw=3)
axhline(0, color='k')
axvline(0, color='k')
axvline(15, ls=':', color='k')
savefigs('example_decay_exponential')

In [None]:
plot(tt, maximum(1 - (1/15 * tt), 0), lw=3)
axhline(0, color='k')
axvline(0, color='k')
axvline(15, ls=':', color='k')
savefigs('example_decay_linear')

In [None]:
plot(tt, (tt < 15) * 1.0, lw=3)
axhline(0, color='k')
axvline(0, color='k')
axvline(15, ls=':', color='k')
savefigs('example_decay_step')