In [None]:
# JBY: Set up env using "ipython --pylab" imports
%pylab

In [None]:
%autoreload 2

In [None]:
interactive = False
if interactive:
    %matplotlib osx
else:
    %matplotlib inline
figsize(17,6)
rcParams['font.size'] = 16

In [None]:
from datetime import datetime
import pandas as pd
import json
import os

#from pyextra import looser

# Misc functions

In [None]:
def display_full(df):
    with pd.option_context('display.max_columns', 2000), pd.option_context('display.max_colwidth', -1):
        display(df)

In [None]:
def savefigs(name):
    savefig('%s.png' % name)
    savefig('%s.pdf' % name)

# Load data

In [None]:
with open(os.path.join(os.getcwd(), '..', 'data', 'carbonplan_projects.json'), 'r') as ff:
    whole_json = json.load(ff)

In [None]:
df = pd.json_normalize(whole_json['projects'])

In [None]:
#df = pd.json_normalize(whole_json['projects'], 'metrics', record_prefix='metrics_')

In [None]:
df.head()

Flatten tags for easier filtering

In [None]:
tagset = set()
for tags in df.tags:
    for tag in tags:
        tagset.add(tag)
taglist = sorted(list(tagset))

In [None]:
# Create boolean field for each tag, e.g. t_dac and t_ocean
for tag in taglist:
    df['t_%s' % tag] = df.tags.map(lambda x: tag in x)

In [None]:
# n_tags
df['n_tags'] = df.tags.map(len)

In [None]:
#df.head()

Flatten metrics

In [None]:
metrics = ['mechanism', 'volume', 'negativity', 'permanence', 'additionality', 'cost', 'specificity']
metric_fields = ['value', 'units', 'rating', 'notes', 'comment']

In [None]:
# Ugly but works to read the nested metrics and expand
metrics_dfs = []
for ii in range(len(df)):
    metrics_dfs.append(pd.json_normalize(df.metrics[ii]))

In [None]:
for metric in metrics:
    for metric_field in metric_fields:
        colname = '%s_%s' % (metric, metric_field)
        #print(metric, metric_field)
        df[colname] = [dfx[dfx['name'] == metric][metric_field].iloc[0] for dfx in metrics_dfs]

In [None]:
# Clean up
for col in ['type', 'metrics']:
    if col in df.columns:
        del df[col]

# Look at data

In [None]:
df.head(5)

In [None]:
#for col in df.columns:
#    print(col)

In [None]:
df.columns

In [None]:
display_full(df.head(1))

In [None]:
for tag in taglist:
    print('%4d: %s' % (df['t_%s' % tag].sum(), tag))

# Plots

In [None]:
df[df.t_dac].shape

In [None]:
_colors = {
    'forests': (49.0, 70.0, 42.0),
    'soil': (92.0, 59.0, 33.0),
    'biomass': (83.0, 75.0, 37.0),
    'ocean': (39.0, 73.0, 77.0),
    'mineralization': (66.0, 71.0, 77.0),
    'dac': (74.0, 52.0, 85.0),
}
colors = {k: array(v)/100.0 for k, v in _colors.items()}

In [None]:
primary_tags = list(colors.keys())
primary_tag_set = set(primary_tags)

In [None]:
for ii, key in enumerate(colors.keys()):
    plot(ii, 0, 'o', ms=20, mfc=colors[key], mec=colors[key])

In [None]:
def get_pt(tags):
    '''Returns a single primary tag (first tag from primary_tags found), or 'none' if project has no primary tags.'''
    for pt in primary_tags:
        if pt in tags:
            return pt
    else:
        return 'none'

In [None]:
def get_clr(tags, default_clr=(.7, .7, .7)):
    '''Returns the color of the first tag found, if any, or a default color if not.'''
    pt = get_pt(tags)
    return default_clr if tag == 'none' else colors[pt]

In [None]:
# Add pt column
df['pt'] = df.tags.map(get_pt)
# Add clr column
df['clr'] = df.tags.map(get_clr)

In [None]:
# Does any project not have a primary tag?
print((df.pt == 'none').sum(), 'projects are missing a primary tag')

In [None]:
figsize(18,18)
for ii, project in enumerate(df.itertuples()):
    clr = project.clr
    volume = project.volume_value
    semilogx(volume, ii, 'o', mec=clr, mfc=clr, ms=15)
xlabel('Volume (tons)')
ylabel('Project ID')
savefigs('carbon_plan_type_vol_separate')

In [None]:
figsize(18,4)
for ii, project in enumerate(df.itertuples()):
    clr = project.clr
    y_coord = (-len(primary_tags)) if project.pt == 'none' else -primary_tags.index(project.pt)
    volume = project.volume_value
    semilogx(volume, y_coord, 'o', mec=clr, mfc=clr, ms=20)
xlabel('Volume (tons)')
yticks([])
tight_layout()
savefigs('carbon_plan_type_vol')

In [None]:
figsize(18,4)
for ii, project in enumerate(df.itertuples()):
    clr = project.clr
    y_coord = (-len(primary_tags)) if project.pt == 'none' else -primary_tags.index(project.pt)
    permanence = project.permanence_value
    semilogx(permanence, y_coord, 'o', mec=clr, mfc=clr, ms=20)
xlabel('Permanence (years)')
yticks([])
tight_layout()
savefigs('carbon_plan_type_permanence')

In [None]:
figsize(18,4)
for ii, project in enumerate(df.itertuples()):
    clr = project.clr
    y_coord = (-len(primary_tags)) if project.pt == 'none' else -primary_tags.index(project.pt)
    cost = project.cost_value
    semilogx(cost, y_coord, 'o', mec=clr, mfc=clr, ms=20)
xlabel('Cost ($/ton)')
yticks([])
tight_layout()
savefigs('carbon_plan_type_cost')

Supply curves

In [None]:
df[df.cost_rating != -9999].sort_values(by='cost_value')

# HERE!

In [None]:
figsize(18,8)
for ii, project in enumerate(df.itertuples()):
    clr = project.clr
    y_coord = (-len(primary_tags)) if project.pt == 'none' else -primary_tags.index(project.pt)
    permanence = project.permanence_value
    semilogx(permanence, y_coord, 'o', mec=clr, mfc=clr, ms=20)
xlabel('Permanence (years)')
yticks([])
tight_layout()
savefigs('carbon_plan_type_permanence')