# Atmospheric, oceanic and land data handling

In this notebook we discuss the subtleties of how NetCDF-SCM handles different data 'realms' and why these choices are made. The realms of intereset to date are atmosphere, ocean and land and the discussion between the realms follows the [CMIP6 realm controlled vocabulary](https://github.com/WCRP-CMIP/CMIP6_CVs/blob/master/CMIP6_realm.json).

TODO: make this notebook follow something like the following outline

1. Oceanic data handling
    - choices
        - areacello and stfof weighted means
        - assume everything is ocean (i.e. all land weights are zero)
    - why
        - there is no land box relevant data in ocean output
        - area as otherwise your output is nonsense
        - sftof so you're not overweighting cells which aren't really representative of the ocean
        - show plots to show difference made by these choices
1. Land data handling
    - choices
        - areacella and stflf weighted means
        - assume everything is land (i.e. all ocean weights are zero)
    - why
        - there is no ocean box relevant data in land output
        - area as otherwise your output is nonsense
        - sftlf so you're not overweighting cells which aren't really representative of the land
        - show plots to show difference made by these choices
1. Atmospheric data handling
    - choices
        - areacella weighted means
            - land box means are further weighted by sftlf
            - ocean box means are further weighted by (100 - sftlf)
    - why
        - contribution of data to SCM boxes depends on land fraction
        - area as otherwise your output is nonsense
        - sftlf so you split land/ocean data appropriately
        - show plots to show difference made by these choices

In [1]:
from os.path import join

import iris
import iris.quickplot as qplt
import matplotlib.pyplot as plt
import numpy as np

from netcdf_scm.iris_cube_wrappers import CMIP6OutputCube
from netcdf_scm.utils import broadcast_onto_lat_lon_grid

<IPython.core.display.Javascript object>

In [2]:
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

In [3]:
import logging
root_logger = logging.getLogger()
root_logger.setLevel(logging.WARNING)
root_logger.addHandler(logging.StreamHandler())

In [4]:
DATA_PATH_TEST = join("..", "tests", "test-data")

In [13]:
tas_file = join(
    DATA_PATH_TEST,
    "cmip6output",
    "CMIP6",
    "CMIP",
    "CNRM-CERFACS",
    "CNRM-ESM2-1",
    "historical",
    "r2i1p1f2",
    "Amon",
    "tas",
    "gr",
    "v20190125",
    "tas_Amon_CNRM-ESM2-1_historical_r2i1p1f2_gr_199701-199912.nc",
)

gpp_file = tas_file.replace(
    "Amon", "Lmon"
).replace(
    "tas", "gpp"
)

csoilfast_file = gpp_file.replace("gpp", "cSoilFast")

hfds_file = tas_file.replace(
    "Amon", "Omon"
).replace(
    "gr", "gn"
).replace("tas", "hfds")

## Oceans

Ocean model output is the simplest to deal with. 

In [12]:
tas = CMIP6OutputCube()
tas.load_data_from_path(tas_file)

gpp = CMIP6OutputCube()
gpp.load_data_from_path(gpp_file)

csoilfast = CMIP6OutputCube()
csoilfast.load_data_from_path(csoilfast_file)

hfdsfast = CMIP6OutputCube()
hfdsfast.load_data_from_path(hfds_file)

Missing CF-netCDF measure variable 'areacello', referenced by netCDF variable 'hfds'


In [None]:
tas.cube

In [None]:
gpp.cube

In [None]:
csoilfast.cube

In [None]:
sftlf = tas.get_metadata_cube("sftlf")
sftlf.cube

In [None]:
areacella = tas.get_metadata_cube("areacella")
areacella.cube

In [None]:
tas_land = tas.get_scm_cubes(masks=["World|Land"])["World|Land"]

In [None]:
plt.figure(figsize=(16, 9))
qplt.contourf(tas_land.cube[0, :, :]);

In [None]:
tas_land_unweighted_mean = tas_land.cube.collapsed(
    ["latitude", "longitude"],
    iris.analysis.MEAN
)

area_weights = broadcast_onto_lat_lon_grid(tas_land, areacella.cube.data)
tas_land_area_weighted_mean= tas_land.cube.collapsed(
    ["latitude", "longitude"],
    iris.analysis.MEAN,
    weights=area_weights
)

try:
    tas_land.cube.coord("latitude").guess_bounds()
    tas_land.cube.coord("longitude").guess_bounds()
except ValueError:
    pass

tas_land_default_area_weighted_mean = tas_land.cube.collapsed(
    ["latitude", "longitude"],
    iris.analysis.MEAN,
    weights=iris.analysis.cartography.area_weights(tas_land.cube)
)

area_sftlf_weights = broadcast_onto_lat_lon_grid(tas_land, areacella.cube.data * sftlf.cube.data)
tas_land_area_sftlf_weighted_mean = tas_land.cube.collapsed(
    ["latitude", "longitude"],
    iris.analysis.MEAN,
    weights=area_sftlf_weights
)

In [None]:
plt.figure(figsize=(16, 9))
qplt.plot(tas_land_unweighted_mean, label="unweighted")
qplt.plot(tas_land_area_weighted_mean, label="areacella weighted")
qplt.plot(
    tas_land_default_area_weighted_mean, 
    label="area weighted assuming regular grid", 
    linestyle="--",
    dashes=(7, 15)
)
qplt.plot(
    tas_land_area_sftlf_weighted_mean, 
    label="areacella-sftlf weighted", 
    linestyle=":",
)

plt.legend();

In [None]:
plt.figure(figsize=(16, 9))
qplt.contourf(sftlf.cube)
plt.gca().coastlines();

In [None]:
no_weights = areacella.cube.copy()
no_weights.data = np.ones_like(no_weights.data)

default_weights = areacella.cube.copy()
default_weights.data = iris.analysis.cartography.area_weights(tas_land.cube)[0, :, :]
default_weights.data = default_weights.data / default_weights.data.sum()

areacella_weights = areacella.cube.copy()
areacella_weights.data = areacella_weights.data / areacella_weights.data.sum()

areacella_sftlf_weights = areacella.cube.copy()
areacella_sftlf_weights.data = area_sftlf_weights[0, :, :]
areacella_sftlf_weights.data = areacella_sftlf_weights.data / areacella_sftlf_weights.data.sum()

In [None]:
plt.figure(figsize=(16, 9))
plt.subplot(221)
qplt.contourf(no_weights)
plt.gca().coastlines()
plt.subplot(222)
qplt.contourf(areacella_weights)
plt.gca().coastlines()
plt.subplot(223)
qplt.contourf(default_weights)
plt.gca().coastlines()
plt.subplot(224)
qplt.contourf(areacella_sftlf_weights)
plt.gca().coastlines();

In [None]:
plt.figure(figsize=(16, 9))
qplt.contourf(areacella_sftlf_weights)
plt.gca().coastlines();

In [None]:
(tas_land.cube[0, :, :].data.mask == (sftlf.cube.data < 50).data).all()

In [None]:
lat_con = iris.Constraint(latitude=lambda cell: -45 < cell < -20)
lon_con = iris.Constraint(longitude=lambda cell: 140 < cell < 160)
plt.figure(figsize=(16, 9))

plt.subplot(131)
qplt.contourf((tas.cube[0, :, :] / tas.cube[0, :, :]).extract(
    lat_con & lon_con
))#, levels=np.arange(290, 314, 0.1))
plt.gca().coastlines()
plt.title("raw data")

plt.subplot(132)
qplt.contourf((tas_land.cube[0, :, :] / tas_land.cube[0, :, :]).extract(
    lat_con & lon_con
))#, levels=np.arange(290, 314, 0.1))
plt.gca().coastlines()
plt.title("masked data")

plt.subplot(133)
qplt.contour(sftlf.cube.extract(
    lat_con & lon_con
), levels=[0, 40, 45, 50, 55, 60, 100])
plt.gca().coastlines()
plt.title("sftlf data");

In [None]:
plt.figure(figsize=(16, 9))

plt.subplot(221)
qplt.contourf(tas_land.cube[0, :, :].extract(
    lat_con & lon_con
), levels=np.arange(290, 314, 2))
plt.gca().coastlines()
plt.title("masked data")

plt.subplot(222)
qplt.contourf(sftlf.cube.extract(
    lat_con & lon_con
))
plt.gca().coastlines()
plt.title("sftlf")

plt.subplot(223)
qplt.contourf(areacella_sftlf_weights.extract(
    lat_con & lon_con
))
plt.gca().coastlines()
plt.title("areacella-sftlf weighting")

plt.subplot(224)
qplt.contourf(areacella_weights.extract(
    lat_con & lon_con
))
plt.gca().coastlines()
plt.title("areacella weighting");

In [None]:
csoilfast_unweighted_mean = csoilfast.cube.collapsed(
    ["latitude", "longitude"],
    iris.analysis.MEAN
)

area_weights = broadcast_onto_lat_lon_grid(csoilfast, areacella.cube.data)
csoilfast_area_weighted_mean= csoilfast.cube.collapsed(
    ["latitude", "longitude"],
    iris.analysis.MEAN,
    weights=area_weights
)

try:
    csoilfast.cube.coord("latitude").guess_bounds()
    csoilfast.cube.coord("longitude").guess_bounds()
except ValueError:
    pass

csoilfast_default_area_weighted_mean = csoilfast.cube.collapsed(
    ["latitude", "longitude"],
    iris.analysis.MEAN,
    weights=iris.analysis.cartography.area_weights(csoilfast.cube)
)

area_sftlf_weights = broadcast_onto_lat_lon_grid(csoilfast, areacella.cube.data * sftlf.cube.data)
csoilfast_area_sftlf_weighted_mean = csoilfast.cube.collapsed(
    ["latitude", "longitude"],
    iris.analysis.MEAN,
    weights=area_sftlf_weights
)
plt.figure(figsize=(16, 9))
qplt.plot(csoilfast_unweighted_mean, label="unweighted")
qplt.plot(csoilfast_area_weighted_mean, label="areacella weighted")
qplt.plot(
    csoilfast_default_area_weighted_mean, 
    label="area weighted assuming regular grid", 
    linestyle="--",
    dashes=(7, 15)
)
qplt.plot(
    csoilfast_area_sftlf_weighted_mean, 
    label="areacella-sftlf weighted", 
    linestyle=":",
)

plt.legend();

In [None]:
lat_con = iris.Constraint(latitude=lambda cell: -45 < cell < -20)
lon_con = iris.Constraint(longitude=lambda cell: 140 < cell < 160)
plt.figure(figsize=(16, 9))

# plt.subplot(111)
qplt.contour(csoilfast.cube[0, :, :].extract(
    lat_con & lon_con
))#, levels=np.arange(290, 314, 2))
plt.gca().coastlines()
plt.title("raw data")

In [None]:
csoilfast.cube

In [None]:
csoilfast.cube[0, :, :].data