In [1]:
import xarray as xr
import numpy as np
import warnings
import matplotlib.pyplot as plt
import pandas as pd
warnings.filterwarnings('ignore')
plt.style.use('default')
import os
import seaborn as sns
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from shapely.geometry import box, mapping
import geopandas as gpd
from matplotlib import cm
import texttable
from xclim import ensembles
import global_land_mask
import logging
from CMIP6_ridgeplot import CMIP6_ridgeplot
import CMIP6_area_calculations

logging.getLogger('xclim').setLevel('ERROR')
logging.getLogger('root').setLevel('ERROR')
sns.set(font_scale=1.5, style="whitegrid")

In [2]:
def convert_to_180(ds):
 #   ds=ds.assign_coords(lat=ds.y)
    return (ds.assign_coords(lon=(((ds.lon + 180) % 360) - 180))).sortby('lon')

def convert_time(ds):
    if not ds.indexes["time"].dtype in ["datetime64[ns]"]:

        time_objects = ds.indexes['time'].to_datetimeindex() 
        ds=ds.assign_coords({"time": time_objects})                   
        ds = xr.decode_cf(ds)
        
    return ds

def create_land_ocean_mask(cmip6_grid: xr.Dataset) -> xr.DataArray:
    print("[create_land_ocean_mask] Running create_land_ocean_mask")
    lon = cmip6_grid.lon.values
    lat = cmip6_grid.lat.values
    lon_180 = xr.where(lon > 180, lon - 360, lon)
    lon_grid, lat_grid = np.meshgrid(lon_180, lat)

    mask_data = global_land_mask.globe.is_ocean(lat_grid, lon_grid).astype(int)
    #  mask_data[np.isnan(mask_data)] = np.nan

    return xr.DataArray(mask_data, coords={'lat': lat, 'lon': lon},
                        dims=['lat', 'lon'])


def get_area_averaged_ds(fname, model, scenario, var_name, var_name_std, LME,frequency, models_dict):
    
    if os.path.exists(fname):
        with xr.open_dataset(fname) as ds:
            ds = convert_to_180(ds)
            ds = ds.sel(time=slice(start_time,end_time))
            ds = convert_time(ds)

            ds_lme = get_data_within_LME(ds, var_name, LME, False)
            ds_lme[var_name] = CMIP6_area_calculations.xr_add_cyclic_point(ds_lme[var_name])
            ds_lme[var_name].attrs["long_name"]=var_name
            ds_lme[var_name_std] = CMIP6_area_calculations.xr_add_cyclic_point(ds_lme[var_name_std])

            # Add land mask
            ds_lme["mask"] = create_land_ocean_mask(ds_lme)
            ds_lme = ds_lme.where(ds_lme.mask == 1)

            ds_lme["areacello"] = CMIP6_area_calculations.calculate_areacello(ds_lme, var_name)
            # Clip the area to the polygon - this also clips to the time varying var_name which results in
            # time varying areacello variable that we can use to sum up the size of open water by
            # looking at the annual mean.
            # First we set all areas outside of the polygon to nan - which also now includes
            # areacello which as part of the calculations actually covered the whole region (and
            # not just the polygon).
            print("1",ds_lme)
            ds_lme = xr.where(np.isnan(ds_lme[var_name]), np.nan, ds_lme)
            print("2",ds_lme)

            # Then we remove the areas where par or uv is less than a minimum value to remove zeroes etc.
            if var_name in ["par_mean"]:
                ds_lme = xr.where(ds_lme[var_name]<1, np.nan, ds_lme)
                print("3",ds_lme)
            elif var_name in ["uvb_mean"]:
                ds_lme = xr.where(ds_lme[var_name]<0.001, np.nan, ds_lme)
            total_area_lme = np.nansum(ds_lme["areacello"].mean(dim="time").values)
            print("[calculate_areacello] Average total area {:,.2f} km2".format(total_area_lme))

            ds_lme = ds_lme.assign(openwater = (xr.where(np.isnan(ds_lme[var_name]),np.nan, ds_lme["areacello"])))

            # Average data within the LME as a function of time
            ds = ds_lme.mean({"lat","lon"})

            # To get the average sum of the open water areas we sum prior to creating the
            # dataframe. Then when running rolling mean we just get the summed value back smoothed.
            ds = ds.assign(areacello = ds_lme["areacello"].sum(dim={"lat","lon"}, skipna=True))
            ds = ds.assign(openwater = ds_lme["openwater"].sum(dim={"lat","lon"}, skipna=True))
           # ds.to_netcdf("test.nc")

            df = ds.to_dataframe().dropna().reset_index()

        roll_years=5
        df=df.resample("A", on="time").mean()

        df["model_name"]=model
        df["roll_mean"]=df[var_name].rolling(roll_years).mean()
        df["roll_std"]=df[var_name_std].rolling(roll_years).mean()
        df["roll_mean_area"]=df["areacello"].rolling(roll_years).mean()

        df["roll_median"]=df[var_name].rolling(roll_years).median()
        df["model_scenario"]=scenario
        df["model_name_scenario"]=model+"_"+scenario
        unique="{}_{}".format(model, scenario)
        df["unique"]=unique

        model_info={}
        model_info["model_name"]=model
        model_info["model_scenario"]=scenario
        model_info["model_var"]=var_name
        formatter="{:.2f}"
        model_info["model_min"]=formatter.format(np.nanmin(df[var_name]))
        model_info["model_max"]=formatter.format(np.nanmax(df[var_name]))
        return df, models_dict, ds_lme, total_area_lme
    else:
        return None, models_dict, None, None

In [3]:
def get_LME_records():
    lme_file='../oceanography/Shapefiles/LME66/LMEs66.shp'
    return gpd.read_file(lme_file)

def get_LME_records_plot():
    lme_file='../oceanography/Shapefiles/LME66_180/LME66_180.shp'
    return gpd.read_file(lme_file)

def create_colors(N):
    color=iter(cm.tab20b(np.linspace(0,1,N)))
    return [next(color) for c in range(N)]


def get_data_within_LME(ds,var_name,LME,create_maps):
    print("Working on LME: {}".format(LME))

    # Extract the polygon defining the boundaries of the LME
    shdf = get_LME_records()
   # for name in shdf['LME_NAME']:
   #     print(name)
    shdf_sel = shdf[ shdf['LME_NAME']==LME ]

    # Rioxarray requires x and y dimensions - we convert these back to lon and lat later.
    # We also add the projection (lat-lon) so that rioxarray can do the clipping of the data according to the
    # shapefile.
    tos=ds.rename({'lon': 'x','lat': 'y'})
    tos=tos.rio.write_crs(4326)

    # Clip the data within the LME. We have to convert the polygon geometry to a geodataframe using
    # `shapely.geometry`. The clipping of data within the polygon is done using rioxarray.clip function

    clipped = tos.rio.clip(geometries=shdf_sel.geometry.apply(mapping), crs=tos.rio.crs)
    clipped=clipped.rename({'x': 'lon','y': 'lat'}) #.to_dataset()

    return clipped

In [4]:
def create_summary_table(dict_of_models, LME):
    table = texttable.Texttable()
    table.set_cols_align(["c","c", "c","c","c","c","c"])
    table.set_cols_valign(["t","t", "m","m","m","m", "b"])

    table.header(["LME","Model", "Scenario", "ID", "Var", "CMIP6 min", "CMIP6 max"])
    for key in dict_of_models.keys():
        model=dict_of_models[key]

        table.add_row([LME,
                       model["model_name"],
                       model["model_scenario"],
                       model["model_ensemble_id"],
                       str(model["model_var"]),
                       str(model["model_min"]),
                       str(model["model_max"])])

    table.set_cols_width([30,30,20,20,10,10,10])
    print(table.draw() + "\n")

In [None]:
scenarios = ["ssp245", "ssp585"]
member_range = 12
frequency = "A"
period = "1979-01-01-2099-12-16"
start_time = "1979-01-01"
end_time = "2099-12-16"

models = ["ensemble"]
ds_var_names = ["par"] #, "uvb", "uv"]
#ds_var_names = ["uv","uvb"]  #,"uv"]
write_stats_to_file = False
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)}, font_scale=1.5)


LMES = ['California Current', 'East Bering Sea', 'Gulf of Alaska',
        'Northern Bering - Chukchi Seas', 'West Bering Sea', 'Sea of Japan',
        'Oyashio Current', 'Kuroshio Current', 'East China Sea',
        'South China Sea', 'Sea of Okhotsk', 'Yellow Sea',
        'Aleutian Islands']

LMES = ['Barents Sea', 'Northern Bering - Chukchi Seas']

for var_name in ds_var_names:
    for LME in LMES:
        df_list = []
        models_dict = {}
        simulation = "osa"
        create_maps = False
        saved_total_area_lme = None
        f = plt.figure(figsize=(10, 10))
        gs = f.add_gridspec(1, 1)
        ax = f.add_subplot(gs[0, 0])
        sns.set_palette("tab10")
        loop=0
        palette_tab10 = sns.color_palette("tab10", 10)
        colors = [palette_tab10[0], palette_tab10[1], palette_tab10[3]]

        # We loop over all of the scenarios, ensemble_ids, and models to create a
        # list of dataframes that we eventually concatenate together and plot
        for scenario in scenarios:
            fname = "../oceanography/light/ncfiles/ensemble/{}_{}_{}.nc".format(var_name,
                                                                                         "ensemble",
                                                                                         scenario)
            if scenario=="ssp585":
                label="SSP5-8.5"
            else:
                label="SSP2-4.5"
            var_name_mean=f"{var_name}_mean"
            var_name_std=f"{var_name}_stdev"
            df, models_dict, ds_lme, total_area_lme = get_area_averaged_ds(fname,  "ensemble",
                                                                           scenario,var_name_mean,var_name_std,
                                                                           LME, frequency, models_dict)

            if ds_lme is not None:
                saved_total_area_lme = total_area_lme
                ds_lme = xr.where(((ds_lme < 1.e-20) | (ds_lme > 1e20)), np.nan, ds_lme)

            outfile = "Figures/{}_ensemble_{}_{}.png".format(var_name_mean.capitalize(), scenario, LME)
           # CMIP6_ridgeplot.ridgeplot("{}_mean".format(var_name),
           #                           None, outfile,
           #                                   glorys=False, depth_threshold=None,
           #                                   ds=ens_stats)


            if var_name_mean in ["par_mean","uvb_mean", "uv_mean"]:
                df["roll_mean"] = df["roll_mean"].apply(lambda x : x if x > 0 else np.nan)

            clim = (df["roll_mean"].loc['1980-01-01':'2000-01-01']).mean()
            df["rel_change"] = ((df["roll_mean"] - float(clim)) / float(clim)) * 100.

            ax.fill_between(df["roll_mean"].index,
                df["roll_mean"]+df["roll_std"], df["roll_mean"]-df["roll_std"],
                alpha=0.2,
                color=colors[loop],
                label=None,
            )

            ax.plot(
                df["roll_mean"].index,
                df["roll_mean"],
                linewidth=5,
                color=colors[loop],
                label=f"{label}",
            )

            ax.tick_params(labelsize=22)
            ax.set_xlabel("", fontsize=20)
            ax.set_ylabel("", fontsize=20)

            if loop==0:
                ax2 = ax.twinx()

            ax2.plot(
                df["rel_change"].index,
                df["rel_change"],
                linewidth=3,
                linestyle="--",
                color=colors[loop],
                label=None,
            )

            ax2.tick_params(labelsize=22)
            ax2.set_xlabel("", fontsize=20)
            ax2.set_ylabel("", fontsize=20)
            ylabels = ['{:,.0%}'.format(y) for y in ax2.get_yticks() / 100.]
            ax2.set_yticklabels(ylabels)
            import matplotlib.dates as mdates
            ax.xaxis.set_major_locator(mdates.YearLocator(base=10))
            ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
            plt.setp(ax.xaxis.get_majorticklabels(), rotation=-90)
           # ax2.get_legend().remove()
            ax.legend(loc="upper left", frameon=False, fontsize=32)

            if not os.path.exists("Figures"):
                os.makedirs("Figures")

            if loop==1:
                plotfile = "Figures/CMIP6_light_{}_{}.png".format(var_name_mean, LME)
                print("Created figure {}".format(plotfile))
                plt.savefig(plotfile, dpi=200,
                            bbox_inches='tight')


            # Plot individual figure the open water area

            if var_name_mean in ["par_mean"]:
                if loop==0:
                    f2 = plt.figure(figsize=(16, 16))
                    gs2 = f2.add_gridspec(1, 1)
                    ax3 = f2.add_subplot(gs2[0, 0])

                # Colors from deep default seaborn palette found here:
                # https://github.com/mwaskom/seaborn/blob/master/seaborn/palettes.py
                colors_water = ["#8172B3", "#64B5CD"]

                # Add extra variable - percentage change in size of open water area.
                df["change_open_water"] = (df["roll_mean_area"] / float(saved_total_area_lme)) * 100.

                print(f"Calculating for scenario: {scenario} {df.head(2)} loop {loop}")
                # Plot the results
                # Note that we do not actually plot the roll_mean_area in axis ax2 only in ax3 below
               # ax3.fill_between(df["change_open_water"].index, df["change_open_water"], df["change_open_water"],
               #     alpha=0.2,
               #     color=colors_water[loop],
               #     label=None,
               #     )

                ax3.plot(
                    df["roll_mean_area"].index,
                    df["change_open_water"],
                    linewidth=5,
                    color=colors[loop],
                    label=f"{label}",
                )

                ax3.tick_params(labelsize=22)
                ax3.set_xlabel("", fontsize=20)
                ax3.set_ylabel("", fontsize=20)

                # Format the yticks
                # https://mkaz.blog/code/python-string-format-cookbook/
                ylabels = ['{:,}'.format(y) for y in ax3.get_yticks()]
                ax3.set_yticklabels(ylabels)

            #    print("Open water calculations range from {} on {} to {} in {}".format(df["change_open_water"].iloc[0],
            #                                                                           df.index.iloc[0],
            #                                                                           df["change_open_water"].iloc[-1],
            #                                                                           df.index.iloc[-1]))

                ax3.tick_params(labelsize=22)
                ax3.set_xlabel("", fontsize=20)
                ax3.set_ylabel("", fontsize=20)
                ylabels = ['{:,.0%}'.format(y) for y in ax3.get_yticks() / 100.]
                ax3.set_yticklabels(ylabels)

                import matplotlib.dates as mdates
                ax3.xaxis.set_major_locator(mdates.YearLocator(base=10))
                ax3.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
                plt.setp(ax3.xaxis.get_majorticklabels(), rotation=-90)

              #  ax3.get_legend().remove()
                ax3.legend(loc="upper left", frameon=False, fontsize=32)

            loop+=1
        plt.show()
        if var_name in ["par_mean"]:
            plotfile = "Figures/CMIP6_light_{}_{}.png".format("area_open_water", LME)
            print("Created figure {}".format(plotfile))
            plt.savefig(plotfile, dpi=300,bbox_inches='tight')
            plt.show()
