### Combining projected datasets
Let's combine the datasets for all models in a projected year range into a single object, and output a table with streamflow and stream temperature for one decade.

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
from pathlib import Path

# path to data is /beegfs/CMIP6/arctic-cmip6/Arctic_Rivers_Data/
data_dir = Path("/beegfs/CMIP6/arctic-cmip6/Arctic_Rivers_Data/")

Create a function to get all streamflow or stream temp datasets for projected years 2050-2060 and concatenate them along the time dimension.

In [2]:
def concat_future_files(data_dir, variable, start, end):

    model_dict = {}

    models = []
    fps = []

    # "f" gets only future datasets
    var_fps = list(data_dir.glob(f"*f*{variable}*.nc"))

    for fp in var_fps:
        models.append(fp.name.split("_")[1])
        fp_year = int(fp.name.split("_")[0])
        if start <= fp_year and fp_year < end:
            fps.append(fp)

    models = list(set(models))

    for model in models:
        model_dict[model] = []
        for fp in fps:
            if model in fp.name:
                model_dict[model].append(fp)

    datasets = []

    for model in model_dict.keys():
        ds = xr.open_mfdataset(
            model_dict[model], combine="nested", concat_dim="time", parallel=True
        )
        datasets.append(ds)

    combined_ds = xr.concat(datasets, pd.Index(models, name="model"))

    return combined_ds

Run the function for "Q" and "WT" variables.

In [3]:
Q = concat_future_files(data_dir, "Q", 2050, 2060)

In [4]:
WT = concat_future_files(data_dir, "WT", 2050, 2060)

For a given stream segment, make tables with daily streamflow and daily stream temp values.

In [5]:
Q_sub = Q.sel(seg=81000004)
WT_sub = WT.sel(hru=81000004, no_seg=1)

In [6]:
Q_df = Q_sub.to_dataframe()
Q_df.reset_index(inplace=True)

WT_df = WT_sub.to_dataframe()
WT_df.reset_index(inplace=True)

In [7]:
Q_df_pivot = Q_df.pivot(
    index="time", columns="model", values="IRFroutedRunoff"
).reset_index()
WT_df_pivot = WT_df.pivot(
    index="time", columns="model", values="T_stream"
).reset_index()

Stream Flow

In [8]:
Q_df_pivot.columns.name = None
# remove time of day from the datetime object in the time column
Q_df_pivot["time"] = Q_df_pivot["time"].dt.date
Q_df_pivot.to_csv("streamflow_2050_2060.csv", index=False)
Q_df_pivot

Unnamed: 0,time,fC2LE2,fC2LE4,fC2LE7,fC2LE9,fPGWh,fPGWm
0,2050-01-01,,,,,0.626869,0.739737
1,2050-01-02,,,,,0.626069,0.738859
2,2050-01-03,,,,,0.625306,0.737986
3,2050-01-04,,,,,0.624579,0.737128
4,2050-01-05,0.831886,1.510535,0.943906,0.622640,0.623892,0.736297
...,...,...,...,...,...,...,...
3655,2060-01-02,0.330644,0.333896,0.704730,0.281809,,
3656,2060-01-03,0.329932,0.332323,0.704232,0.279126,,
3657,2060-01-04,0.329222,0.330862,0.703723,0.276894,,
3658,2060-01-05,0.328491,0.329394,0.703205,0.275009,,


Stream Temperature

In [9]:
WT_df_pivot.columns.name = None
WT_df_pivot.to_csv("streamtemp_2050_2060.csv", index=False)
WT_df_pivot

Unnamed: 0,time,fC2LE2,fC2LE4,fC2LE7,fC2LE9,fPGWh,fPGWm
0,2050-01-01,,,,,2.45,1.89
1,2050-01-02,,,,,1.59,1.06
2,2050-01-03,,,,,1.30,0.20
3,2050-01-04,,,,,0.47,0.33
4,2050-01-05,0.1,2.10,1.05,0.37,0.13,0.10
...,...,...,...,...,...,...,...
3653,2060-01-02,0.1,0.97,0.62,0.10,,
3654,2060-01-03,0.1,1.36,0.64,0.10,,
3655,2060-01-04,0.1,0.62,0.10,0.10,,
3656,2060-01-05,0.1,0.40,0.10,0.10,,
