In [5]:
import os
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import MonthLocator, DayLocator, DateFormatter
from matplotlib.dates import date2num

import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy.mpl.ticker as cticker
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.colors import TwoSlopeNorm
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import ListedColormap

In [None]:
data_path = ('../ERA5_T2/')
t2m_max_rp =[]
for year in range(1973, 2023):
    #print(year)
    file_path = os.path.join(data_path,f'T2_hourly-{year}-1deg.nc')
    t2m = xr.open_dataset(file_path)    
    temp_max = t2m.resample(time='D').max()
    t2m_max_rp.append(temp_max)

In [6]:
ds = xr.concat(t2m_max_rp,dim='time')
lsm= xr.open_dataset('land_sea_mask.nc')
mask = lsm.lsm.values
ds = ds.where(mask != 0, float('nan'))

In [8]:
Tmax_RP = ds - 273.15
Tmax_RP = Tmax_RP.where(~((Tmax_RP['time.month'] == 2) & (Tmax_RP['time.day'] == 29)), drop=True)

In [9]:
Tmax_RP_NCI = Tmax_RP.sel(latitude=slice(30,20),longitude=slice(70,80)).mean(dim=['latitude','longitude'])

In [10]:
tmax_subset = Tmax_RP_NCI
tmax_subset['dayofyear'] = tmax_subset['time.dayofyear']
leap_years = (tmax_subset['time.year'] % 4 == 0) & ((tmax_subset['time.year'] % 100 != 0) | (tmax_subset['time.year'] % 400 == 0))
tmax_subset['dayofyear'] = xr.where(leap_years & (tmax_subset['time.month'] > 2),
                                               tmax_subset['dayofyear'] - 1,
                                               tmax_subset['dayofyear'])

In [11]:
Tmax_RP_NCI_ds = tmax_subset.groupby('dayofyear') - tmax_subset.groupby('dayofyear').mean(dim='time')

In [13]:
#Calculating 90th Percentile using 1941 to 1970 for each calendar day using 15 day moving window
tmax_grouped = Tmax_RP_NCI_ds.groupby('dayofyear')
Tmax_90P = []
for day in range(1, 366):
    # Calculate the indices for the rolling window
    window_indices = [(day + i) % 365 for i in range(-(15//2), 15//2+1)]
    window_indices = [idx if idx != 0 else idx+365 for idx in window_indices]
    
    # Concatenate groups for each calendar day with the rolling window
    selected_groups = [tmax_grouped[i] for i in window_indices]
    concatenated_data = xr.concat(selected_groups, dim='time')
    percentile_90p = np.percentile(concatenated_data.t2m, 90, axis=0)
    
    #percentile_90p_dataarray = xr.DataArray(percentile_90p, coords={'latitude': tmax_subset.latitude, 'longitude': tmax_subset.longitude}, dims=['latitude', 'longitude'])
    Tmax_90P.append(percentile_90p)  

Tmax_90Pctle =  np.array(Tmax_90P)   

In [85]:
#data_path = ('../ERA5_T2/')
#t2m_max =[]
#for year in range(1973, 2023):
   # print(year)
    #file_path = os.path.join(data_path,f'T2_hourly-{year}-1deg.nc')
    #t2m = xr.open_dataset(file_path)    
    #temp_max = t2m.resample(time='D').max()
    #t2m_max.append(temp_max)

#ds1 = xr.concat(t2m_max,dim='time')
#ds1 = ds1.where(mask != 0, float('nan'))

#Tmax = ds1 - 273.15
#Tmax = Tmax.where(~((Tmax['time.month'] == 2) & (Tmax['time.day'] == 29)), drop=True)

In [16]:
Tmax= Tmax_RP_NCI_ds

date_dataframe = []
for yr, year in enumerate(range(1973, 2023)):
    Tmax_year = Tmax.sel(time=f'{year}') #,latitude=slice(30,20),longitude=slice(70,80)).mean(dim=['latitude','longitude'])
    Tmax_90Pct = xr.DataArray(Tmax_90Pctle, dims=('time'), coords={'time': Tmax_year.time}, name='t2m')                
    hot_days = (Tmax_year.t2m - Tmax_90Pct)
    hot_days.name='hd'
    hot_days_MAM = hot_days.sel(time=hot_days['time.season'] == 'MAM')
    hot_days_only = hot_days_MAM.where(hot_days_MAM >0, drop=True).squeeze()
    if hot_days_only.size >= 3:
        df_hd = hot_days_only.to_dataframe()
        df_hd['group'] = (df_hd.index.to_series().diff() != pd.Timedelta(days=1)).cumsum()
        grouped = df_hd.groupby('group')
        dataframes_list = [group.drop(columns='group') for _, group in grouped]
        dataframe_df_filtered = [df for df in dataframes_list if len(df) >= 3]
        for df in dataframe_df_filtered:
            if not df.empty:
                date_dataframe.append(df)

In [17]:
def get_start_end_dates(df):
    start_date = df.index[0]
    end_date = df.index[-1]
    return start_date, end_date

In [22]:
# Create a file to write start and end dates
with open('start_end_dates.txt', 'w') as file:
    # Iterate through the list of DataFrames
    for i, df in enumerate(date_dataframe):
        start_date, end_date = get_start_end_dates(df)
        file.write(f"{start_date},{end_date}\n")

In [18]:
date_dataframe_combined = pd.concat(date_dataframe)
df_1973_1997 = date_dataframe_combined.loc['1973':'1997']
df_1998_2022 = date_dataframe_combined.loc['1998':'2022']

Regime1_hot_days = xr.DataArray(df_1973_1997['hd'], coords={'time': df_1973_1997.index}, dims=['time'])
Regime2_hot_days = xr.DataArray(df_1998_2022['hd'], coords={'time': df_1998_2022.index}, dims=['time'])

In [21]:
Regime2_hot_days.to_netcdf('Regime2_hot_days_1973_2022_NM.nc')
Regime1_hot_days.to_netcdf('Regime1_hot_days_1973_2022_NM.nc')

In [28]:
Regime2_hot_days.time

In [29]:
Regime1_hot_days.time