In [8]:
import sys
sys.path.append('../')
from utils import paths
from utils import utils

import os
import warnings
from tqdm import tqdm
import pandas as pd
import numpy as np
import datetime
import xarray as xr
import time
from pyTMD import compute_tide_corrections

In [2]:

def geo_coord_borders(lat, lon):
    """
    Get the borders of the window of interest for CMIP6 Native grid;
    To be passed down into extract_window function.
    ---
    lat (np.float): latitude of location
    lon (np.float): longitude of location
    ---
    Returns:
        ((lat_lw_bord,lat_up,bord),(lon_lw_bord,lon_up_bord)): (tuple(tuple(np.float)))
    """
    ### The CMIP6 N96 Native grid has   144 gridcells of latitude, i.e. 1.25deg/cell, and
    ###                                 192 gridcells of longitude, i.e. 1.875deg/cell.

    ### Make sure the longitude is in [-180,180], not [0,360]:
    if (lon - 180 >0):
        lon = lon - 360

    delta_lat = 1.25 * 2.5
    delta_lon = 1.875* 2.5

    lat_lw_bord = lat - delta_lat 
    lat_up_bord = lat + delta_lat

    lon_lw_bord = lon - delta_lon
    lon_up_bord = lon + delta_lon

    if ((lon_lw_bord >= 0)&(lon_up_bord > 0)):
        lon_lw_bord = lon - delta_lon
        lon_up_bord = lon + delta_lon
    elif ((lon_lw_bord < 0)&(lon_up_bord <= 0)):
        lon_lw_bord = lon - delta_lon + 360
        lon_up_bord = lon + delta_lon + 360
    elif ((lon_lw_bord < 0)&(lon_up_bord > 0)):
        lon_lw_bord = lon - delta_lon + 360
        lon_up_bord = lon + delta_lon
    else:
        print("Error: Something is wrong with the Longitude.")

    return ((lat_lw_bord,lat_up_bord),(lon_lw_bord,lon_up_bord))

#--------------------------------------------------------------------------------------------------
def extract_window(data,geo_coord_tuple,data_type = 'CMIP6'):
    """
    data_type (string): 'CMIP6' or 'ERA5'
    THIS FUNCTION FAILS FOR NEGATIVE LONGITUDES ---- FIX!
    """


    if data_type == 'CMIP6':

        lat_lw = geo_coord_tuple[0][0]
        lat_up = geo_coord_tuple[0][1]
        lon_lw = geo_coord_tuple[1][0]
        lon_up = geo_coord_tuple[1][1]

        data = data.sel(lat = slice(lat_lw,lat_up))

        ### Check if the window crosses the meridian:
        if ((lon_lw >180)&(lon_lw<360))&((lon_up>0)&(lon_up<180)):
            crop1 = data.sel(lon = slice(lon_lw,360))
            crop2 = data.sel(lon = slice(0,lon_up))
            data = xr.concat([crop1,crop2],dim = 'lon')
        else:
            data = data.sel(lon = slice(lon_lw,lon_up))
    elif data_type == 'ERA5':
        ### ERA5 data has longitude in [-180,180]
        lat_lw = geo_coord_tuple[0][1]
        lat_up = geo_coord_tuple[0][0]
        lon_lw = geo_coord_tuple[1][0]
        lon_up = geo_coord_tuple[1][1]

        data = data.sel(latitude = slice(lat_lw,lat_up))

        ### Check if the window crosses the meridian:
        if ((lon_lw >180)&(lon_lw<360))&((lon_up>0)&(lon_up<180)):
            crop1 = data.sel(longitude = slice(lon_lw-360,360))
            crop2 = data.sel(longitude = slice(0,lon_up))
            data = xr.concat([crop1,crop2],dim = 'longitude')
        elif ((lon_lw >180)&(lon_lw<360))&((lon_up>180)&(lon_up<360)):
            data = data.sel(longitude = slice(lon_lw-360,lon_up-360))
        else:
            data = data.sel(longitude = slice(lon_lw,lon_up))
    else:
        print('data_type not recognized.')

    return data
#--------------------------------------------------------------------------------------------------

def geo_coord_borders_1GRID(lat, lon, Dataset_type):
    """
    Get the borders of the window of interest for CMIP6 Native grid;
    To be passed down into extract_window function.
    ---
    lat (np.float): latitude of location
    lon (np.float): longitude of location
    ---
    Returns:
        ((lat_lw_bord,lat_up,bord),(lon_lw_bord,lon_up_bord)): (tuple(tuple(np.float)))
    """
    ### The CMIP6 N96 Native grid has   144 gridcells of latitude, i.e. 1.25deg/cell, and
    ###                                 192 gridcells of longitude, i.e. 1.875deg/cell.

    ### The CMIP6 N216 Native grid has  324 gridcells of latitude, i.e. 0.5555deg/cell, and
    ###                                 432 gridcells of longitude, i.e. 0.8333deg/cell.

    ### The ERA5 Native grid has        721 (720) gridcells of latitude, i.e. 0.5555deg/cell, and
    ###                                 1440 gridcells of longitude, i.e. 0.8333deg/cell.

    ### Make sure the longitude is in [-180,180], not [0,360]:
    if (lon - 180 >0):
        lon = lon - 360
    if Dataset_type == 'GCM_N216':
        delta_lat = 0.56 * .5
        delta_lon = 0.8333 * .5
    elif Dataset_type == 'ERA5':
        delta_lat = 0.25 * .5
        delta_lon = 0.25 * .5


    lat_lw_bord = lat - delta_lat 
    lat_up_bord = lat + delta_lat

    lon_lw_bord = lon - delta_lon
    lon_up_bord = lon + delta_lon

    if ((lon_lw_bord >= 0)&(lon_up_bord > 0)):
        lon_lw_bord = lon - delta_lon
        lon_up_bord = lon + delta_lon
    elif ((lon_lw_bord < 0)&(lon_up_bord <= 0)):
        lon_lw_bord = lon - delta_lon + 360
        lon_up_bord = lon + delta_lon + 360
    elif ((lon_lw_bord < 0)&(lon_up_bord > 0)):
        lon_lw_bord = lon - delta_lon + 360
        lon_up_bord = lon + delta_lon
    else:
        print("Error: Something is wrong with the Longitude.")

    return ((lat_lw_bord,lat_up_bord),(lon_lw_bord,lon_up_bord))


In [5]:
uk_haigh_meta = pd.read_csv(paths.uk_haigh_meta_path)
storms = pd.read_csv(paths.storms_path, skiprows = 2, header = 3)
water_level= pd.read_csv(paths.water_levels_path,skiprows = 2, header =3)

In [3]:
location = 'aberdeen'
# lat, lon = utils.GetCoordinates(location)
# flood_records = utils.ExtractFloods(location)
# geo_borders = utils.GetGeoBorders(lat,lon)

In [5]:
def GetFormattedData(location):
    start = time.time()
    warnings.filterwarnings("ignore")
    ps_data, u_data, v_data, t_data, pr_data = utils.Step1()
    ps_data, u_data, v_data, t_data, pr_data = utils.Step2(ps_data, u_data, v_data, t_data, pr_data)
    ps_window, u_window, v_window, t_window, pr_window = utils.Step3(ps_data, u_data, v_data, t_data, pr_data, location)
    df_ps, df_u, df_v, df_t, df_pr = utils.Step4(ps_window, u_window, v_window, t_window, pr_window)
    df_ps, df_u, df_v, df_t, df_pr = utils.Step5(df_ps, df_u, df_v, df_t, df_pr)
    df_ps, df_u, df_v, df_t, df_pr = utils.Step6(df_ps, df_u, df_v, df_t, df_pr)
    df_merged = utils.Step7(df_ps, df_u, df_v, df_t, df_pr)
    df_merged = utils.Step8(df_merged)
    df_merged = utils.Step9(df_merged)
    df_merged = utils.Step10(df_merged)
    df_merged = utils.Step11(df_merged)
    df_merged = utils.Step12(df_merged, location)
    df_merged = utils.Step13(df_merged, location)
    end = time.time()
    print("Time taken to get the data: ", (end-start)/60, ' mins.')
    return df_merged



In [6]:
final_df_aberdeen = GetFormattedData('aberdeen')

Time taken to open datasets:  0.03733563423156738  ; Step 1/13
Time taken to remove leap years:  0.030103206634521484  ; Step 2/13
Time taken to extract window:  0.0028951168060302734  ; Step 3/13
Time taken to convert to dataframe:  96.62711191177368  ; Step 4/13
Time taken to convert to fractional years:  82.65430235862732  ; Step 5/13
Time taken to drop time column:  2.238919973373413  ; Step 6/13
Time taken to merge dataframes:  105.1542980670929  ; Step 7/13


100%|██████████| 25/25 [03:59<00:00,  9.56s/it]


Time taken to create the grid-cell-wise features:  241.87705397605896  ; Step 8/13


100%|██████████| 24/24 [00:00<00:00, 100.15it/s]


Time taken to calculate vertical vorticity:  0.24085116386413574  ; Step 9/13


100%|██████████| 25/25 [00:16<00:00,  1.53it/s]


Time taken to calculate cumulative precipitation:  16.338603019714355  ; Step 10/13
Time taken to add NAO data:  1.3988449573516846  ; Step 11/13
Time taken to add tides:  0.07927703857421875  ; Step 12/13
Time taken to add floods:  0.7438199520111084 Step 13/13
Time taken to get the data:  9.126602919896444  mins.


In [7]:
final_df_aberdeen

Unnamed: 0,u_1_1,t,v_1_1,T_1_1,P_1_1,pr_1_1,u_1_2,v_1_2,T_1_2,P_1_2,...,pr_cum5_25_23,pr_cum3_25_24,pr_cum5_25_24,pr_cum3_25_25,pr_cum5_25_25,nao,time,tide,floods,floods_x4
0,-4.287253,1979.00000,-6.412828,272.940887,101650.906250,2.506189e-05,-4.466002,-6.803702,272.587158,101359.156250,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.874547,1979-01-01 00:00:00,0.141355,0.0,0.0
1,-2.441757,1979.00034,-5.705153,273.148224,101902.937500,1.115724e-06,-2.774396,-5.909136,272.749207,101616.664062,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.859784,1979-01-01 03:00:00,1.938312,0.0,0.0
2,0.916595,1979.00068,-4.984941,273.543762,102043.328125,1.115724e-06,0.338916,-5.134226,273.022766,101759.109375,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.772936,1979-01-01 06:00:00,0.031803,0.0,0.0
3,5.835155,1979.00103,-0.179356,274.211121,102134.421875,2.172031e-05,5.710859,-0.468808,274.435883,101840.609375,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.701229,1979-01-01 09:00:00,-1.895495,0.0,0.0
4,9.897849,1979.00137,0.128329,274.754761,102215.234375,5.587935e-07,10.333476,-0.077934,275.145081,101920.054688,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.538234,1979-01-01 12:00:00,0.110635,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116795,6.373770,2018.99829,5.895715,282.783936,103324.718750,3.285892e-05,7.295926,5.949275,282.698547,103061.726562,...,0.006905,0.004801,0.006398,0.004726,0.005789,0.791289,2018-12-31 09:00:00,-0.168054,0.0,0.0
116796,6.770332,2018.99863,4.397175,282.925079,103280.882812,4.455447e-05,7.543334,4.508853,282.785675,103017.890625,...,0.007051,0.004131,0.006581,0.004317,0.006006,1.361615,2018-12-31 12:00:00,1.570374,0.0,0.0
116797,7.294742,2018.99897,3.880948,283.198639,103248.695312,4.678033e-05,8.143503,3.962997,283.031372,102978.859375,...,0.006658,0.004452,0.006388,0.004548,0.006085,1.880161,2018-12-31 15:00:00,0.384296,0.0,0.0
116798,7.764698,2018.99932,3.455886,283.313660,103246.640625,6.685033e-06,8.889278,3.495771,283.172516,102969.265625,...,0.006172,0.004452,0.005975,0.004548,0.005762,2.217394,2018-12-31 18:00:00,-1.496255,0.0,0.0


In [11]:
final_df_aberdeen.to_csv('./final_df_' + 'aberdeen' + '.csv')
