In [1]:
# general tools
import os
import sys
import time
import h5py
import random
from glob import glob

import numpy as np
from datetime import datetime, timedelta
from random import shuffle

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend
from keras_unet_collection import utils as k_utils

2022-11-23 16:16:27.569768: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [3]:
sys.path.insert(0, '/glade/u/home/ksha/NCAR/')
sys.path.insert(0, '/glade/u/home/ksha/NCAR/libs/')

from namelist import *
import data_utils as du

In [4]:
import re

In [5]:
import matplotlib.pyplot as plt
%matplotlib inline

In [6]:
def feature_extract(filenames, lon_80km, lon_minmax, lat_80km, lat_minmax, elev_80km, elev_max, lead_minmax):
    
    lon_out = []
    lat_out = []
    elev_out = []
    lead_out = []
    mon_out = []
    
    base_v3_s = datetime(2018, 7, 15)
    base_v3_e = datetime(2020, 12, 2)

    base_v4_s = datetime(2020, 12, 3)
    base_v4_e = datetime(2022, 7, 15)

    base_ref = datetime(2010, 1, 1)
    
    date_list_v3 = [base_v3_s + timedelta(days=day) for day in range(365+365+142)]
    date_list_v4 = [base_v4_s + timedelta(days=day) for day in range(365+180-151)]
    
    for i, name in enumerate(filenames):
        
        if 'v4' in name:
            date_list = date_list_v4
        else:
            date_list = date_list_v3
        
        nums = re.findall(r'\d+', name)
        lead = int(nums[-1])
        indy = int(nums[-2])
        indx = int(nums[-3])
        day = int(nums[-4])
        day = date_list[day]
        month = day.month
        
        month_norm = (month - 1)/(12-1)
        
        lon = lon_80km[indx, indy]
        lat = lat_80km[indx, indy]

        lon = (lon - lon_minmax[0])/(lon_minmax[1] - lon_minmax[0])
        lat = (lat - lat_minmax[0])/(lat_minmax[1] - lat_minmax[0])

        elev = elev_80km[indx, indy]
        elev = elev / elev_max

        lead = (lead - lead_minmax[0])/(lead_minmax[1] - lead_minmax[0])
                
        lon_out.append(lon)
        lat_out.append(lat)
        elev_out.append(elev)
        lead_out.append(lead)
        mon_out.append(month_norm)
        
    return np.array(lon_out), np.array(lat_out), np.array(elev_out), np.array(lead_out), np.array(mon_out)

def create_model():

    
    IN_vec = keras.Input((128,))
    
    IN_elev = keras.Input((4,))
    
    X_elev = IN_elev
    
    # X_elev = keras.layers.Dense(32, activity_regularizer=keras.regularizers.L2(1e-2))(X_elev)
    # X_elev = keras.layers.BatchNormalization()(X_elev)
    # X_elev = keras.layers.Activation("gelu")(X_elev)
    
    IN = keras.layers.Concatenate()([X_elev, IN_vec])
    
    X = IN
    #
    X = keras.layers.Dense(1024, activity_regularizer=keras.regularizers.L2(1e-2))(X)
    X = keras.layers.BatchNormalization()(X)
    X = keras.layers.Activation("gelu")(X)

    X = keras.layers.Dropout(0.33)(X)

    X = keras.layers.Dense(512, activity_regularizer=keras.regularizers.L2(1e-2))(X)
    X = keras.layers.BatchNormalization()(X)
    X = keras.layers.Activation("gelu")(X)

    X = keras.layers.Dropout(0.33)(X)
    #X = keras.layers.GaussianDropout(0.1)(X)

    X = keras.layers.Dense(256, activity_regularizer=keras.regularizers.L2(1e-2))(X)
    X = keras.layers.BatchNormalization()(X)
    X = keras.layers.Activation("gelu")(X)

    X = keras.layers.Dropout(0.33)(X)

    X = keras.layers.Dense(128, activity_regularizer=keras.regularizers.L2(1e-2))(X)
    X = keras.layers.BatchNormalization()(X)
    X = keras.layers.Activation("gelu")(X)

    OUT = X
    OUT = keras.layers.Dense(1, activation='sigmoid', bias_initializer=keras.initializers.Constant(-10))(OUT)

    model = keras.models.Model(inputs=[IN_elev, IN_vec], outputs=OUT)
    
    return model

In [7]:
with h5py.File(save_dir+'HRRR_domain.hdf', 'r') as h5io:
    lon_3km = h5io['lon_3km'][...]
    lat_3km = h5io['lat_3km'][...]
    lon_80km = h5io['lon_80km'][...]
    lat_80km = h5io['lat_80km'][...]
    elev_3km = h5io['elev_3km'][...]
    land_mask_80km = h5io['land_mask_80km'][...]
    
grid_shape = land_mask_80km.shape

In [8]:
elev_80km = du.interp2d_wraper(lon_3km, lat_3km, elev_3km, lon_80km, lat_80km, method='linear')

elev_80km[np.isnan(elev_80km)] = 0
elev_80km[elev_80km<0] = 0
elev_max = np.max(elev_80km)

lon_80km_mask = lon_80km[land_mask_80km]
lat_80km_mask = lat_80km[land_mask_80km]

lon_minmax = [np.min(lon_80km_mask), np.max(lon_80km_mask)]
lat_minmax = [np.min(lat_80km_mask), np.max(lat_80km_mask)]

In [9]:
filepath_train = "/glade/scratch/ksha/DATA/NCAR_batch/"
filepath_valid = "/glade/campaign/cisl/aiml/ksha/NCAR_batch/"
filepath_test = "/glade/campaign/cisl/aiml/ksha/NCAR_batch_v4/"
filepath_vec = "/glade/work/ksha/NCAR/"

In [10]:
for lead in range(2, 3, 1):
    print('Process lead{}'.format(lead))
    filename_valid = sorted(glob("{}VALID*neg_neg_neg*lead{}.npy".format(filepath_valid, lead))) + \
                     sorted(glob("{}VALID*pos*lead{}.npy".format(filepath_valid, lead)))

    lon_valid, lat_valid, elev_valid, lead_valid, mon_valid = feature_extract(
        filename_valid, lon_80km, lon_minmax, lat_80km, lat_minmax, elev_80km, elev_max, [lead, lead+2])

    VALID_stn = np.concatenate((lon_valid[:, None], 
                                lat_valid[:, None], 
                                elev_valid[:, None], 
                                lead_valid[:, None]), axis=1)

    VALID_merge = VALID_stn

    data_p_valid = np.load('{}TEST_pp15_pred_lead{}_vec2.npy'.format(filepath_vec, lead), allow_pickle=True)[()]

    VALID_256 = data_p_valid['y_vector']
    VALID_pred = data_p_valid['y_pred']
    VALID_Y = data_p_valid['y_true']

    filename_neg_test = sorted(glob("{}*neg_neg_neg*lead{}.npy".format(filepath_test, lead)))
    filename_pos_test = sorted(glob("{}*pos*lead{}.npy".format(filepath_test, lead)))

    filename_test = filename_neg_test + filename_pos_test

    lon_test, lat_test, elev_test, lead_test, mon_test = feature_extract(
        filename_test, lon_80km, lon_minmax, lat_80km, lat_minmax, elev_80km, elev_max, [lead, lead+2])

    data_p_test = np.load("{}TEST_pp15_pred_lead{}_v4_vec2.npy".format(filepath_vec, lead), allow_pickle=True)[()]

    TEST_256 = data_p_test['y_vector']
    TEST_pred = data_p_test['y_pred']
    TEST_Y = data_p_test['y_true']

    TEST_stn = np.concatenate((lon_test[:, None], 
                               lat_test[:, None], 
                               elev_test[:, None], 
                               lead_test[:, None]), axis=1)

    TEST_merge = TEST_stn

    model = create_model()

    model.compile(loss=keras.losses.BinaryCrossentropy(from_logits=False),
                  optimizer=keras.optimizers.Adam(lr=0))

    W_old = k_utils.dummy_loader('/glade/work/ksha/NCAR/Keras_models/VALID_Lead{}/'.format(lead))
    model.set_weights(W_old)

    Y_pred_test = model.predict([TEST_merge, TEST_256])
    Y_pred_valid = model.predict([VALID_merge, VALID_256])

    save_dict = {}
    save_dict['test'] = Y_pred_test
    save_dict['valid'] = Y_pred_valid
    save_dict['label_test'] = TEST_Y
    save_dict['label_valid'] = VALID_Y

    np.save('{}RESULT2_pp15_lead{}.npy'.format(filepath_vec, lead), save_dict)

Process lead2


2022-11-23 16:17:00.648395: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-11-23 16:17:00.649887: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-11-23 16:17:00.705135: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:62:00.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0
coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.75GiB deviceMemoryBandwidth: 836.37GiB/s
2022-11-23 16:17:00.705176: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2022-11-23 16:17:00.707751: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2022-11-23 16:17:00.707814: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.10
2