In [1]:
# general tools
import os
import sys
import time
import h5py
import random
from glob import glob

import numpy as np
from random import shuffle

In [22]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend
from keras_unet_collection import utils as k_utils

2022-10-31 11:17:18.796765: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
sys.path.insert(0, '/glade/u/home/ksha/NCAR/')
sys.path.insert(0, '/glade/u/home/ksha/NCAR/libs/')

from namelist import *
import data_utils as du
import graph_utils as gu

import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
from sklearn.metrics import classification_report, auc, roc_curve
from sklearn.metrics import confusion_matrix

In [4]:
from sklearn.calibration import calibration_curve
from sklearn.metrics import brier_score_loss
from sklearn.isotonic import IsotonicRegression
from sklearn.linear_model import LogisticRegression

In [5]:
def pos_mixer(TRAIN, L, a0=0, a1=0.2):
    data_shape = TRAIN.shape
    out = np.empty((L, data_shape[-1]))
    
    for i in range(L):
        inds = np.random.choice(data_shape[0], 2)
        a = np.random.uniform(a0, a1)
        out[i, :] = a*TRAIN[inds[0], :] + (1-a)*TRAIN[inds[1], :]
    return out

In [6]:
def create_model():

    IN = keras.Input((768,))

    X = IN

    X = keras.layers.Dense(1024, activity_regularizer=keras.regularizers.L2(1e-2))(X)
    X = keras.layers.BatchNormalization()(X)
    X = keras.layers.Activation("gelu")(X)

    X = keras.layers.Dropout(0.3)(X)
    #X = keras.layers.GaussianDropout(0.1)(X)

    X = keras.layers.Dense(512, activity_regularizer=keras.regularizers.L2(1e-2))(X)
    X = keras.layers.BatchNormalization()(X)
    X = keras.layers.Activation("gelu")(X)

    X = keras.layers.Dropout(0.3)(X)
    #X = keras.layers.GaussianDropout(0.1)(X)

    X = keras.layers.Dense(128, activity_regularizer=keras.regularizers.L2(1e-2))(X)
    X = keras.layers.BatchNormalization()(X)
    X = keras.layers.Activation("gelu")(X)

    X = keras.layers.Dropout(0.3)(X)

    X = keras.layers.Dense(64, activity_regularizer=keras.regularizers.L2(1e-2))(X)
    X = keras.layers.BatchNormalization()(X)
    X = keras.layers.Activation("gelu")(X)

    OUT = X
    OUT = keras.layers.Dense(1, activation='sigmoid', bias_initializer=keras.initializers.Constant(-10))(OUT)

    model = keras.models.Model(inputs=IN, outputs=OUT)
    
    return model

In [7]:
def verif_metric(VALID_target, Y_pred, ref):


    # fpr, tpr, thresholds = roc_curve(VALID_target.ravel(), Y_pred.ravel())
    # AUC = auc(fpr, tpr)
    # AUC_metric = 1 - AUC
    
    BS = np.mean((VALID_target.ravel() - Y_pred.ravel())**2)
    #ll = log_loss(VALID_target.ravel(), Y_pred.ravel())
    
    #print('{}'.format(BS))
    metric = BS

    return metric / ref

In [8]:
batch_dir = '/glade/scratch/ksha/DATA/NCAR_batch/'
temp_dir = '/glade/work/ksha/NCAR/Keras_models/'

key = 'HEAD_Lead2'

In [9]:
with h5py.File(save_dir+'HRRR_domain.hdf', 'r') as h5io:
    land_mask_80km = h5io['land_mask_80km'][...]
    
grid_shape = land_mask_80km.shape

### Data prep

In [45]:
filename_train = sorted(glob("/glade/scratch/ksha/DATA/NCAR_batch/TRAIN*neg_neg_neg*lead2.npy")) + \
                 sorted(glob("/glade/scratch/ksha/DATA/NCAR_batch/TRAIN*pos*lead2.npy")) + \
                 sorted(glob("/glade/scratch/ksha/DATA/NCAR_batch/TRAIN*neg_neg_neg*lead3.npy")) + \
                 sorted(glob("/glade/scratch/ksha/DATA/NCAR_batch/TRAIN*pos*lead3.npy")) + \
                 sorted(glob("/glade/scratch/ksha/DATA/NCAR_batch/TRAIN*neg_neg_neg*lead4.npy")) + \
                 sorted(glob("/glade/scratch/ksha/DATA/NCAR_batch/TRAIN*pos*lead4.npy"))

In [46]:
data_lead1_p0 = np.load('/glade/work/ksha/NCAR/TRAIN_pred_lead1_part0_vec2.npy', allow_pickle=True)[()]
data_lead1_p1 = np.load('/glade/work/ksha/NCAR/TRAIN_pred_lead1_part1_vec2.npy', allow_pickle=True)[()]
data_lead1_p2 = np.load('/glade/work/ksha/NCAR/TRAIN_pred_lead1_part2_vec2.npy', allow_pickle=True)[()]

data_lead2_p0 = np.load('/glade/work/ksha/NCAR/TRAIN_pred_lead2_part0_vec2.npy', allow_pickle=True)[()]
data_lead2_p1 = np.load('/glade/work/ksha/NCAR/TRAIN_pred_lead2_part1_vec2.npy', allow_pickle=True)[()]
data_lead2_p2 = np.load('/glade/work/ksha/NCAR/TRAIN_pred_lead2_part2_vec2.npy', allow_pickle=True)[()]

data_lead3_p0 = np.load('/glade/work/ksha/NCAR/TRAIN_pred_lead3_part0_vec2.npy', allow_pickle=True)[()]
data_lead3_p1 = np.load('/glade/work/ksha/NCAR/TRAIN_pred_lead3_part1_vec2.npy', allow_pickle=True)[()]
data_lead3_p2 = np.load('/glade/work/ksha/NCAR/TRAIN_pred_lead3_part2_vec2.npy', allow_pickle=True)[()]

data_lead4_p0 = np.load('/glade/work/ksha/NCAR/TRAIN_pred_lead4_part0_vec2.npy', allow_pickle=True)[()]
data_lead4_p1 = np.load('/glade/work/ksha/NCAR/TRAIN_pred_lead4_part1_vec2.npy', allow_pickle=True)[()]
data_lead4_p2 = np.load('/glade/work/ksha/NCAR/TRAIN_pred_lead4_part2_vec2.npy', allow_pickle=True)[()]

TRAIN_256 = np.concatenate((data_lead2_p0['y_vector'], 
                            data_lead2_p1['y_vector'], 
                            data_lead2_p2['y_vector'],
                            data_lead3_p0['y_vector'], 
                            data_lead3_p1['y_vector'], 
                            data_lead3_p2['y_vector'],
                            data_lead4_p0['y_vector'], 
                            data_lead4_p1['y_vector'], 
                            data_lead4_p2['y_vector'],), axis=0)


TRAIN_pred = np.concatenate((data_lead2_p0['y_pred'], 
                             data_lead2_p1['y_pred'], 
                             data_lead2_p2['y_pred'],
                             data_lead3_p0['y_pred'], 
                             data_lead3_p1['y_pred'], 
                             data_lead3_p2['y_pred'],
                             data_lead4_p0['y_pred'], 
                             data_lead4_p1['y_pred'], 
                             data_lead4_p2['y_pred'],), axis=0)

TRAIN_Y = np.concatenate((data_lead2_p0['y_true'], 
                          data_lead2_p1['y_true'], 
                          data_lead2_p2['y_true'],
                          data_lead3_p0['y_true'], 
                          data_lead3_p1['y_true'], 
                          data_lead3_p2['y_true'],
                          data_lead4_p0['y_true'], 
                          data_lead4_p1['y_true'], 
                          data_lead4_p2['y_true'],), axis=0)

TRAIN_256_pick = TRAIN_256 #[flag_pick_train, :]
TRAIN_pred_pick = TRAIN_pred #[flag_pick_train, :]
TRAIN_Y_pick = TRAIN_Y #[flag_pick_train]

TRAIN_256_pos = TRAIN_256[TRAIN_Y==1]
TRAIN_256_neg = TRAIN_256[TRAIN_Y==0]

filename_valid = sorted(glob("/glade/scratch/ksha/DATA/NCAR_batch/VALID*neg_neg_neg*lead2.npy")) + \
                 sorted(glob("/glade/scratch/ksha/DATA/NCAR_batch/VALID*pos*lead2.npy"))

data_p_valid = np.load('/glade/work/ksha/NCAR/TEST_pred_lead2_vec2.npy', allow_pickle=True)[()]

filename_test = filename_valid + \
                sorted(glob("/glade/scratch/ksha/DATA/NCAR_batch_v4/*neg_neg_neg*lead{}.npy".format(2))) + \
                sorted(glob("/glade/scratch/ksha/DATA/NCAR_batch_v4/*pos*lead{}.npy".format(2)))

data_p_test = np.load('/glade/work/ksha/NCAR/TEST_pred_lead2_v4_vec2.npy', allow_pickle=True)[()]

TEST_256 = np.concatenate((data_p_valid['y_vector'], data_p_test['y_vector']), axis=0)
TEST_pred = np.concatenate((data_p_valid['y_pred'], data_p_test['y_pred']), axis=0)
TEST_Y = np.concatenate((data_p_valid['y_true'], data_p_test['y_true']), axis=0)

In [82]:
ix = 30 #30
iy = 55

In [83]:
name_self = ['indx{}_indy{}'.format(ix, iy),]

name_block5 = []

for i in range(ix-2, ix+3):
    for j in range(iy-2, iy+3):
        name_block5.append('indx{}_indy{}'.format(i, j))

In [93]:
L_train = len(filename_train)
flag_pick_train = [False,]*L_train
filename_pick_train = []

for i, name in enumerate(filename_train):
    for patterns in name_block5:
        if patterns in name:
            flag_pick_train[i] = True
            filename_pick_train.append(name)
            break;


L_test = len(filename_test)
flag_pick_test = [False,]*L_test
filename_pick_test = []

for i, name in enumerate(filename_test):
    for patterns in name_self:
        if patterns in name:
            flag_pick_test[i] = True
            filename_pick_test.append(name)
            break;

In [94]:
TRAIN_256_pick = TRAIN_256[flag_pick_train, :]
TRAIN_pred_pick = TRAIN_pred[flag_pick_train, :]
TRAIN_Y_pick = TRAIN_Y[flag_pick_train]

In [95]:
TRAIN_256_pos = TRAIN_256[TRAIN_Y==1]
TRAIN_256_neg = TRAIN_256[TRAIN_Y==0]

In [96]:
TEST_256_pick = TEST_256[flag_pick_test, :]
TEST_pred_pick = TEST_pred[flag_pick_test, :]
TEST_Y_pick = TEST_Y[flag_pick_test]

In [97]:
model = create_model()

model.compile(loss=keras.losses.BinaryCrossentropy(from_logits=False),
              optimizer=keras.optimizers.SGD(lr=0.0))
#

In [98]:
model_name = '{}_ix{}_iy{}'.format(key, ix, iy)
model_path = temp_dir+model_name

W_old = k_utils.dummy_loader(temp_dir+'HEAD_Lead2_ix{}_iy{}'.format(ix, iy))
model.set_weights(W_old)

In [99]:
Y_pred = model.predict([TEST_256_pick])
ref = np.sum(TEST_Y_pick) / len(TEST_Y_pick)
record = verif_metric(TEST_Y_pick, Y_pred, ref)
print("Initial record: {}".format(record))

Initial record: 0.8810942110618152


In [100]:
y_true = TEST_Y_pick
y_pred = Y_pred

fpr, tpr, thres = roc_curve(y_true, y_pred)
o_bar = np.mean(y_true)

bs = brier_score_loss(y_true, y_pred)
auc_score = auc(fpr, tpr)
bss = (o_bar-bs)/(o_bar)

print('auc: {}'.format(auc_score))
print('o_bar: {}'.format(o_bar))
print('bs: {}'.format(bs))
print('bss: {}'.format(bss))

auc: 0.9701650238429562
o_bar: 0.0015639097744360902
bs: 0.0013779518488786283
bss: 0.11890578893818476


In [92]:
Y_pred_train = model.predict([TRAIN_256_pick])
ref = np.sum(TRAIN_Y_pick) / len(TRAIN_Y_pick)
record = verif_metric(TRAIN_Y_pick, Y_pred_train, ref)
print("Initial record: {}".format(record))

Initial record: 0.9035836278159305
