# Mask out unqualified data

In [11]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os.path
import pickle
import time
from matplotlib.pyplot import figure
import scipy.io as sio
from datetime import date, timedelta
IN_DIRECTORY = "/projectnb/urbanclimate/yueqin/idaho_2020/basic_processed_data/"
IN_DIRECTORY2 = "/projectnb/urbanclimate/yueqin/idaho_2020/processeddata/"
OUT_DIRECTORY = "/projectnb/urbanclimate/yueqin/idaho_2020/mask/"

in_var1=['H','u_star','L_H2','rot_ang_v']
in_var2=['u_std','u_avg','v_std','w_std','q_std','T_std']
out_var = ['mask_INL_all_1d','mask_neutral','mask_INL_all','mask_INL_all_11','mask_INL_all_12','mask_INL_all_3','mask_INL_all_4','mask_INL_all_5']

In [2]:
def date_list(sdate,edate):
    """method used for creating date list"""
    delta = edate - sdate       # as timedelta
    day = [sdate+timedelta(days=x) for x in range(delta.days+1)]
    return day

In [12]:
# set up controls
flux_magnitude_control    = 1    
taylor_assumption_control = 1    
variance_control          = 0    
angle_control             = 1 
w_std_control             = 0

# some constants to be used
min_wnd                   = 120
max_wnd                   = 240 
sonum    =12                       # number of sonic
z        =[1.2,2,3.5,6,8.2,12.8,15.8,23,30.3,40.2,50.6,60.5]                 # height of sonic above ground, m
frequency=10                   # sampling frequency, Hz
time_avg =3600                  # average time, s
rpat = time_avg*frequency           # number of lines for a loop

do_saving = True

# compute and save masks day by day

In [13]:
%%time
Sdate = date(2020,9,20)
Edate = date(2020,10,22)
ds = date_list(Sdate,Edate)
t_id = 0
for iday in range(len(ds)):
    cyr = str(ds[iday].year)
    if ds[iday].month < 10:
        cmonth = '0'+ str(ds[iday].month)
    else:
        cmonth = str(ds[iday].month) 
    if ds[iday].day < 10:
        cdys = '0'+ str(ds[iday].day)
    else:
        cdys = str(ds[iday].day)
    fp_stats = IN_DIRECTORY + 'u_ins_' + cyr + cmonth + cdys +'.pkl'
    if (not os.path.isfile(fp_stats)):
#                 print(cyr + cmonth + cdys+' do not exist')
        continue
    else:
        print('start loading:'+ cyr + cmonth + cdys)
        for var in in_var1+in_var2: 
            a_file = open(IN_DIRECTORY + var +'_' + cyr + cmonth + cdys +'.pkl', "rb")
            globals()[var] = pickle.load(a_file)    
            a_file.close() 
        # calculate stability parameter
        z_o_L = np.zeros((len(L_H2),sonum))+np.nan
        for i in range(sonum):
            z_o_L[:,i]=z[i]/L_H2[:,i]
        ## create maskes
        mask_INL_all = np.zeros(np.shape(z_o_L))+1;   
        mask_INL_all_1d = np.zeros(np.shape(z_o_L)[0])+1;
        mask_INL_all_11 = np.zeros(np.shape(z_o_L))+1;  # H magnitude control
        mask_INL_all_12 = np.zeros(np.shape(z_o_L))+1;  # ustar magnitude control
        mask_INL_all_13 = np.zeros(np.shape(z_o_L))+1;  # LE magnitude control
        mask_INL_all_3 = np.zeros(np.shape(z_o_L))+1; # variance control
        mask_INL_all_4 = np.zeros(np.shape(z_o_L))+1; # angle control
        mask_INL_all_5 = np.zeros(np.shape(z_o_L))+1; # w_std control
        w_std_MOST = np.zeros(np.shape(z_o_L))+np.nan;
        # create mask for neutral condition
        mask_neutral = np.nanmax(abs(z_o_L),1)<0.1
        if flux_magnitude_control == 1:
            msk1 = abs(H)< 10
            mask_INL_all[msk1]= np.nan
            mask_INL_all_11[msk1]= np.nan
            msk2 = abs(u_star)<0.05
            mask_INL_all[msk2]= np.nan
            mask_INL_all_12[msk2]= np.nan
        if taylor_assumption_control == 1:
            msk3 = np.logical_or(u_std/u_avg>0.5,v_std/u_avg>0.5)
            mask_INL_all[msk3]= np.nan
            mask_INL_all_12[msk3]= np.nan
        if variance_control == 1:
            msk4 = np.logical_or(T_std<0.01 , u_std<0.005 , v_std<0.005 , \
            w_std<0.005 , q_std<2.35e-5)
            mask_INL_all[msk4] = np.nan
            mask_INL_all_3[msk4] = np.nan
        if angle_control == 1:
            msk5 = np.logical_and(rot_ang_v> min_wnd,rot_ang_v<max_wnd)
            mask_INL_all[msk5] = np.nan
            mask_INL_all_4[msk5] = np.nan
        if w_std_control == 1:
            msk = z_o_L < 0
            w_std_MOST[msk] = 1*(1-3*z_o_L[msk])^(1/3)
            msk6 = abs(w_std/u_star - w_std_MOST)> 0.2*w_std_MOST
            mask_INL_all[msk5] = np.nan
            mask_INL_all_5[msk5] = np.nan
        test=np.isnan(mask_INL_all)
        mask_INL_all_1d[np.any(test,1)]= np.nan
        if do_saving:
            for var in [out_var[0]]:
                a_file = open(OUT_DIRECTORY + var + '_' + cyr + cmonth + cdys +'.pkl', "wb")
                pickle.dump(eval(var), a_file)
                a_file.close()
        print('finish processing:'+ cyr + cmonth + cdys)  
    

start loading:20200920
finish processing:20200920
start loading:20200921
finish processing:20200921
start loading:20200922
finish processing:20200922
start loading:20200923
finish processing:20200923
start loading:20200924
finish processing:20200924
start loading:20200925
finish processing:20200925
start loading:20200926
finish processing:20200926
start loading:20200927
finish processing:20200927
start loading:20200928
finish processing:20200928
start loading:20200929
finish processing:20200929
start loading:20200930
finish processing:20200930
start loading:20201001
finish processing:20201001
start loading:20201002
finish processing:20201002
start loading:20201003
finish processing:20201003
start loading:20201004
finish processing:20201004
start loading:20201005
finish processing:20201005
start loading:20201006
finish processing:20201006
start loading:20201007
finish processing:20201007
start loading:20201008
finish processing:20201008
start loading:20201009
finish processing:20201009


In [5]:
test=np.isnan(mask_INL_all)
mask_INL_all_1d

array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan,  1.,  1.,  1., nan, nan, nan, nan, nan, nan])

In [76]:
pd.DataFrame(mask_INL_all)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,,,,,,,,,,,,
1,,,,,,,,,,,,
2,,,,,,,,,,,,
3,,,,,,,,,,,,
4,,,,,,,,,,,,
5,,,,,,,,,,,,
6,,,,,,,,,,,,
7,,,,,,,,,,,,
8,,,,,,,,,,,,
9,,,,,,,,,,,,


In [55]:
np.sum(test,1)>0

array([ True,  True,  True,  True,  True,  True,  True,  True, False,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True])

In [58]:
np.sum(test,1)

array([ 9, 11, 11, 11, 11, 11, 11,  6,  0,  1,  2,  1,  6, 11, 11, 11,  1,
       11, 11,  9, 11, 11, 11,  9])

In [66]:
mask_INL_all_11

array([[nan, nan, nan,  1.,  1.,  1.,  1., nan,  1.,  1.,  1.,  1.],
       [nan, nan, nan, nan,  1., nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan,  1., nan, nan, nan, nan, nan, nan, nan],
       [ 1., nan, nan, nan,  1., nan, nan, nan, nan, nan, nan, nan],
       [ 1., nan, nan, nan,  1., nan, nan, nan,  1., nan, nan, nan],
       [ 1., nan, nan, nan,  1., nan, nan, nan,  1., nan, nan,  1.],
       [nan, nan, nan, nan,  1., nan, nan, nan,  1., nan, nan, nan],
       [ 1.,  1.,  1., nan,  1., nan, nan, nan, nan, nan,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1., nan,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.,  