# 'Basic_statistics.ipynb' is created by Yue on Feb 8, 2024 for computing basic statistics.

Workflow:
1. Load original instant data of ux,uy,uz after data_processing and load hourly, qc files.
2. Combine all wind_angle files, qc files, datetime into 1 single file and get the length (number of hours).
3. Compute air density.
4. Compute avg, std, fluxes.
5. Compute stability parameters.
6. Save results.

Notes:
1. input directory:/save_processed_data.
2. output directory: /save_statistical_data.
3. P and T have no value at 3, 5, 7, 11 level.
4. Statistical results will be saved separately in a single file with a length equal to the total number of hours.
5. 'basic_statistics' dataframe contains hourly statistical variables.
6. Last 9 hours have no values in all variables.
7. datetime_all files contain hours from 0 to 23.

=========== Disable de-spike on Sep 14, 2024 and save data to /save_statistical_data_091424 =====

=========== Disable de-spike and do planar fit on Sep 19, 2024 and save data to /save_statistical_data_planarfit =====

=========== Disable de-spike and do planar fit on Sep 19, 2024 and save data to /save_statistical_data_092024 =====

# Set up environment

In [7]:
# import packages
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os.path
import pickle
import time
from matplotlib.pyplot import figure
import scipy.io as sio
from datetime import date, timedelta
from math import *
from scipy.stats import gmean
from scipy import ndimage
from scipy import stats
from scipy import signal
import seaborn as sns
import time
import json

# Define parameters

In [17]:
# directories
IN_DIR = "/projectnb/urbanclimate/yueqin/idaho_ec_jupyter/save_processed_data_092024/"
OUT_DIR = "/projectnb/urbanclimate/yueqin/idaho_ec_jupyter/save_statistical_data_092024/"

# List of directories to check
directories = [IN_DIR, OUT_DIR]

# Check if directories exist, create them if they don't
for dir_path in directories:
    if not os.path.exists(dir_path):
        try:
            os.makedirs(dir_path)
            print(f"Created directory: {dir_path}")
        except Exception as e:
            print(f"An error occurred while creating directory {dir_path}: {e}")
    else:
        print(f"Directory exists: {dir_path}")

# physical constants (or values that are assumed constant)
Rw  = 461.5     # ideal gas constant for water vapor, J/kg*K
Rd  = 287.05    # ideal gas constant for dry air, J/kg*K
Lv  = 1000*2257 # latent heat of vaporization (water), J/kg
Cp  = 1005      # approximate constant pressure specific heat of air, J/kg*K
k   = 0.4      # Von Karman constant
g   = 9.81      # acceleration of gravity, m/s^2

# global constants
sonum    =12                       # number of sonic
z  = np.array([1.2,2,3.5,6,9,12.5,16.5,23,30,40,50,60])  # height of sonic above ground, 
frequency=10                   # sampling rate, Hz
time_avg =3600                  # average time, s
rpat = time_avg*frequency           # number of lines for a loop

# input variables
in_tur = ['u_dspk_2rot_ldtr', 'v_dspk_2rot_ldtr', 'w_dspk_2rot_ldtr', 'T_dspk_ldtr', 
          'u_dspk_2rot_filt', 'v_dspk_2rot_filt', 'w_dspk_2rot_filt', 'T_dspk_filt', 
          'q_ins_rnan','P_ins_rnan','T_dspk_ldtr']
in_other = ['ts_dspk_wind_ang', 'u_filt_size']
in_qf = ['qc_ux_nan', 'qc_uy_nan', 'qc_uz_nan', 'qc_T_nan', 'qc_q_nan', 'qc_P_nan',
         'qc_ux_dspk', 'qc_uy_dspk', 'qc_uz_dspk', 'qc_T_dspk', 'qc_wdir_dspk']
in_nspikes = ['u_nspikes', 'v_nspikes', 'w_nspikes', 'T_nspikes']

# output variables
out_other = ['wind_ang_all', 'filter_size_all','n_hours','datetime_all']
out_qf = ['qc_ux_nan_all', 'qc_uy_nan_all', 'qc_uz_nan_all', 'qc_T_nan_all', 'qc_q_nan_all', 'qc_P_nan_all',
         'qc_ux_dspk_all', 'qc_uy_dspk_all', 'qc_uz_dspk_all', 'qc_T_dspk_all', 'qc_wdir_dspk_all']
out_nspikes = ['u_nspikes_all', 'v_nspikes_all', 'w_nspikes_all', 'T_nspikes_all']

out_tur = ['u_tur_ldtr', 'v_tur_ldtr', 'w_tur_ldtr', 'T_tur_ldtr',
           'u_tur_filt', 'v_tur_filt', 'w_tur_filt', 'T_tur_filt']
out_avg = ['P_avg', 'T_avg', 'q_avg', 'Rho_air', 
           'u_avg_ldtr', 'v_avg_ldtr', 'w_avg_ldtr', 'T_avg_ldtr', 
           'u_avg_filt', 'v_avg_filt', 'w_avg_filt', 'T_avg_filt']
out_std = ['u_std_ldtr', 'v_std_ldtr', 'w_std_ldtr', 'T_std_ldtr',
           'u_std_filt', 'v_std_filt', 'w_std_filt', 'T_std_filt']
out_flux = ['uw_ldtr', 'vw_ldtr', 'wT_ldtr', 'u_star_ldtr', 'H_ldtr', 
            'uw_filt', 'vw_filt', 'wT_filt', 'u_star_filt', 'H_filt']
out_stability = ['L_H2_ldtr', 'stability_ldtr', 'L_H2_filt', 'stability_filt']

# controls
# webb_corr = 2 # do webb-corr on q and C only

# labels
list_bot = np.array([0,1,2,3,4]) # bottom five levels
list_sel_m2 = np.array([5,6,7,8,9,10]) # from level 6 to level 11

Directory exists: /projectnb/urbanclimate/yueqin/idaho_ec_jupyter/save_processed_data_092024/
Directory exists: /projectnb/urbanclimate/yueqin/idaho_ec_jupyter/save_statistical_data_092024/


# Define functions

In [9]:
def date_list(sdate,edate):
    """method used for creating date list"""
    delta = edate - sdate       # as timedelta
    day = [sdate+timedelta(days=x) for x in range(delta.days+1)]
    return day

# Do the statistical analysis

In [18]:
# set up time period and initialize variables
Sdate = date(2020,9,25)
# Sdate = date(2020,10,17)
Edate = date(2021,4,23)
# Edate = date(2020,9,26)
ds = date_list(Sdate,Edate)
write_results = True
OUT_DIR

'/projectnb/urbanclimate/yueqin/idaho_ec_jupyter/save_statistical_data_092024/'

# Combine some arrays into one

In [19]:
%%time
# Initialize lists to store the combined data
wind_ang_combined = []
filter_size_combined = []
qc_ux_nan_combined = []
qc_uy_nan_combined = []
qc_uz_nan_combined = []
qc_T_nan_combined = []
qc_q_nan_combined = []
qc_P_nan_combined = []

qc_ux_dspk_combined = []
qc_uy_dspk_combined = []
qc_uz_dspk_combined = []
qc_T_dspk_combined = []

u_nspikes_combined = []
v_nspikes_combined = []
w_nspikes_combined = []
T_nspikes_combined = []

qc_wdir_dspk_combined = []
datetime_all = []
# combine arrays to a single file
for day in ds:
    strday = str(day.strftime("%Y%m%d"))
    fp_stats = f"{IN_DIR}{'ts_dspk_wind_ang'}_{strday}.npy"
    if (not os.path.isfile(fp_stats)):
        print(day.strftime("%Y%m%d")+' do not exist')
        continue
    # print('start processing:'+ strday)
    # load daily data    
    for var_name in in_other+in_qf+in_nspikes:
        globals()[var_name] = np.load(f"{IN_DIR}{var_name}_{strday}.npy")
    wind_ang_combined.append(ts_dspk_wind_ang)
    filter_size_combined.append(u_filt_size)
    qc_ux_nan_combined.append(qc_ux_nan)
    qc_uy_nan_combined.append(qc_uy_nan)
    qc_uz_nan_combined.append(qc_uz_nan)
    qc_T_nan_combined.append(qc_T_nan)
    qc_q_nan_combined.append(qc_q_nan)
    qc_P_nan_combined.append(qc_P_nan)
    
    qc_ux_dspk_combined.append(qc_ux_dspk)
    qc_uy_dspk_combined.append(qc_uy_dspk)
    qc_uz_dspk_combined.append(qc_uz_dspk)
    qc_T_dspk_combined.append(qc_T_dspk)
    
    u_nspikes_combined.append(u_nspikes)
    v_nspikes_combined.append(v_nspikes)
    w_nspikes_combined.append(w_nspikes)
    T_nspikes_combined.append(T_nspikes)
    
    qc_wdir_dspk_combined.append(qc_wdir_dspk)
    for ih in range(24):
        datetime_all.append([day,ih])
    # end of the loop
    
# Reframe lists to nparrays.
wind_ang_all = np.vstack(wind_ang_combined)  # vertically stacking arrays
filter_size_all = np.vstack(filter_size_combined)
qc_ux_nan_all = np.vstack(qc_ux_nan_combined)
qc_uy_nan_all = np.vstack(qc_uy_nan_combined)
qc_uz_nan_all = np.vstack(qc_uz_nan_combined)
qc_T_nan_all = np.vstack(qc_T_nan_combined)
qc_q_nan_all = np.vstack(qc_q_nan_combined)
qc_P_nan_all = np.vstack(qc_P_nan_combined)

qc_ux_dspk_all = np.vstack(qc_ux_dspk_combined)
qc_uy_dspk_all = np.vstack(qc_uy_dspk_combined)
qc_uz_dspk_all = np.vstack(qc_uz_dspk_combined)
qc_T_dspk_all = np.vstack(qc_T_dspk_combined)

u_nspikes_all = np.vstack(u_nspikes_combined)
v_nspikes_all = np.vstack(v_nspikes_combined)
w_nspikes_all = np.vstack(w_nspikes_combined)
T_nspikes_all = np.vstack(T_nspikes_combined)

qc_wdir_dspk_all = np.vstack(qc_wdir_dspk_combined)
datetime_all = np.array(datetime_all)
n_hours = len(wind_ang_all)
print(n_hours)

# save combined arrays
if write_results: # write output by days
    for var_name in out_other+out_qf+out_nspikes:
    # for var_name in ['datetime_all']:
        var_value = globals()[var_name]
        filename = f"{var_name}.npy"
        np.save(OUT_DIR + filename, var_value)

20201216 do not exist
5040
CPU times: user 1.49 s, sys: 363 ms, total: 1.85 s
Wall time: 3.96 s


# Compute statistics

In [20]:
# set up time period and initialize variables
Sdate = date(2020,9,25)
# Sdate = date(2020,10,17)
Edate = date(2021,4,23)
# Edate = date(2020,9,26)
ds = date_list(Sdate,Edate)
n_hours = np.load(f"{OUT_DIR}{'n_hours'}.npy")
# Do statistics
# initialization
## avg variables
P_avg = np.zeros((n_hours,sonum)) * np.nan
T_avg = np.zeros((n_hours,sonum)) * np.nan
q_avg = np.zeros((n_hours,sonum)) * np.nan
Rho_air = np.zeros((n_hours,sonum)) * np.nan

u_avg_ldtr = np.zeros((n_hours,sonum)) * np.nan
v_avg_ldtr = np.zeros((n_hours,sonum)) * np.nan
w_avg_ldtr = np.zeros((n_hours,sonum)) * np.nan
T_avg_ldtr = np.zeros((n_hours,sonum)) * np.nan

u_avg_filt = np.zeros((n_hours,sonum)) * np.nan
v_avg_filt = np.zeros((n_hours,sonum)) * np.nan
w_avg_filt = np.zeros((n_hours,sonum)) * np.nan
T_avg_filt = np.zeros((n_hours,sonum)) * np.nan

## std
u_std_ldtr = np.zeros((n_hours,sonum)) * np.nan
v_std_ldtr = np.zeros((n_hours,sonum)) * np.nan
w_std_ldtr = np.zeros((n_hours,sonum)) * np.nan
T_std_ldtr = np.zeros((n_hours,sonum)) * np.nan

u_std_filt = np.zeros((n_hours,sonum)) * np.nan
v_std_filt = np.zeros((n_hours,sonum)) * np.nan
w_std_filt = np.zeros((n_hours,sonum)) * np.nan
T_std_filt = np.zeros((n_hours,sonum)) * np.nan

## fluxes
uw_ldtr = np.zeros((n_hours,sonum)) * np.nan
vw_ldtr = np.zeros((n_hours,sonum)) * np.nan
wT_ldtr = np.zeros((n_hours,sonum)) * np.nan
u_star_ldtr = np.zeros((n_hours,sonum)) * np.nan
H_ldtr = np.zeros((n_hours,sonum)) * np.nan

uw_filt = np.zeros((n_hours,sonum)) * np.nan
vw_filt = np.zeros((n_hours,sonum)) * np.nan
wT_filt = np.zeros((n_hours,sonum)) * np.nan
u_star_filt = np.zeros((n_hours,sonum)) * np.nan
H_filt = np.zeros((n_hours,sonum)) * np.nan

## other variables
L_H2_ldtr = np.zeros((n_hours,sonum)) * np.nan ### Obukhov length
stability_ldtr  = np.zeros((n_hours,sonum)) * np.nan
L_H2_filt = np.zeros((n_hours,sonum)) * np.nan 
stability_filt  = np.zeros((n_hours,sonum)) * np.nan
# qc_wdir_dspk_all = np.zeros((n_hours,sonum)) * np.nan

write_results = True
OUT_DIR

'/projectnb/urbanclimate/yueqin/idaho_ec_jupyter/save_statistical_data_092024/'

In [21]:
%%time
count_hr = 0
for day in ds:
    strday = str(day.strftime("%Y%m%d"))
    fp_stats = f"{IN_DIR}{'ts_dspk_wind_ang'}_{strday}.npy"
    if (not os.path.isfile(fp_stats)):
        print(day.strftime("%Y%m%d")+' do not exist')
        continue

    ##-------------------------------------------------
    print(f"start processing:{strday}")
    # start the loop of hours
    for ih in range(24): 
        # initialize tur data at every hour
        u_tur_ldtr = np.zeros((rpat,sonum)) * np.nan
        v_tur_ldtr = np.zeros((rpat,sonum)) * np.nan
        w_tur_ldtr = np.zeros((rpat,sonum)) * np.nan
        T_tur_ldtr = np.zeros((rpat,sonum)) * np.nan 

        u_tur_filt = np.zeros((rpat,sonum)) * np.nan
        v_tur_filt = np.zeros((rpat,sonum)) * np.nan
        w_tur_filt = np.zeros((rpat,sonum)) * np.nan
        T_tur_filt = np.zeros((rpat,sonum)) * np.nan
        # load tur data
        for var_name in in_tur:
            globals()[var_name] = np.load(f"{IN_DIR}{var_name}_{strday}_{ih:02}00.npy")
        
        # calculate air density by ideal gas law
        P_avg[ih+count_hr,:] = np.nanmean(P_ins_rnan,axis=0) 
        T_avg[ih+count_hr,:] = np.nanmean(T_dspk_ldtr,axis=0)
        q_avg[ih+count_hr,:] = np.nanmean(q_ins_rnan,axis=0)
        Rho_air[ih+count_hr,:] = P_avg[ih+count_hr,:]/(287.04*T_avg[ih+count_hr,:]) - 0.61*q_avg[ih+count_hr,:]
        # fill in missing data at 3rd, 5th, 7th, 11th levels
        Rho_air[ih+count_hr,2] = 0.5*(Rho_air[ih+count_hr,1] + Rho_air[ih+count_hr,3])
        Rho_air[ih+count_hr,4] = 0.5*(Rho_air[ih+count_hr,3] + Rho_air[ih+count_hr,5])   
        Rho_air[ih+count_hr,6] = 0.5*(Rho_air[ih+count_hr,5] + Rho_air[ih+count_hr,7])
        Rho_air[ih+count_hr,10] = 0.5*(Rho_air[ih+count_hr,9] + Rho_air[ih+count_hr,11]) 
        
        # Calculate statistics
        # start_time = time.time()
        ## Calculate mean variables
        u_avg_ldtr[ih+count_hr,:] = np.nanmean(u_dspk_2rot_ldtr,axis=0) # 12*1
        v_avg_ldtr[ih+count_hr,:] = np.nanmean(v_dspk_2rot_ldtr,axis=0)
        w_avg_ldtr[ih+count_hr,:] = np.nanmean(w_dspk_2rot_ldtr,axis=0)
        T_avg_ldtr[ih+count_hr,:] = np.nanmean(T_dspk_ldtr,axis=0)
        
        u_avg_filt[ih+count_hr,:] = np.nanmean(u_dspk_2rot_filt,axis=0) # 12*1
        v_avg_filt[ih+count_hr,:] = np.nanmean(v_dspk_2rot_filt,axis=0)
        w_avg_filt[ih+count_hr,:] = np.nanmean(w_dspk_2rot_filt,axis=0)
        T_avg_filt[ih+count_hr,:] = np.nanmean(T_dspk_filt,axis=0)
        # print('1')
        
        ## calculate turbulent variables for 1 hr (36000*12)
        u_tur_ldtr = u_dspk_2rot_ldtr - u_avg_ldtr[ih+count_hr,:]
        v_tur_ldtr = v_dspk_2rot_ldtr - v_avg_ldtr[ih+count_hr,:] 
        w_tur_ldtr = w_dspk_2rot_ldtr - w_avg_ldtr[ih+count_hr,:]
        T_tur_ldtr = T_dspk_ldtr - T_avg_ldtr[ih+count_hr,:] 
        
        u_tur_filt = u_dspk_2rot_filt - u_avg_filt[ih+count_hr,:]
        v_tur_filt = v_dspk_2rot_filt - v_avg_filt[ih+count_hr,:]
        w_tur_filt = w_dspk_2rot_filt - w_avg_filt[ih+count_hr,:]
        T_tur_filt = T_dspk_filt - T_avg_filt[ih+count_hr,:]
        
        ## calculate standard deviations
        u_std_ldtr[ih+count_hr,:] = np.nanstd(u_tur_ldtr,axis=0)
        v_std_ldtr[ih+count_hr,:] = np.nanstd(v_tur_ldtr,axis=0)
        w_std_ldtr[ih+count_hr,:] = np.nanstd(w_tur_ldtr,axis=0)
        T_std_ldtr[ih+count_hr,:] = np.nanstd(T_tur_ldtr,axis=0)
        
        u_std_filt[ih+count_hr,:] = np.nanstd(u_tur_filt,axis=0)
        v_std_filt[ih+count_hr,:] = np.nanstd(v_tur_filt,axis=0)
        w_std_filt[ih+count_hr,:] = np.nanstd(w_tur_filt,axis=0)
        T_std_filt[ih+count_hr,:] = np.nanstd(T_tur_filt,axis=0)
        
        ## calcultate covariance and fluxes
        uw_ldtr[ih+count_hr,:] = np.nanmean(u_tur_ldtr*w_tur_ldtr,axis=0)
        vw_ldtr[ih+count_hr,:] = np.nanmean(v_tur_ldtr*w_tur_ldtr,axis=0)
        wT_ldtr[ih+count_hr,:] = np.nanmean(w_tur_ldtr*T_tur_ldtr,axis=0) # sensible heat flux
        H_ldtr[ih+count_hr,:]  = Cp*Rho_air[ih+count_hr,:]*wT_ldtr[ih+count_hr,:]
        
        uw_filt[ih+count_hr,:] = np.nanmean(u_tur_filt*w_tur_filt,axis=0)
        vw_filt[ih+count_hr,:] = np.nanmean(v_tur_filt*w_tur_filt,axis=0)
        wT_filt[ih+count_hr,:] = np.nanmean(w_tur_filt*T_tur_filt,axis=0) # sensible heat flux
        H_filt[ih+count_hr,:]  = Cp*Rho_air[ih+count_hr,:]*wT_filt[ih+count_hr,:]
        
        u_star_ldtr[ih+count_hr,:] = np.maximum(0,(uw_ldtr[ih+count_hr,:]**2+vw_ldtr[ih+count_hr,:]**2)**0.25)
        u_star_filt[ih+count_hr,:] = np.maximum(0,(uw_filt[ih+count_hr,:]**2+vw_filt[ih+count_hr,:]**2)**0.25)
        
        # end_time = time.time()
        # duration = end_time - start_time
        # print(f"Time taken for statistics: {duration} seconds")
        
        # calculate other variables
        ## Obukhov length (m) without Evaporation term 
        ## results are very similar between _ldtr and _filt so only keep one
        L_H2_ldtr[ih+count_hr,:] = -u_star_ldtr[ih+count_hr,:]**3*T_avg_ldtr[ih+count_hr,:]/(k*g*wT_ldtr[ih+count_hr,:]) 
        stability_ldtr[ih+count_hr,:] = z/L_H2_ldtr[ih+count_hr,:]
        L_H2_filt[ih+count_hr,:] = -u_star_filt[ih+count_hr,:]**3*T_avg_filt[ih+count_hr,:]/(k*g*wT_filt[ih+count_hr,:]) 
        stability_filt[ih+count_hr,:] = z/L_H2_filt[ih+count_hr,:]
        # print('start df')
        df = pd.DataFrame({'P_avg':P_avg[ih+count_hr,:], 
               'T_avg':T_avg[ih+count_hr,:],
               'q_avg':q_avg[ih+count_hr,:],
               'Rho_air':Rho_air[ih+count_hr,:],
               'u_avg_ldtr':u_avg_ldtr[ih+count_hr,:],
               'v_avg_ldtr':v_avg_ldtr[ih+count_hr,:],
               'w_avg_ldtr':w_avg_ldtr[ih+count_hr,:],
               'T_avg_ldtr':T_avg_ldtr[ih+count_hr,:],
               'u_avg_filt':u_avg_filt[ih+count_hr,:],
               'v_avg_filt':v_avg_filt[ih+count_hr,:],
               'w_avg_filt':w_avg_filt[ih+count_hr,:],
               'T_avg_filt':T_avg_filt[ih+count_hr,:],
               'u_std_ldtr':u_std_ldtr[ih+count_hr,:],
               'v_std_ldtr':v_std_ldtr[ih+count_hr,:],
               'w_std_ldtr':w_std_ldtr[ih+count_hr,:],
               'T_std_ldtr':T_std_ldtr[ih+count_hr,:],
               'u_std_filt':u_std_filt[ih+count_hr,:],
               'v_std_filt':v_std_filt[ih+count_hr,:],
               'w_std_filt':w_std_filt[ih+count_hr,:],
               'T_std_filt':T_std_filt[ih+count_hr,:],
               'uw_ldtr':uw_ldtr[ih+count_hr,:],
               'vw_ldtr':vw_ldtr[ih+count_hr,:],
               'wT_ldtr':wT_ldtr[ih+count_hr,:],
               'u_star_ldtr':u_star_ldtr[ih+count_hr,:],
               'H_ldtr':H_ldtr[ih+count_hr,:],
               'uw_filt':uw_filt[ih+count_hr,:],
               'vw_filt':vw_filt[ih+count_hr,:],
               'wT_filt':wT_filt[ih+count_hr,:],
               'u_star_filt':u_star_filt[ih+count_hr,:],
               'H_filt':H_filt[ih+count_hr,:],
               'u_star_ldtr':u_star_ldtr[ih+count_hr,:],
               'u_star_filt':u_star_filt[ih+count_hr,:],
               'L_H2_ldtr':L_H2_ldtr[ih+count_hr,:],
               'stability_ldtr':stability_ldtr[ih+count_hr,:],
               'L_H2_filt':L_H2_filt[ih+count_hr,:],
               'stability_filt':stability_filt[ih+count_hr,:]
               })
        # print('end df')
        # save tur and statistical data by hours
        if write_results:   
            # write output by hours
            for var_name in out_tur:
                var_value = globals()[var_name]
                filename = f"{var_name}_{strday}_{ih:02}00.npy"
                np.save(OUT_DIR + filename, var_value)
            # save dataframe by hours as .csv files
            dfname = f"{'basic_statistics'}_{strday}_{ih:02}00.csv"
            df.to_csv(OUT_DIR + dfname, index=False)
        # end of the hour loop
    #end of the day loop
    count_hr += 24
# Done and save results
if write_results: # write output by days
    for var_name in out_avg+out_std+out_flux+out_stability:
        var_value = globals()[var_name]
        filename = f"{var_name}.npy"
        np.save(OUT_DIR + filename, var_value)

start processing:20200925
start processing:20200926
start processing:20200927
start processing:20200928
start processing:20200929
start processing:20200930
start processing:20201001
start processing:20201002
start processing:20201003
start processing:20201004
start processing:20201005
start processing:20201006
start processing:20201007
start processing:20201008
start processing:20201009
start processing:20201010
start processing:20201011
start processing:20201012
start processing:20201013
start processing:20201014
start processing:20201015
start processing:20201016
start processing:20201017
start processing:20201018
start processing:20201019
start processing:20201020
start processing:20201021
start processing:20201022
start processing:20201023
start processing:20201024
start processing:20201025
start processing:20201026
start processing:20201027
start processing:20201028
start processing:20201029
start processing:20201030
start processing:20201031
start processing:20201101
start proces

# Check results

In [22]:
OUT_DIR

'/projectnb/urbanclimate/yueqin/idaho_ec_jupyter/save_statistical_data_092024/'

In [25]:
dfname = f"{'basic_statistics'}_20210410_1600.csv"
OUT_DIR2 = "/projectnb/urbanclimate/yueqin/idaho_ec_jupyter/save_statistical_data_091424/"
stat1 = pd.read_csv(OUT_DIR+dfname)
stat2= pd.read_csv(OUT_DIR2+dfname)
stat1['u_avg_filt'],stat2['u_avg_filt']

(0     11.201016
 1     12.851772
 2     14.985092
 3     16.581162
 4     18.074254
 5     19.877515
 6     20.444682
 7     22.016416
 8     22.549294
 9     23.318245
 10    23.578300
 11    25.030723
 Name: u_avg_filt, dtype: float64,
 0     11.204163
 1     12.851771
 2     14.988614
 3     16.581032
 4     18.085432
 5     19.877515
 6     20.445700
 7     22.019322
 8     22.571357
 9     23.362946
 10    23.595304
 11    25.050651
 Name: u_avg_filt, dtype: float64)