# Cn2 Predictions via FLAML #

### Sukanta Basu (Delft University of Technology) ###

**Last updated: November 3, 2022**

Notes: 

1. 

**Load the necessary packages**

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path

import pickle
from pickle import dump, load
import time

from flaml import AutoML

#For reproducibility of the results, the following seeds should be selected 
from numpy.random import seed
seed(20)
randSeed = np.random.randint(1000)

  from pandas import MultiIndex, Int64Index


**User input**

In [2]:
#Number of ensembles
nEns    = 25

#Model
mod     = ['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree']
modOpt  = 0 #[0:'lgbm', 1:'xgboost', 2:'xgb_limitdepth', 3:'catboost', 4:'rf', 5:'extra_tree'],

#Time budget
maxTime = 60 #in seconds

#Cn2 is measured at 6 m, 15 m, and 25 m
zLev    = 6 

#1:ERA5, 2:MERRA2, 3:COMBO
inpOpt  = 3 

#Sampling opt
sOpt = 1 #0: hourly, 1: 5 min

#Train-validation ratio
split_ratio = 0.2

**Input & output directories**

In [3]:
ROOT_DIR = "/Users/sukantabasu/Dropbox/Priority/Works_Ongoing/AIML/2022_HawaiiCn2Reanalysis/"

INPUT_DIR  = ROOT_DIR + "ExtractedDATA/v02/Tuning/"
OUTPUT_DIR = ROOT_DIR + "ExtractedDATA/v02/Tuning/" 

In [4]:
if modOpt == 0:
    TUNING_DIR  = OUTPUT_DIR + 'LGBM/'
elif modOpt == 1:
    TUNING_DIR  = OUTPUT_DIR + 'XGB/'
elif modOpt == 2:
    TUNING_DIR  = OUTPUT_DIR + 'XGBL/'
elif modOpt == 3:
    TUNING_DIR  = OUTPUT_DIR + 'CAT/'
elif modOpt == 4:
    TUNING_DIR  = OUTPUT_DIR + 'RF/'
elif modOpt == 5:
    TUNING_DIR  = OUTPUT_DIR + 'EXT/'    

**Test data**

In [5]:
if inpOpt == 1:

    if sOpt == 0:
        df_F_Tst = pd.read_csv(OUTPUT_DIR+'Tst_ERA5_60min.csv')
    else:
        df_F_Tst = pd.read_csv(OUTPUT_DIR+'Tst_ERA5_5min.csv')    
    XTst = df_F_Tst[['sinDY','cosDY','sinHR','cosHR','ERA5_WSPD_10m','ERA5_WSPD_100m','ERA5_GUST_10m','ERA5_alpha','ERA5_beta','ERA5_T_2m','ERA5_TSK','ERA5_TSL','ERA5_Td_2m','ERA5_dT1','ERA5_dT2','ERA5_dT3','ERA5_UST','ERA5_SHFX','ERA5_LH','ERA5_PMSL','ERA5_PBLH','ERA5_TCC','ERA5_LCC','ERA5_EDR','ERA5_CAPE']].values

elif inpOpt == 2: 
    
    if sOpt == 0:
        df_F_Tst = pd.read_csv(OUTPUT_DIR+'Tst_MERRA2_60min.csv')
    else:
        df_F_Tst = pd.read_csv(OUTPUT_DIR+'Tst_MERRA2_5min.csv')
            
    XTst = df_F_Tst[['sinDY','cosDY','sinHR','cosHR','MRA2_WSPD_2m','MRA2_WSPD_10m','MRA2_WSPD_50m','MRA2_GUST','MRA2_alpha1','MRA2_alpha2','MRA2_beta1','MRA2_beta2','MRA2_T_2m','MRA2_T_10m','MRA2_TSK','MRA2_dT1','MRA2_dT2','MRA2_Q_2m','MRA2_Q_10m','MRA2_dQ','MRA2_UST','MRA2_SHFX','MRA2_LH','MRA2_PMSL','MRA2_PBLH','MRA2_RHO','MRA2_RIB']].values    

elif inpOpt == 3:
    
    if sOpt == 0:
        df_F_Tst = pd.read_csv(OUTPUT_DIR+'Tst_COMBO_60min.csv')
    else:
        df_F_Tst = pd.read_csv(OUTPUT_DIR+'Tst_COMBO_5min.csv')
            
    XTst = df_F_Tst[['sinDY','cosDY','sinHR','cosHR','ERA5_WSPD_10m','ERA5_WSPD_100m','ERA5_GUST_10m','ERA5_alpha','ERA5_beta','ERA5_T_2m','ERA5_TSK','ERA5_TSL','ERA5_Td_2m','ERA5_dT1','ERA5_dT2','ERA5_dT3','ERA5_UST','ERA5_SHFX','ERA5_LH','ERA5_PMSL','ERA5_PBLH','ERA5_TCC','ERA5_LCC','ERA5_EDR','ERA5_CAPE','MRA2_WSPD_2m','MRA2_WSPD_10m','MRA2_WSPD_50m','MRA2_GUST','MRA2_alpha1','MRA2_alpha2','MRA2_beta1','MRA2_beta2','MRA2_T_2m','MRA2_T_10m','MRA2_TSK','MRA2_dT1','MRA2_dT2','MRA2_Q_2m','MRA2_Q_10m','MRA2_dQ','MRA2_UST','MRA2_SHFX','MRA2_LH','MRA2_PMSL','MRA2_PBLH','MRA2_RHO','MRA2_RIB']].values
    
if zLev == 6:
    yTst_true = df_F_Tst[['LCn2_06m']].values
elif zLev == 15: 
    yTst_true = df_F_Tst[['LCn2_15m']].values
elif zLev == 25:
    yTst_true = df_F_Tst[['LCn2_25m']].values

FileNotFoundError: [Errno 2] No such file or directory: '/Users/sukantabasu/Dropbox/Priority/Works_Ongoing/AIML/2022_HawaiiCn2Reanalysis/ExtractedDATA/v02/Tuning/Tst_COMBO_5min.csv'

#### FLAML-based predictions

In [None]:
nSamples, nFeatures = np.shape(XTst)

yTst = np.zeros((nSamples,nEns))
for n in range(nEns):

    fSTR = TUNING_DIR + 'FLAML' + '_zLev' + str(zLev) + '_inpOpt' + str(inpOpt) + '_sOpt' + str(sOpt) + '_modOpt' + str(modOpt) + '_maxT' + str(maxTime) + '_Ens' + str(n) + '.pkl'
    with open(fSTR, "rb") as f:
        automl = pickle.load(f)

    modFLAML = automl.best_model_for_estimator(mod[modOpt])
    yTst[:,n]     = modFLAML.predict(XTst)

#### Compute median and pecentile predictions

In [None]:
yTst_p10 = np.percentile(yTst,10, axis = 1)
yTst_p50 = np.percentile(yTst,50, axis = 1)
yTst_p90 = np.percentile(yTst,90, axis = 1)

In [None]:
plt.plot(yTst_p50,'-k')
plt.plot(yTst_true,'-m')

In [None]:
i = 3000
plt.plot(yTst_p50[i:i+200],'-k')
plt.plot(yTst_true[i:i+200],'-m')
plt.show()

In [None]:
plt.plot(yTst_true,yTst_p50,'.k')
plt.xlim((-16.5,-11.5))
plt.ylim((-16.5,-11.5))
plt.xlabel('Observed Cn2')
plt.ylabel('Predicted Cn2')