In [5]:
#  Load the "autoreload" extension so that code can change
%load_ext autoreload
%reload_ext autoreload
from pathlib import Path

#  always reload modules so that as you change code in src, it gets loaded
%autoreload 2
%matplotlib inline

import sys
sys.path.append('../')
from src.imports import *
from src.data.download_data import *
from src.data.fire_data import *
from src.data.read_data import *
from src.gen_functions import *
from src.features.dataset import Dataset
from src.features.build_features import *
from src.models.train_model import *
import seaborn as sns
output_notebook()
# set font size 
from src.visualization.visualize import *
from src.models.train_model import *


from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Dense, Conv1D, SeparableConv1D, Dropout,BatchNormalization
import keras.backend as K
import tensorflow as tf
from keras.optimizers import Adam
from keras.models import load_model

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Using TensorFlow backend.


In [7]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [9]:
def get_model(input_shape,output_shape,num_layer,nn_size,act_fun,drop,lr,momentum):
    
    # set optimizser
    adam = Adam(learning_rate=lr)
    # create model 
    model = Sequential()
    # Input - Layer
    model.add(Dense(nn_size, activation=act_fun, input_dim=input_shape))
    
    if num_layer>0:
        for i in range(num_layer):
            name = f'layer_dense{i+1}'
            model.add(Dense(nn_size,
                 activation=act_fun,
                        name=name))
            model.add(BatchNormalization(momentum=momentum))
            
    model.add(Dropout(drop))
    # Output- Layer
    model.add(Dense(output_shape, activation = 'linear')) 
    model.compile(loss='mse', optimizer=adam,  metrics=[r2_pred])
    return model

In [3]:
data = Dataset('Chiang Mai')
data.load_()
data.pollutant = 'PM2.5'
# build the first dataset 
data.feature_no_fire()
data.make_diff_col()
# use default fire feature
data.merge_fire()
x_cols = data.data.columns.drop([data.pollutant, data.monitor ] )
print(x_cols)

data no fire has shape (77746, 15)
use default fire feature
Index(['Temperature(C)', 'Humidity(%)', 'Wind Speed(kmph)', 'wind_CALM',
       'wind_E', 'wind_N', 'wind_S', 'wind_W', 'is_rain', 'is_holiday',
       'is_weekend', 'day_of_week', 'time_of_day', 'fire_0_100',
       'fire_100_400', 'fire_400_700', 'fire_700_1000'],
      dtype='object')


In [4]:
data.split_data(split_ratio=[0.4, 0.2, 0.2, 0.2])
xtrn, ytrn, x_cols = data.get_data_matrix(use_index=data.split_list[0], x_cols=x_cols)
xval, yval, _ = data.get_data_matrix(use_index=data.split_list[1], x_cols=x_cols)
data.x_cols = x_cols

In [8]:
# nn search parameters
num_layers = Integer(low=0, high=3, name='num_layer')
nn_sizes = Integer(low=8, high=1024, name='nn_size')
act_funs = Categorical(categories=['relu','softplus'],
                             name='act_fun')
drops = Real(low=0, high=0.2,
                         name='drop')
lrs = Real(low=1e-4, high=1e-2, prior='log-uniform',
                         name='lr')
momentums = Real(low=0.7, high=0.99, 
                         name='momentum')
dimensions = [num_layers,nn_sizes,act_funs,drops,lrs,momentums]
default_params = [0,512,'relu',0.01,1E-4,0.99]

In [None]:
@use_named_args(dimensions)
def fit_with(num_layer,nn_size,act_fun,drop,lr,momentum):
    # function to return the score (smaller better)
    model = get_model(input_shape=xtrn.shape[1],output_shape=ytrn.shape[1],num_layer=num_layer,nn_size=nn_size,act_fun=act_fun,drop=drop,lr=lr,momentum=momentum)
    # set early stoping 
    esm = EarlyStopping(patience=8,verbose=0,restore_best_weights=True)
    # train model
    history = model.fit(x_trn, y_trn,validation_split=0.2,verbose=0,epochs=1000,callbacks=[esm])
   
    y_pred = model.predict(xval)
    
    return mean_squared_error(yval,y_pred)

gp_result = gp_minimize(func=fit_with,dimensions=dimensions,n_jobs=-1,random_state=30)
    
print(gp_result.x)
score = gp_result.fun