# Bayesian Neural Networks to Predict Hard Landing with DASHlink Data
Authors: Dr. Yingxiao Kong, Vanderbilt University

Email: yingxiao.kong@vanderbit.edu

## Overview of Research
In this work, we use an open-source dataset - [NASA's DASHlink data](https://c3.ndc.nasa.gov/dashlink/) - to isolate data for landing aircraft that both have hard landing and normal landing occurrences. The objective is to use [this sample data](https://c3.ndc.nasa.gov/dashlink/projects/85/resources/?type=ds) to train a Bayesian Neural Network model to predict touchdown vertical speed for a landing aircraft with the intent to use as a screening for identifying hard landing events before they occur.

This series of Jupyter notebook demonstrations into 3 modules. The presented module is in **bold**:
- Module 1 - Download DASHlink Data
- Module 2 - DASHlink Data Pre-Processing and Feature Selection with Maximum Relevance and Minimum Reduandancy (MRMR)
- **Module 3 - Bayesian Neural Network Model Training**

## Module 3: Bayesian Neural Network Model Training

## Step 3a: Get Processed Data and Ordered Features from Module 2

In [None]:
import pandas as pd
df_landing =  pd.read_csv('processed_data_landing_at_msp.csv')
df_features=pd.read_csv('ordered_features.csv')

In [None]:
df_landing

In [None]:
df_features
inputs_list = df_features.iloc[:,0].values

In [None]:
OUTPUT = 'TD_ALTR'
INPUTS_LIST = df_features.iloc[:,0].values
INPUTS_LIST

In [None]:
df_landing = df_landing[list(INPUTS_LIST)+[OUTPUT]+['heights']]

### Step 3b: Define Training Inputs and Output Function

In [None]:
def get_train_data(df,output='TD_ALTR',drop='heights'):
    if 'heights' in df.columns:
        df.drop(labels='heights',axis=1,inplace=True)
    
    df_in = df.drop(labels=output,axis=1)
    df_out = df[output]
    x_train = (df_in-df_in.min())/(df_in.max()-df_in.min())

    y_train= df_out.values*0.3048/60

    y_train_scale = np.log(y_train - min(y_train) + 1)
    ### here we also scale down RNN_y_scale to make it eaiser to converge
    delta=np.log(-2-min(y_train)+1)
    
    y_train_mean = np.mean(y_train)
    y_train_std = np.std(y_train)
    z_train = (y_train-y_train_mean)/(y_train_std)

    y_train_scale_reverse = np.exp(y_train_scale)-1+np.min(y_train)

    diff = np.mean(y_train)-np.mean(y_train_scale_reverse)
                                                                                
    return x_train,y_train,y_train_scale,delta,z_train,diff

### Step 3c: Define RNN Models

In [None]:
from sklearn.model_selection import train_test_split
import keras.backend as K
from keras.models import Model
from keras.layers import Input, Dense, Dropout,LSTM,Activation
from keras.optimizers import Adam
from keras.regularizers import l1,l2
from keras.models import Sequential

def get_RNN_model(in_shape, idrop=0.25, odrop=0.25, rdrop=0.25, weight_decay=1e-4, lr=1e-3,num_unit=100):

    model=Sequential()
    model.add(LSTM(num_unit,kernel_regularizer=l2(weight_decay),recurrent_regularizer=l2(weight_decay),bias_regularizer=l2(weight_decay),dropout=idrop,recurrent_dropout=rdrop,input_shape=(None,in_shape),\
                  kernel_initializer='random_uniform',return_sequences=True))

    model.add(Activation('relu'))
    
    model.add(LSTM(num_unit,dropout=idrop,recurrent_dropout=rdrop,return_sequences=False,kernel_regularizer=l2(weight_decay),recurrent_regularizer=l2(weight_decay),bias_regularizer=l2(weight_decay),kernel_initializer='random_uniform',))
    model.add(Activation('relu'))
    if odrop:
        model.add(Dropout(odrop))
    model.add(Dense(1,activation='linear',kernel_regularizer=l2(weight_decay),bias_regularizer=l2(weight_decay)))
    optimizer_=Adam(lr)
    #in the paper variational dropout, learning rate isn't considered
#     optimizer=Adam
    model.compile(loss='mse',metrics=['mse'],optimizer=optimizer_)
    return model

def get_RNN_model_2(in_shape, idrop=0.25, odrop=0.25, rdrop=0.25, weight_decay=1e-4, lr=1e-3,num_unit=100):

    model=Sequential()
    model.add(LSTM(num_unit,kernel_regularizer=l1(weight_decay),recurrent_regularizer=l1(weight_decay),bias_regularizer=l1(weight_decay),dropout=idrop,recurrent_dropout=rdrop,input_shape=(None,in_shape),\
                  kernel_initializer='random_uniform',return_sequences=True))

    model.add(Activation('relu'))
    
    model.add(LSTM(num_unit,dropout=idrop,recurrent_dropout=rdrop,return_sequences=False,kernel_regularizer=l1(weight_decay),recurrent_regularizer=l1(weight_decay),bias_regularizer=l1(weight_decay)))
    model.add(Activation('relu'))
    if odrop:
        model.add(Dropout(odrop))
    model.add(Dense(1,activation='linear',kernel_regularizer=l1(weight_decay),bias_regularizer=l1(weight_decay)))
    optimizer_=Adam(lr)
    #in the paper variational dropout, learning rate isn't considered
#     optimizer=Adam
    model.compile(loss='mse',metrics=['mse'],optimizer=optimizer_)
    return model
    
    

class KerasDPprediction(object):
    def __init__(self,model):
        self.f= K.function([model.layers[0].input,K.learning_phase()],[model.layers[-1].output])
        
    def predict(self,x,n_iter=1000):
        result=[]
        for _ in range(n_iter):
            result.append(np.squeeze(self.f([x,1])))
        result = np.array(result)
        
        return result

### Step 3c: Train RNNs for Given MRMR Result

In [None]:
MIN_REDUNDANCY_WEIGHT = 0.5
input_order=df_features.iloc[:,df_features.columns=='{}'.format(MIN_REDUNDANCY_WEIGHT)]
print(input_order)

In [None]:
import numpy as np
grpby = df_landing.groupby(by='heights')
NINPUTS = np.arange(4,24,4)
print(NINPUTS)

In [None]:
df = grpby.get_group(0)
n = 4
df[list(df.columns[:n])+[OUTPUT]]

In [None]:
grpby = df_landing.groupby(by='heights')
NINPUTS = np.arange(4,24,4)
for i,g in enumerate(grpby):
    height = g[0]
    df = g[1]
    for n in NINPUTS:
        df_sub=df[list(df.columns[:n])+[OUTPUT]]
        x_train,y_train,y_train_scale,delta,z_train,diff = get_train_data(df_sub)
      
        ## TRAIN RNN Codes ###

In [None]:
# for j in range(1):
#     print('j: '+str(j))
#     sort_index = []
#     sele_col = 2
#     ### sele_col: 0-4
#     for i in range(all_para.shape[0]):
#         if all_para[i,sele_col] in sele_key_list:
#             sort_index.append(sele_key_list.index(all_para[i,sele_col]))
# #     sort_index = np.insert(sort_index,0,23)

#     final_RNN_x = RNN_x[:,:,sort_index]
#     all_result=[]
#     all_RMSE=[]
#     all_MAE = []
#     for i in np.arange(4,28,4):
#         print('i: '+str(i))
#         ### i is the number of parameters taken to construct the training model
#         partial_data = train_data(final_RNN_x,i)
#         #### the model is trained based on RNN_y_scale_reverse
#         RNN_x_train,RNN_x_test,RNN_y_train,RNN_y_test=train_test_split(partial_data[:,:,:], RNN_y_scale_reverse, test_size=0.2, random_state=40)
#     #     RNN_x_train,RNN_x_test,RNN_y_train,RNN_y_test=train_test_split(partial_data[:,:,:],RNN_y, test_size=0.2, random_state=40)
#         RNN_model = get_RNN_model(RNN_x_train.shape[2])
#         RNN_model.fit(RNN_x_train,RNN_y_train,batch_size=30,verbose=False,epochs=200)
#         kdp = KerasDPprediction(RNN_model)
#         y_test_pred=kdp.predict(RNN_x_test,1000)
#         all_result.append(y_test_pred)
#         mean_y_test_pred = np.mean(y_test_pred,axis=0)
#         RMSE =np.sqrt(np.mean((mean_y_test_pred-RNN_y_test)**2)) 
#         MAE = np.mean(np.abs(mean_y_test_pred-RNN_y_test))

#         all_RMSE.append(RMSE)
#         all_MAE.append(MAE)
#     all_all_result.append(all_result)
#     all_all_RMSE.append(all_RMSE)
#     all_all_MAE.append(all_MAE

### Step 3d: Assessment of Model Results

In [None]:
import matplotlib.pyplot as plt
plt.plot(all_all_MAE[0])