## Load input and output variables

In [None]:
from __future__ import print_function
import numpy as np
from sklearn import datasets, metrics, cross_validation
import mord
import pickle
import pandas as pd
import category_encoders as ce

X = pd.read_pickle("./X_org.pkl")
Y = pd.read_pickle("./Y_org.pkl")

columns = ['Locale_Reference', 'State_Reference', 'Flight_Conditions', 'Weather_Elements_Visibility', 
                  'Work_Environment_Factor', 'Light', 'ATC_Advisory', 'Aircraft_Operator', 'Make_Model_Name', 
                  'Crew_Size', 'Flight_Plan', 'Mission', 'Flight_Phase1',
                  'Route_In_Use','Airspace', 'Aircraft_Component', 'Manufacturer', 'Location_Of_Person', 'Location_In_Aircraft',
                  'Reporter_Organization', 'Function', 'Qualification', 'Human_Factors', 'Anomaly', 'Detector', 'When_Detected',
                  'Were_Passengers_Involved_In_Event', 'Contributing_Factors_Situations', 'Primary_Problem']

features = X[columns]

    
encoder = ce.OrdinalEncoder(cols = columns)
tmp = encoder.fit(features)

X_org = encoder.transform(features)
Y_org = Y

## Building ordinal logistic regression model

In [None]:
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn import linear_model, metrics, preprocessing
import mord

test_random_state = 112
cv = StratifiedShuffleSplit(n_splits = 10, test_size = 0.1, random_state = test_random_state)

target_names = [str(i) for i in range(1, 6)]

for k, (data_index, test_index) in enumerate(cv.split(X_org, Y_org)):
    print ('current fold: ', k+1)
      
    ### Split the data into three parts: 
    ### X_train, Y_train: train the data
    ### X_validation, Y_validation: trial data to obtain the performance metrics
    ### X_test, Y_test: test data used to compare the performance of hybrid model with SVM and DNN
    X = X_org.iloc[data_index]
    Y = Y_org.iloc[data_index]
      
    X_test = X_org.iloc[test_index]
    Y_test = Y_org.iloc[test_index]
    
    clf1 = linear_model.LogisticRegression(solver='lbfgs', multi_class='multinomial')
    clf1.fit(X, Y)
    
    print('Mean Absolute Error of LogisticRegression: \n',
      metrics.classification_report(Y_test, clf1.predict(X_test), target_names))
    
    clf2 = mord.LogisticAT(alpha=1.)
    clf2.fit(X, Y)
    print('Mean Absolute Error of LogisticAT  \n',
          metrics.classification_report(Y_test, clf2.predict(X_test), target_names))

    clf3 = mord.LogisticIT(alpha=1.)
    clf3.fit(X, Y)
    print('Mean Absolute Error of LogisticIT  \n',
          metrics.classification_report(Y_test, clf3.predict(X_test), target_names))

    clf4 = mord.LogisticSE(alpha=1.)
    clf4.fit(X, Y)
    print('Mean Absolute Error of LogisticSE  \n',
          metrics.classification_report(Y_test, clf4.predict(X_test), target_names))

current fold:  1


  mask &= (ar1 != a)


Mean Absolute Error of LogisticRegression: 
              precision    recall  f1-score   support

          1       0.43      0.48      0.46      1651
          2       0.53      0.50      0.52      1885
          3       0.31      0.28      0.30      1884
          4       0.36      0.39      0.38      1884
          5       0.37      0.38      0.37      1884

avg / total       0.40      0.40      0.40      9188

Mean Absolute Error of LogisticAT  
              precision    recall  f1-score   support

          1       0.21      0.01      0.03      1651
          2       0.26      0.11      0.15      1885
          3       0.21      0.64      0.32      1884
          4       0.31      0.41      0.35      1884
          5       0.24      0.00      0.01      1884

avg / total       0.25      0.24      0.18      9188

Mean Absolute Error of LogisticIT  
              precision    recall  f1-score   support

          1       0.19      0.01      0.03      1651
          2       0.32    

Mean Absolute Error of LogisticRegression: 
              precision    recall  f1-score   support

          1       0.43      0.47      0.45      1651
          2       0.56      0.53      0.54      1884
          3       0.30      0.27      0.28      1885
          4       0.35      0.40      0.37      1884
          5       0.39      0.38      0.38      1884

avg / total       0.41      0.41      0.41      9188

Mean Absolute Error of LogisticAT  
              precision    recall  f1-score   support

          1       0.38      0.03      0.06      1651
          2       0.23      0.08      0.12      1884
          3       0.21      0.62      0.32      1885
          4       0.31      0.47      0.37      1884
          5       0.41      0.01      0.01      1884

avg / total       0.31      0.25      0.18      9188

Mean Absolute Error of LogisticIT  
              precision    recall  f1-score   support

          1       0.37      0.16      0.22      1651
          2       0.19    

  'precision', 'predicted', average, warn_for)


Mean Absolute Error of LogisticIT  
              precision    recall  f1-score   support

          1       0.40      0.35      0.38      1651
          2       0.23      0.40      0.29      1884
          3       0.00      0.00      0.00      1884
          4       0.22      0.38      0.28      1884
          5       0.24      0.16      0.19      1885

avg / total       0.22      0.26      0.23      9188



## Compare the performance of all four algorithms