# Human Factors Module (HFM)

In [2]:
from preprocess.PREPROCESS import Preprocess
from hfm.HFM import step_aic

#### Edit this dictionary to map your column with the data type

In [3]:
columns_data_type = {
        "Ss": "int",
        "condtn": "str",
        "baseline": "int",
        "highworkload": "int",
        "highworkload offnominal": "int",
        "at_sec": "int",
        "traffic_density": "int",
        "los_freq": "int",
        "los_duration_over5min": "int",
        "query": "str",
        "ready_latency": "float",
        "ready_latency_adj": "float",
        "query_latency": "float",
        "response_index": "int",
        "ready_timed_out": "int",
        "query_timed_out": "int",
        "stimuli": "str",
        "response_text": "str",
        "sa_correct": "float",
        "wl_rating": "int",
        "interbeat_interval": "float",
        "condtn_num": "int",
        "face_conf": "str",
        "rx": "float",
        "ry": "float",
        "rz": "float",
        "eyeblink": "float",
        "positive": "float",
        "neutral": "float",
        "negative": "float",
        "emo_conf": "int",
        "los_severity": "float",
        "CLCD": "int",
        "In_transmission (binary)": "int",
        "transmission started (count)": "int",
        "transmission ended (count)": "int",
        "words per transmission (syn_complexity)": "int",
        "length of transmission(in_sec)": "float",
        "Words_sec": "float",
        "time filled in previous interval (up to 5 seconds)": "float",
        "time since last transmissions": "float",
        "True Pilot or ATC": "str",
        "Interval-Pilot (p) OR ATC (a) (ap for shared intervals)": "str",
        "pilot communication time": "float",
        "air traffic communication time in previous interval": "float",
        "ratio of comms at interval P:A": "str"
}


## Code Starts Here

#### Edit the fields name & the file_name of the csv data which you are passing

In [4]:
cfg = {
        "fields": ['traffic_density', 'ready_latency', 'traffic_density', 'ready_latency', 'query_latency',
                'query_timed_out', 'sa_correct', 'interbeat_interval', 'rx', 'ry', 'rz', 'eyeblink', 'positive',
                'neutral', 'negative', 'CLCD', 'Words_sec', 'los_severity', 'los_freq'],
        'file_name': "data.csv"
}

In [5]:
# initialize preprocessing of csv by passing the file name and columns to be used
preproc = Preprocess(config=cfg)

In [6]:
# drop rows where field value == na
preproc.dropna()

In [7]:
# update column type e.g. convert object column to int as per mapping giving in the column data type
preproc.columnDataType(data_type_for_each_column=columns_data_type) 

In [8]:
# get final pandas data frame after preprocessing
data = preproc.get_data() 

In [9]:
# you can export the cleaned csv as well | pass the file name without extension
# preproc.write_csv("cleaned_data")

#### Edit these variables

In [10]:
list_of_independent_variables = ['traffic_density', 'ready_latency', 'query_latency', 'query_timed_out', 'sa_correct',
                                   'interbeat_interval', 'rx', 'ry', 'rz', 'eyeblink', 'positive', 'neutral',
                                   'negative', 'CLCD', 'Words_sec']
list_of_dependent_variables = ['los_freq'] # or los_severity

In [11]:
model, selected_columns = step_aic(independent_variables=list_of_independent_variables,
                                    dependent_variables=list_of_dependent_variables,
                                    data=data)

AIC: 3114.908, formula: los_freq ~ 1


In [12]:
print(F"*****************************************************")
print(F"{selected_columns}")

*****************************************************
['traffic_density', 'interbeat_interval', 'rz', 'negative', 'positive', 'Words_sec', 'query_latency', 'sa_correct', 'neutral', 'rx']


In [13]:
print(F"*****************************************************")
print(F"{model.summary()}")

*****************************************************
                            OLS Regression Results                            
Dep. Variable:               los_freq   R-squared:                       0.439
Model:                            OLS   Adj. R-squared:                  0.434
Method:                 Least Squares   F-statistic:                     87.55
Date:                Tue, 03 Aug 2021   Prob (F-statistic):          5.64e-133
Time:                        15:39:18   Log-Likelihood:                -1229.9
No. Observations:                1130   AIC:                             2482.
Df Residuals:                    1119   BIC:                             2537.
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------

In [14]:
print(F"*****************************************************")
print(f'AIC = {model.aic}')
# model is of type ==> https://www.statsmodels.org/stable/generated/statsmodels.regression.linear_model.RegressionResults.html
# you can access variables within model as mentioned in this link
# some examples

*****************************************************
AIC = 2481.795130480846


In [15]:
# print(F"*****************************************************")
# print(f'P Values = \n{model.pvalues}')
# print(F"*****************************************************")
# print(f'T Values = \n{model.tvalues}')
# print(F"*****************************************************")
# print(f'R Square = {model.rsquared}')
# print(F"*****************************************************")
# print(f'F-Statistic = {model.fvalue}')
# print(F"*****************************************************")