In [None]:
import pandas as pd
import numpy as np
from zipfile import ZipFile
import matplotlib.pyplot as plt   # plotting
from statsmodels.formula.api import logit
from scipy.stats import norm
from sklearn.metrics import confusion_matrix

import seaborn as sns

In [None]:
# Original weather data needed the weather condition to be decoded
# Done using https://gist.github.com/kfish610/29d0e2874824732ec2d2922f979e4acb
# Due to slowness saved as a csv

# Further minimized using https://gist.github.com/kfish610/48605253428c856bbb1f461c10b6d6ac 

In [None]:
# The original ASPM data (ASPM.zip) contains the hourly count of departures, as well as the percentage of delays
# We have to expand this into individual flights for the logistic regression
# Because we don't have the original flight data, we only have precision down to the hour
# Done using https://gist.github.com/kfish610/b63bbf488d91dcf6877925f7ddfe618b
# This also merges the weather data with the ASPM data, which in Python is too memory inefficient

zipFile = ZipFile('data/ASPM_Transformed.zip')

df = pd.concat([pd.concat(pd.read_csv(zipFile.open(i), chunksize=10000)) for i in zipFile.namelist()])

# Lowercase for consistency
df.columns = df.columns.str.lower()

# Drop unused datetime
df = df.drop(columns='datetime')

print(f"{df.shape}")
df.head()

In [None]:
formula1 = 'delayed ~ 1 + precipitation + visibility'
model1 = logit(formula = formula1, data = df).fit()
model1.summary()

In [None]:
formula2 = 'delayed ~ 1 + thunder + snow + hail'
model2 = logit(formula = formula2, data = df).fit()
model2.summary()

In [None]:
def f1_score(df, predictions):
    y_true = df['delayed']
    y_pred = predictions
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    f1_score = tp/(tp + (0.5 * (fp + fn)))
    return f1_score

In [None]:
models = [model1, model2]

for model in models:
    predictions = model.predict()
    predictions = [1 if x > 0.5 else 0 for x in predictions]
    print('model: ' + str(model) + ' f1_score is %f' %f1_score(df,predictions))