In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from dotenv import load_dotenv
import os
from os import listdir, getenv
import sqlalchemy as sa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [None]:
# Import the querying module
from flight_querying import query_flights
import pandas as pd

# Set up and retrieve the data from the database.
db_connect = query_flights()

In [None]:
# Flight IDs to include
flight_ids = [4620, 4929, 4940, 5019, 5021, 5034]

# Fetch data for specified flight IDs
data_frames = [db_connect.connect_flight_for_ml_data_label(flight_id) for flight_id in flight_ids]

# Concatenate data frames and shuffle the data
all_data = pd.concat(data_frames, axis=0).sample(frac=1, random_state=42)

In [None]:

#multi-nomial encoding 
le = LabelEncoder()
all_data['y'] = le.fit_transform(all_data['exercise'])
print(all_data.head())

In [None]:
print("Encoded classes:", le.classes_)
labels=['NA', 'climb', 'cruise', 'descent', 'landing', 'power off stall',
 'power on stall', 'slow flight', 'steep turns', 'takeoff']
print("Encoded labels:", le.transform(labels))

In [None]:
# Split the data into train and test sets
train_data, test_data = train_test_split(all_data, test_size=0.5, random_state=42)

# Set up train data
train_y = train_data["y"].to_numpy()
train_x = train_data.drop(columns=["exercise","id", "y"])

# Set up test data
test_y = test_data["y"].to_numpy()
test_x = test_data.drop(columns=["exercise", "id","y"])

In [None]:
print(f"Length train_x = {len(train_x)} \n Length train_y = {len(train_y)}")
print(f"Length test_x = {len(test_x)} \n Length test_y = {len(test_y)}")

In [None]:
train_x

In [None]:
train_y

In [None]:
test_x

In [None]:
test_y

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [None]:
model = LogisticRegression(multi_class='multinomial', solver='lbfgs')
model.fit(train_x, train_y)

In [None]:
# Make predictions on the test set
predictions = model.predict(test_x)

# Evaluate the model
accuracy = accuracy_score(test_y, predictions)
print(f"Accuracy: {accuracy:.2f}")

# Display additional classification metrics
print("Classification Report:")
print(classification_report(test_y, predictions))

## Saving & Loading The Model

In [None]:
import joblib

# Assuming you've trained and named your Logistic Regression model 'logreg_model'

# Save the model to a file
model_filename = 'multinomial_logreg_model.joblib'
joblib.dump(model, model_filename)

print(f"Model saved to {model_filename}")


# Labelling all flights with the model created above

In [None]:
# Import the querying module
from flight_querying import query_flights
import pandas as pd

# Set up and retrieve the data from the database.
db_connect = query_flights()

In [None]:
# import model to label new data
import joblib
model_filename = 'multinomial_logreg_model.joblib'
model = joblib.load(model_filename)

In [None]:
# query the list of flight ids excluding these six which are already labelled
flight_ids = db_connect.get_flight_ids()
flight_ids = flight_ids['id'].to_list()
manually_labelled_ids = [4620, 4929, 4940, 5019, 5021, 5034]

# remove the manually_labelled_ids from the list 
flight_ids = [id for id in flight_ids if id not in manually_labelled_ids]

In [None]:
# Fetch data for specified flight IDs # first id in list is 5367
data_frames = [db_connect.get_flightdata_for_ml_data_label(flight_id) for flight_id in flight_ids]

# Concatenate data frames
x = pd.concat(data_frames, axis=0)

In [None]:
# drop id column for model prediction
id_column = x[['id']]
x = x.drop(columns=["id"])

In [None]:
# Make predictions on the test set
predictions = model.predict(x)

In [None]:
# insert the predicted values back into the x dataframe
x['activity'] = predictions
x['flight_id'] = id_column

In [None]:
# replace values in activity column with their string activity mapping
labels = ['NA', 'climb', 'cruise', 'descent', 'landing', 'power off stall',
          'power on stall', 'slow flight', 'steep turns', 'takeoff']
x['activity'] = x['activity'].map(lambda x: labels[x])

In [None]:
# trim all columns except for the ones in flight_activities table
flight_activities_data = x[['flight_id', 'time', 'activity']]
flight_activities_data = flight_activities_data.rename(columns={"time": "time_min"})
flight_activities_data.head()

# push the updated data to the flight_activities table
engine = db_connect.connect()
flight_activities_data.to_sql('flight_activities', engine, if_exists='append', index=False)
engine.dispose()