In [13]:
import pandas as pd
from datetime import datetime
import joblib

In [15]:
def get_user_input():
    print("Please enter the following details for your flight:")
    airline = input("Airline: ")
    source = input("Source airport: ")
    destination = input("Destination airport: ")
    stops = int(input("Number of stops: "))
    travel_class = input("Class (Economy/Business): ")
    date_str = input("Date of travel (YYYY-MM-DD): ")
    stopover_time = int(input("Total stopover time (in minutes): "))
    departure_time = input("Departure time (HH:MM): ")
    arrival_time = input("Arrival time (HH:MM): ")
    arrival_day_offset = int(input("Arrival day offset (0 if same day, 1 if next day, etc.): "))

    # Calculate days left (assume booking is made today)
    date = datetime.strptime(date_str, "%Y-%m-%d")
    days_left = (date - datetime.now()).days

    return {
        "Airline": airline,
        "Source": source,
        "Destination": destination,
        "Number of Stops": stops,
        "Class": travel_class,
        "Date": date_str,
        "Total_Stopover_Time": stopover_time,
        "days_left": days_left,
        "Departure_24hr": departure_time,
        "Arrival_24hr": arrival_time,
        "Arrival_Day_Offset": arrival_day_offset
    }

print("User input function created. Ready to use in the next step.")

User input function created. Ready to use in the next step.


In [7]:
# Load the saved label encoders
le_dict = joblib.load('label_encoders.joblib')

In [31]:
# Function to preprocess user input
def preprocess_user_input(user_input):
    user_df = pd.DataFrame([user_input])
    user_df['Date'] = pd.to_datetime(user_df['Date'])
    user_df['Month'] = user_df['Date'].dt.month
    user_df['DayOfWeek'] = user_df['Date'].dt.dayofweek

    def time_to_minutes(time_str):
        hours, minutes = map(int, time_str.split(':'))
        return hours * 60 + minutes

    user_df['Departure_Minutes'] = user_df['Departure_24hr'].apply(time_to_minutes)
    user_df['Arrival_Minutes'] = user_df['Arrival_24hr'].apply(time_to_minutes)

    # Encode categorical variables
    for col in ['Airline', 'Source', 'Destination', 'Class']:
        user_df[col + '_encoded'] = le_dict[col].transform(user_df[col])

    # Select features for prediction
    features = ['Number of Stops', 'Total_Stopover_Time', 'days_left', 'Arrival_Day_Offset',
                'Month', 'DayOfWeek', 'Departure_Minutes', 'Arrival_Minutes',
                'Airline_encoded', 'Source_encoded', 'Destination_encoded', 'Class_encoded']

    return user_df[features]

In [49]:
# Simulated user input (replace this with actual user input in production)
user_input = {
    "Airline": "British Airways",
    "Source": "Toronto Pearson Intl",
    "Destination": "Bengaluru Intl",
    "Number of Stops": 1,
    "Class": "Business Class",
    "Date": "2024-08-11",
    "Total_Stopover_Time": 120,
    "Departure_24hr": "10:10",
    "Arrival_24hr": "03:05",
    "Arrival_Day_Offset": 1,
    "days_left": 25
}

# Preprocess user input
processed_input = preprocess_user_input(user_input)

print("Preprocessed user input:")
print(processed_input)

Preprocessed user input:
   Number of Stops  Total_Stopover_Time  days_left  Arrival_Day_Offset  Month  \
0                1                  120         25                   1      8   

   DayOfWeek  Departure_Minutes  Arrival_Minutes  Airline_encoded  \
0          6                610              185               10   

   Source_encoded  Destination_encoded  Class_encoded  
0              13                    0              0  


In [51]:
import joblib
import pandas as pd

# Load the trained model
model = joblib.load('extratrees_model.joblib')



In [52]:
# Load the preprocessed user input (assuming it's still in memory from the previous step)
# If it's not in memory, you would need to run the preprocessing step again

# Make prediction
predicted_price = model.predict(processed_input)[0]

print(f"Predicted price for the flight: {predicted_price:.2f} CAD")

# Feature importance
feature_importance = pd.DataFrame({'feature': processed_input.columns, 'importance': model.feature_importances_})
feature_importance = feature_importance.sort_values('importance', ascending=False).reset_index(drop=True)

print("\nTop 5 most important features:")
print(feature_importance.head())

Predicted price for the flight: 4016.63 CAD

Top 5 most important features:
               feature  importance
0        Class_encoded    0.376116
1    Departure_Minutes    0.088292
2      Arrival_Minutes    0.085761
3       Source_encoded    0.084061
4  Destination_encoded    0.077445
