In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import logging

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.FileHandler('post_analysis.log'), logging.StreamHandler()]
)
logger = logging.getLogger(__name__)
sns.set_style('whitegrid')
%matplotlib inline

# ## Step 2: Load Difficulty Scores
try:
    df = pd.read_csv('test_deliverability.csv')
    df['date'] = pd.to_datetime(df['date'])
    logger.info("Loaded test_johndoe.csv")
except FileNotFoundError:
    logger.error("test_johndoe.csv not found")
    raise

# ## Step 3: Difficult Destinations
difficult_flights = df[df['category'] == 'Difficult']
dest_avg = difficult_flights.groupby('scheduled_arrival_station_code')['difficulty_score'].mean().sort_values(ascending=False)
print("Top Difficult Destinations:\n", dest_avg.round(2))
plt.figure(figsize=(8, 5))
sns.barplot(x=dest_avg.index, y=dest_avg.values)
plt.title('Average Difficulty Score by Destination (Difficult Flights)')
plt.ylabel('Average Difficulty Score')
plt.xticks(rotation=45)
plt.show()

# ## Step 4: Common Drivers
drivers = difficult_flights.groupby('scheduled_arrival_station_code')[['ground_tight', 'bag_ratio', 'load_factor', 'ssr_per_pax']].mean().round(2)
print("\nDrivers for Difficult Destinations:\n", drivers)
plt.figure(figsize=(10, 6))
sns.heatmap(drivers, annot=True, cmap='YlOrRd')
plt.title('Driver Metrics by Destination')
plt.show()

# ## Step 5: Recommendations
recommendations = """
**Recommendations for Operational Efficiency**:
1. **Resource Allocation**: Assign extra ground crew (+20%) to Difficult flights (top 33% daily rank), especially for tight ground times (e.g., DFW: 26% tight).
2. **Proactive Planning**: Integrate scores into daily scheduling; add 10-min buffers for Difficult destinations (ATL, JFK).
3. **Mitigate Drivers**: Fast-track bag sorting for high bag_ratio routes (MIA); pre-position wheelchairs for SSR-heavy JFK/DFW.
4. **Fleet Optimization**: Prioritize newer aircraft for ATL/JFK (high B777 scores); monitor fleet age as per paper.
5. **Monitoring**: Retrain model monthly, add weather data (per paper) to improve accuracy (~70% target).
"""
print(recommendations)

# ## Step 6: Save Insights
insights = dest_avg.reset_index().merge(drivers.reset_index(), on='scheduled_arrival_station_code')
insights['recommendations'] = recommendations
insights.to_csv('insights.csv', index=False)
logger.info("Insights saved to insights.csv")


2025-10-04 17:19:22,529 - ERROR - test_johndoe.csv not found


FileNotFoundError: [Errno 2] No such file or directory: 'test_deliverability.csv'