In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import sys
sys.path.append('../src')

from preprocessing import preprocess_data
from feature_engineering import engineer_features

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## Load and Explore Data

In [None]:
# Load data
df = preprocess_data('../data/european_flights.csv')
df.head()

In [None]:
# Basic statistics
df.describe()

In [None]:
# Data info
df.info()

## Target Variable Distribution

In [None]:
# Distribution of FLT_TOT_1
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.hist(df['FLT_TOT_1'], bins=50, edgecolor='black')
plt.xlabel('Total IFR Movements')
plt.ylabel('Frequency')
plt.title('Distribution of FLT_TOT_1')

plt.subplot(1, 2, 2)
plt.boxplot(df['FLT_TOT_1'])
plt.ylabel('Total IFR Movements')
plt.title('Boxplot of FLT_TOT_1')

plt.tight_layout()
plt.show()

## Temporal Patterns

In [None]:
# Average traffic per year
yearly_avg = df.groupby('YEAR')['FLT_TOT_1'].mean()

plt.figure(figsize=(12, 5))
plt.plot(yearly_avg.index, yearly_avg.values, marker='o', linewidth=2)
plt.xlabel('Year')
plt.ylabel('Average IFR Movements')
plt.title('Average Traffic per Year')
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# Seasonal patterns
monthly_avg = df.groupby('MONTH_NUM')['FLT_TOT_1'].mean()

plt.figure(figsize=(12, 5))
plt.bar(monthly_avg.index, monthly_avg.values, color='steelblue', edgecolor='black')
plt.xlabel('Month')
plt.ylabel('Average IFR Movements')
plt.title('Seasonal Pattern - Average Traffic per Month')
plt.xticks(range(1, 13))
plt.grid(True, alpha=0.3)
plt.show()

## Top Airports

In [None]:
# Top 20 airports by total traffic
top_airports = df.groupby('APT_ICAO')['FLT_TOT_1'].sum().sort_values(ascending=False).head(20)

plt.figure(figsize=(12, 8))
plt.barh(top_airports.index, top_airports.values, color='orange', edgecolor='black')
plt.xlabel('Total IFR Movements')
plt.ylabel('Airport (ICAO Code)')
plt.title('Top 20 Airports by Total Traffic')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

## Country Analysis

In [None]:
# Top 15 countries by total traffic
top_countries = df.groupby('STATE_NAME')['FLT_TOT_1'].sum().sort_values(ascending=False).head(15)

plt.figure(figsize=(12, 8))
plt.barh(top_countries.index, top_countries.values, color='green', edgecolor='black')
plt.xlabel('Total IFR Movements')
plt.ylabel('Country')
plt.title('Top 15 Countries by Total Traffic')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

## Feature Engineering Preview

In [None]:
# Apply feature engineering
df_feat, encoders = engineer_features(df, create_lags=True)
df_feat.head()

In [None]:
# Correlation with target
from feature_engineering import get_feature_columns

feature_cols = get_feature_columns(df_feat)
correlations = df_feat[feature_cols + ['FLT_TOT_1']].corr()['FLT_TOT_1'].sort_values(ascending=False)

plt.figure(figsize=(10, 8))
correlations[1:21].plot(kind='barh', color='purple')
plt.xlabel('Correlation with FLT_TOT_1')
plt.title('Top 20 Feature Correlations with Target')
plt.tight_layout()
plt.show()

## Next Steps

1. Train LightGBM model: `python ../src/train_lightgbm.py`
2. Train MLP model: `python ../src/train_mlp.py`
3. Apply pruning: `python ../src/pruning.py`
4. Apply quantization: `python ../src/quantization.py`
5. Evaluate all models: `python ../src/evaluate.py`