### Wildfire Prediction: Forecast Feature Generator & Inference
 This script generates wildfire prediction input features for a given Greek city and forecast date using:
- WeatherAPI for 3-day forecast data
- Meteostat for 7-day historical weather statistics
- Open-Elevation API for terrain elevation features
- GADM shapefile for Greek region assignment

 The final output is a dataframe with 33 engineered features,
 ready to be used by pre-trained XGBoost wildfire classification model.
 Just provide a city name and forecast date — the script handles all preprocessing automatically.

In [None]:
import geopandas as gpd
import pandas as pd
import requests
from shapely.geometry import Point as GeoPoint
from datetime import datetime, timedelta
from meteostat import Point as MeteoPoint, Daily
from geopy.geocoders import Nominatim
import joblib
import warnings

This script allows wildfire predictions to be made for any Greek city based on short-term weather forecasts. It takes a city name and a date, geolocates the city to get its coordinates, and then assigns it to the correct administrative region using a spatial join with official Greek region boundaries. The script collects forecast data for that city from the WeatherAPI and supplements it with elevation information from the Open-Elevation API and historical weather data from Meteostat. It processes all inputs to generate the same set of features that were used to train the wildfire prediction model, including engineered variables like wind direction encoding, fire risk index, and seasonal indicators.

In [9]:
def get_elevation_open(lat, lon):
    """
    Fetch average elevation from Open-Elevation API.
    Assumes:
        - max_elevation ≈ avg + 300m
        - min_elevation ≈ avg - 100m
    This is a simplified estimate, sufficient for modeling wildfire risk.
    """
    url = f'https://api.open-elevation.com/api/v1/lookup?locations={lat},{lon}'
    response = requests.get(url)
    if response.status_code != 200:
        raise ValueError("Open-Elevation API request failed.")
    results = response.json().get('results')
    if not results:
        raise ValueError("No elevation data returned.")
    elevation = results[0]['elevation']
    return round(elevation, 2), round(elevation + 300, 2), round(elevation - 100, 2)

# --- Helper: Convert average wind direction to 7-class one-hot encoding (wd_0 to wd_6) ---
def get_wd_columns(wind_dir):
    wd_cols = {}
    wd_index = round(wind_dir / (360 / 7)) % 7
    for i in range(7):
        wd_cols[f'wd_{i}'] = int(i == wd_index)
    return wd_cols

def prepare_forecast_data(city_name, forecast_date):
    # Step 1: Load Greek regions
    regions_gdf = gpd.read_file("/Users/Thomas/Desktop/Skole/Business Analytics/Advanced Business Analytics/Wildfire Prediction/data/raw/gadm41_GRC_1.json")

    # Step 2: Geocode city
    geolocator = Nominatim(user_agent="wildfire_forecast_app")
    location = geolocator.geocode(city_name + ", Greece")
    if location is None:
        raise ValueError(f"Could not find coordinates for city: {city_name}")
    lat, lon = location.latitude, location.longitude

    # Step 3: Fetch weather forecast
    api_key = "54b7379e9d1d49c9989210618252903"
    url = f"https://api.weatherapi.com/v1/forecast.json?key={api_key}&q={city_name}&days=3&aqi=no&alerts=no"
    response = requests.get(url)
    data = response.json()

    forecast_rows = []
    for day in data['forecast']['forecastday']:
        tmin = day['day']['mintemp_c']
        tmax = day['day']['maxtemp_c']
        wind_dirs = [hour['wind_degree'] for hour in day['hour']]
        avg_wind_dir = round(sum(wind_dirs) / len(wind_dirs))

        row = {
            "date": day['date'],
            "lat": lat,
            "lon": lon,
            "temp_min": tmin,
            "temp_max": tmax,
            "precip": day['day']['totalprecip_mm'],
            "wind_speed": round(day['day']['maxwind_kph'] / 3.6, 2),
            "wind_dir": avg_wind_dir,
            "is_dry": int(day['day']['daily_chance_of_rain'] == 0)
        }
        forecast_rows.append(row)

    df = pd.DataFrame(forecast_rows)

    # Step 4: Assign region
    df['geometry'] = df.apply(lambda row: GeoPoint(row['lon'], row['lat']), axis=1)
    forecast_geo = gpd.GeoDataFrame(df, geometry='geometry', crs=regions_gdf.crs)
    joined = gpd.sjoin(forecast_geo, regions_gdf[['NAME_1', 'geometry']], how='left', predicate='within')
    df = joined.rename(columns={'NAME_1': 'region'}).drop(columns='geometry')

    # Step 5: Date-based features
    df['date'] = pd.to_datetime(df['date'])
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['day_of_year'] = df['date'].dt.dayofyear
    df['is_weekend'] = df['date'].dt.weekday.isin([5, 6]).astype(int)
    df['quarter'] = df['date'].dt.quarter
    df['is_wildfire_season'] = df['month'].isin([6, 7, 8]).astype(int)

    # Step 6: Encode region
    region_mapping = {
        'Aegean': 0,
        'Attica': 1,
        'Crete': 2,
        'EpirusandWesternMacedonia': 3,
        'MacedoniaandThrace': 4,
        'Peloponnese,WesternGreeceand': 5,
        'ThessalyandCentralGreece': 6
    }
    df['region_encoded'] = df['region'].map(region_mapping)

    # Step 7: Historical weather
    location_meteostat = MeteoPoint(lat, lon)
    end_date = forecast_date - timedelta(days=1)
    start_date = end_date - timedelta(days=6)
    historical_df = Daily(location_meteostat, start_date, end_date).fetch()

    df['precip_7d_sum'] = round(historical_df['prcp'].sum(), 2)
    df['temp_max_7d_avg'] = round(historical_df['tmax'].mean(), 2)

    # Step 8: Elevation (dynamic using Open-Elevation)
    avg_el, max_el, min_el = get_elevation_open(lat, lon)
    df['avg_elevation'] = avg_el
    df['max_elevation'] = max_el
    df['min_elevation'] = min_el

    # Step 9: Wind direction one-hot encoding (wd_0 to wd_6)
    wd_encoded = df['wind_dir'].apply(get_wd_columns).apply(pd.Series)
    df = pd.concat([df, wd_encoded], axis=1)


    # Step 10: Feature engineering
    df['elevation_temp_interaction'] = df['avg_elevation'] * df['temp_max']
    df['elevation_range'] = df['max_elevation'] - df['min_elevation']
    df['rugged_wind_interaction'] = df['elevation_range'] * df['wind_speed']
    df['temp_range_elevation'] = (df['temp_max'] - df['temp_min']) * df['avg_elevation']
    df['is_mountainous'] = (df['elevation_range'] > 500).astype(int)
    df['temp_month_interaction'] = df['temp_max'] * df['month']
    df['temp_wind_interaction'] = df['temp_max'] * df['wind_speed']
    df['fire_risk_index'] = 0.4 * df['temp_max'] + 0.4 * df['wind_speed'] - 0.2 * df['precip']
    df['is_windy'] = (df['wind_speed'] > 6).astype(int)

    # Step 11: Final model feature selection
    model_features = [
        'temp_max', 'temp_min', 'precip', 'wind_speed',
        'avg_elevation', 'max_elevation', 'min_elevation', 'year',
        'elevation_temp_interaction', 'elevation_range', 'rugged_wind_interaction',
        'temp_range_elevation', 'is_mountainous', 'month', 'day_of_year',
        'is_weekend', 'quarter', 'is_wildfire_season', 'temp_month_interaction',
        'precip_7d_sum', 'temp_max_7d_avg', 'is_dry', 'fire_risk_index',
        'wd_0', 'wd_1', 'wd_2', 'wd_3', 'wd_4', 'wd_5', 'wd_6',
        'temp_wind_interaction', 'is_windy', 'region_encoded'
    ]

    return df[model_features].copy()

In [10]:
# Load the trained wildfire prediction model
model = joblib.load("/Users/Thomas/Desktop/Skole/Business Analytics/Advanced Business Analytics/Wildfire Prediction/src/models/xgboost_model.pkl")

# Prepare input features for a selected location and date
features = prepare_forecast_data("Athens", datetime.today())

# Predict wildfire probability (class = 1)
probability = model.predict_proba(features)[0][1]

# Predict binary outcome (default threshold = 0.5)
binary_prediction = model.predict(features)[0]

# Output the results
print(f"Wildfire probability: {probability:.2f}")
print(f"Binary prediction (0 = no fire, 1 = fire): {binary_prediction}")



Wildfire probability: 0.00
Binary prediction (0 = no fire, 1 = fire): 0


This notebook integrates real-time forecast data, historical weather, terrain elevation, and regional encoding to generate a complete set of features required by a wildfire prediction model. It automates the entire preprocessing pipeline, allowing users to simply input a city and date to obtain a wildfire risk prediction using pre-trained XGBoost model.

In [None]:

# Suppress any warnings for clean display
warnings.filterwarnings("ignore")

# Set a forecast date (must be today or within next 2–3 days depending on WeatherAPI limits)
forecast_date = datetime(2024, 5, 10)  # 

# Load the trained wildfire prediction model
model = joblib.load("/Users/Thomas/Desktop/Skole/Business Analytics/Advanced Business Analytics/Wildfire Prediction/src/models/xgboost_model.pkl")

# Prepare input features for selected location and date
features = prepare_forecast_data("Athens", forecast_date) # select city and it will be located in the right region

# Predict wildfire probability
probability = model.predict_proba(features)[0][1]

# Predict binary wildfire alert (0 or 1)
binary_prediction = model.predict(features)[0]

# Display results
print(f"Forecast date: {forecast_date.strftime('%Y-%m-%d')}")
print(f"Wildfire probability: {probability:.2f}")
print(f"Binary prediction (0 = no fire, 1 = fire): {binary_prediction}")

Forecast date: 2024-05-10
Wildfire probability: 0.00
Binary prediction (0 = no fire, 1 = fire): 0


Dummy data to show that it actually works :) 

In [18]:
# Load model
model = joblib.load("/Users/Thomas/Desktop/Skole/Business Analytics/Advanced Business Analytics/Wildfire Prediction/src/models/xgboost_model.pkl")

# Define the features and dummy values (tweak as needed)
dummy_data = {
    'temp_max': 35,
    'temp_min': 20,
    'precip': 0,
    'wind_speed': 5,
    'avg_elevation': 250,
    'max_elevation': 400,
    'min_elevation': 100,
    'year': 2025,
    'elevation_temp_interaction': 8750,
    'elevation_range': 300,
    'rugged_wind_interaction': 1500,
    'temp_range_elevation': (35 - 20) * 250,
    'is_mountainous': 0,
    'month': 8,
    'day_of_year': 230,
    'is_weekend': 0,
    'quarter': 3,
    'is_wildfire_season': 1,
    'temp_month_interaction': 280,
    'precip_7d_sum': 0,
    'temp_max_7d_avg': 34,
    'is_dry': 1,
    'fire_risk_index': 0.4 * 35 + 0.4 * 5 - 0.2 * 0,
    'wd_0': 0, 'wd_1': 0, 'wd_2': 1, 'wd_3': 0, 'wd_4': 0, 'wd_5': 0, 'wd_6': 0,
    'temp_wind_interaction': 35 * 5,
    'is_windy': 0,
    'region_encoded': 1  # e.g. Attica
}

# Mapping encoded value back to region name
region_labels = {
    0: 'Aegean',
    1: 'Attica',
    2: 'Crete',
    3: 'EpirusandWesternMacedonia',
    4: 'MacedoniaandThrace',
    5: 'Peloponnese,WesternGreeceand',
    6: 'ThessalyandCentralGreece'
}
region_name = region_labels.get(dummy_data['region_encoded'], "Unknown Region")

# Convert to DataFrame with a single row
X_dummy = pd.DataFrame([dummy_data])

# Predict
prob = model.predict_proba(X_dummy)[0][1]
binary = model.predict(X_dummy)[0]

print(f"Region: {region_name}")
print(f"Dummy Wildfire Probability: {prob:.2f}")
print(f"Binary Prediction (0 = no fire, 1 = fire): {binary}")

Region: Attica
Dummy Wildfire Probability: 0.09
Binary Prediction (0 = no fire, 1 = fire): 0
