In [8]:
import os
import pandas as pd
import geopandas as gpd
import xarray as xr
import math
import rasterio
from rasterio.merge import merge
from rasterio.io import MemoryFile

**Build ML models to predict fire ignition for a pixel**

Features: FWI, lat, lon, elevation, landcover (2010), distance, month
Label: ignited or not ignited
Model: XGBoost
Optimization Method: Beyas optimization using Optuna
Data range: EU

Steps:
1. Merge positive and negative labels of fire intensity and FWi data
    1.1 Load left join data (fire intensity left join FWI, resolution 1km) and count how many data points it has
    1.2 Assign positive labels (ignited) to the data points with distance <= 31 * sqrt(2) / 2 and only keep rows with positive labels
    1.3 Load right join data (fire intensity right join FWI, resolution 31km) and count how may data points it has
    1.4 Down sampling the data to match the number of left join data
    1.5 Assign negative labels (ignited) to the data points with distance > 31 * sqrt(2) / 2 and only keep rows with negative labels
    1.6 Merge data and save the geo dataframe and save the dataframe
2. Add land cover (aggregated) data to pixel according to lon and lat
    2.1 Load land cover shapefile and convert to geopandas dataframe
    2.2 for each pixel (1x1 for left and 31x31 for right), calculate the average probability of fire ignition according to Clamada Master thesis Table 2.1
3. Add elevation data to pixel according to lon and lat
    3.1 Load elevation shapefile and convert to geopandas dataframe
    3.2 for each pixel (1x1 for left and 31x31 for right), calculate the average elevation
4. Build ML models to predict whether a pixel is ignited
5. Tune hyperparameters

# 1. Merge positive and negative labels of fire intensity and FWi data

In [24]:
from geopy.distance import great_circle

# Define a function to calculate the distance
def calculate_distance(row):
    coords_1 = (row['latitude_left'], row['longitude_left'])
    coords_2 = (row['latitude_right'], row['longitude_right'])
    return great_circle(coords_1, coords_2).kilometers

folder = '../../climada_petals/data/wildfire/output/2013/'

## 1.1 Load left join data (fire intensity left join FWI, resolution 1km)

In [78]:
merged_eu_2013_left_gdf_filename = 'merged_eu_2013_left_gdf'
df_left_join = gpd.read_file(os.path.join(folder, merged_eu_2013_left_gdf_filename))
df_left_join.shape

(78382, 17)

In [79]:
# Apply the function to each row in the GeoDataFrame and create a new column 'distance_km'
df_left_join['distance_km'] = df_left_join.apply(calculate_distance, axis=1)

In [80]:
df_left_join['distance_km'].describe()

count    78382.000000
mean         9.123286
std          3.588911
min          0.035163
25%          6.526173
50%          9.238830
75%         12.049334
max         17.695499
Name: distance_km, dtype: float64

In [81]:
df_left_join

Unnamed: 0,latitude_left,longitude_left,brightness,satellite,instrument,confidence,bright_t31,frp,daynight,index,latitude_right,longitude_right,surface,fwi,distance,date,geometry,distance_km
0,35.8073,-0.2538,310.2,Aqua,MODIS,79,277.6,25.0,N,313919,35.75,-0.25,0.0,6.847656,0.057426,2013-01-01,POINT (-0.25380 35.80730),6.380693
1,47.8587,33.4466,308.0,Terra,MODIS,63,272.6,27.4,D,243494,47.75,33.50,0.0,0.214844,0.121108,2013-01-01,POINT (33.44660 47.85870),12.727889
2,49.6728,18.6611,307.4,Terra,MODIS,57,276.5,12.8,D,231915,49.75,18.75,0.0,1.417969,0.117741,2013-01-01,POINT (18.66110 49.67280),10.702774
3,37.3934,39.4902,303.1,Aqua,MODIS,55,287.0,14.4,D,302558,37.50,39.50,0.0,3.445312,0.107050,2013-01-01,POINT (39.49020 37.39340),11.884926
4,36.8729,6.9397,320.8,Terra,MODIS,75,286.9,32.9,D,306748,36.75,7.00,0.0,3.714844,0.136896,2013-01-01,POINT (6.93970 36.87290),14.682411
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78377,52.1556,10.4051,307.4,Aqua,MODIS,65,276.4,10.9,D,378136842,52.25,10.50,0.0,0.230469,0.133856,2013-12-31,POINT (10.40510 52.15560),12.329164
78378,52.1535,10.3907,300.4,Aqua,MODIS,19,276.3,6.7,D,378136842,52.25,10.50,0.0,0.230469,0.145804,2013-12-31,POINT (10.39070 52.15350),13.062298
78379,35.8027,-0.2461,310.6,Aqua,MODIS,29,283.8,43.8,D,378233279,35.75,-0.25,0.0,0.609375,0.052844,2013-12-31,POINT (-0.24610 35.80270),5.870533
78380,54.5777,-1.1428,310.4,Terra,MODIS,71,275.8,15.6,N,378125275,54.50,-1.25,0.0,0.085938,0.132398,2013-12-31,POINT (-1.14280 54.57770),11.066649


In [82]:
# only keep the relevant rows
df_left_join = df_left_join[['latitude_left', 'longitude_left', 'brightness', 'confidence', 'bright_t31', 'fwi', 'distance_km', 'date', 'geometry']]

## 1.2 Assign positive labels (ignited) to the data points with distance <= 31 * sqrt(2) / 2 and only keep rows with positive labels

In [83]:
# Drop rows with distance_km > 31 km
df_left_join = df_left_join[df_left_join['distance_km'] <= 31 * math.sqrt(2) / 2 ]
# Drop rows with confidence < 30
df_left_join = df_left_join[df_left_join['confidence'] >= 30]

In [84]:
df_left_join.shape

(73505, 9)

In [85]:
df_left_join['ignited'] = True

## 1.3 Load and down sampling right join data (fire intensity right join FWI, resolution 31km)

In [13]:
folder = '../../climada_petals/data/wildfire/output/2013/'
# only NetCDF works for interpolated data
file = 'merged_eu_2013_right_gdf'
df = gpd.read_file(os.path.join(folder, file))

df.shape

(15210699, 17)

## 1.4 Down sampling the data to match the number of left join data

In [86]:
# Randomly sample 100,000 rows without replacement
df_right_join = df.sample(n=100000, replace=False, random_state=42)

In [87]:
# Apply the function to each row in the GeoDataFrame and create a new column 'distance_km'
df_right_join['distance_km'] = df_right_join.apply(calculate_distance, axis=1)

In [88]:
df_right_join['distance_km'].describe()

count    100000.000000
mean       1828.623649
std        1067.024811
min           0.536269
25%         907.827912
50%        1827.439411
75%        2695.080328
max        4834.285241
Name: distance_km, dtype: float64

In [89]:
df_right_join

Unnamed: 0,latitude_left,longitude_left,brightness,satellite,instrument,confidence,bright_t31,frp,daynight,index,latitude_right,longitude_right,surface,fwi,distance,date,geometry,distance_km
7531953,51.2774,-61.3320,311.1,Aqua,MODIS,0,297.7,5.3,D,199579440,49.00,-60.00,0.0,,2.638328,2013-07-12,POINT (-60.00000 49.00000),270.431866
1662745,52.1884,-115.6310,314.3,Terra,MODIS,57,267.0,68.5,D,52149232,49.00,-92.00,0.0,3.906252e-03,23.845126,2013-02-20,POINT (-92.00000 49.00000),1697.227237
9672095,54.4132,-61.9265,303.3,Aqua,MODIS,29,291.3,5.1,D,245214407,57.25,-78.25,0.0,1.835329e-09,16.568165,2013-08-25,POINT (-78.25000 57.25000),1064.087916
2598626,56.6373,-121.8868,305.3,Terra,MODIS,52,276.0,40.9,D,86241059,78.50,-135.25,0.0,,25.623286,2013-03-25,POINT (-135.25000 78.50000),2481.318638
14862155,52.3129,-115.8773,319.6,Aqua,MODIS,67,269.7,23.3,D,359443841,53.25,-79.75,0.0,1.760846e-09,36.139452,2013-12-13,POINT (-79.75000 53.25000),2406.091708
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2373679,50.5055,-120.3918,316.2,Aqua,MODIS,72,278.4,52.4,D,75936739,65.00,-55.25,0.0,,66.734883,2013-03-15,POINT (-55.25000 65.00000),3966.472953
4494868,50.0545,-97.6040,303.3,Terra,MODIS,48,288.4,6.6,D,135189754,52.25,-81.50,0.0,2.117188e+00,16.252970,2013-05-11,POINT (-81.50000 52.25000),1146.862831
13947378,51.8614,-124.7880,387.1,Terra,MODIS,75,272.9,190.8,D,336464178,77.25,-115.50,0.0,2.656250e-01,27.034200,2013-11-21,POINT (-115.50000 77.25000),2849.523999
3811036,54.0293,-124.0738,311.3,Terra,MODIS,53,286.0,9.6,D,120658537,51.50,-125.75,0.0,1.808982e-09,3.034305,2013-04-27,POINT (-125.75000 51.50000),302.993555


In [90]:
# only keep the relevant rows
df_right_join = df_right_join[['latitude_right', 'longitude_right', 'brightness', 'confidence', 'bright_t31', 'fwi', 'distance_km', 'date', 'geometry']]

In [91]:
df_right_join.to_file(os.path.join(folder, f'merged_eu_2013_right_downsampled_gdf'), driver='GPKG')

In [ ]:
df_right_join = gpd.read_file(os.path.join(folder, 'merged_eu_2013_right_downsampled_gdf'))

## 1.5 Assign negative labels (ignited) to the data points with distance > 31 * sqrt(2) / 2 and only keep rows with negative labels

In [92]:
# Drop rows with distance_km > 31 km
df_right_join = df_right_join[df_right_join['distance_km'] > 31 * math.sqrt(2) / 2 ]
# Drop rows with confidence < 30
df_right_join = df_right_join[df_right_join['confidence'] >= 30]

In [93]:
df_right_join.shape

(77296, 9)

In [94]:
df_right_join['ignited'] = False

## 1.6 Merge data and save the geo dataframe and save the dataframe

In [96]:
df_left_join

Unnamed: 0,latitude_left,longitude_left,brightness,confidence,bright_t31,fwi,distance_km,date,geometry,ignited
0,35.8073,-0.2538,310.2,79,277.6,6.847656,6.380693,2013-01-01,POINT (-0.25380 35.80730),True
1,47.8587,33.4466,308.0,63,272.6,0.214844,12.727889,2013-01-01,POINT (33.44660 47.85870),True
2,49.6728,18.6611,307.4,57,276.5,1.417969,10.702774,2013-01-01,POINT (18.66110 49.67280),True
3,37.3934,39.4902,303.1,55,287.0,3.445312,11.884926,2013-01-01,POINT (39.49020 37.39340),True
4,36.8729,6.9397,320.8,75,286.9,3.714844,14.682411,2013-01-01,POINT (6.93970 36.87290),True
...,...,...,...,...,...,...,...,...,...,...
78375,49.3557,6.7315,300.3,35,273.4,0.082031,11.829615,2013-12-31,POINT (6.73150 49.35570),True
78376,48.2725,14.3292,306.9,61,278.5,0.308594,6.374402,2013-12-31,POINT (14.32920 48.27250),True
78377,52.1556,10.4051,307.4,65,276.4,0.230469,12.329164,2013-12-31,POINT (10.40510 52.15560),True
78380,54.5777,-1.1428,310.4,71,275.8,0.085938,11.066649,2013-12-31,POINT (-1.14280 54.57770),True


In [97]:
# Rename the columns to match
df_left_join = df_left_join.rename(columns={'latitude_left': 'latitude', 'longitude_left': 'longitude'})
df_right_join = df_right_join.rename(columns={'latitude_right': 'latitude', 'longitude_right': 'longitude'})

# Concatenate the GeoDataFrames
gdf_concat = pd.concat([df_left_join, df_right_join], ignore_index=True)

# Ensure the concatenated DataFrame is still a GeoDataFrame
gdf_concat = gpd.GeoDataFrame(gdf_concat, geometry='geometry')
gdf_concat

Unnamed: 0,latitude,longitude,brightness,confidence,bright_t31,fwi,distance_km,date,geometry,ignited
0,35.8073,-0.2538,310.2,79,277.6,6.847656e+00,6.380693,2013-01-01,POINT (-0.25380 35.80730),True
1,47.8587,33.4466,308.0,63,272.6,2.148438e-01,12.727889,2013-01-01,POINT (33.44660 47.85870),True
2,49.6728,18.6611,307.4,57,276.5,1.417969e+00,10.702774,2013-01-01,POINT (18.66110 49.67280),True
3,37.3934,39.4902,303.1,55,287.0,3.445312e+00,11.884926,2013-01-01,POINT (39.49020 37.39340),True
4,36.8729,6.9397,320.8,75,286.9,3.714844e+00,14.682411,2013-01-01,POINT (6.93970 36.87290),True
...,...,...,...,...,...,...,...,...,...,...
150796,65.0000,-55.2500,316.2,72,278.4,,3966.472953,2013-03-15,POINT (-55.25000 65.00000),False
150797,52.2500,-81.5000,303.3,48,288.4,2.117188e+00,1146.862831,2013-05-11,POINT (-81.50000 52.25000),False
150798,77.2500,-115.5000,387.1,75,272.9,2.656250e-01,2849.523999,2013-11-21,POINT (-115.50000 77.25000),False
150799,51.5000,-125.7500,311.3,53,286.0,1.808982e-09,302.993555,2013-04-27,POINT (-125.75000 51.50000),False


# 2. Add land cover (aggregated) data to pixel according to lon and lat

## 2.1 Load land cover shapefile and convert to geopandas dataframe

In [118]:
gdf_concat

Unnamed: 0,latitude,longitude,brightness,confidence,bright_t31,fwi,distance_km,date,geometry,ignited
0,35.8073,-0.2538,310.2,79,277.6,6.847656e+00,6.380693,2013-01-01,POINT (-0.25380 35.80730),True
1,47.8587,33.4466,308.0,63,272.6,2.148438e-01,12.727889,2013-01-01,POINT (33.44660 47.85870),True
2,49.6728,18.6611,307.4,57,276.5,1.417969e+00,10.702774,2013-01-01,POINT (18.66110 49.67280),True
3,37.3934,39.4902,303.1,55,287.0,3.445312e+00,11.884926,2013-01-01,POINT (39.49020 37.39340),True
4,36.8729,6.9397,320.8,75,286.9,3.714844e+00,14.682411,2013-01-01,POINT (6.93970 36.87290),True
...,...,...,...,...,...,...,...,...,...,...
150796,65.0000,-55.2500,316.2,72,278.4,,3966.472953,2013-03-15,POINT (-55.25000 65.00000),False
150797,52.2500,-81.5000,303.3,48,288.4,2.117188e+00,1146.862831,2013-05-11,POINT (-81.50000 52.25000),False
150798,77.2500,-115.5000,387.1,75,272.9,2.656250e-01,2849.523999,2013-11-21,POINT (-115.50000 77.25000),False
150799,51.5000,-125.7500,311.3,53,286.0,1.808982e-09,302.993555,2013-04-27,POINT (-125.75000 51.50000),False


In [115]:
os.path.exists('C:/Users\zhong\Documents\Python Projects\climada_petals\climada_petals\data\wildfire\land_cover\land_cover2015.tif')

True

In [119]:
elevation_folder = '../../climada_petals/data/wildfire/land_cover/'
land_cover_raster = rasterio.open(os.path.join(elevation_folder, 'land_cover2015.tif'))

import numpy as np

# Define a function to get raster values at given points
def get_raster_values(raster, points):
    # Transform coordinates to raster's space
    coords = [(pt.x, pt.y) for pt in points.geometry]
    # Sample the raster at each coordinate
    return [x[0] for x in raster.sample(coords)]

# Apply the function to the geodataframe
gdf_concat['land_cover'] = get_raster_values(land_cover_raster, gdf_concat)


In [121]:
gdf_concat.land_cover.unique()

array([ 50,  40, 126, 115, 114, 200,  30,  20, 121, 116,  60,  80,  90,
       124, 125, 111, 112, 255,   0,  70, 100, 113], dtype=uint8)

In [99]:
# # Load the land cover shapefile
# elevation_folder = '../../climada_petals/data/wildfire/land_cover/'
# land_cover_gdf = gpd.read_file(os.path.join(elevation_folder, 'ProbaV_UTM_LC100_biome_clusters_V3_global.shp'))
# land_cover_gdf

Unnamed: 0,bc_id,id,geometry
0,ZA01,1,"MULTIPOLYGON (((-170.81288 61.33151, -170.8128..."
1,ZA02,2,"MULTIPOLYGON (((-32.80088 63.12851, -32.80088 ..."
2,ZA03,3,"POLYGON ((-126.94237 69.54026, -126.94237 69.5..."
3,ZA04,4,"POLYGON ((-137.49938 65.89151, -137.49938 65.8..."
4,ZA05,5,"MULTIPOLYGON (((177.14963 53.24651, 177.14963 ..."
...,...,...,...
68,ZB47,69,"POLYGON ((137.87812 -14.58199, 137.87812 -14.6..."
69,ZB48,70,"POLYGON ((121.13363 -19.70899, 121.13363 -19.7..."
70,ZB49,71,"POLYGON ((119.26762 -18.97774, 119.30362 -18.9..."
71,ZB50,72,"POLYGON ((149.73413 -25.26649, 149.73413 -25.2..."


In [100]:
# # Check CRS
# print("CRS for land cover GDF:", land_cover_gdf.crs)
# print("CRS for merged EU GDF:", gdf_concat.crs)

CRS for land cover GDF: EPSG:4326
CRS for merged EU GDF: EPSG:4326


In [106]:
# gdf_concat.shape

(150801, 10)

In [101]:
# result_gdf = gpd.sjoin_nearest(gdf_concat, land_cover_gdf, how='left', distance_col='distance')
# result_gdf 




Unnamed: 0,latitude,longitude,brightness,confidence,bright_t31,fwi,distance_km,date,geometry,ignited,index_right,bc_id,id,distance
0,35.8073,-0.2538,310.2,79,277.6,6.847656e+00,6.380693,2013-01-01,POINT (-0.25380 35.80730),True,41,ZB22,44,0.000000
1,47.8587,33.4466,308.0,63,272.6,2.148438e-01,12.727889,2013-01-01,POINT (33.44660 47.85870),True,28,ZB09,31,0.000000
2,49.6728,18.6611,307.4,57,276.5,1.417969e+00,10.702774,2013-01-01,POINT (18.66110 49.67280),True,28,ZB09,31,0.000000
3,37.3934,39.4902,303.1,55,287.0,3.445312e+00,11.884926,2013-01-01,POINT (39.49020 37.39340),True,55,ZB19,41,0.000000
4,36.8729,6.9397,320.8,75,286.9,3.714844e+00,14.682411,2013-01-01,POINT (6.93970 36.87290),True,41,ZB22,44,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150796,65.0000,-55.2500,316.2,72,278.4,,3966.472953,2013-03-15,POINT (-55.25000 65.00000),False,1,ZA02,2,0.369872
150797,52.2500,-81.5000,303.3,48,288.4,2.117188e+00,1146.862831,2013-05-11,POINT (-81.50000 52.25000),False,5,ZA06,6,0.000000
150798,77.2500,-115.5000,387.1,75,272.9,2.656250e-01,2849.523999,2013-11-21,POINT (-115.50000 77.25000),False,1,ZA02,2,0.000000
150799,51.5000,-125.7500,311.3,53,286.0,1.808982e-09,302.993555,2013-04-27,POINT (-125.75000 51.50000),False,11,ZA12,12,0.000000


In [105]:
# result_gdf['distance'].describe()

count    150801.000000
mean          0.295478
std           1.064557
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max          11.305619
Name: distance, dtype: float64

## 2.2 for each pixel (1x1 for left and 31x31 for right), calculate the average probability of fire ignition according to Clamada Master thesis Table 2.1


In [ ]:
# from shapely.geometry import Point, box
# 
# # Create a new column 'geometry' that converts each point to a square polygon
# # Assuming the points are in a projected coordinate system where units are meters
# merged_eu_2013_gdf['geometry'] = merged_eu_2013_gdf.apply(
#     lambda row: box(
#         row['longitude'] - 500,  # Subtract 500 meters from the longitude
#         row['latitude'] - 500,   # Subtract 500 meters from the latitude
#         row['longitude'] + 500,  # Add 500 meters to the longitude
#         row['latitude'] + 500    # Add 500 meters to the latitude
#     ), axis=1
# )
# merged_eu_2013_gdf = gpd.GeoDataFrame(merged_eu_2013_gdf, geometry='geometry')
# 
# import geopandas as gpd
# 
# # Load the land cover shapefile
# land_cover_gdf = gpd.read_file('/path/to/ProbaV_UTM_LC100_biome_clusters_V3_global.shp')
# 
# # Ensure CRS match, reproject if necessary
# if merged_eu_2013_gdf.crs != land_cover_gdf.crs:
#     merged_eu_2013_gdf = merged_eu_2013_gdf.to_crs(land_cover_gdf.crs)
# 
# # Perform spatial join
# joined_gdf = gpd.sjoin(merged_eu_2013_gdf, land_cover_gdf, how='inner', op='intersects')
# 
# import pandas as pd
# 
# # Group by the index of the merged_eu_2013_gdf and find the most common land cover type
# most_common_land_cover = joined_gdf.groupby('index_merged_eu_2013').apply(
#     lambda g: g['bc_id'].value_counts().idxmax()
# )
# merged_eu_2013_gdf['most_frequent_land_cover'] = most_common_land_cover
# 
# # Save the updated GeoDataFrame
# merged_eu_2013_gdf.to_file('/path/to/save/updated_merged_eu_2013.gpkg', driver='GPKG')


# 3. Add elevation data to pixel according to lon and lat
## 3.1 Load elevation shapefile and convert to geopandas dataframe

Create a Virtual Raster (VRT)
You can use the GDAL command-line tools or rasterio's virtual raster capabilities in Python to combine your GeoTIFF files into a VRT.
Using GDAL Command Line:
gdalbuildvrt combined_elevation.vrt 50N000E_20101117_gmted_mea075.tif 50N030W_20101117_gmted_mea075.tif 50N030E_20101117_gmted_mea075.tif 30N030W_20101117_gmted_mea075.tif 30N000E_20101117_gmted_mea075.tif 30N030E_20101117_gmted_mea075.tif

In [123]:


elevation_folder = '../../climada_petals/data/wildfire/elevation/'
# List of your GeoTIFF files
files = ['50N000E_20101117_gmted_mea075.tif', '50N030W_20101117_gmted_mea075.tif', '50N030E_20101117_gmted_mea075.tif', '30N030W_20101117_gmted_mea075.tif', '30N000E_20101117_gmted_mea075.tif', '30N030E_20101117_gmted_mea075.tif']


# Open all files
src_files = [rasterio.open(os.path.join(elevation_folder, f)) for f in files]

# Create a virtual mosaic (in-memory, no VRT file written)
mosaic, out_transform = merge(src_files)

# Create a Virtual Raster (VRT) file in memory (alternative to writing to disk)
with MemoryFile() as memfile:
    with memfile.open(driver='GTiff', height=mosaic.shape[1], width=mosaic.shape[2],
                      count=1, dtype=mosaic.dtype, transform=out_transform,
                      crs=src_files[0].crs) as dataset:
        dataset.write(mosaic[0], 1)
        virtual_raster = dataset.name  # Path to the in-memory dataset

## 3.2 for each pixel (1x1 for left and 31x31 for right), calculate the average elevation

In [125]:
# Load the virtual raster
elevation_raster = rasterio.open(os.path.join(elevation_folder, 'combined_elevation.vrt'))
# Ensure CRS alignment
if elevation_raster.crs != gdf_concat.crs:
    elevation_gdf = elevation_raster.to_crs(gdf_concat.crs)

In [126]:
# Function to get elevation data from raster
def get_elevation(lon, lat, raster):
    for val in raster.sample([(lon, lat)]):
        return val[0]  # Assuming elevation data is in the first band

# Apply the function to each row in the geodataframe
gdf_concat['elevation'] = gdf_concat.apply(
    lambda row: get_elevation(row.geometry.x, row.geometry.y, elevation_raster), axis=1
)

In [127]:
gdf_concat

Unnamed: 0,latitude,longitude,brightness,confidence,bright_t31,fwi,distance_km,date,geometry,ignited,land_cover,elevation
0,35.8073,-0.2538,310.2,79,277.6,6.847656e+00,6.380693,2013-01-01,POINT (-0.25380 35.80730),True,50,16
1,47.8587,33.4466,308.0,63,272.6,2.148438e-01,12.727889,2013-01-01,POINT (33.44660 47.85870),True,50,110
2,49.6728,18.6611,307.4,57,276.5,1.417969e+00,10.702774,2013-01-01,POINT (18.66110 49.67280),True,50,319
3,37.3934,39.4902,303.1,55,287.0,3.445312e+00,11.884926,2013-01-01,POINT (39.49020 37.39340),True,40,705
4,36.8729,6.9397,320.8,75,286.9,3.714844e+00,14.682411,2013-01-01,POINT (6.93970 36.87290),True,50,2
...,...,...,...,...,...,...,...,...,...,...,...,...
150796,65.0000,-55.2500,316.2,72,278.4,,3966.472953,2013-03-15,POINT (-55.25000 65.00000),False,200,-32768
150797,52.2500,-81.5000,303.3,48,288.4,2.117188e+00,1146.862831,2013-05-11,POINT (-81.50000 52.25000),False,126,-32768
150798,77.2500,-115.5000,387.1,75,272.9,2.656250e-01,2849.523999,2013-11-21,POINT (-115.50000 77.25000),False,200,-32768
150799,51.5000,-125.7500,311.3,53,286.0,1.808982e-09,302.993555,2013-04-27,POINT (-125.75000 51.50000),False,70,-32768


In [128]:
# Save the updated GeoDataFrame
gdf_concat.to_file(os.path.join(folder, 'ignited_eu_2013_gdf'), driver='GPKG')

# 4. Build ML models to predict whether a pixel is ignited