In [2]:
import os
import pandas as pd
import geopandas as gpd
import xarray as xr
import math
import rasterio
from rasterio.merge import merge
from rasterio.io import MemoryFile
import numpy as np
from rasterio.io import MemoryFile
from rasterio.windows import Window

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

**Build ML models to predict fire ignition for a pixel**

Features: FWI, lat, lon, elevation (2010), landcover (2015), distance, month
Label: ignited or not ignited
Model: XGBoost
Optimization Method: Beyas optimization using Optuna
Data range: EU

Steps:
1. Merge positive and negative labels of fire intensity and FWi data
    1.1 Load left join data (fire intensity left join FWI, resolution 1km) and count how many data points it has
    1.2 Assign positive labels (ignited) to the data points with distance <= 31 * sqrt(2) / 2 and only keep rows with positive labels
    1.3 Load right join data (fire intensity right join FWI, resolution 31km) and count how may data points it has
    1.4 Down sampling the data to match the number of left join data
    1.5 Assign negative labels (ignited) to the data points with distance > 31 * sqrt(2) / 2 and only keep rows with negative labels
    1.6 Merge data and save the geo dataframe and save the dataframe
2. Add land cover (aggregated) data to pixel according to lon and lat
    2.1 Load land cover shapefile and convert to geopandas dataframe
    2.2 for each pixel (1x1 for left and 31x31 for right), calculate the average probability of fire ignition according to Clamada Master thesis Table 2.1
3. Add elevation data to pixel according to lon and lat
    3.1 Load elevation shapefile and convert to geopandas dataframe
    3.2 for each pixel (1x1 for left and 31x31 for right), calculate the average elevation
4. Build ML models to predict whether a pixel is ignited
5. Tune hyperparameters

Issue solved:

1. Longitude range in right joined dataset was not correct. Now correct after rerun the code.
2. If the lon and lat can't be find in the elevation data raster, it will be converted to indices outside the raster. Now it will be converted to the nearest valid elevation.

# 1. Merge positive and negative labels of fire intensity and FWi data

In [3]:
from geopy.distance import great_circle

# Define a function to calculate the distance
def calculate_distance(row):
    coords_1 = (row['latitude_left'], row['longitude_left'])
    coords_2 = (row['latitude_right'], row['longitude_right'])
    return great_circle(coords_1, coords_2).kilometers

folder = '../../climada_petals/data/wildfire/output/2013/'

## 1.1 Load left join data (fire intensity left join FWI, resolution 1km)

In [4]:
merged_eu_2013_left_gdf_filename = 'merged_eu_2013_left_gdf'
df_left_join = gpd.read_file(os.path.join(folder, merged_eu_2013_left_gdf_filename))
df_left_join.shape

(78382, 17)

In [5]:
# Apply the function to each row in the GeoDataFrame and create a new column 'distance_km'
df_left_join['distance_km'] = df_left_join.apply(calculate_distance, axis=1)

In [6]:
df_left_join['distance_km'].describe()

count    78382.000000
mean         9.123286
std          3.588911
min          0.035163
25%          6.526173
50%          9.238830
75%         12.049334
max         17.695499
Name: distance_km, dtype: float64

In [7]:
df_left_join['longitude_left'].describe()

count    78382.000000
mean        22.953229
std         14.650091
min        -29.313700
25%         13.488950
50%         27.629200
75%         35.057125
max         39.999500
Name: longitude_left, dtype: float64

In [8]:
df_left_join

Unnamed: 0,latitude_left,longitude_left,brightness,satellite,instrument,confidence,bright_t31,frp,daynight,index,latitude_right,longitude_right,surface,fwi,distance,date,geometry,distance_km
0,35.8073,-0.2538,310.2,Aqua,MODIS,79,277.6,25.0,N,313919,35.75,-0.25,0.0,6.847656,0.057426,2013-01-01,POINT (-0.25380 35.80730),6.380693
1,47.8587,33.4466,308.0,Terra,MODIS,63,272.6,27.4,D,243494,47.75,33.50,0.0,0.214844,0.121108,2013-01-01,POINT (33.44660 47.85870),12.727889
2,49.6728,18.6611,307.4,Terra,MODIS,57,276.5,12.8,D,231915,49.75,18.75,0.0,1.417969,0.117741,2013-01-01,POINT (18.66110 49.67280),10.702774
3,37.3934,39.4902,303.1,Aqua,MODIS,55,287.0,14.4,D,302558,37.50,39.50,0.0,3.445312,0.107050,2013-01-01,POINT (39.49020 37.39340),11.884926
4,36.8729,6.9397,320.8,Terra,MODIS,75,286.9,32.9,D,306748,36.75,7.00,0.0,3.714844,0.136896,2013-01-01,POINT (6.93970 36.87290),14.682411
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78377,52.1556,10.4051,307.4,Aqua,MODIS,65,276.4,10.9,D,378136842,52.25,10.50,0.0,0.230469,0.133856,2013-12-31,POINT (10.40510 52.15560),12.329164
78378,52.1535,10.3907,300.4,Aqua,MODIS,19,276.3,6.7,D,378136842,52.25,10.50,0.0,0.230469,0.145804,2013-12-31,POINT (10.39070 52.15350),13.062298
78379,35.8027,-0.2461,310.6,Aqua,MODIS,29,283.8,43.8,D,378233279,35.75,-0.25,0.0,0.609375,0.052844,2013-12-31,POINT (-0.24610 35.80270),5.870533
78380,54.5777,-1.1428,310.4,Terra,MODIS,71,275.8,15.6,N,378125275,54.50,-1.25,0.0,0.085938,0.132398,2013-12-31,POINT (-1.14280 54.57770),11.066649


In [9]:
# only keep the relevant rows
df_left_join = df_left_join[['latitude_left', 'longitude_left', 'brightness', 'confidence', 'bright_t31', 'fwi', 'distance_km', 'date', 'geometry']]

## 1.2 Assign positive labels (ignited) to the data points with distance <= 31 * sqrt(2) / 2 and only keep rows with positive labels

In [10]:
# Drop rows with distance_km > 31 km
df_left_join = df_left_join[df_left_join['distance_km'] <= 31 * math.sqrt(2) / 2 ]
# Drop rows with confidence < 30
df_left_join = df_left_join[df_left_join['confidence'] >= 30]

In [11]:
df_left_join.shape

(73505, 9)

In [12]:
df_left_join['ignited'] = True

## 1.3 Load and down sampling right join data (fire intensity right join FWI, resolution 31km)

In [13]:
folder = '../../climada_petals/data/wildfire/output/2013/'
# only NetCDF works for interpolated data
file = 'merged_eu_2013_right_gdf'
df = gpd.read_file(os.path.join(folder, file))

df.shape

(15915825, 17)

In [14]:
df['fwi'].isna().sum()

7846770

In [15]:
df.dropna(subset=['fwi'], inplace=True)

In [16]:
df['fwi'].isna().sum()

0

In [17]:
df.head()

Unnamed: 0,latitude_left,longitude_left,brightness,satellite,instrument,confidence,bright_t31,frp,daynight,index,latitude_right,longitude_right,surface,fwi,distance,date,geometry
161,51.3698,6.7047,310.6,Terra,MODIS,81,275.0,15.6,N,104996,72.0,-31.0,0.0,2.660156,42.979641,2013-01-01,POINT (-31.00000 72.00000)
162,51.3698,6.7047,310.6,Terra,MODIS,81,275.0,15.6,N,104997,72.0,-30.75,0.0,2.871094,42.760492,2013-01-01,POINT (-30.75000 72.00000)
163,51.3698,6.7047,310.6,Terra,MODIS,81,275.0,15.6,N,104998,72.0,-30.5,0.0,2.761719,42.541684,2013-01-01,POINT (-30.50000 72.00000)
164,51.3698,6.7047,310.6,Terra,MODIS,81,275.0,15.6,N,104999,72.0,-30.25,0.0,2.644531,42.323221,2013-01-01,POINT (-30.25000 72.00000)
165,51.3698,6.7047,310.6,Terra,MODIS,81,275.0,15.6,N,105000,72.0,-30.0,0.0,2.027344,42.105108,2013-01-01,POINT (-30.00000 72.00000)


In [18]:
df['longitude_right'].describe()

count    8.069055e+06
mean     1.636234e+01
std      1.617003e+01
min     -3.100000e+01
25%      6.000000e+00
50%      1.875000e+01
75%      2.925000e+01
max      4.000000e+01
Name: longitude_right, dtype: float64

## 1.4 Down sampling the data to match the number of left join data

In [19]:
# Randomly sample 100,000 rows without replacement
df_right_join = df.sample(n=100000, replace=False, random_state=42)

In [20]:
# Apply the function to each row in the GeoDataFrame and create a new column 'distance_km'
df_right_join['distance_km'] = df_right_join.apply(calculate_distance, axis=1)

In [21]:
df_right_join['distance_km'].describe()

count    100000.000000
mean        696.373660
std         680.180202
min           0.648141
25%         203.730283
50%         450.184479
75%         974.244231
max        4460.044308
Name: distance_km, dtype: float64

In [22]:
df_right_join

Unnamed: 0,latitude_left,longitude_left,brightness,satellite,instrument,confidence,bright_t31,frp,daynight,index,latitude_right,longitude_right,surface,fwi,distance,date,geometry,distance_km
9248119,67.9987,31.1807,303.0,Aqua,MODIS,53,281.3,15.4,N,220229434,68.75,38.50,0.0,3.363281e+00,7.357758,2013-08-01,POINT (38.50000 68.75000),311.149650
3826700,51.1718,3.8173,307.8,Aqua,MODIS,57,271.9,20.2,N,90597605,43.00,1.25,0.0,2.382812e-01,8.565591,2013-03-29,POINT (1.25000 43.00000),929.065539
6859465,64.6636,21.2850,307.5,Aqua,MODIS,54,284.4,12.3,N,163175125,60.25,21.25,0.0,8.203125e-01,4.413739,2013-06-07,POINT (21.25000 60.25000),490.773902
3386366,43.2701,3.0019,305.9,Aqua,MODIS,47,285.8,5.8,D,80193596,47.00,-1.00,0.0,5.078125e-02,5.470590,2013-03-19,POINT (-1.00000 47.00000),519.996506
10976317,45.0290,27.7902,321.8,Aqua,MODIS,65,307.5,10.4,D,260860432,44.50,28.00,0.0,2.052344e+01,0.569084,2013-09-09,POINT (28.00000 44.50000),61.109696
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8776895,60.1828,12.0131,304.7,Terra,MODIS,0,292.3,12.1,D,208851875,61.25,8.75,0.0,8.535156e+00,3.433182,2013-07-21,POINT (8.75000 61.25000),213.457403
11078400,42.8878,-9.1271,321.9,Aqua,MODIS,56,290.5,22.7,N,263830920,69.75,-30.00,0.0,1.850602e-09,34.018462,2013-09-12,POINT (-30.00000 69.75000),3213.688301
12042471,54.4075,21.7102,315.5,Aqua,MODIS,76,284.9,15.7,D,286695441,65.50,20.25,0.0,4.882812e-01,11.188196,2013-10-04,POINT (20.25000 65.50000),1236.024186
11331126,42.1679,24.6028,314.1,Aqua,MODIS,50,298.6,32.9,D,269196576,39.25,24.00,0.0,3.125000e+00,2.979515,2013-09-17,POINT (24.00000 39.25000),328.407676


In [23]:
# only keep the relevant rows
df_right_join = df_right_join[['latitude_right', 'longitude_right', 'brightness', 'confidence', 'bright_t31', 'fwi', 'distance_km', 'date', 'geometry']]

In [24]:
df_right_join.to_file(os.path.join(folder, f'merged_eu_2013_right_downsampled_gdf'), driver='GPKG')

In [13]:
df_right_join = gpd.read_file(os.path.join(folder, 'merged_eu_2013_right_downsampled_gdf'))
df_right_join['longitude_right'].describe()

count    100000.000000
mean          4.474630
std          20.602765
min         -31.000000
25%         -13.500000
50%           4.500000
75%          22.250000
max          40.000000
Name: longitude_right, dtype: float64

## 1.5 Assign negative labels (ignited) to the data points with distance > 31 * sqrt(2) / 2 and only keep rows with negative labels

In [25]:
# Drop rows with distance_km > 31 km
df_right_join = df_right_join[df_right_join['distance_km'] > 31 * math.sqrt(2) / 2 ]
# Drop rows with confidence < 30
df_right_join = df_right_join[df_right_join['confidence'] >= 30]

In [26]:
df_right_join.shape

(85419, 9)

In [27]:
df_right_join['ignited'] = False

## 1.6 Merge data and save the geo dataframe and save the dataframe

In [29]:
df_right_join

Unnamed: 0,latitude_right,longitude_right,brightness,confidence,bright_t31,fwi,distance_km,date,geometry,ignited
0,40.75,-0.25,307.4,54,295.4,2.605859e+01,258.512515,2013-07-28,POINT (-0.25000 40.75000),False
1,42.75,23.75,301.6,47,283.5,1.855986e-09,961.118452,2013-01-27,POINT (23.75000 42.75000),False
2,55.75,-13.00,326.7,71,284.9,,350.343726,2013-04-08,POINT (-13.00000 55.75000),False
3,69.00,14.00,315.2,75,287.9,2.851562e-01,1034.104520,2013-05-03,POINT (14.00000 69.00000),False
4,39.25,12.00,326.0,100,281.0,,514.644749,2013-03-13,POINT (12.00000 39.25000),False
...,...,...,...,...,...,...,...,...,...,...
99995,48.50,-20.75,303.1,47,288.5,,1511.020307,2013-10-25,POINT (-20.75000 48.50000),False
99996,42.75,4.25,307.0,70,288.1,,93.131421,2013-10-05,POINT (4.25000 42.75000),False
99997,54.75,-20.25,305.0,60,287.2,,2241.524404,2013-01-25,POINT (-20.25000 54.75000),False
99998,51.50,-29.50,309.9,79,287.5,,1948.420714,2013-08-17,POINT (-29.50000 51.50000),False


In [28]:
# Rename the columns to match
df_left_join = df_left_join.rename(columns={'latitude_left': 'latitude', 'longitude_left': 'longitude'})
df_right_join = df_right_join.rename(columns={'latitude_right': 'latitude', 'longitude_right': 'longitude'})

# Concatenate the GeoDataFrames
gdf_concat = pd.concat([df_left_join, df_right_join], ignore_index=True)

# Ensure the concatenated DataFrame is still a GeoDataFrame
gdf_concat = gpd.GeoDataFrame(gdf_concat, geometry='geometry')
gdf_concat

Unnamed: 0,latitude,longitude,brightness,confidence,bright_t31,fwi,distance_km,date,geometry,ignited
0,35.8073,-0.2538,310.2,79,277.6,6.847656e+00,6.380693,2013-01-01,POINT (-0.25380 35.80730),True
1,47.8587,33.4466,308.0,63,272.6,2.148438e-01,12.727889,2013-01-01,POINT (33.44660 47.85870),True
2,49.6728,18.6611,307.4,57,276.5,1.417969e+00,10.702774,2013-01-01,POINT (18.66110 49.67280),True
3,37.3934,39.4902,303.1,55,287.0,3.445312e+00,11.884926,2013-01-01,POINT (39.49020 37.39340),True
4,36.8729,6.9397,320.8,75,286.9,3.714844e+00,14.682411,2013-01-01,POINT (6.93970 36.87290),True
...,...,...,...,...,...,...,...,...,...,...
158919,52.7500,16.7500,313.0,58,297.9,4.812500e+00,344.381719,2013-07-14,POINT (16.75000 52.75000),False
158920,69.7500,-30.0000,321.9,56,290.5,1.850602e-09,3213.688301,2013-09-12,POINT (-30.00000 69.75000),False
158921,65.5000,20.2500,315.5,76,284.9,4.882812e-01,1236.024186,2013-10-04,POINT (20.25000 65.50000),False
158922,39.2500,24.0000,314.1,50,298.6,3.125000e+00,328.407676,2013-09-17,POINT (24.00000 39.25000),False


# 2. Add land cover (aggregated) data to pixel according to lon and lat

## 2.1 Load land cover shapefile and convert to geopandas dataframe

In [29]:
land_cover_folder = '../../climada_petals/data/wildfire/land_cover/'
land_cover_raster = rasterio.open(os.path.join(land_cover_folder, 'land_cover2015.tif'))


# Define a function to get raster values at given points
def get_raster_values(raster, points):
    # Transform coordinates to raster's space
    coords = [(pt.x, pt.y) for pt in points.geometry]
    # Sample the raster at each coordinate
    return [x[0] for x in raster.sample(coords)]

# Apply the function to the geodataframe
gdf_concat['land_cover'] = get_raster_values(land_cover_raster, gdf_concat)


In [30]:
gdf_concat.land_cover.unique()

array([ 50,  40, 126, 115, 114, 200,  30,  20, 121, 116,  60,  80,  90,
       124, 125, 111, 112,  70, 100], dtype=uint8)

## 2.2 for each pixel (1x1 for left and 31x31 for right), calculate the average probability of fire ignition according to Clamada Master thesis Table 2.1


In [ ]:
# from shapely.geometry import Point, box
# 
# # Create a new column 'geometry' that converts each point to a square polygon
# # Assuming the points are in a projected coordinate system where units are meters
# merged_eu_2013_gdf['geometry'] = merged_eu_2013_gdf.apply(
#     lambda row: box(
#         row['longitude'] - 500,  # Subtract 500 meters from the longitude
#         row['latitude'] - 500,   # Subtract 500 meters from the latitude
#         row['longitude'] + 500,  # Add 500 meters to the longitude
#         row['latitude'] + 500    # Add 500 meters to the latitude
#     ), axis=1
# )
# merged_eu_2013_gdf = gpd.GeoDataFrame(merged_eu_2013_gdf, geometry='geometry')
# 
# import geopandas as gpd
# 
# # Load the land cover shapefile
# land_cover_gdf = gpd.read_file('/path/to/ProbaV_UTM_LC100_biome_clusters_V3_global.shp')
# 
# # Ensure CRS match, reproject if necessary
# if merged_eu_2013_gdf.crs != land_cover_gdf.crs:
#     merged_eu_2013_gdf = merged_eu_2013_gdf.to_crs(land_cover_gdf.crs)
# 
# # Perform spatial join
# joined_gdf = gpd.sjoin(merged_eu_2013_gdf, land_cover_gdf, how='inner', op='intersects')
# 
# import pandas as pd
# 
# # Group by the index of the merged_eu_2013_gdf and find the most common land cover type
# most_common_land_cover = joined_gdf.groupby('index_merged_eu_2013').apply(
#     lambda g: g['bc_id'].value_counts().idxmax()
# )
# merged_eu_2013_gdf['most_frequent_land_cover'] = most_common_land_cover
# 
# # Save the updated GeoDataFrame
# merged_eu_2013_gdf.to_file('/path/to/save/updated_merged_eu_2013.gpkg', driver='GPKG')


# 3. Add elevation data (2010, mean 7.5 arcsec) to pixel according to lon and lat
## 3.1 Load elevation shapefile and convert to geopandas dataframe

Create a Virtual Raster (VRT)
You can use the GDAL command-line tools or rasterio's virtual raster capabilities in Python to combine your GeoTIFF files into a VRT.
Using GDAL Command Line:
gdalbuildvrt combined_elevation.vrt 50N000E_20101117_gmted_mea075.tif 50N030W_20101117_gmted_mea075.tif 50N030E_20101117_gmted_mea075.tif 30N030W_20101117_gmted_mea075.tif 30N000E_20101117_gmted_mea075.tif 30N030E_20101117_gmted_mea075.tif

In [31]:
# Method 2

# Your elevation folder path and file names
elevation_folder = '../../climada_petals/data/wildfire/elevation/'
files = [
    '50N000E_20101117_gmted_mea075.tif',
    '50N030W_20101117_gmted_mea075.tif',
    '50N030E_20101117_gmted_mea075.tif',
    '30N030W_20101117_gmted_mea075.tif',
    '30N000E_20101117_gmted_mea075.tif',
    '30N030E_20101117_gmted_mea075.tif'
]

# Open all files
src_files = [rasterio.open(os.path.join(elevation_folder, f)) for f in files]

# Create a virtual mosaic (in-memory, no VRT file written)
mosaic, out_transform = merge(src_files)





def find_nearest_valid_elevation(dataset, x, y):
    row, col = dataset.index(x, y)  # Convert geographic coordinates to raster indices
    max_distance = 10  # Define max distance to search for a valid elevation

    # Ensure initial indices are within bounds
    row = max(0, min(dataset.height - 1, row))
    col = max(0, min(dataset.width - 1, col))

    for dist in range(1, max_distance + 1):
        # Calculate window bounds, ensuring they are within the raster boundaries
        row_start = max(0, row - dist)
        row_stop = min(dataset.height, row + dist + 1)
        col_start = max(0, col - dist)
        col_stop = min(dataset.width, col + dist + 1)

        # Create the window using the bounds
        window = Window.from_slices((row_start, row_stop), (col_start, col_stop))
        data = dataset.read(1, window=window)
        valid_data = data[data != dataset.nodata]

        if valid_data.size > 0:
            return np.min(valid_data)  # Return the closest valid data
    return None  # Return None if no valid data found within the search radius


with MemoryFile() as memfile:
    with memfile.open(driver='GTiff', height=mosaic.shape[1], width=mosaic.shape[2],
                      count=1, dtype=mosaic.dtype, transform=out_transform,
                      crs=src_files[0].crs) as dataset:
        dataset.write(mosaic[0], 1)

        # Ensure GeoDataFrame is in the same CRS as the dataset
        gdf_concat = gdf_concat.to_crs(dataset.crs)

        # Sample the raster at each point location in the GeoDataFrame
        elevation_values = []
        for idx, row in gdf_concat.iterrows():
            x, y = row['geometry'].x, row['geometry'].y
            row, col = dataset.index(x, y)  # Get the raster indices

            if 0 <= row < dataset.height and 0 <= col < dataset.width:
                value = dataset.read(1, window=Window(col, row, 1, 1))[0, 0]
                if value == dataset.nodata:  # Check if the value is nodata
                    value = find_nearest_valid_elevation(dataset, x, y)
            else:
                value = find_nearest_valid_elevation(dataset, x, y)

            elevation_values.append(value)

        # Add the elevation data to the GeoDataFrame
        gdf_concat['elevation'] = elevation_values

        # Print bounds and metadata if needed
        print('Bounds:', dataset.bounds)
        print('Metadata:', dataset.meta)

# Now 'gdf' has an additional column 'elevation' with the elevation values



Bounds: BoundingBox(left=-30.000138888888888, bottom=29.999861111111116, right=59.999861111111116, top=69.99986111111112)
Metadata: {'driver': 'GTiff', 'dtype': 'int16', 'nodata': None, 'width': 43200, 'height': 19200, 'count': 1, 'crs': CRS.from_epsg(4326), 'transform': Affine(0.0020833333333333333, 0.0, -30.000138888888888,
       0.0, -0.0020833333333333333, 69.99986111111112)}


## 3.2 for each pixel (1x1 for left and 31x31 for right), calculate the average elevation

In [48]:
gdf_concat

Unnamed: 0,latitude,longitude,brightness,confidence,bright_t31,fwi,distance_km,date,geometry,ignited,land_cover,elevation
0,35.8073,-0.2538,310.2,79,277.6,6.847656,6.380693,2013-01-01,POINT (-0.25380 35.80730),True,50,16
1,47.8587,33.4466,308.0,63,272.6,0.214844,12.727889,2013-01-01,POINT (33.44660 47.85870),True,50,110
2,49.6728,18.6611,307.4,57,276.5,1.417969,10.702774,2013-01-01,POINT (18.66110 49.67280),True,50,319
3,37.3934,39.4902,303.1,55,287.0,3.445312,11.884926,2013-01-01,POINT (39.49020 37.39340),True,40,705
4,36.8729,6.9397,320.8,75,286.9,3.714844,14.682411,2013-01-01,POINT (6.93970 36.87290),True,50,2
...,...,...,...,...,...,...,...,...,...,...,...,...
159648,48.5000,-20.7500,303.1,47,288.5,,1511.020307,2013-10-25,POINT (-20.75000 48.50000),False,200,0
159649,42.7500,4.2500,307.0,70,288.1,,93.131421,2013-10-05,POINT (4.25000 42.75000),False,200,0
159650,54.7500,-20.2500,305.0,60,287.2,,2241.524404,2013-01-25,POINT (-20.25000 54.75000),False,200,0
159651,51.5000,-29.5000,309.9,79,287.5,,1948.420714,2013-08-17,POINT (-29.50000 51.50000),False,200,0


In [50]:
gdf_concat['longitude'].describe()

count    159653.000000
mean         12.993213
std          20.411212
min         -31.000000
25%          -4.000000
50%          17.724500
75%          31.260700
max          40.000000
Name: longitude, dtype: float64

In [32]:
# Save the updated GeoDataFrame
gdf_concat.to_file(os.path.join(folder, 'ignited_eu_2013_gdf'), driver='GPKG')