In [2]:
import xarray as xr
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as ctx
import os

# Load data

In [3]:
save_path = '../../climada_petals/data/wildfire/output/'
merged_gdf = gpd.read_file(os.path.join(save_path, 'merged_eu_gdf'))
# The distance does not consider the curvature of the Earth and is in degree not km
# Ex. distance of the first row is calculated by 
# np.sqrt((merged_gdf['latitude_left'][0] - merged_gdf['latitude_right'][0])**2 + (merged_gdf['longitude_left'][0] - merged_gdf['longitude_right'][0])**2)
merged_gdf

Unnamed: 0,latitude_left,longitude_left,brightness,satellite,instrument,confidence,bright_t31,frp,daynight,values,surface,latitude_right,longitude_right,fwi,distance,date,geometry
0,51.2415,23.0096,301.8,Terra,MODIS,49,281.3,5.4,D,60528,0.0,51.288034,22.916667,8.585938,0.103933,2000-11-01,POINT (23.00960 51.24150)
1,50.6740,25.4001,302.9,Terra,MODIS,0,287.9,5.9,D,62262,0.0,50.725974,25.416667,9.941406,0.054550,2000-11-01,POINT (25.40010 50.67400)
2,49.8929,29.2202,309.6,Terra,MODIS,69,286.2,18.2,D,64863,0.0,49.882883,29.166667,11.226562,0.054462,2000-11-01,POINT (29.22020 49.89290)
3,50.5311,25.5214,304.6,Terra,MODIS,59,288.1,6.7,D,63126,0.0,50.444944,25.416667,11.703125,0.135617,2000-11-01,POINT (25.52140 50.53110)
4,50.6798,24.3879,307.5,Terra,MODIS,66,285.1,8.6,D,62260,0.0,50.725974,24.583333,12.222656,0.200814,2000-11-01,POINT (24.38790 50.67980)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1409775,42.3003,-6.5988,366.0,Aqua,MODIS,100,286.8,139.9,D,89861,0.0,42.295066,0.000000,0.910156,6.598802,2023-01-31,POINT (-6.59880 42.30030)
1409776,44.5971,20.9679,304.1,Terra,MODIS,59,271.2,31.1,N,82225,0.0,44.543308,21.000000,0.277344,0.062641,2023-01-31,POINT (20.96790 44.59710)
1409777,44.5938,20.9739,307.4,Terra,MODIS,71,271.3,36.4,N,82225,0.0,44.543308,21.000000,0.277344,0.056839,2023-01-31,POINT (20.97390 44.59380)
1409778,45.1455,9.9422,306.4,Terra,MODIS,68,274.2,14.6,N,80276,0.0,45.105369,10.125000,0.449219,0.187153,2023-01-31,POINT (9.94220 45.14550)


In [4]:
print(merged_gdf.shape)

(1409780, 17)


In [5]:
from geopy.distance import great_circle

# Define a function to calculate the distance
def calculate_distance(row):
    coords_1 = (row['latitude_left'], row['longitude_left'])
    coords_2 = (row['latitude_right'], row['longitude_right'])
    return great_circle(coords_1, coords_2).kilometers

# Apply the function to each row in the GeoDataFrame and create a new column 'distance_km'
merged_gdf['distance_km'] = merged_gdf.apply(calculate_distance, axis=1)
merged_gdf


Unnamed: 0,latitude_left,longitude_left,brightness,satellite,instrument,confidence,bright_t31,frp,daynight,values,surface,latitude_right,longitude_right,fwi,distance,date,geometry,distance_km
0,51.2415,23.0096,301.8,Terra,MODIS,49,281.3,5.4,D,60528,0.0,51.288034,22.916667,8.585938,0.103933,2000-11-01,POINT (23.00960 51.24150),8.281555
1,50.6740,25.4001,302.9,Terra,MODIS,0,287.9,5.9,D,62262,0.0,50.725974,25.416667,9.941406,0.054550,2000-11-01,POINT (25.40010 50.67400),5.895853
2,49.8929,29.2202,309.6,Terra,MODIS,69,286.2,18.2,D,64863,0.0,49.882883,29.166667,11.226562,0.054462,2000-11-01,POINT (29.22020 49.89290),3.993662
3,50.5311,25.5214,304.6,Terra,MODIS,59,288.1,6.7,D,63126,0.0,50.444944,25.416667,11.703125,0.135617,2000-11-01,POINT (25.52140 50.53110),12.111174
4,50.6798,24.3879,307.5,Terra,MODIS,66,285.1,8.6,D,62260,0.0,50.725974,24.583333,12.222656,0.200814,2000-11-01,POINT (24.38790 50.67980),14.689771
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1409775,42.3003,-6.5988,366.0,Aqua,MODIS,100,286.8,139.9,D,89861,0.0,42.295066,0.000000,0.910156,6.598802,2023-01-31,POINT (-6.59880 42.30030),542.591703
1409776,44.5971,20.9679,304.1,Terra,MODIS,59,271.2,31.1,N,82225,0.0,44.543308,21.000000,0.277344,0.062641,2023-01-31,POINT (20.96790 44.59710),6.499423
1409777,44.5938,20.9739,307.4,Terra,MODIS,71,271.3,36.4,N,82225,0.0,44.543308,21.000000,0.277344,0.056839,2023-01-31,POINT (20.97390 44.59380),5.983020
1409778,45.1455,9.9422,306.4,Terra,MODIS,68,274.2,14.6,N,80276,0.0,45.105369,10.125000,0.449219,0.187153,2023-01-31,POINT (9.94220 45.14550),15.019678


In [6]:
# Drop FWI and brightness rows with missing values
filtered_gdf = merged_gdf.dropna(subset=['fwi', 'brightness'])
# Drop rows with distance_km > 31 km
filtered_gdf = filtered_gdf[filtered_gdf['distance_km'] <= 31]
# Drop rows with confidence < 30
filtered_gdf = filtered_gdf[filtered_gdf['confidence'] >= 30]

filtered_gdf

Unnamed: 0,latitude_left,longitude_left,brightness,satellite,instrument,confidence,bright_t31,frp,daynight,values,surface,latitude_right,longitude_right,fwi,distance,date,geometry,distance_km
0,51.2415,23.0096,301.8,Terra,MODIS,49,281.3,5.4,D,60528,0.0,51.288034,22.916667,8.585938,0.103933,2000-11-01,POINT (23.00960 51.24150),8.281555
2,49.8929,29.2202,309.6,Terra,MODIS,69,286.2,18.2,D,64863,0.0,49.882883,29.166667,11.226562,0.054462,2000-11-01,POINT (29.22020 49.89290),3.993662
3,50.5311,25.5214,304.6,Terra,MODIS,59,288.1,6.7,D,63126,0.0,50.444944,25.416667,11.703125,0.135617,2000-11-01,POINT (25.52140 50.53110),12.111174
4,50.6798,24.3879,307.5,Terra,MODIS,66,285.1,8.6,D,62260,0.0,50.725974,24.583333,12.222656,0.200814,2000-11-01,POINT (24.38790 50.67980),14.689771
5,51.0532,25.4886,353.8,Terra,MODIS,97,290.0,76.4,D,61398,0.0,51.007004,25.416667,9.933594,0.085490,2000-11-01,POINT (25.48860 51.05320),7.189676
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1409763,35.8092,-0.2537,324.5,Aqua,MODIS,63,290.6,20.9,D,113617,0.0,35.831369,0.000000,0.332031,0.254667,2023-01-31,POINT (-0.25370 35.80920),23.006860
1409776,44.5971,20.9679,304.1,Terra,MODIS,59,271.2,31.1,N,82225,0.0,44.543308,21.000000,0.277344,0.062641,2023-01-31,POINT (20.96790 44.59710),6.499423
1409777,44.5938,20.9739,307.4,Terra,MODIS,71,271.3,36.4,N,82225,0.0,44.543308,21.000000,0.277344,0.056839,2023-01-31,POINT (20.97390 44.59380),5.983020
1409778,45.1455,9.9422,306.4,Terra,MODIS,68,274.2,14.6,N,80276,0.0,45.105369,10.125000,0.449219,0.187153,2023-01-31,POINT (9.94220 45.14550),15.019678
