In [1]:
# started 9-24-2025

In [35]:
import pandas as pd
import xarray as xr
import glob
import time
from tqdm import tqdm
from pathlib import Path


In [36]:
fires = pd.read_csv("../attempt 2/pt2_full_fires_socal.csv") 

In [37]:
fires.head()

Unnamed: 0,OBJECTID,FIRE_YEAR,DISCOVERY_DATE,FIRE_SIZE,STAT_CAUSE_DESCR,LATITUDE,LONGITUDE,OBJECTID.1,temp_max_F,humidity_pct,precip_in,windspeed_mph
0,110,2005,2453540.5,10.0,Equipment Use,33.718889,-117.433611,110,73.04,89,0.062992,6.028589
1,155,2005,2453411.5,3.0,Debris Burning,34.748333,-119.410278,155,58.46,79,0.0,4.536979
2,178,2005,2453544.5,4.2,Equipment Use,34.466667,-119.828333,178,77.54,76,0.0,15.972654
3,1053,2005,2453559.5,3.0,Miscellaneous,34.479444,-118.768611,1053,82.76,86,0.0,6.028589
4,1282,2005,2453582.5,2.0,Lightning,33.110833,-116.847222,1282,87.62,88,0.0,8.763207


In [38]:
fires.shape

(2926, 12)

In [39]:
fires['time'] = pd.to_datetime(fires['DISCOVERY_DATE'])

In [40]:
ndvi_files = sorted(glob.glob("../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502/*.nc4"))

In [41]:
ndvi_files

['../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2000_0106.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2000_0712.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2001_0106.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2001_0712.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2002_0106.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2002_0712.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2003_0106.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2003_0712.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2004_0106.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2004_0

In [48]:
def parse_ndvi_file_dates(ndvi_files):
    file_dates = []
    for f in ndvi_files:
        name = Path(f).name  # get filename safely
        parts = name.split("_")
        year = int(parts[-2])
        
        # Remove file extension before extracting day
        day_str = parts[-1].split(".")[0]  # '0106.nc4' -> '0106'
        month = int(day_str[:2])
        day = int(day_str[2:])
        
        start_date = pd.Timestamp(year=year, month=month, day=day)
        file_dates.append((start_date, f))
    
    # Sort by date just in case
    file_dates.sort(key=lambda x: x[0])
    return file_dates

In [49]:
def pick_file_for_date(fire_date, ndvi_file_dates):
    """
    fire_date: pd.Timestamp
    ndvi_file_dates: output of parse_ndvi_file_dates(ndvi_files)
    """
    fire_date = pd.Timestamp(fire_date)
    
    # iterate through files in order, pick the last file whose start_date <= fire_date
    chosen_file = ndvi_file_dates[0][1]  # default first file
    for start_date, f in ndvi_file_dates:
        if fire_date >= start_date:
            chosen_file = f
        else:
            break
    return chosen_file

In [54]:
def get_ndvi_for_point(lat, lon, date, file):
    ds = xr.open_dataset(file)
    
    # find nearest time
    time_index = ds['time'].sel(time=date, method="nearest")
    
    # get nearest lat/lon grid point
    ndvi_value = ds['ndvi'].sel(lat=lat, lon=lon, time=time_index, method="nearest").values.item()
    
    ds.close()  # free memory
    return ndvi_value

In [51]:
ndvi_file_dates = parse_ndvi_file_dates(ndvi_files)

In [55]:
ndvi_values = []

for _, row in tqdm(fires.iterrows(), total=len(fires)):
    # pick the NDVI file for this fire event
    ndvi_file = pick_file_for_date(row['time'], ndvi_file_dates)
    
    # get NDVI value
    ndvi = get_ndvi_for_point(
        row['LATITUDE'], row['LONGITUDE'], row['time'], ndvi_file
    )
    ndvi_values.append(ndvi)


100%|██████████| 2926/2926 [00:27<00:00, 108.32it/s]


In [56]:
# Add NDVI values to the dataframe
fires['ndvi'] = ndvi_values

In [57]:
fires

Unnamed: 0,OBJECTID,FIRE_YEAR,DISCOVERY_DATE,FIRE_SIZE,STAT_CAUSE_DESCR,LATITUDE,LONGITUDE,OBJECTID.1,temp_max_F,humidity_pct,precip_in,windspeed_mph,time,ndvi
0,110,2005,2453540.5,10.0,Equipment Use,33.718889,-117.433611,110,73.04,89,0.062992,6.028589,1970-01-01 00:00:00.002453540,5016.0
1,155,2005,2453411.5,3.0,Debris Burning,34.748333,-119.410278,155,58.46,79,0.000000,4.536979,1970-01-01 00:00:00.002453411,3357.0
2,178,2005,2453544.5,4.2,Equipment Use,34.466667,-119.828333,178,77.54,76,0.000000,15.972654,1970-01-01 00:00:00.002453544,4356.0
3,1053,2005,2453559.5,3.0,Miscellaneous,34.479444,-118.768611,1053,82.76,86,0.000000,6.028589,1970-01-01 00:00:00.002453559,3124.0
4,1282,2005,2453582.5,2.0,Lightning,33.110833,-116.847222,1282,87.62,88,0.000000,8.763207,1970-01-01 00:00:00.002453582,4180.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2921,1880367,2009,2455012.5,2.0,Equipment Use,33.297738,-117.003073,1880367,85.64,78,0.000000,7.271597,1970-01-01 00:00:00.002455012,5071.0
2922,1880400,2009,2454954.5,73.0,Missing/Undefined,33.892120,-117.079065,1880400,65.84,91,0.000000,6.774394,1970-01-01 00:00:00.002454954,2988.0
2923,1880420,2010,2455345.5,50.0,Missing/Undefined,32.906591,-116.942448,1880420,66.56,90,0.015748,11.249223,1970-01-01 00:00:00.002455345,3081.0
2924,1880428,2013,2456402.5,4.5,Children,34.633333,-118.966666,1880428,65.84,44,0.000000,7.147296,1970-01-01 00:00:00.002456402,4854.0


In [14]:
old_fires = pd.read_csv("../attempt 2/pt2_full_fires_socal.csv") 

In [15]:
old_fires

Unnamed: 0,OBJECTID,FIRE_YEAR,DISCOVERY_DATE,FIRE_SIZE,STAT_CAUSE_DESCR,LATITUDE,LONGITUDE,OBJECTID.1,temp_max_F,humidity_pct,precip_in,windspeed_mph
0,110,2005,2453540.5,10.0,Equipment Use,33.718889,-117.433611,110,73.04,89,0.062992,6.028589
1,155,2005,2453411.5,3.0,Debris Burning,34.748333,-119.410278,155,58.46,79,0.000000,4.536979
2,178,2005,2453544.5,4.2,Equipment Use,34.466667,-119.828333,178,77.54,76,0.000000,15.972654
3,1053,2005,2453559.5,3.0,Miscellaneous,34.479444,-118.768611,1053,82.76,86,0.000000,6.028589
4,1282,2005,2453582.5,2.0,Lightning,33.110833,-116.847222,1282,87.62,88,0.000000,8.763207
...,...,...,...,...,...,...,...,...,...,...,...,...
2921,1880367,2009,2455012.5,2.0,Equipment Use,33.297738,-117.003073,1880367,85.64,78,0.000000,7.271597
2922,1880400,2009,2454954.5,73.0,Missing/Undefined,33.892120,-117.079065,1880400,65.84,91,0.000000,6.774394
2923,1880420,2010,2455345.5,50.0,Missing/Undefined,32.906591,-116.942448,1880420,66.56,90,0.015748,11.249223
2924,1880428,2013,2456402.5,4.5,Children,34.633333,-118.966666,1880428,65.84,44,0.000000,7.147296


In [16]:
ndvi_files = sorted(glob.glob("../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502/*.nc4"))

In [17]:
ndvi_files

['../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2000_0106.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2000_0712.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2001_0106.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2001_0712.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2002_0106.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2002_0712.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2003_0106.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2003_0712.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2004_0106.nc4',
 '../ndvi data/Global_Veg_Greenness_GIMMS_3G_2187_1-20250924_160502\\ndvi3g_geo_v1_1_2004_0

In [18]:
def get_ndvi_for_point(lat, lon, date, file):
    ds = xr.open_dataset(file)
    
    # find nearest time
    time_index = ds['time'].sel(time=date, method="nearest")
    
    # get nearest lat/lon grid point
    ndvi_value = ds['ndvi'].sel(lat=lat, lon=lon, time=time_index, method="nearest").values.item()
    
    ds.close()  # free memory
    return ndvi_value

In [19]:
ndvi_values = []
for _, row in tqdm(old_fires.iterrows(), total=len(old_fires)):
    # choose the right file based on row['time']
    ndvi_file = pick_file_for_date(row['time'], ndvi_files)
    
    ndvi = get_ndvi_for_point(
        row['latitude'], row['longitude'], row['time'], ndvi_file
    )
    ndvi_values.append(ndvi)

old_fires['ndvi'] = ndvi_values

  0%|          | 0/2926 [00:00<?, ?it/s]


NameError: name 'pick_file_for_date' is not defined