In [1]:
import rasterio
import pandas as pd
from datetime import datetime, timedelta

In [2]:

era5_path = r"C:\Users\Ankit\Datasets_Forest_fire\VIIRS_fire_time_stack1.tif"
with rasterio.open(era5_path) as src:
    era5_array = src.read(1)

In [9]:
import numpy as np
def check_normalization(arr, name="array"):
    print(f"{name} stats:")
    print("  min:", np.nanmin(arr))
    print("  max:", np.nanmax(arr))
    print("  mean:", np.nanmean(arr))
    print("  std:", np.nanstd(arr))
    print("-"*30)

# Example:
check_normalization(era5_array, "ERA5")

ERA5 stats:
  min: 0
  max: 0
  mean: 0.0
  std: 0.0
------------------------------


In [6]:
import os 

In [12]:
ERA5_FILES = {
    "t2m": r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_resampled_to_VIIRS\ERA5_t2m_2015_2016_stack_toVIIRS.tif",
    "d2m": r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_resampled_to_VIIRS\ERA5_d2m_2015_2016_stack_toVIIRS.tif",
    "tp":  r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_resampled_to_VIIRS\ERA5_tp_2015_2016_stack_toVIIRS.tif",
    "u10": r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_resampled_to_VIIRS\ERA5_u10_2015_2016_stack_toVIIRS.tif",
    "v10": r"C:\Users\Ankit\Datasets_Forest_fire\ERA5_resampled_to_VIIRS\ERA5_v10_2015_2016_stack_toVIIRS.tif",
}
DEM_FILE = r"C:\Users\Ankit\Datasets_Forest_fire\DEM_resampled_to_VIIRS1.tif"


In [13]:
OUT_DIR = r"C:\Users\Ankit\Datasets_Forest_fire\normalized_inputs"
os.makedirs(OUT_DIR, exist_ok=True)

In [1]:
#profile gives the information or you can say it is the metadata about the raster (crs,h,w,transform)
#even in the output file we'll have the same profile (**profile)

In [14]:
def normalize_raster(in_file, out_file):
    with rasterio.open(in_file) as src:
        profile = src.profile
        data = src.read().astype(np.float32)  # shape = (bands, height, width)

        # Compute min and max across ALL bands
        min_val = np.nanmin(data)
        max_val = np.nanmax(data)

        # Avoid divide by zero
        if max_val - min_val > 1e-6:
            data = (data - min_val) / (max_val - min_val)
        else:
            data[:] = 0.0

        # Save normalized raster
        with rasterio.open(out_file, 'w', **profile) as dst:
            dst.write(data)

    print(f"✅ Normalized saved: {out_file}")

In [15]:
for var, path in ERA5_FILES.items():
    out_path = os.path.join(OUT_DIR, f"{os.path.basename(path).replace('.tif', '_norm.tif')}")
    normalize_raster(path, out_path)

✅ Normalized saved: C:\Users\Ankit\Datasets_Forest_fire\normalized_inputs\ERA5_t2m_2015_2016_stack_toVIIRS_norm.tif
✅ Normalized saved: C:\Users\Ankit\Datasets_Forest_fire\normalized_inputs\ERA5_d2m_2015_2016_stack_toVIIRS_norm.tif
✅ Normalized saved: C:\Users\Ankit\Datasets_Forest_fire\normalized_inputs\ERA5_tp_2015_2016_stack_toVIIRS_norm.tif
✅ Normalized saved: C:\Users\Ankit\Datasets_Forest_fire\normalized_inputs\ERA5_u10_2015_2016_stack_toVIIRS_norm.tif
✅ Normalized saved: C:\Users\Ankit\Datasets_Forest_fire\normalized_inputs\ERA5_v10_2015_2016_stack_toVIIRS_norm.tif


In [16]:
dem_out = os.path.join(OUT_DIR, "DEM_norm.tif")
normalize_raster(DEM_FILE, dem_out)

✅ Normalized saved: C:\Users\Ankit\Datasets_Forest_fire\normalized_inputs\DEM_norm.tif


In [25]:
#this below does create the recipe 

In [4]:
# ---- Input files ----
ERA5_FILES = {
    "t2m": r"C:\Users\Ankit\Datasets_Forest_fire\normalized_inputs\ERA5_t2m_2015_2016_stack_toVIIRS_norm.tif",
    "d2m": r"C:\Users\Ankit\Datasets_Forest_fire\normalized_inputs\ERA5_d2m_2015_2016_stack_toVIIRS_norm.tif",
    "tp":  r"C:\Users\Ankit\Datasets_Forest_fire\normalized_inputs\ERA5_tp_2015_2016_stack_toVIIRS_norm.tif",
    "u10": r"C:\Users\Ankit\Datasets_Forest_fire\normalized_inputs\ERA5_u10_2015_2016_stack_toVIIRS_norm.tif",
    "v10": r"C:\Users\Ankit\Datasets_Forest_fire\normalized_inputs\ERA5_v10_2015_2016_stack_toVIIRS_norm.tif",
}
VIIRS_FILE  = r"C:\Users\Ankit\Datasets_Forest_fire\VIIRS_fire_time_stack1.tif"
DEM_FILE    = r"C:\Users\Ankit\Datasets_Forest_fire\normalized_inputs\DEM_norm.tif"
LULC_2015   = r"C:\Users\Ankit\Datasets_Forest_fire\LULC_MAPS\LULC_2015_resampled_to_VIIRS_onehot.tif"
LULC_2016   = r"C:\Users\Ankit\Datasets_Forest_fire\LULC_MAPS\LULC_2016_resampled_to_VIIRS_onehot.tif"

In [6]:
# ---- Config ----
START_TIME  = datetime(2015, 1, 1, 0)   
SEQ_LEN     = 6                         
HORIZONS    = [1, 2, 3]                 
OUT_CSV     = r"C:\Users\Ankit\Datasets_Forest_fire\sequence_index_hourly_binary.csv"

In [7]:
with rasterio.open(ERA5_FILES["t2m"]) as src:
    n_bands = src.count
    height, width = src.height, src.width

print(f"ERA5 variables cover {n_bands} timesteps")


rows = []
for center_idx in range(SEQ_LEN, n_bands - max(HORIZONS)):
    
    center_time = START_TIME + timedelta(hours=center_idx)

    seq_idxs = list(range(center_idx - SEQ_LEN + 1, center_idx + 1))

    tgt_idxs = [center_idx + h for h in HORIZONS]

    lulc_file = LULC_2015 if center_time.year == 2015 else LULC_2016

    rows.append({
        "center_time": center_time,
        "seq_band_idxs": seq_idxs,
        "target_band_idxs": tgt_idxs,
        "era5_t2m_file": ERA5_FILES["t2m"],
        "era5_d2m_file": ERA5_FILES["d2m"],
        "era5_tp_file":  ERA5_FILES["tp"],
        "era5_u10_file": ERA5_FILES["u10"],
        "era5_v10_file": ERA5_FILES["v10"],
        "viirs_file": VIIRS_FILE,
        "dem_file": DEM_FILE,
        "lulc_file": lulc_file,
    })

ERA5 variables cover 17544 timesteps


In [8]:
df = pd.DataFrame(rows)
df.to_csv(OUT_CSV, index=False)
print(f"Saved {len(df)} sequences to {OUT_CSV}")
print(f"Time range: {df['center_time'].min()} → {df['center_time'].max()}")

Saved 17535 sequences to C:\Users\Ankit\Datasets_Forest_fire\sequence_index_hourly_binary.csv
Time range: 2015-01-01 06:00:00 → 2016-12-31 20:00:00
