In [1]:
import xarray as xr
import pandas as pd
import numpy as np
from datetime import datetime
import cdsapi

# ---------------------------
# 1. Download ERA5-Land data
# ---------------------------
c = cdsapi.Client()

c.retrieve(
    "reanalysis-era5-land",
    {
        "format": "netcdf",
        "variable": ["total_precipitation"],
        "year": [str(y) for y in range(2019, 2026)],
        "month": [f"{m:02d}" for m in range(1, 13)],
        "day": [f"{d:02d}" for d in range(1, 32)],
        "time": [f"{h:02d}:00" for h in range(24)],
        "area": [26.33, 80.85, 24.54, 82.85],  
        # North, West, South, East
    },
    "era5_land_allahabad_2019_2025.nc"
)

# ---------------------------
# 2. Load and preprocess data
# ---------------------------
ds = xr.open_dataset("era5_land_allahabad_2019_2025.nc")

# Convert to mm
ds["precip_mm"] = ds["total_precipitation"] * 1000.0

# Daily total
ds_daily = ds["precip_mm"].resample(time="1D").sum()

# ---------------------------
# 3. Convert to table format
# ---------------------------
df = ds_daily.to_dataframe().reset_index()

# Add grid indices (i,j)
# NOTE: lat varies along index 0, lon along index 1
lat_vals = np.sort(df["latitude"].unique())[::-1]  # ERA5 descending
lon_vals = np.sort(df["longitude"].unique())

lat_index = {lat: i for i, lat in enumerate(lat_vals)}
lon_index = {lon: j for j, lon in enumerate(lon_vals)}

df["i"] = df["latitude"].map(lat_index)
df["j"] = df["longitude"].map(lon_index)

# Clean column order
df = df[["time", "i", "j", "latitude", "longitude", "precip_mm"]]
df.rename(columns={"time": "date"}, inplace=True)

# ---------------------------
# 4. Save to Parquet
# ---------------------------
df.to_parquet("allahabad_precip_era5land_2019_2025.parquet", index=False)

print("Saved parquet file with shape:", df.shape)


ModuleNotFoundError: No module named 'xarray'