In [2]:
import pandas as pd
import rasterio
from rasterio.sample import sample_gen
from rasterio.enums import Resampling
import numpy as np

# Paths to input files
raster_path = "/Users/yubinbaaniya/Downloads/koppen_geiger_tif (1)/1991_2020/koppen_geiger_0p00833333.tif"
csv_path = "/Users/yubinbaaniya/Documents/WORLD BIAS/saber workdir/gauge_table_2nd_iteration_deDuplicated cleaned.csv"
output_csv_path = "/Users/yubinbaaniya/Documents/WORLD BIAS/saber workdir/gauge_table_2nd_iteration_deDuplicated_cleaned_with_climate.csv"

def extract_band_values(raster_path, csv_path, output_csv_path):
    # Read the CSV file
    df = pd.read_csv(csv_path)

    # Check if required columns exist
    if 'latitude' not in df.columns or 'longitude' not in df.columns:
        raise ValueError("CSV file must contain 'latitude' and 'longitude' columns.")

    # Open the raster file
    with rasterio.open(raster_path) as src:
        # Get nodata value
        nodata = src.nodata

        # Extract raster values at each point
        coords = [(lon, lat) for lon, lat in zip(df['longitude'], df['latitude'])]
        band_values = []

        for coord in coords:
            try:
                value = list(src.sample([coord]))[0][0]

                # Handle missing data
                if value == nodata or np.isnan(value):
                    # Use nearest neighbor interpolation
                    value = list(
                        src.sample(
                            [coord],
                            indexes=1,
                            resampling=Resampling.nearest
                        )
                    )[0][0]

                band_values.append(value)

            except Exception as e:
                # Handle out-of-bound errors or other issues
                band_values.append(None)

    # Map band values to climate categories
    climate_mapping = {
        (1, 3): "Tropical",
        (4, 7): "Arid",
        (8, 16): "Temperate",
        (17, 28): "Continental",
        (29, 30): "Polar"
    }

    def map_climate(value):
        if value is None or np.isnan(value):
            return None
        for (low, high), climate in climate_mapping.items():
            if low <= value <= high:
                return climate
        return None

    # Apply mapping to band values
    df['Climate'] = [map_climate(value) for value in band_values]

    # Save updated CSV
    df.to_csv(output_csv_path, index=False)
    print(f"Updated CSV saved to: {output_csv_path}")

# Call the function
extract_band_values(raster_path, csv_path, output_csv_path)



Updated CSV saved to: /Users/yubinbaaniya/Documents/WORLD BIAS/saber workdir/gauge_table_2nd_iteration_deDuplicated_cleaned_with_climate.csv
