In [None]:
import pandas as pd
import rasterio
from rasterio.sample import sample_gen
from rasterio.enums import Resampling
import numpy as np

In [None]:
# Paths to input files
raster_path = '/Users/yubin/Downloads/1991_2020/koppen_geiger_0p00833333.tif'
csv_path = "/Users/yubin/Downloads/combined_file_with_metadata 1.csv"
output_csv_path = "/Users/yubin/Downloads/combined_file_with_metadata_with climate_2.csv"

In [None]:
def extract_band_values(raster_path, csv_path, output_csv_path):
    try:
        # Step 1: Read the CSV file
        df = pd.read_csv(csv_path)
        print(f"Input CSV loaded: {df.shape[0]} rows, {df.shape[1]} columns")

        # Step 2: Check if required columns exist
        if 'latitude' not in df.columns or 'longitude' not in df.columns:
            raise ValueError("CSV file must contain 'latitude' and 'longitude' columns.")

        # Step 3: Drop rows where 'latitude' or 'longitude' are missing
        df = df.dropna(subset=['latitude', 'longitude'])
        print(f"After dropping missing latitude/longitude: {df.shape[0]} rows")

        if df.empty:
            print("No valid rows after filtering for latitude and longitude.")
            return

        # Step 4: Open the raster file
        with rasterio.open(raster_path) as src:
            print(f"Raster file loaded with bounds: {src.bounds}")
            
            # Step 5: Check if coordinates are within raster bounds
            lon_min, lat_min, lon_max, lat_max = src.bounds
            df = df[(df['longitude'] >= lon_min) & (df['longitude'] <= lon_max) &
                    (df['latitude'] >= lat_min) & (df['latitude'] <= lat_max)]
            print(f"After filtering for coordinates within raster bounds: {df.shape[0]} rows")

            if df.empty:
                print("No valid rows after filtering for raster bounds.")
                return

            # Step 6: Extract raster values at each point
            coords = [(lon, lat) for lon, lat in zip(df['longitude'], df['latitude'])]
            band_values = []

            for coord in coords:
                try:
                    # Sample the raster value at the coordinate
                    value = list(src.sample([coord]))[0][0]

                    # Handle missing data
                    if value == src.nodata or np.isnan(value):
                        # If missing, append None
                        band_values.append(None)
                    else:
                        band_values.append(value)

                except Exception as e:
                    print(f"Error sampling raster at {coord}: {e}")
                    band_values.append(None)

        # Step 7: Map band values to climate categories
        climate_mapping = {
            (1, 3): "Tropical",
            (4, 7): "Arid",
            (8, 16): "Temperate",
            (17, 28): "Continental",
            (29, 30): "Polar"
        }

        def map_climate(value):
            if value is None or np.isnan(value):
                return None
            for (low, high), climate in climate_mapping.items():
                if low <= value <= high:
                    return climate
            return None

        df['Climate'] = [map_climate(value) for value in band_values]

        # Step 8: Filter rows with valid climate values (optional)
        valid_climate_rows = df['Climate'].notna()
        print(f"Rows with valid climate values: {valid_climate_rows.sum()} rows")
        df = df[valid_climate_rows]

        if df.empty:
            print("No valid climate data to save.")
            return

        # Step 9: Save updated CSV
        df.to_csv(output_csv_path, index=False)
        print(f"Updated CSV saved to: {output_csv_path}")

    except Exception as e:
        print(f"An error occurred: {e}")

# Call the function
extract_band_values(raster_path, csv_path, output_csv_path)