# Vegetation Processer

In [12]:
import pyarrow.parquet as pq
import pandas as pd
from shapely.geometry import Point
import rasterio
import os
from tqdm import tqdm

# Path to the directory containing the Parquet files
HOME = os.path.expanduser('~')
parquet_dir = f"{HOME}/SWEMLv2.0/data/TrainingDFs/Northwest/300M_Resolution/Vegetation_Seasonality_PrecipVIIRSGeoObsDFs/20_fSCA_Thresh"
parquet_file = f"{HOME}/SWEMLv2.0/data/TrainingDFs/Northwest/300M_Resolution/Vegetation_Seasonality_PrecipVIIRSGeoObsDFs/20_fSCA_Thresh/Vegetation_Season_Precip_VIIRS_GeoObsDF_20160208.parquet"
geotiff_file = f"{HOME}/SWEMLv2.0/USA_NALCMS_landcover_2020_120m.tif"


In [13]:
# Function to get the values from GeoTIFF for a list of points
def get_values_from_geotiff(geotiff, lon_list, lat_list):
    with rasterio.open(geotiff) as src:
        values = []
        for lon, lat in tqdm(zip(lon_list, lat_list), total=len(lon_list), desc="Retrieving GeoTIFF values"):
            row, col = src.index(lon, lat)
            values.append(src.read(1)[row, col])
        return values

# Read Parquet file into DataFrame with progress bar
print("Reading Parquet file...")
with tqdm(total=1, desc="Reading Parquet file") as pbar:
    df = pq.read_table(parquet_file).to_pandas()
    pbar.update(1)

# Get GeoTIFF values at centroids
lon_list = df['cen_lon'].tolist()
lat_list = df['cen_lat'].tolist()

# Retrieving GeoTIFF values with progress bar
df['vegetation_class'] = get_values_from_geotiff(geotiff_file, lon_list, lat_list)

# Save updated DataFrame to new Parquet file with progress bar
updated_parquet_file = f"updated_{os.path.basename(parquet_file)}"
print("Writing updated Parquet file...")
with tqdm(total=1, desc="Writing updated Parquet file") as pbar:
    table = pq.Table.from_pandas(df)
    pq.write_table(table, updated_parquet_file)
    pbar.update(1)

print(f"Processed and saved: {updated_parquet_file}")
print("Processing complete.")


Reading Parquet file...


Reading Parquet file: 100%|██████████| 1/1 [00:00<00:00, 18.06it/s]
Retrieving GeoTIFF values:   0%|          | 216/85185 [01:30<9:54:27,  2.38it/s]


KeyboardInterrupt: 