In [None]:
import sys
import pandas as pd
#sys.path.append("//")
sys.path.append("../../micromet")
import micromet
#from micromet.volk import ffp_climatology as ffp
import micromet.volk as ffp
from micromet import AmerifluxDataProcessor

In [None]:
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
import numpy as np

def multiply_geotiffs(input_a, input_b, output_path):
    """
    Multiply two GeoTIFFs (A * B) after aligning them to the same
    extent, resolution, and projection. The output is saved as a new GeoTIFF.
    """

    # --- Open the first raster (this will be our "reference" grid) ---
    with rasterio.open(input_a) as src_a:
        profile_a = src_a.profile.copy()
        # Read the full data array for A
        data_a = src_a.read(1, masked=True)  # returns a MaskedArray if there's nodata

        # We'll store the relevant spatial info to guide reprojecting raster B
        ref_crs = src_a.crs
        ref_transform = src_a.transform
        ref_width = src_a.width
        ref_height = src_a.height

        # --- Open the second raster ---
        with rasterio.open(input_b) as src_b:
            # 1) We need both rasters in the same CRS. If different, we'll reproject B.
            # 2) We also want B to match A's resolution and extent exactly.

            # Create an empty array to hold the reprojected data from B
            data_b_aligned = np.zeros((ref_height, ref_width), dtype=src_a.dtypes[0])

            # Reproject (and resample) B to match A's grid
            reproject(
                source=rasterio.band(src_b, 1),
                destination=data_b_aligned,
                src_transform=src_b.transform,
                src_crs=src_b.crs,
                dst_transform=ref_transform,
                dst_crs=ref_crs,
                resampling=Resampling.bilinear
            )

    # --- Perform the multiplication (masked arrays handle NoData gracefully) ---
    # Convert data_b_aligned to a masked array if you want to respect NoData from A
    data_b_masked = np.ma.array(data_b_aligned, mask=data_a.mask)
    data_mult = data_a * data_b_masked

    # --- Update the profile for the output ---
    # We'll keep the same data type as A. If needed, you can change this (e.g., float32).
    profile_out = profile_a.copy()
    profile_out.update(dtype=str(data_mult.dtype), count=1, nodata=None)

    # --- Write the result ---
    with rasterio.open(output_path, 'w', **profile_out) as dst:
        dst.write(data_mult.filled(0).astype(profile_out['dtype']), 1)  # fill masked with NaN or a NoData value

    print(f"Output saved to: {output_path}")
    with rasterio.open(output_path) as src:
        # Read the first band into a NumPy array
        band_data = src.read(1)

        # If you have "NoData" values and you'd like to exclude them, you can
        # read the band as a masked array:
        band_data = src.read(1, masked=True)
        print(band_data)
        # Then compute the sum of all values
        total_sum = np.sum(band_data)

        print("Sum of raster values:", total_sum)
    return total_sum



output_raster = 'output_masked_resampled_3.tif'
input_raster_B  = "G:/My Drive/OpenET Exports/ensemble_et_2023_06_15_40011.tif"
input_raster_A   = "./output/usutw/2023-06-15_weighted.tif"
tsum = multiply_geotiffs(input_raster_A, input_raster_B, output_raster)
print(tsum)




In [None]:
import os
import re
import rasterio
import numpy as np
import pathlib

def multiply_directories_rast(dir1=None, dir2=None, out_dir=None):
    """
    Multiply matching GeoTIFF rasters from two directories based on date patterns in their filenames.

    The function looks for GeoTIFF files in `dir1` (filenames containing a date pattern
    and ending with "_weighted.tif") and in `dir2` (filenames starting with "ensemble_et_").
    It extracts the date (in the "YYYY_MM_DD" format) from the filenames of both directories.
    For every matching date, the corresponding rasters are multiplied using the helper function
    `multiply_geotiffs`. The results are saved as new rasters in the `out_dir` directory.

    Parameters
    ----------
    dir1 : pathlib.Path or str
        The directory containing the first set of GeoTIFF files (typically ending with "_weighted.tif").
    dir2 : pathlib.Path or str
        The directory containing the second set of GeoTIFF files (filenames typically start with "ensemble_et_").
    out_dir : pathlib.Path or str
        The directory where the resulting multiplied GeoTIFF files are saved. If it does not exist,
        it is created.

    Returns
    -------
    dict
        A dictionary keyed by date (pandas.Timestamp) where each value is the result of
        the `multiply_geotiffs` function for that date's rasters.

    Notes
    -----
    - The function expects that the filename patterns in `dir1` and `dir2` include a date substring
      in the format "YYYY_MM_DD".
    - Any files not matching this pattern or without corresponding pairs in the other directory
      are ignored.
    - If any part of the file path does not exist, the function creates it.

    Example
    -------
    >>> from pathlib import Path
    >>> result = multiply_directories_rast(
    ...     dir1=Path("./output/usutw/"),
    ...     dir2=Path("G:/My Drive/OpenET Exports/"),
    ...     out_dir=Path("./output/usutw_mult/")
    ... )
    >>> print(result)  # Dictionary with dates and results of multiplication
    """


    # Set the paths to your two directories
    if dir1 is None:
        dir1 = pathlib.Path("./output/usutw/")    # e.g., contains '...20210305.tif', etc.
    if dir2 is None:
        dir2 = pathlib.Path("G:/My Drive/OpenET Exports/")
    if out_dir is None:
        out_dir = pathlib.Path("./output/usutw_mult/")

    # Check if it exists
    if not out_dir.exists():
        # Create the directory (including any necessary parent directories)
        out_dir.mkdir(parents=True, exist_ok=True)
        print(f"Directory {out_dir} created.")
    else:
        print(f"Directory {out_dir} already exists.")


    # Regex pattern for an 8-digit date (adjust if your date format is different)
    date_pattern = re.compile(r"\d{4}_\d{2}_\d{2}")

    # 1) Build a dictionary of {date_string: full_path} for files in dir2
    date_to_file_dir2 = {}
    for filename in dir2.glob("ensemble_et_*.tif"):
        match = date_pattern.search(filename.stem)
        if match:
            date_str = match.group(0)
            date_to_file_dir2[date_str] = filename

    tsum = {}

    # 2) Iterate over the files in dir1, extract date, and check if we have a match in dir2
    for filename in dir1.glob("*_weighted.tif"):
        dt_str = filename.stem.split("_")[0].replace("-","_")
        match = date_pattern.search(dt_str)
        if match:
            date_str = match.group(0)
            # Check if this date exists in dir2
            if date_str in date_to_file_dir2:
                date = pd.to_datetime(date_str, format="%Y_%m_%d")
                file1 = filename
                file2 = date_to_file_dir2[date_str]
                output_raster = out_dir / f'weighted_ens_openet_{date_str}.tif'
                tsum[date] = multiply_geotiffs(file1, file2, output_raster)
    return tsum



In [None]:
series= pd.Series(tsum)


station = "US-UTW"
metadata = micromet.load_configs(station,
                 config_path='../../station_config/',
                 secrets_path="../../secrets/config.ini")
df = micromet.fetch_and_preprocess_data(metadata["url"], station, startdate='2023-01-01')
s = df['et']
s = s.where(s >= 0, 0)
daily_stat_et = s.dropna().resample('D').sum(min_count=20)


In [None]:
combined = pd.concat([daily_stat_et, series],axis=1).dropna()
combined.columns = ['station_mm','eemetric_mm']#.plot()
combined

In [None]:
import matplotlib.pyplot as plt
plt.scatter(combined['station_mm'], combined['eemetric_mm'])
plt.grid()
xy = np.arange(0,9,1)
plt.plot(xy,xy,color='red',linestyle='--')
plt.xlabel('Raw Station ET (mm)')
plt.ylabel('eeMetric ET (mm)')
plt.title(f"Wellington {combined.first_valid_index():%Y-%m-%d} to {combined.last_valid_index():%Y-%m-%d}")

In [None]:
pd.concat()

In [None]:
station = "US-UTW"
metadata = micromet.load_configs(station,
                 config_path='../../station_config/',
                 secrets_path="../../secrets/config.ini")
df = micromet.fetch_and_preprocess_data(metadata["url"], station, startdate='2022-01-01')
#df.groupby(pd.Grouper(freq='1D')).sum().loc['2023-06-15']

In [None]:
df.groupby(pd.Grouper(freq='1D')).sum()