In [1]:
import xarray as xr
import rioxarray  # this registers the .rio accessor on xarray objects
from datetime import datetime

# Define file paths for input and output
input_file = '/Users/thowe/MinersAI Dropbox/Tyler Howe/KoboldData/5m/5m_output.nc'   # Path to your original NetCDF file
output_file = '/Users/thowe/MinersAI Dropbox/Tyler Howe/KoboldData/5m/5m_output_metadata.nc' # Path to save the new file with updated metadata


In [None]:

# Open the dataset using xarray
ds = xr.open_dataset(input_file)

# --------------------------------------------------------------------------------
# 1. Ensure the Dataset has a CRS defined
# --------------------------------------------------------------------------------
# Check if the dataset is already aware of its CRS. If not, assume it's in EPSG:4326 and define it.
if not ds.rio.crs:
    ds = ds.rio.write_crs("EPSG:4326")
    print("No CRS found in the dataset; setting default to EPSG:4326.")

# --------------------------------------------------------------------------------
# 2. Update Global Metadata Attributes
# --------------------------------------------------------------------------------
# Here we add or update the global attributes with the required metadata.
ds.attrs["author"] = "Tyler Howe"
ds.attrs["company"] = "MinersAI"
ds.attrs["license"] = "CCBY 4.0 (Original data is shared under CCBY 4.0 at the time of release)"
ds.attrs["date_minted"] = datetime.utcnow().isoformat() + "Z"  # ISO formatted UTC date/time.
ds.attrs["country"] = "Zambia"
ds.attrs["attribution"] = "Original data provided by KoBold Metals" 
# Note: The following two attributes are for the UTM projection metadata.
ds.attrs["crs"] = "EPSG:32735"  # This informs users of the new CRS (UTM Zone 35S).
ds.attrs["utm_zone"] = "35S"

# --------------------------------------------------------------------------------
# 3. Add Units to All Layers (Data Variables)
# --------------------------------------------------------------------------------
# Iterate through each data variable and add a "units" attribute if it isn’t already specified.
#for var in ds.data_vars:
#    if "units" not in ds[var].attrs:
#        ds[var].attrs["units"] = "your_unit_here"  # Replace with actual units if known.
#        print(f"Added default units to variable: {var}")

# --------------------------------------------------------------------------------
# 4. Reproject the Dataset to UTM Zone 35S
# --------------------------------------------------------------------------------
# UTM Zone 35S is typically represented by EPSG:32735 when using WGS84 in the southern hemisphere.
target_crs = "EPSG:32735"
ds_reprojected = ds.rio.reproject(target_crs)

print(f"Dataset reprojected from EPSG:4326 to {target_crs} (UTM Zone 35S).")


  ds.attrs["date_minted"] = datetime.utcnow().isoformat() + "Z"  # ISO formatted UTC date/time.


TypeError: can't multiply sequence by non-int of type 'float'

In [None]:

# --------------------------------------------------------------------------------
# 5. Save the Updated and Reprojected Dataset to a New NetCDF File
# --------------------------------------------------------------------------------
ds_reprojected.to_netcdf(output_file)
print(f"Reprojected dataset with updated metadata saved to: {output_file}")