In [8]:
import xarray as xr
import rioxarray 
from datetime import datetime
import json
from affine import Affine

In [2]:
# Define file paths for input and output
input_file = r"C:\Users\TyHow\Downloads\5m_output\5m_output.nc"  # Path to your original NetCDF file
output_file = r"C:\Users\TyHow\Downloads\5m_output\5m_output_metadata.nc"  # Path to save the new file with updated metadata

# Open the dataset using xarray
ds = xr.open_dataset(input_file)

In [3]:
### Update Global Metadata Attributes


# Check if the dataset is already aware of its CRS. If not, assume it's in EPSG:4326 and define it.
if not ds.rio.crs:
    ds = ds.rio.write_crs("EPSG:4326")
    print("No CRS found in the dataset; setting default to EPSG:4326.")


ds.attrs["author"] = "Tyler Howe"
ds.attrs["company"] = "MinersAI"
ds.attrs["license"] = "CCBY 4.0 (Original data is shared under CCBY 4.0 at the time of release)"
ds.attrs["date_created"] = datetime.utcnow().isoformat() + "Z"  # ISO formatted UTC date/time.
ds.attrs["data_source"] = (
    "https://www.kaggle.com/datasets/nickwilliams99/kobold-metals-sitatunga-airborne-geophysics-survey"
)
ds.attrs["date_accessed"] = "04/07/2025"
ds.attrs["country"] = "Zambia"
ds.attrs["attribution"] = "Original data provided by KoBold Metals" 
# Note: The following two attributes are for the UTM projection metadata.
ds.attrs["crs"] = "EPSG:32735" 
ds.attrs["utm_zone"] = "35S"

  ds.attrs["date_created"] = datetime.utcnow().isoformat() + "Z"  # ISO formatted UTC date/time.


In [5]:
### Define layer units

layer_units = {
    "Bouguer230_Processed": "mGal",
    "Bouguer267_Processed": "mGal",
    "Digital_terrain": "Meters",
    "Gravity_disturbance_Processed": "mGal",
    "Grav_model_Processed": "mGal",
    "Processed_magnetics": "nT",
    "Gradient_levelled": "nT/m",
    "Potassium_NASVD_processed": "Percentage",
    "Uranium_NASVD_processed": "PPM",
    "Thorium_NASVD_processed": "PPM",
    "Total_count_NASVD_processed": "CPS",
}

for i, (name, u) in enumerate(layer_units.items()):
    ds["combined_layers"].isel(layer=i).attrs["units"] = u

In [None]:

# --------------------------------------------------------------------------------
# 3. Add Units to All Layers (Data Variables)
# --------------------------------------------------------------------------------
# Iterate through each data variable and add a "units" attribute if it isn’t already specified.
#for var in ds.data_vars:
#    if "units" not in ds[var].attrs:
#        ds[var].attrs["units"] = "your_unit_here"  # Replace with actual units if known.
#        print(f"Added default units to variable: {var}")


In [None]:
### Reproject the Dataset to UTM Zone 35S

da = ds["combined_layers"]

# 2. (Optional) ensure your spatial dims are named 'x' & 'y'
#    da = da.rename({'lon':'x','lat':'y'})

# 3. build Affine from your coordinate arrays
x = da["x"].values
y = da["y"].values
resx = x[1] - x[0]
resy = y[1] - y[0]
transform = Affine(resx, 0, x.min(), 0, -resy, y.max())

# 4. write transform then CRS
da = da.rio.write_transform(transform, inplace=False)
da = da.rio.write_crs("EPSG:4326", inplace=False)

# 5. reproject to UTM zone 35S
da_utm = da.rio.reproject("EPSG:32735")

# 6. put back (or under new name) in your dataset
ds = ds.drop_vars("combined_layers").assign(combined_layers_utm=da_utm)

In [18]:
### Save the Updated and Reprojected Dataset to a New NetCDF File

ds.to_netcdf(output_file)
print(f"Reprojected dataset with updated metadata saved to: {output_file}")

Reprojected dataset with updated metadata saved to: C:\Users\TyHow\Downloads\5m_output\5m_output_metadata.nc
