In [None]:
# netCDF files
import xarray as xr
import numpy as np
import os

folder_path = r"C:\Users\zscho\OneDrive\Documents\Capstone\Weather"
year = 2015

nc_files = [f for f in os.listdir(folder_path) if f.startswith("Weather_data_") and f.endswith(f"_{year}.nc")]
print(f"Found {len(nc_files)} NetCDF files")


In [None]:
# file structure
for file in nc_files:
    ds = xr.open_dataset(os.path.join(folder_path, file))
    print(f"{file}: {dict(ds.dims)}")
    ds.close()

In [None]:
# variable inspection
for file in nc_files:
    ds = xr.open_dataset(os.path.join(folder_path, file))
    for var in ds.data_vars:
        data = ds[var]
        print(f"{file} - {var}: shape {data.shape}, min {data.min().values:.3f}, max {data.max().values:.3f}")
    ds.close()

In [None]:
# coordinate ranges
for file in nc_files:
    ds = xr.open_dataset(os.path.join(folder_path, file))
    
    if 'lat' in ds.coords:
        lat_min, lat_max = ds.lat.min().values, ds.lat.max().values
        print(f"{file} - lat: {lat_min:.3f} to {lat_max:.3f}")
    
    if 'lon' in ds.coords:
        lon_min, lon_max = ds.lon.min().values, ds.lon.max().values
        print(f"{file} - lon: {lon_min:.3f} to {lon_max:.3f}")
    
    if 'day' in ds.coords:
        day_min, day_max = ds.day.min().values, ds.day.max().values
        print(f"{file} - days: {day_min} to {day_max} ({len(ds.day)} total)")
    
    ds.close()

In [None]:
# missing data
for file in nc_files:
    ds = xr.open_dataset(os.path.join(folder_path, file))
    for var in ds.data_vars:
        nan_count = np.isnan(ds[var].values).sum()
        if nan_count > 0:
            total_values = ds[var].size
            print(f"{file} - {var}: {nan_count}/{total_values} missing values")
    ds.close()

In [None]:
# file sizes
for file in nc_files:
    file_path = os.path.join(folder_path, file)
    size_mb = os.path.getsize(file_path) / (1024**2)
    print(f"{file}: {size_mb:.1f} MB")

In [None]:
# checking mem reqs
for file in nc_files:
    ds = xr.open_dataset(os.path.join(folder_path, file))
    total_bytes = sum(ds[var].nbytes for var in ds.data_vars)
    print(f"{file}: {total_bytes / (1024**3):.2f} GB in memory")
    ds.close()

In [None]:
# data type
for file in nc_files:
    ds = xr.open_dataset(os.path.join(folder_path, file))
    for var in ds.data_vars:
        print(f"{file} - {var}: {ds[var].dtype}")
    ds.close()

In [None]:
# samp values
sample_file = nc_files[0]
ds = xr.open_dataset(os.path.join(folder_path, sample_file))
subset = ds.isel(lat=0, lon=0, day=slice(0, 5))
for var in subset.data_vars:
    print(f"{var}: {subset[var].values}")
ds.close()