This code is used to rearrange file names for certain saved .nc files.  

In [15]:
# import functions
import os
import sys
import glob
import numpy as np
import netCDF4 as nc
import xarray as xr

In [2]:
my_era5_path = '/glade/u/home/zcleveland/scratch/ERA5/'
plot_out_path = '/glade/u/home/zcleveland/NAM_soil-moisture/ERA5_analysis/plots/'

In [3]:
# Variable lists
# surface instantaneous variables
sfc_instan_list = [
    'sd',  # snow depth  (m of water equivalent)
    'msl',  # mean sea level pressure (Pa)
    'tcc',  # total cloud cover (0-1)
    'stl1',  # soil temp layer 1 (K)
    'stl2',  # soil temp layer 2 (K)
    'stl3',  # soil temp layer 3 (K)
    'stl4',  # soil temp layer 4 (K)
    'swvl1',  # soil volume water content layer 1 (m^3 m^-3)
    'swvl2',  # soil volume water content layer 2 (m^3 m^-3)
    'swvl3',  # soil volume water content layer 3 (m^3 m^-3)
    'swvl4',  # soil volume water content layer 4 (m^3 m^-3)
    '2t',  # 2 meter temp (K)
    '2d',  # 2 meter dew point (K)
    'ishf',  # instant surface heat flux (W m^-2)
    'ie',  # instant moisture flux (kg m^-2 s^-1)
    'cape',  # convective available potential energy (J kg^-1)
    'tcw',  # total column water (kg m^-2) -- sum total of solid, liquid, and vapor in a column
    'sstk',  # sea surface temperature (K)
    'vipile',  # vertical integral of potential, internal, and latent energy (J m^-2)
    'viwve',  # vertical integral of eastward water vapour flux (kg m^-1 s^-1) - positive south -> north
    'viwvn',  # vertical integral of northward water vapour flux (kg m^-1 s^-1) - positive west -> east
    'viwvd',  # vertical integral of divergence of moisture flux (kg m^-2 s^-1) - positive divergencve
    'z-thick',  # geopotential height thickness (m) - difference between two height levels
]

# surface accumulation variables
sfc_accumu_list = [
    'lsp',  # large scale precipitation (m of water)
    'cp',  # convective precipitation (m of water)
    'tp',  # total precipitation (m of water) -- DERIVED
    'sshf',  # surface sensible heat flux (J m^-2)
    'slhf',  # surface latent heat flux (J m^-2)
    'ssr',  # surface net solar radiation (J m^-2)
    'str',  # surface net thermal radiation (J m^-2)
    'sro',  # surface runoff (m)
    'sf',  # total snowfall (m of water equivalent)
    'ssrd',  # surface solar radiation downwards (J m^-2)
    'strd',  # surface thermal radiation downwards (J m^-2)
    'ttr',  # top net thermal radiation (OLR, J m^-2) -- divide by time (s) for W m^-2
]

# pressure level variables
pl_var_list = [
    # 'pv',  # potential vorticity (K m^2 kg^-1 s^-1)
    # 'crwc',  # specific rain water content (kg kg^-1)
    # 'cswc',  # specific snow water content (kg kg^-1)
    'z',  # geopotential (m^2 s^2)
    'z_height',  # geopotential height (m)
    't',  # temperature (K)
    'u',  # u component of wind(m s^-1)
    'v',  # v component of wind (m s^-1)
    'q',  # specific humidity (kg kg^-1)
    'w',  # vertical velo|city (Pa s^-1)
    # 'vo',  # vorticity - relative (s^-1)
    # 'd',  # divergence (s^-1)
    'r',  # relative humidity (%)
    # 'clwc',  # specific cloud liquid water content
    # 'ciwc',  # specific cloud ice water content
    # 'cc',  # fraction of cloud cover (0-1)
]

# NAM variables
NAM_var_list = [
    'onset',
    'retreat',
    'length',
    'precipitation',
    'precipitation-rate'
]

# all var in one list
var_list = sfc_instan_list + sfc_accumu_list + pl_var_list

# region average list
region_avg_list = [
    'cp',
    'mr',
    'son',
    'chi',
    'moj',
    'MeNmAz',
]

# variables that are fluxes and need to be multiplied by -1 for easier understanding
flux_var_list = [
    'sshf',  # surface sensible heat flux (J m^-2)
    'slhf',  # surface latent heat flux (J m^-2)
    'ttr',  # top net thermal radiation (OLR, J m^-2) -- divide by time (s) for W m^-2
    'ishf',  # instant surface heat flux (W m^-2)
    'ie',  # instant moisture flux (kg m^-2 s^-1)
    'str',  # surface thermal radiation (J m^-2)
]

# misc variables
misc_var_list = [
    'nino-3',
]


# Variable dictionaries

# dictionary of variables and their names
var_dict = {
    'sd': 'Snow Depth',
    'msl': 'Mean Sea Level Pressure',
    'tcc': 'Total Cloud Cover',
    'stl1': 'Soil Temp Layer 1',
    'stl2': 'Soil Temp Layer 2',
    'stl3': 'Soil Temp Layer 3',
    'stl4': 'Soil Temp Layer 4',
    'swvl1': 'Soil Volume Water Content Layer 1',
    'swvl2': 'Soil Volume Water Content Layer 2',
    'swvl3': 'Soil Volume Water Content Layer 3',
    'swvl4': 'Soil Volume Water Content Layer 4',
    '2t': '2 Meter Temp',
    '2d': '2 Meter Dew Point',
    'ishf': 'Instant Surface Heat Flux',
    'ie': 'Instant Moisture Flux',
    'cape': 'Convective Available Potential Energy',
    'tcw': 'Total Column Water',
    'sstk': 'Sea Surface Temperature',
    'vipile': 'vertical integral of potential, internal, and latent energy',
    'viwve': 'vertical integral of eastward water vapour flux',
    'viwvn': 'vertical integral of northward water vapour flux',
    'viwvd': 'vertical integral of divergence of moisture flux',
    'lsp': 'Large Scale Precipitation',
    'cp': 'Convective Precipitation',
    'tp': 'Total Precipitation',
    'sshf': 'Surface Sensible Heat Flux',
    'slhf': 'Surface Latent Heat Flux',
    'ssr': 'Surface Net Solar Radiation',
    'str': 'Surface Net Thermal Radiation',
    'sro': 'Surface Runoff',
    'sf': 'Total Snowfall',
    'ssrd': 'Surface Solar Radiation Downwards',
    'strd': 'Surface Thermal Radiation Downwards',
    'ttr': 'Top Net Thermal Radiation (OLR)',
    'z': 'Geopotential',
    'z_height': 'Geopotential Height',
    'z_thick': 'Geopotential Height Thickness',
    't': 'Temperature',
    'u': 'U Component of Wind',
    'v': 'V Component of Wind',
    'q': 'Specific Humidity',
    'w': 'Vertical Velocity',
    'r': 'Relative Humidity',
    'onset': 'NAM Onset',
    'retreat': 'NAM Retreat',
    'length': 'NAM Length',
    'precipitation': 'Yearly NAM Season Precipitation',
    'precipitation-rate': 'NAM Precipitation Rate',
    'nino-3': r'Ni$\tilda{n}$o-3 Index',
}

# variable units in latex format for plotting
var_units = {
    'sd': r'(m)',
    'msl': r'(Pa)',
    'tcc': r'(0-1)',
    'stl1': r'(K)',
    'stl2': r'(K)',
    'stl3': r'(K)',
    'stl4': r'(K)',
    'swvl1': r'$(m^3 m^{-3})$',
    'swvl2': r'$(m^3 m^{-3})$',
    'swvl3': r'$(m^3 m^{-3})$',
    'swvl4': r'$(m^3 m^{-3})$',
    '2t': r'(K)',
    '2d': r'(K)',
    'ishf': r'$(W m^{-2})$',
    'ie': r'$(kg m^{-2} s^{-1})$',
    'cape': r'$(J kg^{-1})$',
    'tcw': r'$(kg m^{-2})$',
    'sstk': r'(K)',
    'vipile': r'$(J m^{-2})$',
    'viwve': r'$(kg m^{-1} s^{-1})$',
    'viwvn': r'$(kg m^{-1} s^{-1})$',
    'viwvd': r'$(kg m^{-2} s^{-1})$',
    'lsp': r'(m)',
    'cp': r'(m)',
    'tp': r'(m)',
    'sshf': r'$(J m^{-2})$',
    'slhf': r'$(J m^{-2})$',
    'ssr': r'$(J m^{-2})$',
    'str': r'$(J m^{-2})$',
    'sro': r'(m)',
    'sf': r'(m)',
    'ssrd': r'$(J m^{-2})$',
    'strd': r'$(J m^{-2})$',
    'ttr': r'$(J m^{-2})$',
    'z': r'$(m^2 s^{-2})$',
    'z_height': '$(m)$',
    'z_thick': '$(m)$',
    't': r'(K)',
    'u': r'$(m s^{-1})$',
    'v': r'$(m s^{-1})$',
    'q': r'$(kg kg^{-1})$',
    'w': r'$(Pa s^{-1})$',
    'r': r'(%)',
    'onset': '',
    'retreat': '',
    'length': r'# of days',
    'precipitation': r'(m)',
    'precipitation-rate': r'(m day^{-1}, NAM Season Precip / NAM Length)',
    'nino-3': r'(Ni$\tilda{n}$o-3 Index Anomaly)',
}

# dictionary of regions and their names
region_avg_dict = {
    'cp': 'Colorado Plateau',
    'mr': 'Mogollon Rim',
    'son': 'Sonoran Desert',
    'chi': 'Chihuahuan Desert',
    'moj': 'Mojave Desert',
    'MeNmAz': 'Mexico, New Mexico, and Arizona Border',
}

# dictionary of regions and their coordinate boundaries
# [WEST, EAST, NORTH, SOUTH] -- WEST and EAST are on 0-360 latitude grid system
region_avg_coords = {
    'cp': [249, 253, 39, 35],
    'mr': [249, 251, 34, 33],
    'son': [246, 250, 32, 28],
    'chi': [252, 256, 33, 29],
    'moj': [243, 247, 37, 33],
    'MeNmAz': [246, 256, 38, 28],
}

# dictionary of colors for the plot of each region
region_colors_dict = {
    'cp': 'blue',
    'mr': 'darkorange',
    'son': 'green',
    'chi': 'red',
    'moj': 'purple',
    'MeNmAz': 'brown',
    'dsw': 'black'
}

Prior to this code, I had ran some correlations with 700mb geopotential height, but had only saved the files with _z_ in the file name.
Now, I want to run correlations with other height levels, so I'm adding the pressure level to the file name to distinguish between them.
The next cell will replace _z_ with _z-xxx_, where xxx is the pressure level in mb.

The functions below update a file name with a given input string pattern to another file name with a new string pattern.

In [16]:
def get_files_to_rename(directory, old_pattern, new_pattern, recursive):
    """
    Collects files that match the old pattern within the specified directory.

    :param directory: The directory to search.
    :param old_pattern: The old filename pattern to match.
    :param new_pattern: The new filename pattern to replace with.
    :param recursive: Whether to search directories recursively or not.
    :return: A list of tuples (old_file_path, new_file_path).
    """
    files_to_rename = []
    if recursive:
        for root, _, files in os.walk(directory):
            for filename in files:
                if old_pattern in filename and new_pattern not in filename:
                    old_file_path = os.path.join(root, filename)
                    new_filename = filename.replace(old_pattern, new_pattern)
                    new_file_path = os.path.join(root, new_filename)
                    files_to_rename.append((old_file_path, new_file_path))
    else:
        for filename in os.listdir(directory):
            if os.path.isfile(os.path.join(directory, filename)) and old_pattern in filename and new_pattern not in filename:
                old_file_path = os.path.join(directory, filename)
                new_filename = filename.replace(old_pattern, new_pattern)
                new_file_path = os.path.join(directory, new_filename)
                files_to_rename.append((old_file_path, new_file_path))
    return files_to_rename

In [5]:
def rename_files(files_to_rename):
    """
    Renames the files based on the provided list of old and new file paths.

    :param files_to_rename: A list of tuples (old_file_path, new_file_path).
    """
    for old_file, new_file in files_to_rename:
        os.rename(old_file, new_file)
        print(f'Renamed: {old_file} -> {new_file}')

In [11]:
def main_file_names(directory, old_pattern, new_pattern, recursive):
    """
    Main function to run the renaming process.

    :param directory: The directory to search.
    :param old_pattern: The old filename pattern to match.
    :param new_pattern: The new filename pattern to replace with.
    :param recursive: Whether to search directories recursively or not.
    """
    files_to_rename = get_files_to_rename(directory, old_pattern, new_pattern, recursive)
    rename_files(files_to_rename)

The code below finds files in a given directory and updates the variable names inside the netcdf file to something new specified by the user.

In [16]:
def collect_files(directory, file_pattern, recursive):
    """
    Collects files matching the pattern from the specified directory.

    :param directory: The directory to search.
    :param file_pattern: The filename pattern to match.
    :param recursive: Whether to search directories recursively or not.
    :return: A list of file paths matching the pattern.
    """
    try:
        files_matching_pattern = []
        if recursive:
            for root, _, files in os.walk(directory):
                for file in files:
                    if file_pattern in file:
                        files_matching_pattern.append(os.path.join(root, file))
        else:
            for file in os.listdir(directory):
                if file_pattern in file:
                    files_matching_pattern.append(os.path.join(directory, file))

        if not files_matching_pattern:
            raise FileNotFoundError("No files found matching the pattern.")

        return files_matching_pattern

    except FileNotFoundError as e:
        print(f"Error: {e}")
        exit(1)

In [3]:
def open_and_rename_var(file_path, new_var_name, old_var_name=None):
    """
    Opens a NetCDF file, renames the variable, and handles cases where the old variable name isn't known upfront.

    :param file_path: Path to the NetCDF file.
    :param new_var_name: The new variable name to replace in the NetCDF file.
    :param old_var_name: The old variable name, if known. If None, the user is prompted to select.
    :return: The old variable name and the new dataset with the renamed variable.
    """
    ds = xr.open_dataset(file_path)
    var_names = list(ds.data_vars)

    if old_var_name is None:
        if len(var_names) == 1:
            old_var_name = var_names[0]
        else:
            print(f"Variables in the dataset: {var_names}")
            for i, var in enumerate(var_names):
                print(f"{i + 1}. {var}")
            choice = int(input("Enter the number of the variable to rename: "))
            old_var_name = var_names[choice - 1]

    if old_var_name != new_var_name:
        ds = ds.rename({old_var_name: new_var_name})

    return old_var_name, ds

In [5]:
def verify_overwrite(old_ds, new_ds, overwrite_flag, prompt_user):
    """
    Ensures that the user wants to proceed with overwriting the file.

    :param old_ds: The old dataset.
    :param new_ds: The new dataset.
    :param overwrite_flag: Bool; True to overwrite current file or False to save a copy.
    :param prompt_user: Bool; True to prompt user for verification.
    :return: Bool; True to continue, False to exit.
    """
    if prompt_user:
        old_var_name = list(old_ds.data_vars)[0]
        new_var_name = list(new_ds.data_vars)[0]
        print(f"Old variable name: {old_var_name}")
        print(f"New variable name: {new_var_name}")
        if overwrite_flag:
            response = input("Are you sure you want to overwrite the existing file? (yes/no): ")
            if response.lower() != 'yes':
                return False
        return True
    return True

In [6]:
def update_variable_names(files, new_var_name, overwrite_flag, old_var_name=None):
    """
    Manages the renaming process across multiple files and handles user verification.

    :param files: List of file paths to update.
    :param new_var_name: The new variable name to replace in the NetCDF files.
    :param overwrite_flag: Bool; True to overwrite current files or False to save copies.
    :param old_var_name: The old variable name, if known.
    :return: Lists of successful updates, failed updates, and files not updated.
    """
    files_not_updated = []
    successful_updates = []
    failed_updates = []
    prompt_user = True

    for file_path in files:
        try:
            old_var_name_in_file, new_ds = open_and_rename_var(file_path, new_var_name, old_var_name)

            if old_var_name and old_var_name != old_var_name_in_file:
                response = input(f"Variable name in {file_path} is {old_var_name_in_file}. Update to {new_var_name}? (yes/no): ")
                if response.lower() != 'yes':
                    files_not_updated.append(file_path)
                    continue

            if not verify_overwrite(xr.open_dataset(file_path), new_ds, overwrite_flag, prompt_user):
                files_not_updated.append(file_path)
                continue

            if overwrite_flag:
                save_dataset(new_ds, file_path, overwrite=True)
                successful_updates.append(file_path)
            else:
                new_file_path = file_path.replace('.nc', f'_copy.nc')
                save_dataset(new_ds, new_file_path, overwrite=False)
                successful_updates.append(new_file_path)

            prompt_user = False  # Do not prompt for subsequent files unless needed

        except Exception as e:
            print(f"Error processing {file_path}: {e}")
            failed_updates.append(file_path)

    return successful_updates, failed_updates, files_not_updated

In [7]:
def save_dataset(ds, file_path, overwrite):
    """
    Saves the dataset, either overwriting the original file or creating a new one.

    :param ds: The dataset to save.
    :param file_path: The path to save the dataset.
    :param overwrite: Bool; True to overwrite current file or False to save a copy.
    """
    if overwrite:
        temp_path = file_path + '.tmp'
        try:
            ds.to_netcdf(temp_path)
            os.replace(temp_path, file_path)
        except Exception as e:
            print(f"Error saving file {file_path}: {e}")
            if os.path.exists(temp_path):
                os.remove(temp_path)
            raise
    else:
        ds.to_netcdf(file_path)

In [8]:
def print_summary(successful_updates, failed_updates, files_not_updated, summary_flag):
    """
    Prints a summary of the operations performed.

    :param successful_updates: List of successfully updated file paths.
    :param failed_updates: List of failed update file paths.
    :param files_not_updated: List of file paths not updated.
    :param summary_flag: Bool; True to print summary counts, False to print lists.
    """
    if summary_flag:
        print(f"Total files matching pattern: {len(successful_updates) + len(failed_updates) + len(files_not_updated)}")
        print(f"Successful updates: {len(successful_updates)}")
        print(f"Failed updates: {len(failed_updates)}")
        print(f"Files not updated: {len(files_not_updated)}")
    else:
        print("Successful updates:", successful_updates)
        print("Failed updates:", failed_updates)
        print("Files not updated:", files_not_updated)

In [9]:
def main_netcdf_variables(directory, file_pattern, new_var_name, recursive, overwrite_flag=False, summary_flag=False, old_var_name=None):
    """
    Main function to run the variable renaming code.

    :param directory: The directory to search.
    :param file_pattern: The filename pattern to match.
    :param new_var_name: The new variable name to replace in the NetCDF file.
    :param recursive: Whether to search directories recursively or not.
    :param overwrite_flag: Bool; True to overwrite current file or False to save a copy.
    :param summary_flag: Bool; True to print summary counts, False to print lists.
    :param old_var_name: The old variable name, if known.
    """
    files = collect_files(directory, file_pattern, recursive)
    successful_updates, failed_updates, files_not_updated = update_variable_names(files, new_var_name, overwrite_flag, old_var_name)
    print_summary(successful_updates, failed_updates, files_not_updated, summary_flag)

In [None]:
# running the code to rename files

if __name__ == '__main__':
    # define arguments
    directory = '/glade/u/home/zcleveland/'
    old_pattern = ''
    new_pattern = ''
    recursive = True

    # call main function for renaming file names
    main_file_names(directory, old_pattern, new_pattern, recursive)

In [None]:
# run the code to update variable names in netcdf files

if __name__ == '__main__':
    # define arguments
    directory = '/glade/u/home/zcleveland/scratch/ERA5/dsw/'
    file_pattern = 'z_thick_1000-500'
    new_var_name = 'z_thick_1000-500'
    recursive = True
    overwrite_flag = True
    summary_flag = False
    old_var_name = None

    # call main function for renaming variables
    main_netcdf_variables(directory, file_pattern, new_var_name, recursive, overwrite_flag=overwrite_flag, summary_flag=summary_flag, old_var_name=old_var_name)