In [1]:
import hashlib
import netCDF4 as nc4

In [2]:
def attribute_filter(attributes):
    "Filter for elements which do not start with '__NCH'."
    return filter(lambda a: not a.startswith("__NCH"), attributes)

In [3]:
def update_hash_attr(name, value, hash_obj=None):
    """Update `hash_obj` with the UTF8 encoded string version of first `name` and then `value`."""
    if hash_obj is None:
        hash_obj = hashlib.sha256()
    hash_obj.update(str(name).encode('utf8'))
    hash_obj.update(str(value).encode('utf8'))
    return hash_obj

In [4]:
def update_hash_var(var_obj, hash_obj=None):
    """Update `hash_obj` from a variable."""
    if hash_obj is None:
        hash_obj = hashlib.sha256()
    hash_obj.update(str(var_obj.name).encode('utf8'))
    for attr in attribute_filter(sorted(var_obj.ncattrs())):
        update_hash_attr(attr, hash_obj)

In [5]:
def calculate_file_hash(file_name):
    """Calculate hash for a given netCDF file."""
    data_set = nc4.Dataset(str(file_name))
    hash_obj = None
    for key, var in sorted(data_set.variables.items()):
        hash_obj = update_hash_var(var, hash_obj)
    for name in data_set.ncattrs():
        value = data_set.getncattr(name)
        hash_obj = update_hash_attr(name, value, hash_obj)
    return hash_obj.hexdigest()

In [6]:
base_example_file = "example_data/madis-sao.nc"
print(calculate_file_hash(base_example_file), base_example_file)

for kind in [3, 4, 6, 7]:
    example_file = base_example_file + ".{}".format(kind)
    print(calculate_file_hash(example_file), example_file)

d03d576afef618549f44a49bb7a988ff9801670cf0944ad5254e26a2dd6397d8 example_data/madis-sao.nc
d03d576afef618549f44a49bb7a988ff9801670cf0944ad5254e26a2dd6397d8 example_data/madis-sao.nc.3
d03d576afef618549f44a49bb7a988ff9801670cf0944ad5254e26a2dd6397d8 example_data/madis-sao.nc.4
d03d576afef618549f44a49bb7a988ff9801670cf0944ad5254e26a2dd6397d8 example_data/madis-sao.nc.6
d03d576afef618549f44a49bb7a988ff9801670cf0944ad5254e26a2dd6397d8 example_data/madis-sao.nc.7
