In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
from netCDF4 import Dataset

# Function to create and write NetCDF data
def parquet_to_netcdf(parquet_file, netcdf_file, headerdict):
    # Read the Parquet file
    df = pd.read_parquet(parquet_file)

    # Open a new NetCDF file for writing
    with Dataset(netcdf_file, 'w', format='NETCDF4') as ncfile:

        for key, value in headerdict.items():
            setattr(ncfile, key, value)
            print(key, value)

        #############
        # Create compound types
        #############
        coord_dtype = np.dtype([("r", "<f8"),
                            ("z", "<f8"),
                            ("poloidal_angle", "<f8"),
                            ("toroidal_angle1", "<f8"),
                            ("toroidal_angle2", "<f8")])
        geom_dtype = np.dtype([("length", "<f8")])
        lp_dtype = np.dtype([("name", "S50"),
                         ("version", "<f8"),
                         ("coordinate", coord_dtype),
                         ("geometry", geom_dtype)])
        
        # Create the compound types in the file
        coord_cp = ncfile.createCompoundType(coord_dtype, "COORDINATE")
        geom_cp = ncfile.createCompoundType(geom_dtype, "GEOMETRY")
        lp_cp = ncfile.createCompoundType(lp_dtype, "PROBE")

        # Dimensions based on the DataFrame
        ncfile.createDimension('singleDim', 1)

        #############
        # Loop over rows in data, create variables in the appropriate group and fill data
        #############
        version = headerdict["version"] + 0.1 * headerdict["revision"]

        # Create a single group called 'ccbv'
        ccbv_group = ncfile.createGroup("ccbv")  # Create the 'ccbv' group once outside the loop

        for ind, row in df.iterrows():
            # Use the same group 'ccbv' for all variables
            grp = ccbv_group
            print(grp)

            # Create a variable in the 'ccbv' group
            var = grp.createVariable(row["uda_name"],
                                     lp_cp,
                                     ("singleDim",))

            # Prepare data for the variable
            data = np.empty(1, lp_cp.dtype_view)
            data["name"][:] = row["uda_name"]
            data["version"] = version
            data["coordinate"]["r"] = row["r"]
            data["coordinate"]["z"] = row["z"]
            data["coordinate"]["poloidal_angle"] = row["poloidal_angle"]
            data["coordinate"]["toroidal_angle1"] = row["toroidal_angle1"]
            data["coordinate"]["toroidal_angle2"] = row["toroidal_angle2"]
            data["geometry"]["length"] = row["length"]

            # Assign the prepared data to the variable
            var[:] = data

if __name__ == "__main__":

    headerdict = {
        "creationDate" : datetime.strftime(datetime.now(), "%Y-%m-%d"),
        "coordinateSystem" : "Cylindrical",
        "device" : "MAST",
        "shotRangeStart" : 0,
        "shotRangeEnd" : 50000,
        "createdBy" : "jhodson",
        "system" : "?",
        "class_" : "?",
        "units" : "SI",
        "version" : 0,
        "revision" : 1,
        "status" : "development",
        "releaseDate" : datetime.strftime(datetime.now(), "%Y-%m-%d"),
        "releaseTime" : datetime.strftime(datetime.now(), "%H:%M:%S")
    }

    # Example usage
    parquet_to_netcdf("geometry/data/amb/ccbv.parquet", "output.nc", headerdict)


creationDate 2025-01-02
coordinateSystem Cylindrical
device MAST
shotRangeStart 0
shotRangeEnd 50000
createdBy jhodson
system ?
class_ ?
units SI
version 0
revision 1
status development
releaseDate 2025-01-02
releaseTime 09:33:07
<class 'netCDF4.Group'>
group /ccbv:
    dimensions(sizes): 
    variables(dimensions): 
    groups: 
<class 'netCDF4.Group'>
group /ccbv:
    dimensions(sizes): 
    variables(dimensions): {'names': ['name', 'version', 'coordinate', 'geometry'], 'formats': [('S1', (50,)), '<f8', [('r', '<f8'), ('z', '<f8'), ('poloidal_angle', '<f8'), ('toroidal_angle1', '<f8'), ('toroidal_angle2', '<f8')], [('length', '<f8')]], 'offsets': [0, 56, 64, 104], 'itemsize': 112, 'aligned': True} ccbv01(entries)
    groups: 
<class 'netCDF4.Group'>
group /ccbv:
    dimensions(sizes): 
    variables(dimensions): {'names': ['name', 'version', 'coordinate', 'geometry'], 'formats': [('S1', (50,)), '<f8', [('r', '<f8'), ('z', '<f8'), ('poloidal_angle', '<f8'), ('toroidal_angle1', '<f8'),