In [44]:
import pandas as pd
import numpy as np
from datetime import datetime
from netCDF4 import Dataset

def parquet_to_netcdf(netcdf_file, headerdict):
    
    # Open a new NetCDF file for writing
    with Dataset(netcdf_file, 'w', format='NETCDF4') as ncfile:

        for key, value in headerdict.items():
            setattr(ncfile, key, value)

        mag_grp = ncfile.createGroup("magnetics")  

        fluxloop_group = mag_grp.createGroup("fluxloops")

        p2_group = fluxloop_group.createGroup("p2")
        p2_u_group = p2_group.createGroup("upper")
        p2_l_group = p2_group.createGroup("lower")

        p3_group = fluxloop_group.createGroup("p3")
        p3_u_group = p3_group.createGroup("upper")
        p3_l_group = p3_group.createGroup("lower")

        p4_group = fluxloop_group.createGroup("p4")
        p4_u_group = p4_group.createGroup("upper")
        p4_l_group = p4_group.createGroup("lower")

        p5_group = fluxloop_group.createGroup("p5")
        p5_u_group = p5_group.createGroup("upper")
        p5_l_group = p5_group.createGroup("lower")
        
        p6_group = fluxloop_group.createGroup("p6")
        p6_l_group = p6_group.createGroup("lower")

        coord_dtype = np.dtype([("r", "<f8"),
                            ("z", "<f8")])

        geometry_dtype = np.dtype([("phi_start", "<f4"),
                            ("phi_end", "<f4")])

        fl_dtype = np.dtype([("name", "S50"),
                            ("version", "<f8"),
                            ("location", "<S50"),
                            ("coordinate", coord_dtype),
                            ("geometry", geometry_dtype)])

        # Create the compound types in the file within the pickup group
        fluxloop_group.createCompoundType(coord_dtype, "COORDINATE")
        fluxloop_group.createCompoundType(geometry_dtype, "GEOMETRY")
        lp_cp = fluxloop_group.createCompoundType(fl_dtype, "FLUXLOOP")

        # Dimension
        fluxloop_group.createDimension('singleDim', 1)

        version = headerdict["version"] + 0.1 * headerdict["revision"]

        # Read the Parquet file
        df = pd.read_parquet("geometry/data/amb/fl_p2l.parquet")
        for ind, row in df.iterrows():

            var = p2_l_group.createVariable(row["uda_name"].replace("/", "_"),
                                     lp_cp,
                                     ("singleDim",))

            data = np.empty(1, lp_cp.dtype_view)
            data["name"][:] = row["uda_name"].replace("/", "_")
            data["version"] = version
            data["location"] = "P2 lower"
            data["coordinate"]["r"] = row["r"]
            data["coordinate"]["z"] = row["z"]
            data['geometry']['phi_start'] = np.nan
            data['geometry']['phi_end'] = np.nan

            var[:] = data
            var.setncattr("units", "SI units: degrees, m")

        # Read the Parquet file
        df = pd.read_parquet("geometry/data/amb/fl_p2u.parquet")
        for ind, row in df.iterrows():
         
            var = p2_u_group.createVariable(row["uda_name"].replace("/", "_"),
                                     lp_cp,
                                     ("singleDim",))
 
            data = np.empty(1, lp_cp.dtype_view)
            data["name"][:] = row["uda_name"].replace("/", "_")
            data["version"] = version
            data["location"] = "P2 upper"
            data["coordinate"]["r"] = row["r"]
            data["coordinate"]["z"] = row["z"]
            data['geometry']['phi_start'] = np.nan
            data['geometry']['phi_end'] = np.nan
 
            var[:] = data
            var.setncattr("units", "SI units: degrees, m")
        
        # Read the Parquet file
        df = pd.read_parquet("geometry/data/amb/fl_p3l.parquet")
        for ind, row in df.iterrows():
     
            var = p3_l_group.createVariable(row["uda_name"].replace("/", "_"),
                                     lp_cp,
                                     ("singleDim",))
 
            data = np.empty(1, lp_cp.dtype_view)
            data["name"][:] = row["uda_name"].replace("/", "_")
            data["version"] = version
            data["location"] = "P3 lower"
            data["coordinate"]["r"] = row["r"]
            data["coordinate"]["z"] = row["z"]
            data['geometry']['phi_start'] = np.nan
            data['geometry']['phi_end'] = np.nan
 
            var[:] = data
            var.setncattr("units", "SI units: degrees, m")
 
        # Read the Parquet file
        df = pd.read_parquet("geometry/data/amb/fl_p3u.parquet")
        for ind, row in df.iterrows():
     
            var = p3_u_group.createVariable(row["uda_name"].replace("/", "_"),
                                     lp_cp,
                                     ("singleDim",))
 
            data = np.empty(1, lp_cp.dtype_view)
            data["name"][:] = row["uda_name"].replace("/", "_")
            data["version"] = version
            data["location"] = "P3 upper"
            data["coordinate"]["r"] = row["r"]
            data["coordinate"]["z"] = row["z"]
            data['geometry']['phi_start'] = np.nan
            data['geometry']['phi_end'] = np.nan
 
            var[:] = data
            var.setncattr("units", "SI units: degrees, m")
 
        # Read the Parquet file
        df = pd.read_parquet("geometry/data/amb/fl_p4l.parquet")
        for ind, row in df.iterrows():
     
            var = p4_l_group.createVariable(row["uda_name"].replace("/", "_"),
                                     lp_cp,
                                     ("singleDim",))
 
            data = np.empty(1, lp_cp.dtype_view)
            data["name"][:] = row["uda_name"].replace("/", "_")
            data["version"] = version
            data["location"] = "P4 lower"
            data["coordinate"]["r"] = row["r"]
            data["coordinate"]["z"] = row["z"]
            data['geometry']['phi_start'] = np.nan
            data['geometry']['phi_end'] = np.nan
 
            var[:] = data
            var.setncattr("units", "SI units: degrees, m")
 
        # Read the Parquet file
        df = pd.read_parquet("geometry/data/amb/fl_p4u.parquet")
        for ind, row in df.iterrows():
     
            var = p4_u_group.createVariable(row["uda_name"].replace("/", "_"),
                                     lp_cp,
                                     ("singleDim",))
 
            data = np.empty(1, lp_cp.dtype_view)
            data["name"][:] = row["uda_name"].replace("/", "_")
            data["version"] = version
            data["location"] = "P4 upper"
            data["coordinate"]["r"] = row["r"]
            data["coordinate"]["z"] = row["z"]
            data['geometry']['phi_start'] = np.nan
            data['geometry']['phi_end'] = np.nan
 
            var[:] = data
            var.setncattr("units", "SI units: degrees, m")
 
        # Read the Parquet file
        df = pd.read_parquet("geometry/data/amb/fl_p5l.parquet")
        for ind, row in df.iterrows():
     
            var = p5_l_group.createVariable(row["uda_name"].replace("/", "_"),
                                     lp_cp,
                                     ("singleDim",))
 
            data = np.empty(1, lp_cp.dtype_view)
            data["name"][:] = row["uda_name"].replace("/", "_")
            data["version"] = version
            data["location"] = "P5 lower"
            data["coordinate"]["r"] = row["r"]
            data["coordinate"]["z"] = row["z"]
            data['geometry']['phi_start'] = np.nan
            data['geometry']['phi_end'] = np.nan
 
            var[:] = data
            var.setncattr("units", "SI units: degrees, m")
 
        # Read the Parquet file
        df = pd.read_parquet("geometry/data/amb/fl_p5u.parquet")
        for ind, row in df.iterrows():
     
            var = p5_u_group.createVariable(row["uda_name"].replace("/", "_"),
                                     lp_cp,
                                     ("singleDim",))
 
            data = np.empty(1, lp_cp.dtype_view)
            data["name"][:] = row["uda_name"].replace("/", "_")
            data["version"] = version
            data["location"] = "P5 upper"
            data["coordinate"]["r"] = row["r"]
            data["coordinate"]["z"] = row["z"]
            data['geometry']['phi_start'] = np.nan
            data['geometry']['phi_end'] = np.nan
 
            var[:] = data
            var.setncattr("units", "SI units: degrees, m")
 
        # Read the Parquet file
        df = pd.read_parquet("geometry/data/amb/fl_p6l.parquet")
        for ind, row in df.iterrows():
     
            var = p6_l_group.createVariable(row["uda_name"].replace("/", "_"),
                                     lp_cp,
                                     ("singleDim",))
 
            data = np.empty(1, lp_cp.dtype_view)
            data["name"][:] = row["uda_name"].replace("/", "_")
            data["version"] = version
            data["location"] = "P6 lower"
            data["coordinate"]["r"] = row["r"]
            data["coordinate"]["z"] = row["z"]
            data['geometry']['phi_start'] = np.nan
            data['geometry']['phi_end'] = np.nan
 
            var[:] = data
            var.setncattr("units", "SI units: degrees, m")
 
if __name__ == "__main__":

    # Metadata for the netcdf file
    headerdict = {
        "Conventions": "",
        "device": "MAST",
        "class": "magnetics",
        "system": "fluxloops",
        "configuration": "geometry",
        "shotRangeStart": 0,
        "shotRangeStop": 400000,
        "content": "geometry of the fluxloops for MAST",
        "comment": "",
        "units": "SI, degrees, m",
        "coordinateSystem": "Cylindrical",
        "structureCastType": "unknown",
        "calibration": "None",
        "version": 0,
        "revision": 0,
        "status": "development",
        "releaseDate": datetime.strftime(datetime.now(), "%Y-%m-%d"),
        "releaseTime": datetime.strftime(datetime.now(), "%H:%M:%S"),
        "owner": "jhodson",
        "signedOffBy": "",
        "signedOffDate": "",
        "creatorCode": "python create_netcdf_fluxloops.py",
        "creationDate": datetime.strftime(datetime.now(), "%Y-%m-%d"),
        "createdBy": "jhodson",
        "testCode": "",
        "testDate": "",
        "testedBy": ""
    }

    # Example usage
    parquet_to_netcdf("test.nc", headerdict)
