In [None]:
import os
import glob
import pandas as pd
import numpy as np
from netCDF4 import Dataset

# Configuration
csv_folder = '/data'
lat_column = 'Latitude'  # Adjust according to your CSV column name
lon_column = 'Longitude'  # Adjust according to your CSV column name
value_column = 'XCH4'  # Adjust according to your CSV column name
lat_min, lat_max = -90, 90
lon_min, lon_max = -180, 180
lat_resolution = 0.1
lon_resolution = 0.1

# Define grid parameters
lat_bins = np.arange(lat_min, lat_max + lat_resolution, lat_resolution)
lon_bins = np.arange(lon_min, lon_max + lon_resolution, lon_resolution)
lat_grid = (lat_bins[:-1] + lat_bins[1:]) / 2
lon_grid = (lon_bins[:-1] + lon_bins[1:]) / 2

# Initialize an empty grid with NaN values
def initialize_empty_grid():
    return np.full((len(lat_grid), len(lon_grid)), np.nan)

# Function to process each CSV file and return data grid and timestamp
def process_csv_file(csv_file):
    print(csv_file)
    df = pd.read_csv(csv_file)
    df = df[[lat_column, lon_column, value_column]]

    # Extract timestamp from filename
    timestamp = os.path.basename(csv_file).split('_')[4]  # Adjust based on filename pattern

    lat_indices = np.digitize(df[lat_column], lat_bins) - 1
    lon_indices = np.digitize(df[lon_column], lon_bins) - 1

    mask = (lat_indices >= 0) & (lat_indices < len(lat_grid)) & \
           (lon_indices >= 0) & (lon_indices < len(lon_grid))

    xch4_grid = initialize_empty_grid()
    xch4_grid[lat_indices[mask], lon_indices[mask]] = df[value_column][mask]
    return timestamp, xch4_grid

# Process all CSV files and gather the data grids
for csv_file in glob.glob(os.path.join(csv_folder, '*.csv')):
    data_grids = {}
    timestamp, xch4_grid = process_csv_file(csv_file)
    output_netcdf = '/data/' + timestamp + '01.nc'
    data_grids[timestamp] = xch4_grid

    # Create a new NetCDF file
    nc = Dataset(output_netcdf, 'w', format='NETCDF4')
    
    # Create dimensions
    nc.createDimension('latitude', len(lat_grid))
    nc.createDimension('longitude', len(lon_grid))
    
    # Create coordinate variables
    latitudes = nc.createVariable('latitude', 'f4', ('latitude',))
    longitudes = nc.createVariable('longitude', 'f4', ('longitude',))
    
    # Assign data to coordinate variables
    latitudes[:] = lat_grid
    longitudes[:] = lon_grid
    
    # Create a variable for each timestamp's XCH4 values
    for timestamp, grid in data_grids.items():
        var = nc.createVariable('xch4', 'f4', ('latitude', 'longitude',), fill_value=np.nan)
        var[:, :] = grid
        var.units = 'ppb'
    
    # Assign units and other attributes
    latitudes.units = 'degrees_north'
    longitudes.units = 'degrees_east'
    
    # Add global attributes
    nc.title = 'Global Gridded XCH4 Values'
    nc.source = 'Generated from multiple CSV files'
    nc.timestamps = ', '.join(data_grids.keys())
    
    # Close the NetCDF file
    nc.close()
    print(f"NetCDF file created at {output_netcdf}")