Skip to content

Commit

Permalink
feat(ch4): add jupyter notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
yfr committed May 13, 2024
1 parent 999e46c commit 4f78af0
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 2 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
/storage/**/*.zip
/storage/basemaps
/download
*.ipynb
.ipynb*
.DS_Store
assets/legend-images/

pipeline/logs
pipeline/workdir
pipeline/*/__pycache__
service-account.json
service-account.json
119 changes: 119 additions & 0 deletions data/ch4-csv-netcdf4.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "7d28e820-0ad0-43b1-ae6d-253460083110",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import glob\n",
"import pandas as pd\n",
"import numpy as np\n",
"from netCDF4 import Dataset\n",
"\n",
"# Configuration\n",
"csv_folder = '/data'\n",
"lat_column = 'Latitude' # Adjust according to your CSV column name\n",
"lon_column = 'Longitude' # Adjust according to your CSV column name\n",
"value_column = 'XCH4' # Adjust according to your CSV column name\n",
"lat_min, lat_max = -90, 90\n",
"lon_min, lon_max = -180, 180\n",
"lat_resolution = 0.1\n",
"lon_resolution = 0.1\n",
"\n",
"# Define grid parameters\n",
"lat_bins = np.arange(lat_min, lat_max + lat_resolution, lat_resolution)\n",
"lon_bins = np.arange(lon_min, lon_max + lon_resolution, lon_resolution)\n",
"lat_grid = (lat_bins[:-1] + lat_bins[1:]) / 2\n",
"lon_grid = (lon_bins[:-1] + lon_bins[1:]) / 2\n",
"\n",
"# Initialize an empty grid with NaN values\n",
"def initialize_empty_grid():\n",
" return np.full((len(lat_grid), len(lon_grid)), np.nan)\n",
"\n",
"# Function to process each CSV file and return data grid and timestamp\n",
"def process_csv_file(csv_file):\n",
" print(csv_file)\n",
" df = pd.read_csv(csv_file)\n",
" df = df[[lat_column, lon_column, value_column]]\n",
"\n",
" # Extract timestamp from filename\n",
" timestamp = os.path.basename(csv_file).split('_')[4] # Adjust based on filename pattern\n",
"\n",
" lat_indices = np.digitize(df[lat_column], lat_bins) - 1\n",
" lon_indices = np.digitize(df[lon_column], lon_bins) - 1\n",
"\n",
" mask = (lat_indices >= 0) & (lat_indices < len(lat_grid)) & \\\n",
" (lon_indices >= 0) & (lon_indices < len(lon_grid))\n",
"\n",
" xch4_grid = initialize_empty_grid()\n",
" xch4_grid[lat_indices[mask], lon_indices[mask]] = df[value_column][mask]\n",
" return timestamp, xch4_grid\n",
"\n",
"# Process all CSV files and gather the data grids\n",
"for csv_file in glob.glob(os.path.join(csv_folder, '*.csv')):\n",
" data_grids = {}\n",
" timestamp, xch4_grid = process_csv_file(csv_file)\n",
" output_netcdf = '/data/' + timestamp + '01.nc'\n",
" data_grids[timestamp] = xch4_grid\n",
"\n",
" # Create a new NetCDF file\n",
" nc = Dataset(output_netcdf, 'w', format='NETCDF4')\n",
" \n",
" # Create dimensions\n",
" nc.createDimension('latitude', len(lat_grid))\n",
" nc.createDimension('longitude', len(lon_grid))\n",
" \n",
" # Create coordinate variables\n",
" latitudes = nc.createVariable('latitude', 'f4', ('latitude',))\n",
" longitudes = nc.createVariable('longitude', 'f4', ('longitude',))\n",
" \n",
" # Assign data to coordinate variables\n",
" latitudes[:] = lat_grid\n",
" longitudes[:] = lon_grid\n",
" \n",
" # Create a variable for each timestamp's XCH4 values\n",
" for timestamp, grid in data_grids.items():\n",
" var = nc.createVariable('xch4', 'f4', ('latitude', 'longitude',), fill_value=np.nan)\n",
" var[:, :] = grid\n",
" var.units = 'ppb'\n",
" \n",
" # Assign units and other attributes\n",
" latitudes.units = 'degrees_north'\n",
" longitudes.units = 'degrees_east'\n",
" \n",
" # Add global attributes\n",
" nc.title = 'Global Gridded XCH4 Values'\n",
" nc.source = 'Generated from multiple CSV files'\n",
" nc.timestamps = ', '.join(data_grids.keys())\n",
" \n",
" # Close the NetCDF file\n",
" nc.close()\n",
" print(f\"NetCDF file created at {output_netcdf}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 4f78af0

Please sign in to comment.