In [2]:
import netCDF4 as nc

file_path = 'AQUA_MODIS.20040101_20041231.L3m.YR.CHL.chlor_a.4km.nc'

dataset = nc.Dataset(file_path, 'r')

print("Variables and their dimensions:")
for var_name in dataset.variables.keys():
    var = dataset.variables[var_name]
    print(f"{var_name}: {var.shape}")

print("\nData in the file:")
for var_name in dataset.variables.keys():
    var = dataset.variables[var_name]
    print(f"\nVariable: {var_name}")
    print(var[:])

dataset.close()


Variables and their dimensions:
chlor_a: (4320, 8640)
lat: (4320,)
lon: (8640,)
palette: (3, 256)

Data in the file:

Variable: chlor_a
[[-- -- -- ... -- -- --]
 [-- -- -- ... -- -- --]
 [-- -- -- ... -- -- --]
 ...
 [-- -- -- ... -- -- --]
 [-- -- -- ... -- -- --]
 [-- -- -- ... -- -- --]]

Variable: lat
[ 89.979164  89.9375    89.895836 ... -89.895836 -89.93751  -89.97917 ]

Variable: lon
[-179.97917 -179.9375  -179.89583 ...  179.89584  179.93752  179.97917]

Variable: palette
[[147 0 108 144 0 111 141 0 114 138 0 117 135 0 120 132 0 123 129 0 126
  126 0 129 123 0 132 120 0 135 117 0 138 114 0 141 111 0 144 108 0 147
  105 0 150 102 0 153 99 0 -- 0 106 -- 0 112 -- 0 117 -- 0 122 -- 0 128
  -- 0 133 -- 0 138 -- 0 144 -- 0 149 -- 0 154 -- 0 160 -- 0 165 -- 0 170
  -- 0 176 -- 0 181 -- 0 186 -- 0]
 [192 -- 0 197 -- 0 202 -- 0 208 -- 0 213 -- 0 218 -- 0 224 -- 0 229 -- 0
  234 -- 0 240 -- 0 245 -- 0 250 -- 0 -- -- 0 -- 247 0 -- 239 0 -- 231 0
  -- 223 208 -- 0 216 -- 0 224 -- 0 232 -- 

In [3]:
import pandas as pd

file_path = 'AQUA_MODIS.20040101_20041231.L3m.YR.CHL.chlor_a.4km.nc'
output_csv = 'output.csv'

dataset = nc.Dataset(file_path, 'r')

latitudes = dataset.variables['lat'][:]
longitudes = dataset.variables['lon'][:]
chlorophyll = dataset.variables['chlor_a'][:]

dataset.close()

In [4]:
import numpy as np
# Use meshgrid to create pairs of latitudes and longitudes
lon_grid, lat_grid = np.meshgrid(longitudes, latitudes)
fill_value=-32767.0

lat_flat = lat_grid.flatten()
lon_flat = lon_grid.flatten()
chlorophyll = np.where(chlorophyll == fill_value, np.nan,chlorophyll)
chlorophyll_flat = chlorophyll.flatten()

df = pd.DataFrame({
    'Latitude': lat_flat,
    'Longitude': lon_flat,
    'Chlorophyll': chlorophyll_flat
})

df.to_csv(output_csv, index=False)

print(f"Data has been successfully converted to {output_csv}")

Data has been successfully converted to output.csv


In [None]:
dataset=pd.read_csv('output.csv')

In [21]:
dataset.dropna()

Unnamed: 0,Latitude,Longitude,Chlorophyll
1195213,84.229164,-59.437496,0.013342
1195214,84.229164,-59.395830,0.013342
1195215,84.229164,-59.354164,0.013342
1195216,84.229164,-59.312496,0.013342
1195217,84.229164,-59.270830,0.013342
...,...,...,...
34957844,-78.604170,-163.145830,0.139341
34957845,-78.604170,-163.104170,0.136865
34957846,-78.604170,-163.062500,0.136865
34957847,-78.604170,-163.020830,0.136865


In [22]:
dataset.head()

Unnamed: 0,Latitude,Longitude,Chlorophyll
0,89.979164,-179.97917,
1,89.979164,-179.9375,
2,89.979164,-179.89583,
3,89.979164,-179.85417,
4,89.979164,-179.8125,


In [10]:
data[1]

[89.979164, -179.9375, masked]

In [9]:
file_path = 'AQUA_MODIS.20040101_20041231.L3m.YR.CHL.chlor_a.4km.nc'

dataset = nc.Dataset(file_path, 'r')
fill_value=-32767.0
latitudes = dataset.variables['lat'][:]
longitudes = dataset.variables['lon'][:]
chlorophyll = dataset.variables['chlor_a'][:]
print(chlorophyll)
chlorophyll = np.where(chlorophyll == fill_value, np.nan,chlorophyll)
print(chlorophyll_flat)
print(chlorophyll_flat[~np.isnan(chlorophyll_flat)])

[[-- -- -- ... -- -- --]
 [-- -- -- ... -- -- --]
 [-- -- -- ... -- -- --]
 ...
 [-- -- -- ... -- -- --]
 [-- -- -- ... -- -- --]
 [-- -- -- ... -- -- --]]
[-32767. -32767. -32767. ... -32767. -32767. -32767.]
[-32767. -32767. -32767. ... -32767. -32767. -32767.]


In [20]:
import netCDF4 as nc
import numpy as np

file_path = 'AQUA_MODIS.20040101_20041231.L3m.YR.CHL.chlor_a.4km.nc'

dataset = nc.Dataset(file_path, 'r')
fill_value = -32767.0

latitudes = dataset.variables['lat'][:]
longitudes = dataset.variables['lon'][:]
chlorophyll = np.array(dataset.variables['chlor_a'][:])

print("Original Chlorophyll Data:")
print(chlorophyll)

print(np.any(chlorophyll == fill_value))

chlorophyll = np.where(chlorophyll == fill_value, np.nan, chlorophyll)

print(np.any(chlorophyll == fill_value))
#chlorophyll = np.where(chlorophyll == fill_value, np.nan, chlorophyll)
#print(np.any(chlorophyll == fill_value))
chlorophyll_flat = chlorophyll.flatten()

print("Flattened Chlorophyll Data:")
print(chlorophyll_flat)

print("Non-NaN Chlorophyll Values:")
print(chlorophyll_flat[~np.isnan(chlorophyll_flat)])


Original Chlorophyll Data:
[[-32767. -32767. -32767. ... -32767. -32767. -32767.]
 [-32767. -32767. -32767. ... -32767. -32767. -32767.]
 [-32767. -32767. -32767. ... -32767. -32767. -32767.]
 ...
 [-32767. -32767. -32767. ... -32767. -32767. -32767.]
 [-32767. -32767. -32767. ... -32767. -32767. -32767.]
 [-32767. -32767. -32767. ... -32767. -32767. -32767.]]
True
False
Flattened Chlorophyll Data:
[nan nan nan ... nan nan nan]
Non-NaN Chlorophyll Values:
[0.01334156 0.01334155 0.01334155 ... 0.13686493 0.13686493 0.13686493]
