In [54]:
import netCDF4 as nc

dataset = nc.Dataset('AQUA_MODIS.20020101_20021231.L3m.YR.CHL.chlor_a.4km.nc')

print("Variables in the dataset:")
print(dataset.variables.keys())

i=1

for var_name in dataset.variables:
    print(" data no. ::",i,"\n")
    i+=1
    var = dataset.variables[var_name]
    print(f"Variable: {var_name}")
    print(f"Dimensions: {var.dimensions}")
    print(f"Shape: {var.shape}")
    print(f"Attributes:")
    for attr_name in var.ncattrs():
        print(f"  {attr_name}: {getattr(var, attr_name)}")
    print()

dataset.close()

Variables in the dataset:
dict_keys(['chlor_a', 'lat', 'lon', 'palette'])
 data no. :: 1 

Variable: chlor_a
Dimensions: ('lat', 'lon')
Shape: (4320, 8640)
Attributes:
  long_name: Chlorophyll Concentration, OCI Algorithm
  units: mg m^-3
  standard_name: mass_concentration_of_chlorophyll_in_sea_water
  _FillValue: -32767.0
  valid_min: 0.0010000000474974513
  valid_max: 100.0
  reference: Hu, C., Lee Z., and Franz, B.A. (2012). Chlorophyll-a algorithms for oligotrophic oceans: A novel approach based on three-band reflectance difference, J. Geophys. Res., 117, C01011, doi:10.1029/2011JC007395.
  display_scale: log
  display_min: 0.009999999776482582
  display_max: 20.0

 data no. :: 2 

Variable: lat
Dimensions: ('lat',)
Shape: (4320,)
Attributes:
  long_name: Latitude
  units: degrees_north
  standard_name: latitude
  _FillValue: -999.0
  valid_min: -90.0
  valid_max: 90.0

 data no. :: 3 

Variable: lon
Dimensions: ('lon',)
Shape: (8640,)
Attributes:
  long_name: Longitude
  units: d

In [39]:
import netCDF4 as nc
import numpy as np

dataset = nc.Dataset('AQUA_MODIS.20020101_20021231.L3m.YR.CHL.chlor_a.4km.nc')

chlorophyll_var = dataset.variables['chlor_a']
latitudes = dataset.variables['lat'][:]
longitudes = dataset.variables['lon'][:]
chlorophyll_data = chlorophyll_var[:]    
print("Chlorophyll Data Shape:", chlorophyll_data.shape)
    
print("Sample Chlorophyll Data:")
print(chlorophyll_data[:10, :10])  # Print a small subset of the data for illustration
    
for i in range(0, len(latitudes), len(latitudes) // 10):  
    for j in range(0, len(longitudes), len(longitudes) // 10):
        print(f"Lat: {latitudes[i]}, Lon: {longitudes[j]}, Chlorophyll: {chlorophyll_data[i, j]}")


dataset.close()



Chlorophyll Data Shape: (4320, 8640)
Sample Chlorophyll Data:
[[-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]
 [-- -- -- -- -- -- -- -- -- --]]
Lat: 89.97916412353516, Lon: -179.9791717529297, Chlorophyll: --
Lat: 89.97916412353516, Lon: -143.9791717529297, Chlorophyll: --
Lat: 89.97916412353516, Lon: -107.97916412353516, Chlorophyll: --
Lat: 89.97916412353516, Lon: -71.97916412353516, Chlorophyll: --
Lat: 89.97916412353516, Lon: -35.979164123535156, Chlorophyll: --
Lat: 89.97916412353516, Lon: 0.020838698372244835, Chlorophyll: --
Lat: 89.97916412353516, Lon: 36.02083969116211, Chlorophyll: --
Lat: 89.97916412353516, Lon: 72.02084350585938, Chlorophyll: --
Lat: 89.97916412353516, Lon: 108.02084350585938, Chlorophyll: --
Lat: 89.979164123535

In [25]:
import netCDF4 as nc
import numpy as np

dataset = nc.DatasetAQUA_MODIS.20020101_20021231.L3m.YR.CHL.chlor_a.4km.nc')

try:
    chlorophyll_data = dataset.variables['chlor_a'][:]
    
    chlorophyll_flat = chlorophyll_data.flatten() # Flatten the data to a 1D array
    
    total_count = chlorophyll_flat.size # Calculate the number of total elements including NaNs
    
    chlorophyll_non_nan = chlorophyll_flat[~np.isnan(chlorophyll_flat)] # Remove NaN values for the following calculations

    mean_value = np.nansum(chlorophyll_flat) / total_count # Calculate mean, considering the total count including NaNs
    
    std_dev = np.nanstd(chlorophyll_flat) # Calculate standard deviation, ignoring NaNs
    
    thresholds = np.nanpercentile(chlorophyll_flat, [50, 80, 85, 90, 95]) # Calculate threshold values, ignoring NaNs
    
    print(f"Mean Chlorophyll Value (including NaNs in count): {mean_value}")
    print(f"Standard Deviation of Chlorophyll: {std_dev}")
    print(f"50% Threshold (Median): {thresholds[0]}")
    print(f"80% Threshold: {thresholds[1]}")
    print(f"85% Threshold: {thresholds[2]}")
    print(f"90% Threshold: {thresholds[3]}")
    print(f"95% Threshold: {thresholds[4]}")
    
except KeyError:
    print("The variable 'chlorophyll' does not exist in the dataset.")

# Close the dataset when done
dataset.close()


Mean Chlorophyll Value (including NaNs in count): 0.2713905499828532
Standard Deviation of Chlorophyll: 1.7573108673095703
50% Threshold (Median): 0.05408300831913948
80% Threshold: 0.22539727687835698
85% Threshold: 0.303694312274456
90% Threshold: --
95% Threshold: --


In [34]:
import netCDF4 as nc
import numpy as np
import pandas as pd

# Open the NetCDF file
dataset = nc.Dataset('AQUA_MODIS.20020101_20021231.L3m.YR.CHL.chlor_a.4km.nc')

# Access the chlorophyll data
chlorophyll_data = dataset.variables['chlor_a'][:]

# Flatten the data to a 1D array, ignoring NaN values
chlorophyll_flat = chlorophyll_data.flatten()
print(chlorophyll_flat.shape)
for i in range(0,len(chlorophyll_flat),len(chlorophyll_flat)//30) :
    print(chlorophyll_flat[i],"    ")
print("\n")

np_mask=np.isnan(chlorophyll_flat)
chlorophyll_flat=chlorophyll_flat[~np_mask]

print(chlorophyll_flat.shape)

for i in range(0,len(chlorophyll_flat),len(chlorophyll_flat)//30) :
    print(chlorophyll_flat[i],"    ")

# Calculate mean and standard deviation, excluding NaNs
mean_value = np.mean(chlorophyll_flat)
std_dev = np.std(chlorophyll_flat)

# Calculate threshold values, excluding NaNs
thresholds = np.percentile(chlorophyll_flat, [50, 80, 85, 90, 95])

# Print the results
print(f"Mean Chlorophyll Value (excluding NaNs): {mean_value}")
print(f"Standard Deviation of Chlorophyll (excluding NaNs): {std_dev}")
print(f"50% Threshold (Median): {thresholds[0]}")
print(f"80% Threshold: {thresholds[1]}")
print(f"85% Threshold: {thresholds[2]}")
print(f"90% Threshold: {thresholds[3]}")
print(f"95% Threshold: {thresholds[4]}")

# Close the dataset when done
dataset.close()


(37324800,)
--     
--     
--     
0.62537783     
--     
2.0386786     
0.73767656     
0.27157608     
0.26030195     
0.13116094     
0.06527373     
0.05059677     
0.03244455     
0.037151903     
0.037155163     
0.068611935     
0.08054615     
0.04584014     
0.093482755     
0.0648528     
0.15914991     
0.27275884     
0.5677251     
0.20385638     
0.172366     
0.28279415     
0.64808637     
--     
--     
--     


(37324800,)
--     
--     
--     
0.62537783     
--     
2.0386786     
0.73767656     
0.27157608     
0.26030195     
0.13116094     
0.06527373     
0.05059677     
0.03244455     
0.037151903     
0.037155163     
0.068611935     
0.08054615     
0.04584014     
0.093482755     
0.0648528     
0.15914991     
0.27275884     
0.5677251     
0.20385638     
0.172366     
0.28279415     
0.64808637     
--     
--     
--     
Mean Chlorophyll Value (excluding NaNs): 0.4757873124704764
Standard Deviation of Chlorophyll (excluding NaNs): 1.75731173744321

In [80]:
import netCDF4 as nc
import numpy as np

# Open netCDF file
nc_file = nc.Dataset('AQUA_MODIS.20020101_20021231.L3m.YR.CHL.chlor_a.4km.nc', 'r')  # Open file in read mode

# Assuming you have a variable named 'data' in the netCDF file
data = nc_file.variables['chlor_a'][:]
for i in range(0,len(data),len(data)//10) :
    print(data[i])

# Define your fill value (replace with your actual fill value)
fill_value = -32767.0
  # Example fill value
# Create a mask for fill values in the 2D array
fill_mask = (data != fill_value)

# Apply the mask to filter out fill values
valid_data = np.where(fill_mask, data, np.nan)

# Remove fill values
for i in range(0,len(valid_data),len(valid_data)//10) :
    print(valid_data[i])
# Close the netCDF file
flatten_data=valid_data.flatten()

for i in range(0,len(flatten_data),len(flatten_data)//10) :
    print(flatten_data[i])

print(flatten_data.shape)
flatten_data=flatten_data[flatten_data != fill_value]
print(flatten_data.shape)


for i in range(0,len(flatten_data),len(flatten_data)//10) :
    print(flatten_data[i])
mean_value = np.mean(flatten_data)
std_dev = np.std(flatten_data)

# Calculate threshold values, excluding NaNs
thresholds = np.percentile(flatten_data, [50, 80, 85, 90, 95])

# Print the results
print(f"Mean Chlorophyll Value : {mean_value}")
print(f"Standard Deviation of Chlorophyll : {std_dev}")
print(f"50% Threshold (Median): {thresholds[0]}")
print(f"80% Threshold: {thresholds[1]}")
print(f"85% Threshold: {thresholds[2]}")
print(f"90% Threshold: {thresholds[3]}")
print(f"95% Threshold: {thresholds[4]}")


[-- -- -- ... -- -- --]
[0.6253778338432312 0.6253778338432312 0.6253778338432312 ...
 0.6102795004844666 0.6102795004844666 0.6102795004844666]
[0.7376765608787537 0.7381399273872375 0.7390739321708679 ...
 0.7589828372001648 0.7483915090560913 0.7429216504096985]
[0.1311609447002411 0.11578325927257538 0.10906032472848892 ...
 0.14048294723033905 0.13759779930114746 0.13091370463371277]
[0.0324445515871048 0.03389525040984154 0.03382660448551178 ...
 0.033401694148778915 0.03372397646307945 0.03250875696539879]
[0.06861193478107452 0.06272122263908386 0.07242410629987717 ...
 0.07023441791534424 0.06439971178770065 0.06806684285402298]
[0.09348275512456894 0.09091464430093765 0.09373467415571213 ...
 0.08912638574838638 0.08792906999588013 0.09064151346683502]
[0.2727588415145874 0.26620569825172424 0.2613604962825775 ...
 0.28461989760398865 0.27013063430786133 0.27328088879585266]
[0.17236599326133728 0.17725075781345367 0.19157181680202484 ...
 0.22245025634765625 0.21287375688552

In [85]:
import netCDF4 as nc
import numpy as np

# Open netCDF file
nc_file = nc.Dataset('AQUA_MODIS.20020101_20021231.L3m.YR.CHL.chlor_a.4km.nc', 'r')  # Open file in read mode

# Assuming you have a variable named 'chlor_a' in the netCDF file
data = nc_file.variables['chlor_a'][:]

# Define fill value
fill_value = -32767.0

# Replace fill values with NaN in the original data
data[data == fill_value] = np.nan

# Close the netCDF file
nc_file.close()

# Print a subset of original data for verification
print("Original data (subset):")
for i in range(0, len(data), len(data) // 10):
    print(data[i])

# Print a subset of valid data after removing fill values
print("\nValid data without fill values (subset):")
valid_data = np.where(~np.isnan(data), data, np.nan)
for i in range(0, len(valid_data), len(valid_data) // 10):
    print(valid_data[i])

# Flatten valid data to 1D array
flatten_data = valid_data.flatten()

# Print a subset of flattened data for verification
print("\nFlattened data (subset):")
print(flatten_data.shape,"\n")
for i in range(0, len(flatten_data), len(flatten_data) // 10):
    print(flatten_data[i])
print("\n")

# Remove fill values (-32767.0) from flattened data
flatten_data = flatten_data[flatten_data != fill_value]
print(flatten_data.shape,"\n")

for i in range(0, len(flatten_data), len(flatten_data) // 10):
    print(flatten_data[i])

# Calculate statistics
mean_value = np.mean(flatten_data)
std_dev = np.std(flatten_data)
thresholds = np.percentile(flatten_data, [50, 80, 85, 90, 95])

# Print the results
print("\nStatistics:")
print(f"Mean Chlorophyll Value: {mean_value}")
print(f"Standard Deviation of Chlorophyll: {std_dev}")
print(f"50% Threshold (Median): {thresholds[0]}")
print(f"80% Threshold: {thresholds[1]}")
print(f"85% Threshold: {thresholds[2]}")
print(f"90% Threshold: {thresholds[3]}")
print(f"95% Threshold: {thresholds[4]}")


Original data (subset):
[-- -- -- ... -- -- --]
[0.6253778338432312 0.6253778338432312 0.6253778338432312 ...
 0.6102795004844666 0.6102795004844666 0.6102795004844666]
[0.7376765608787537 0.7381399273872375 0.7390739321708679 ...
 0.7589828372001648 0.7483915090560913 0.7429216504096985]
[0.1311609447002411 0.11578325927257538 0.10906032472848892 ...
 0.14048294723033905 0.13759779930114746 0.13091370463371277]
[0.0324445515871048 0.03389525040984154 0.03382660448551178 ...
 0.033401694148778915 0.03372397646307945 0.03250875696539879]
[0.06861193478107452 0.06272122263908386 0.07242410629987717 ...
 0.07023441791534424 0.06439971178770065 0.06806684285402298]
[0.09348275512456894 0.09091464430093765 0.09373467415571213 ...
 0.08912638574838638 0.08792906999588013 0.09064151346683502]
[0.2727588415145874 0.26620569825172424 0.2613604962825775 ...
 0.28461989760398865 0.27013063430786133 0.27328088879585266]
[0.17236599326133728 0.17725075781345367 0.19157181680202484 ...
 0.2224502563