In [1]:
import netCDF4 as nc
import pandas as pd
import numpy as np
import os
import subprocess
from datetime import datetime, timedelta
import json
import argparse

In [38]:
nc_file = './output/netcdf/2023/0101.nc'
variables_of_interest = ['psea', 'sp', 'u', 'v', 'temp', 'rh', 'r1h', 'ncld', 'dswrf']
target_lat = 43.5789
target_lon = 144.5288

dataset = nc.Dataset(nc_file)
lats = dataset.variables['lat'][:]
lons = dataset.variables['lon'][:]
lat_idx = np.abs(lats - target_lat).argmin()
lon_idx = np.abs(lons - target_lon).argmin()
data = {'time': []}
times = dataset.variables['time'][:]
data['time'] = nc.num2date(times, units=dataset.variables['time'].units)

var = dataset.variables[variables_of_interest[2]]
values = var[:, lat_idx, lon_idx]
scale_factor = getattr(var, 'scale_factor', 1.0)
add_offset = getattr(var, 'add_offset', 0.0)
# corrected_values = values * scale_factor + add_offset
corrected_values = values

# Convert temperature to Celsius if the variable is 'temp'
if var  == 'temp':
    corrected_values -= 273.15  # Convert Kelvin to Celsius

data[var ] = corrected_values

In [41]:
scale_factor

np.float64(0.006116208155)

In [34]:
type(data[var])

numpy.ma.MaskedArray

In [29]:
data['u']

KeyError: 'u'

In [12]:
var

<class 'netCDF4.Variable'>
int16 sp(time, lat, lon)
    scale_factor: 0.9174311758
    add_offset: 80000.0
    long_name: surface air pressure
    units: Pa
    standard_name: surface_air_pressure
unlimited dimensions: 
current shape = (24, 505, 481)
filling on, default _FillValue of -32767 used

In [33]:
type(values)

numpy.ma.MaskedArray

In [39]:
corrected_values

masked_array(data=[1.44342512, 1.44342512, 1.73700312, 2.12844044,
                   2.44648326, 2.49541293, 1.97553523, 1.79204899,
                   1.44342512, 1.49235479, 1.5718655 , 1.58409791,
                   1.14984713, 1.21712542, 1.03975539, 0.96636089,
                   0.88685018, 1.08868505, 1.96941903, 2.3975536 ,
                   2.12232423, 1.39449546, 1.65749241, 1.33333338],
             mask=False,
       fill_value=1e+20)

In [40]:
data[var]

masked_array(data=[1.44342512, 1.44342512, 1.73700312, 2.12844044,
                   2.44648326, 2.49541293, 1.97553523, 1.79204899,
                   1.44342512, 1.49235479, 1.5718655 , 1.58409791,
                   1.14984713, 1.21712542, 1.03975539, 0.96636089,
                   0.88685018, 1.08868505, 1.96941903, 2.3975536 ,
                   2.12232423, 1.39449546, 1.65749241, 1.33333338],
             mask=False,
       fill_value=1e+20)

In [35]:
values

masked_array(data=[1.44342512, 1.44342512, 1.73700312, 2.12844044,
                   2.44648326, 2.49541293, 1.97553523, 1.79204899,
                   1.44342512, 1.49235479, 1.5718655 , 1.58409791,
                   1.14984713, 1.21712542, 1.03975539, 0.96636089,
                   0.88685018, 1.08868505, 1.96941903, 2.3975536 ,
                   2.12232423, 1.39449546, 1.65749241, 1.33333338],
             mask=False,
       fill_value=1e+20)

In [17]:
def extract_msm_data_to_csv(nc_file, target_lat, target_lon, output_csv):
    dataset = nc.Dataset(nc_file)
    
    lats = dataset.variables['lat'][:]
    lons = dataset.variables['lon'][:]
    
    lat_idx = np.abs(lats - target_lat).argmin()
    lon_idx = np.abs(lons - target_lon).argmin()
    
    data = {'time': []}
    
    times = dataset.variables['time'][:]
    data['time'] = nc.num2date(times, units=dataset.variables['time'].units)
    
    variables_of_interest = ['psea', 'sp', 'u', 'v', 'temp', 'rh', 'r1h', 'ncld', 'dswrf']
    for var_name in variables_of_interest:
        var = dataset.variables[var_name]
        values = var[:, lat_idx, lon_idx]
        scale_factor = getattr(var, 'scale_factor', 1.0)
        add_offset = getattr(var, 'add_offset', 0.0)
        corrected_values = values * scale_factor + add_offset

        # Convert temperature to Celsius if the variable is 'temp'
        if var_name == 'temp':
            corrected_values -= 273.15  # Convert Kelvin to Celsius

        data[var_name] = corrected_values

    # Calculate wind direction and speed
    data['wind_direction'] = (270 - np.degrees(np.arctan2(data['v'], data['u']))) % 360
    data['wind_speed'] = np.sqrt(data['u']**2 + data['v']**2)
    
    df = pd.DataFrame(data)
    df.to_csv(output_csv, index=False)
    
    print(f"Data successfully extracted to {output_csv}")

In [18]:
nc_file = 'output/netcdf/2023/0101.nc'
target_lat = 43.5789
target_lon = 144.5288
output_csv = 'test.csv'
extract_msm_data_to_csv(nc_file, target_lat, target_lon, output_csv)

Data successfully extracted to test.csv
