# Verifying Seebuoy data for NDBC on Station 44150

In [101]:
import sys

!{sys.executable} -m pip install seebuoy

Defaulting to user installation because normal site-packages is not writeable


In [102]:
import os

path = os.getcwd()
buoyNum = 44097

In [103]:
import requests

url = f"https://www.ndbc.noaa.gov/data/realtime2/{buoyNum}.txt"
r = requests.get(url, allow_redirects=True)
open(f"{path}/{buoyNum}.txt", 'wb').write(r.content)

205484

In [104]:
from seebuoy import NDBC
import pandas as pd
import numpy as np

ndbc = NDBC()

# Information on NDBC's ~1800 buoys and gliders
wave_df = ndbc.stations()

# list all available data for all buoys
df_data = ndbc.available_data()

# Get info on La Jave Bank (42.500N 64.02W) 
# SEEBUOY DATA
see_buoy = ndbc.get_data(str(buoyNum))
see_buoy = see_buoy[::-1]
see_buoy

Unnamed: 0_level_0,wind_direction,wind_speed,wind_gust,wave_height,dominant_period,average_period,mean_wave_direction,pressure,air_temp,water_temp,dewpoint,visibility,pressure_tendency,tide
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2024-02-08 22:26:00,,,,1.1,11,6.5,121,,,5.4,,,,
2024-02-08 21:56:00,,,,1.1,11,6.7,121,,,5.5,,,,
2024-02-08 21:26:00,,,,1.2,8,6.7,118,,,5.6,,,,
2024-02-08 20:56:00,,,,1.2,11,7.0,122,,,5.7,,,,
2024-02-08 20:26:00,,,,1.2,10,7.1,122,,,5.8,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-25 02:26:00,,,,0.8,10,5.0,145,,,10.3,,,,
2023-12-25 01:56:00,,,,0.8,11,4.9,136,,,10.3,,,,
2023-12-25 01:26:00,,,,0.9,6,4.7,169,,,10.3,,,,
2023-12-25 00:56:00,,,,0.9,6,4.8,165,,,10.3,,,,


In [105]:
# NDBC TXT
# Define the width of each column based on the spacing in your data
column_widths = [4, 3, 3, 3, 3, 5, 5, 5, 6, 6, 5, 5, 7, 7, 5, 5, 6, 4, 6]

# Read the fixed-width file into a Pandas DataFrame
ndbc = pd.read_fwf(f"{buoyNum}.txt", widths=column_widths, header=[0])
ndbc = ndbc.drop(labels=0, axis=0)
ndbc

Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,PTDY,TIDE
1,2024,02,08,22,26,MM,MM,MM,1.1,11,6.5,121,MM,MM,5.4,MM,MM,MM,MM
2,2024,02,08,21,56,MM,MM,MM,1.1,11,6.7,121,MM,MM,5.5,MM,MM,MM,MM
3,2024,02,08,21,26,MM,MM,MM,1.2,8,6.7,118,MM,MM,5.6,MM,MM,MM,MM
4,2024,02,08,20,56,MM,MM,MM,1.2,11,7.0,122,MM,MM,5.7,MM,MM,MM,MM
5,2024,02,08,20,26,MM,MM,MM,1.2,10,7.1,122,MM,MM,5.8,MM,MM,MM,MM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2180,2023,12,25,02,26,MM,MM,MM,0.8,10,5.0,145,MM,MM,10.3,MM,MM,MM,MM
2181,2023,12,25,01,56,MM,MM,MM,0.8,11,4.9,136,MM,MM,10.3,MM,MM,MM,MM
2182,2023,12,25,01,26,MM,MM,MM,0.9,6,4.7,169,MM,MM,10.3,MM,MM,MM,MM
2183,2023,12,25,00,56,MM,MM,MM,0.9,6,4.8,165,MM,MM,10.3,MM,MM,MM,MM


In [106]:
# consolidate columns into one date column
ndbc['date'] = ndbc.apply(lambda row: f"{int(row['#YY'])}-{int(row['MM']):02d}-{int(row['DD']):02d} {int(row['hh']):02d}:{int(row['mm']):02d}:00", axis=1)

#ndbc = ndbc.drop(['#YY', 'MM', 'DD', 'hh', 'mm'], axis=1)

# replace columns with see_buoy column names
ndbc.rename(columns={'WDIR': 'wind_direction', 'WSPD': 'wind_speed', 'GST':'wind_gust', 'WVHT':'wave_height', 'DPD':'dominant_period',
                        'APD':'average_period', 'MWD':'mean_wave_direction', 'PRES':'pressure', 'ATMP':'air_temp', 'WTMP':'water_temp', 'DEWP':'dewpoint',
                        'VIS':'visibility', 'PTDY':'pressure_tendency', 'TIDE':'tide'}, inplace=True)

# replace MM with NaN
ndbc.replace('MM', np.nan, inplace=True)
ndbc

Unnamed: 0,#YY,MM,DD,hh,mm,wind_direction,wind_speed,wind_gust,wave_height,dominant_period,average_period,mean_wave_direction,pressure,air_temp,water_temp,dewpoint,visibility,pressure_tendency,tide,date
1,2024,02,08,22,26,,,,1.1,11,6.5,121,,,5.4,,,,,2024-02-08 22:26:00
2,2024,02,08,21,56,,,,1.1,11,6.7,121,,,5.5,,,,,2024-02-08 21:56:00
3,2024,02,08,21,26,,,,1.2,8,6.7,118,,,5.6,,,,,2024-02-08 21:26:00
4,2024,02,08,20,56,,,,1.2,11,7.0,122,,,5.7,,,,,2024-02-08 20:56:00
5,2024,02,08,20,26,,,,1.2,10,7.1,122,,,5.8,,,,,2024-02-08 20:26:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2180,2023,12,25,02,26,,,,0.8,10,5.0,145,,,10.3,,,,,2023-12-25 02:26:00
2181,2023,12,25,01,56,,,,0.8,11,4.9,136,,,10.3,,,,,2023-12-25 01:56:00
2182,2023,12,25,01,26,,,,0.9,6,4.7,169,,,10.3,,,,,2023-12-25 01:26:00
2183,2023,12,25,00,56,,,,0.9,6,4.8,165,,,10.3,,,,,2023-12-25 00:56:00


In [107]:
ndbc.columns

Index(['#YY', 'MM', 'DD', 'hh', 'mm', 'wind_direction', 'wind_speed',
       'wind_gust', 'wave_height', 'dominant_period', 'average_period',
       'mean_wave_direction', 'pressure', 'air_temp', 'water_temp', 'dewpoint',
       'visibility', 'pressure_tendency', 'tide', 'date'],
      dtype='object')

In [108]:
see_buoy.dropna(axis=1, how='all', inplace=True)
ndbc.dropna(axis=1, how='all', inplace=True)

In [109]:
see_buoy

Unnamed: 0_level_0,wave_height,dominant_period,average_period,mean_wave_direction,water_temp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-02-08 22:26:00,1.1,11,6.5,121,5.4
2024-02-08 21:56:00,1.1,11,6.7,121,5.5
2024-02-08 21:26:00,1.2,8,6.7,118,5.6
2024-02-08 20:56:00,1.2,11,7.0,122,5.7
2024-02-08 20:26:00,1.2,10,7.1,122,5.8
...,...,...,...,...,...
2023-12-25 02:26:00,0.8,10,5.0,145,10.3
2023-12-25 01:56:00,0.8,11,4.9,136,10.3
2023-12-25 01:26:00,0.9,6,4.7,169,10.3
2023-12-25 00:56:00,0.9,6,4.8,165,10.3


In [110]:
ndbc

Unnamed: 0,#YY,MM,DD,hh,mm,wave_height,dominant_period,average_period,mean_wave_direction,water_temp,date
1,2024,02,08,22,26,1.1,11,6.5,121,5.4,2024-02-08 22:26:00
2,2024,02,08,21,56,1.1,11,6.7,121,5.5,2024-02-08 21:56:00
3,2024,02,08,21,26,1.2,8,6.7,118,5.6,2024-02-08 21:26:00
4,2024,02,08,20,56,1.2,11,7.0,122,5.7,2024-02-08 20:56:00
5,2024,02,08,20,26,1.2,10,7.1,122,5.8,2024-02-08 20:26:00
...,...,...,...,...,...,...,...,...,...,...,...
2180,2023,12,25,02,26,0.8,10,5.0,145,10.3,2023-12-25 02:26:00
2181,2023,12,25,01,56,0.8,11,4.9,136,10.3,2023-12-25 01:56:00
2182,2023,12,25,01,26,0.9,6,4.7,169,10.3,2023-12-25 01:26:00
2183,2023,12,25,00,56,0.9,6,4.8,165,10.3,2023-12-25 00:56:00


In [111]:
cols = see_buoy.columns
cols

Index(['wave_height', 'dominant_period', 'average_period',
       'mean_wave_direction', 'water_temp'],
      dtype='object')

In [112]:
for col in cols:
    ndbc[col] = ndbc[col].astype(float)
    see_buoy.dropna(subset=[col], inplace=True)
    ndbc.dropna(subset=[col], inplace=True)
    see_buoyList = []
    ndbcList = []

    for value in see_buoy[col]:
        see_buoyList.append(value)

    for val in ndbc[col]:
        ndbcList.append(val)

    for i in range(len(see_buoyList)):
        if see_buoyList[i] != ndbcList[i]:
            print(col)
            print(i)
            print("see_buoy ", see_buoyList[i])
            print("ndbc ", ndbcList[i])
            print()