# Verifying Seebuoy data for NDBC on Station 44150

In [1]:
import sys

!{sys.executable} -m pip install seebuoy

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import os

path = os.getcwd()
buoyNum = 44065

In [3]:
import requests

url = f"https://www.ndbc.noaa.gov/data/realtime2/{buoyNum}.txt"
r = requests.get(url, allow_redirects=True)
open(f"{path}/{buoyNum}.txt", 'wb').write(r.content)



613165

In [4]:
from seebuoy import NDBC
import pandas as pd
import numpy as np

ndbc = NDBC()

# Information on NDBC's ~1800 buoys and gliders
wave_df = ndbc.stations()

# list all available data for all buoys
df_data = ndbc.available_data()

# Get info on La Jave Bank (42.500N 64.02W) 
# SEEBUOY DATA
see_buoy = ndbc.get_data(str(buoyNum))
see_buoy = see_buoy[::-1]
see_buoy

Unnamed: 0_level_0,wind_direction,wind_speed,wind_gust,wave_height,dominant_period,average_period,mean_wave_direction,pressure,air_temp,water_temp,dewpoint,visibility,pressure_tendency,tide
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2024-02-13 22:30:00,,9.0,11.0,,,,,1004.4,,5.8,,,,
2024-02-13 22:20:00,,9.0,11.0,1.1,8.0,4.3,108.0,,,5.8,,,,
2024-02-13 22:10:00,,9.0,11.0,1.1,,4.3,108.0,1004.1,,5.8,,,,
2024-02-13 22:00:00,,10.0,12.0,,,,,1003.8,,5.8,,,3.9,
2024-02-13 21:50:00,,10.0,13.0,1.2,8.0,4.4,109.0,1003.6,,5.8,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-30 00:40:00,,5.0,6.0,0.8,,5.4,140.0,1004.1,,9.5,,,,
2023-12-30 00:30:00,,5.0,7.0,,,,,1004.2,,,,,,
2023-12-30 00:20:00,,4.0,6.0,0.8,9.0,5.4,124.0,1004.1,,9.6,,,,
2023-12-30 00:10:00,,5.0,6.0,0.8,,5.4,124.0,1004.2,,9.6,,,,


In [5]:
# NDBC TXT
# Define the width of each column based on the spacing in your data
column_widths = [4, 3, 3, 3, 3, 5, 5, 5, 6, 6, 5, 5, 7, 7, 5, 5, 6, 4, 6]

# Read the fixed-width file into a Pandas DataFrame
ndbc = pd.read_fwf(f"{buoyNum}.txt", widths=column_widths, header=[0])
ndbc = ndbc.drop(labels=0, axis=0)
ndbc

Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,PTDY,TIDE
1,2024,02,13,22,30,MM,9.0,11.0,MM,MM,MM,MM,1004.4,MM,5.8,MM,MM,MM,MM
2,2024,02,13,22,20,MM,9.0,11.0,1.1,8,4.3,108,MM,MM,5.8,MM,MM,MM,MM
3,2024,02,13,22,10,MM,9.0,11.0,1.1,MM,4.3,108,1004.1,MM,5.8,MM,MM,MM,MM
4,2024,02,13,22,00,MM,10.0,12.0,MM,MM,MM,MM,1003.8,MM,5.8,MM,MM,+3.9,MM
5,2024,02,13,21,50,MM,10.0,13.0,1.2,8,4.4,109,1003.6,MM,5.8,MM,MM,MM,MM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6517,2023,12,30,00,40,MM,5.0,6.0,0.8,MM,5.4,140,1004.1,MM,9.5,MM,MM,MM,MM
6518,2023,12,30,00,30,MM,5.0,7.0,MM,MM,MM,MM,1004.2,MM,MM,MM,MM,MM,MM
6519,2023,12,30,00,20,MM,4.0,6.0,0.8,9,5.4,124,1004.1,MM,9.6,MM,MM,MM,MM
6520,2023,12,30,00,10,MM,5.0,6.0,0.8,MM,5.4,124,1004.2,MM,9.6,MM,MM,MM,MM


In [6]:
# consolidate columns into one date column
ndbc['date'] = ndbc.apply(lambda row: f"{int(row['#YY'])}-{int(row['MM']):02d}-{int(row['DD']):02d} {int(row['hh']):02d}:{int(row['mm']):02d}:00", axis=1)

#ndbc = ndbc.drop(['#YY', 'MM', 'DD', 'hh', 'mm'], axis=1)

# replace columns with see_buoy column names
ndbc.rename(columns={'WDIR': 'wind_direction', 'WSPD': 'wind_speed', 'GST':'wind_gust', 'WVHT':'wave_height', 'DPD':'dominant_period',
                        'APD':'average_period', 'MWD':'mean_wave_direction', 'PRES':'pressure', 'ATMP':'air_temp', 'WTMP':'water_temp', 'DEWP':'dewpoint',
                        'VIS':'visibility', 'PTDY':'pressure_tendency', 'TIDE':'tide'}, inplace=True)

# replace MM with NaN
ndbc.replace('MM', np.nan, inplace=True)
ndbc

Unnamed: 0,#YY,MM,DD,hh,mm,wind_direction,wind_speed,wind_gust,wave_height,dominant_period,average_period,mean_wave_direction,pressure,air_temp,water_temp,dewpoint,visibility,pressure_tendency,tide,date
1,2024,02,13,22,30,,9.0,11.0,,,,,1004.4,,5.8,,,,,2024-02-13 22:30:00
2,2024,02,13,22,20,,9.0,11.0,1.1,8,4.3,108,,,5.8,,,,,2024-02-13 22:20:00
3,2024,02,13,22,10,,9.0,11.0,1.1,,4.3,108,1004.1,,5.8,,,,,2024-02-13 22:10:00
4,2024,02,13,22,00,,10.0,12.0,,,,,1003.8,,5.8,,,+3.9,,2024-02-13 22:00:00
5,2024,02,13,21,50,,10.0,13.0,1.2,8,4.4,109,1003.6,,5.8,,,,,2024-02-13 21:50:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6517,2023,12,30,00,40,,5.0,6.0,0.8,,5.4,140,1004.1,,9.5,,,,,2023-12-30 00:40:00
6518,2023,12,30,00,30,,5.0,7.0,,,,,1004.2,,,,,,,2023-12-30 00:30:00
6519,2023,12,30,00,20,,4.0,6.0,0.8,9,5.4,124,1004.1,,9.6,,,,,2023-12-30 00:20:00
6520,2023,12,30,00,10,,5.0,6.0,0.8,,5.4,124,1004.2,,9.6,,,,,2023-12-30 00:10:00


In [7]:
ndbc.columns

Index(['#YY', 'MM', 'DD', 'hh', 'mm', 'wind_direction', 'wind_speed',
       'wind_gust', 'wave_height', 'dominant_period', 'average_period',
       'mean_wave_direction', 'pressure', 'air_temp', 'water_temp', 'dewpoint',
       'visibility', 'pressure_tendency', 'tide', 'date'],
      dtype='object')

In [8]:
see_buoy.dropna(axis=1, how='all', inplace=True)
ndbc.dropna(axis=1, how='all', inplace=True)

In [9]:
see_buoy

Unnamed: 0_level_0,wind_speed,wind_gust,wave_height,dominant_period,average_period,mean_wave_direction,pressure,water_temp,pressure_tendency
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-02-13 22:30:00,9.0,11.0,,,,,1004.4,5.8,
2024-02-13 22:20:00,9.0,11.0,1.1,8.0,4.3,108.0,,5.8,
2024-02-13 22:10:00,9.0,11.0,1.1,,4.3,108.0,1004.1,5.8,
2024-02-13 22:00:00,10.0,12.0,,,,,1003.8,5.8,3.9
2024-02-13 21:50:00,10.0,13.0,1.2,8.0,4.4,109.0,1003.6,5.8,
...,...,...,...,...,...,...,...,...,...
2023-12-30 00:40:00,5.0,6.0,0.8,,5.4,140.0,1004.1,9.5,
2023-12-30 00:30:00,5.0,7.0,,,,,1004.2,,
2023-12-30 00:20:00,4.0,6.0,0.8,9.0,5.4,124.0,1004.1,9.6,
2023-12-30 00:10:00,5.0,6.0,0.8,,5.4,124.0,1004.2,9.6,


In [10]:
ndbc

Unnamed: 0,#YY,MM,DD,hh,mm,wind_speed,wind_gust,wave_height,dominant_period,average_period,mean_wave_direction,pressure,water_temp,pressure_tendency,tide,date
1,2024,02,13,22,30,9.0,11.0,,,,,1004.4,5.8,,,2024-02-13 22:30:00
2,2024,02,13,22,20,9.0,11.0,1.1,8,4.3,108,,5.8,,,2024-02-13 22:20:00
3,2024,02,13,22,10,9.0,11.0,1.1,,4.3,108,1004.1,5.8,,,2024-02-13 22:10:00
4,2024,02,13,22,00,10.0,12.0,,,,,1003.8,5.8,+3.9,,2024-02-13 22:00:00
5,2024,02,13,21,50,10.0,13.0,1.2,8,4.4,109,1003.6,5.8,,,2024-02-13 21:50:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6517,2023,12,30,00,40,5.0,6.0,0.8,,5.4,140,1004.1,9.5,,,2023-12-30 00:40:00
6518,2023,12,30,00,30,5.0,7.0,,,,,1004.2,,,,2023-12-30 00:30:00
6519,2023,12,30,00,20,4.0,6.0,0.8,9,5.4,124,1004.1,9.6,,,2023-12-30 00:20:00
6520,2023,12,30,00,10,5.0,6.0,0.8,,5.4,124,1004.2,9.6,,,2023-12-30 00:10:00


In [11]:
cols = see_buoy.columns
cols

Index(['wind_speed', 'wind_gust', 'wave_height', 'dominant_period',
       'average_period', 'mean_wave_direction', 'pressure', 'water_temp',
       'pressure_tendency'],
      dtype='object')

In [12]:
for col in cols:
    ndbc[col] = ndbc[col].astype(float)
    see_buoy.dropna(subset=[col], inplace=True)
    ndbc.dropna(subset=[col], inplace=True)
    see_buoyList = []
    ndbcList = []

    for value in see_buoy[col]:
        see_buoyList.append(value)

    for val in ndbc[col]:
        ndbcList.append(val)

    for i in range(len(see_buoyList)):
        if see_buoyList[i] != ndbcList[i]:
            print(col)
            print(i)
            print("see_buoy ", see_buoyList[i])
            print("ndbc ", ndbcList[i])
            print()