# Verifying Seebuoy data for NDBC on Station 44150

In [55]:
import sys

!{sys.executable} -m pip install seebuoy

Defaulting to user installation because normal site-packages is not writeable


In [56]:
import os

path = os.getcwd()
buoyNum = 44065

In [57]:
import requests

url = f"https://www.ndbc.noaa.gov/data/realtime2/{buoyNum}.txt"
r = requests.get(url, allow_redirects=True)
open(f"{path}/{buoyNum}.txt", 'wb').write(r.content)

599535

In [58]:
from seebuoy import NDBC
import pandas as pd
import numpy as np

ndbc = NDBC()

# Information on NDBC's ~1800 buoys and gliders
wave_df = ndbc.stations()

# list all available data for all buoys
df_data = ndbc.available_data()

# Get info on La Jave Bank (42.500N 64.02W) 
# SEEBUOY DATA
see_buoy = ndbc.get_data(str(buoyNum))
see_buoy = see_buoy[::-1]
see_buoy

Unnamed: 0_level_0,wind_direction,wind_speed,wind_gust,wave_height,dominant_period,average_period,mean_wave_direction,pressure,air_temp,water_temp,dewpoint,visibility,pressure_tendency,tide
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2024-02-08 22:10:00,,4.0,5.0,,,,,1024.1,,6.2,,,,
2024-02-08 22:00:00,,4.0,5.0,,,,,1024.1,,6.3,,,,
2024-02-08 21:50:00,,4.0,5.0,1.2,11.0,7.1,112.0,1024.2,,6.3,,,,
2024-02-08 21:40:00,,4.0,4.0,,,,,1024.4,,6.3,,,,
2024-02-08 21:30:00,,4.0,5.0,,,,,,,6.3,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-25 00:40:00,,1.0,1.0,,,,,1028.4,,9.0,,,,
2023-12-25 00:30:00,,0.0,1.0,,,,,1028.4,,9.0,,,,
2023-12-25 00:20:00,,1.0,1.0,0.8,6.0,5.6,122.0,1028.4,,9.1,,,,
2023-12-25 00:10:00,,1.0,2.0,0.8,,5.6,122.0,1028.3,,9.1,,,,


In [59]:
# NDBC TXT
# Define the width of each column based on the spacing in your data
column_widths = [4, 3, 3, 3, 3, 5, 5, 5, 6, 6, 5, 5, 7, 7, 5, 5, 6, 4, 6]

# Read the fixed-width file into a Pandas DataFrame
ndbc = pd.read_fwf("44065.txt", widths=column_widths, header=[0])
ndbc = ndbc.drop(labels=0, axis=0)
ndbc

Unnamed: 0,#YY,MM,DD,hh,mm,WDIR,WSPD,GST,WVHT,DPD,APD,MWD,PRES,ATMP,WTMP,DEWP,VIS,PTDY,TIDE
1,2024,02,08,22,10,MM,4.0,5.0,MM,MM,MM,MM,1024.1,MM,6.2,MM,MM,MM,MM
2,2024,02,08,22,00,MM,4.0,5.0,MM,MM,MM,MM,1024.1,MM,6.3,MM,MM,MM,MM
3,2024,02,08,21,50,MM,4.0,5.0,1.2,11,7.1,112,1024.2,MM,6.3,MM,MM,MM,MM
4,2024,02,08,21,40,MM,4.0,4.0,MM,MM,MM,MM,1024.4,MM,6.3,MM,MM,MM,MM
5,2024,02,08,21,30,MM,4.0,5.0,MM,MM,MM,MM,MM,MM,6.3,MM,MM,MM,MM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6372,2023,12,25,00,40,MM,1.0,1.0,MM,MM,MM,MM,1028.4,MM,9.0,MM,MM,MM,MM
6373,2023,12,25,00,30,MM,0.0,1.0,MM,MM,MM,MM,1028.4,MM,9.0,MM,MM,MM,MM
6374,2023,12,25,00,20,MM,1.0,1.0,0.8,6,5.6,122,1028.4,MM,9.1,MM,MM,MM,MM
6375,2023,12,25,00,10,MM,1.0,2.0,0.8,MM,5.6,122,1028.3,MM,9.1,MM,MM,MM,MM


In [60]:
# consolidate columns into one date column
ndbc['date'] = ndbc.apply(lambda row: f"{int(row['#YY'])}-{int(row['MM']):02d}-{int(row['DD']):02d} {int(row['hh']):02d}:{int(row['mm']):02d}:00", axis=1)

#ndbc = ndbc.drop(['#YY', 'MM', 'DD', 'hh', 'mm'], axis=1)

# replace columns with see_buoy column names
ndbc.rename(columns={'WDIR': 'wind_direction', 'WSPD': 'wind_speed', 'GST':'wind_gust', 'WVHT':'wave_height', 'DPD':'dominant_period',
                        'APD':'average_period', 'MWD':'mean_wave_direction', 'PRES':'pressure', 'ATMP':'air_temp', 'WTMP':'water_temp', 'DEWP':'dewpoint',
                        'VIS':'visibility', 'PTDY':'pressure_tendency', 'TIDE':'tide'}, inplace=True)

# replace MM with NaN
ndbc.replace('MM', np.nan, inplace=True)
ndbc

Unnamed: 0,#YY,MM,DD,hh,mm,wind_direction,wind_speed,wind_gust,wave_height,dominant_period,average_period,mean_wave_direction,pressure,air_temp,water_temp,dewpoint,visibility,pressure_tendency,tide,date
1,2024,02,08,22,10,,4.0,5.0,,,,,1024.1,,6.2,,,,,2024-02-08 22:10:00
2,2024,02,08,22,00,,4.0,5.0,,,,,1024.1,,6.3,,,,,2024-02-08 22:00:00
3,2024,02,08,21,50,,4.0,5.0,1.2,11,7.1,112,1024.2,,6.3,,,,,2024-02-08 21:50:00
4,2024,02,08,21,40,,4.0,4.0,,,,,1024.4,,6.3,,,,,2024-02-08 21:40:00
5,2024,02,08,21,30,,4.0,5.0,,,,,,,6.3,,,,,2024-02-08 21:30:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6372,2023,12,25,00,40,,1.0,1.0,,,,,1028.4,,9.0,,,,,2023-12-25 00:40:00
6373,2023,12,25,00,30,,0.0,1.0,,,,,1028.4,,9.0,,,,,2023-12-25 00:30:00
6374,2023,12,25,00,20,,1.0,1.0,0.8,6,5.6,122,1028.4,,9.1,,,,,2023-12-25 00:20:00
6375,2023,12,25,00,10,,1.0,2.0,0.8,,5.6,122,1028.3,,9.1,,,,,2023-12-25 00:10:00


In [61]:
ndbc.columns

Index(['#YY', 'MM', 'DD', 'hh', 'mm', 'wind_direction', 'wind_speed',
       'wind_gust', 'wave_height', 'dominant_period', 'average_period',
       'mean_wave_direction', 'pressure', 'air_temp', 'water_temp', 'dewpoint',
       'visibility', 'pressure_tendency', 'tide', 'date'],
      dtype='object')

In [62]:
see_buoy.dropna(axis=1, how='all', inplace=True)
ndbc.dropna(axis=1, how='all', inplace=True)

In [63]:
see_buoy

Unnamed: 0_level_0,wind_speed,wind_gust,wave_height,dominant_period,average_period,mean_wave_direction,pressure,water_temp,pressure_tendency
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-02-08 22:10:00,4.0,5.0,,,,,1024.1,6.2,
2024-02-08 22:00:00,4.0,5.0,,,,,1024.1,6.3,
2024-02-08 21:50:00,4.0,5.0,1.2,11.0,7.1,112.0,1024.2,6.3,
2024-02-08 21:40:00,4.0,4.0,,,,,1024.4,6.3,
2024-02-08 21:30:00,4.0,5.0,,,,,,6.3,
...,...,...,...,...,...,...,...,...,...
2023-12-25 00:40:00,1.0,1.0,,,,,1028.4,9.0,
2023-12-25 00:30:00,0.0,1.0,,,,,1028.4,9.0,
2023-12-25 00:20:00,1.0,1.0,0.8,6.0,5.6,122.0,1028.4,9.1,
2023-12-25 00:10:00,1.0,2.0,0.8,,5.6,122.0,1028.3,9.1,


In [64]:
cols = see_buoy.columns
cols

Index(['wind_speed', 'wind_gust', 'wave_height', 'dominant_period',
       'average_period', 'mean_wave_direction', 'pressure', 'water_temp',
       'pressure_tendency'],
      dtype='object')

In [65]:
for col in cols:
    ndbc[col] = ndbc[col].astype(float)
    see_buoy.dropna(subset=[col], inplace=True)
    ndbc.dropna(subset=[col], inplace=True)
    see_buoyList = []
    ndbcList = []

    for value in see_buoy[col]:
        see_buoyList.append(value)

    for val in ndbc[col]:
        ndbcList.append(val)

    for i in range(len(see_buoyList)):
        if see_buoyList[i] != ndbcList[i]:
            print(col)
            print(i)
            print("see_buoy ", see_buoyList[i])
            print("ndbc ", ndbcList[i])
            print()