In [1]:
import pandas as pd
import hopsworks
import openmeteo_requests
import requests_cache
from retry_requests import retry

def get_historical_wind_data():
    print("Connecting to Open-Meteo Archive API for historical data...")
    
    # Configure cache and retry mechanism
    cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)

    # Configure query parameters: Coordinates for Flores Island, Azores
    # Target variable: wind_speed_10m_max
    # Feature variables: gusts, direction, temperature, precipitation
    params = {
        "latitude": 39.4532,
        "longitude": -31.1274,
        "start_date": "2015-01-01",
        "end_date": "2025-01-01",
        "daily": [
            "temperature_2m_max", 
            "precipitation_sum", 
            "wind_speed_10m_max", 
            "wind_gusts_10m_max", 
            "wind_direction_10m_dominant"
        ],
        "timezone": "Atlantic/Azores"
    }
    
    url = "https://archive-api.open-meteo.com/v1/archive"
    responses = openmeteo.weather_api(url, params=params)
    response = responses[0]

    # Process Daily Data
    daily = response.Daily()
    daily_data = {
        "date": pd.date_range(
            start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
            end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
            freq = pd.Timedelta(seconds = daily.Interval()),
            inclusive = "left"
        )
    }
    
    # Extract numerical columns from API response
    daily_data["temperature_max"] = daily.Variables(0).ValuesAsNumpy()
    daily_data["precipitation"] = daily.Variables(1).ValuesAsNumpy()
    daily_data["wind_speed"] = daily.Variables(2).ValuesAsNumpy()      # This is our prediction target
    daily_data["wind_gusts"] = daily.Variables(3).ValuesAsNumpy()
    daily_data["wind_direction"] = daily.Variables(4).ValuesAsNumpy()
    
    df = pd.DataFrame(data = daily_data)
    
    # Data Cleaning: Drop NaNs
    df = df.dropna()
    print(f"Data fetched and cleaned. Total rows: {len(df)}")

    # Create primary key string column
    df['date_str'] = df['date'].dt.strftime('%Y-%m-%d')
    
    # Reorder columns for clarity
    cols = ['date', 'date_str', 'temperature_max', 'precipitation', 'wind_speed', 'wind_gusts', 'wind_direction']
    df = df[cols]
    
    return df

def run_job():
    # 1. Fetch Data
    try:
        df = get_historical_wind_data()
        print(f"Data Preview:\n{df.head()}")
    except Exception as e:
        print(e)
        return

    # 2. Login to Hopsworks
    print(" Logging into Hopsworks...")
    project = hopsworks.login()
    fs = project.get_feature_store()

    # 3. Delete old Feature Group (if exists) to ensure a clean state
    print("üßπ Checking for old data...")
    try:
        old_fg = fs.get_feature_group(name="azores_wind_data", version=1)
        old_fg.delete()
        print("Old Feature Group deleted.")
    except:
        print("No old version found, continuing.")

    # 4. Create and Upload Feature Group
    print(" Uploading to Feature Store...")
    
    wind_fg = fs.get_or_create_feature_group(
        name="azores_wind_data",
        version=1,
        primary_key=["date_str"], 
        event_time="date",
        description="Daily weather and wind speed for Flores Island boat trips",
        online_enabled=True
    )

    wind_fg.insert(df)
    print(" Success! Data backfilled to Feature Store.")
    print(" Please wait 1-2 minutes for data indexing before training.")

if __name__ == "__main__":
    run_job()

üì° Connecting to Open-Meteo Archive API for historical data...
‚úÖ Data fetched and cleaned. Total rows: 3654
üìä Data Preview:
                       date    date_str  temperature_max  precipitation  \
0 2015-01-01 01:00:00+00:00  2015-01-01          18.2675            0.0   
1 2015-01-02 01:00:00+00:00  2015-01-02          17.9175            0.2   
2 2015-01-03 01:00:00+00:00  2015-01-03          17.2675            1.0   
3 2015-01-04 01:00:00+00:00  2015-01-04          16.9175           16.9   
4 2015-01-05 01:00:00+00:00  2015-01-05          12.7675            0.0   

   wind_speed  wind_gusts  wind_direction  
0   43.857986   56.880001      214.742599  
1   38.168491   50.039997      211.329620  
2   40.550766   52.560001      191.000580  
3   44.162769   60.120003      258.250427  
4   38.168491   52.560001      322.784210  
üîê Logging into Hopsworks...
2026-01-13 22:16:00,539 INFO: Initializing external client
2026-01-13 22:16:00,539 INFO: Base URL: https://c.app.hopsworks.





2026-01-13 22:16:02,033 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1303706
üßπ Checking for old data...
‚ÑπÔ∏è No old version found, continuing.
üöÄ Uploading to Feature Store...
Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1303706/fs/1291336/fg/1946986


Uploading Dataframe: 100.00% |‚ñà| Rows 3654/3654 | Elapsed Time: 00:02 | Remainin


Launching job: azores_wind_data_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1303706/jobs/named/azores_wind_data_1_offline_fg_materialization/executions
üéâ Success! Data backfilled to Feature Store.
‚è≥ Please wait 1-2 minutes for data indexing before training.
