Code to fetch latest historical weather data

open-meteo.com provides hourly weather data, with high resolution. with hourly updates. predictions up to 16 days into to the future, as well as historical data.. 

You can access past weather data dating back to 1940 with the historical weather API 
offered. *However, there is a 5-day delay in the data*. If you want information for the most recent days, you can use the forecast API and adjust the Past Days setting.


https://open-meteo.com/en/docs/knmi-api?models=knmi_seamless (forecast API) for OBSERVED weather data up to 3 months  ago, up to 'current' (actual) weather
https://open-meteo.com/en/docs/historical-weather-api for weather data > 3 months  ago (secondary)


In [1]:
# install related packages

%pip install openmeteo-requests
%pip install requests-cache retry-requests numpy pandas


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


# weather features and model involved
temperature_2m 🌡️
wind_speed_10m 💨
wind_speed_100m 💨
cloud_cover ☁️
snowfall ☃
apparent_temperature 🥵
radiation features (3x): diffuse_radiation,direct_normal_irradiance,shortwave_radiation
(Diffuse Solar Radiation DHI, Direct Normal Irradiance DNI, Shortwave Solar Radiation GHI)

Location DeBilt ; model used "KNMI-seamless"

In [None]:
# forecast API to fetch 'recent' historical data

import openmeteo_requests
import pandas as pd
import requests_cache
from retry_requests import retry
from datetime import datetime, timezone, timedelta

# define now
now = datetime.now(timezone.utc)


# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/forecast"
params = {
	"latitude": 52.12949,
	"longitude": 5.20514,
	"hourly": ["temperature_2m", "wind_speed_10m", "apparent_temperature", "cloud_cover", "snowfall", "diffuse_radiation", "direct_normal_irradiance", "shortwave_radiation"],
	"models": "knmi_seamless",
	"end_date": "now",
	#"start_date": "max_date"
}
responses = openmeteo.weather_api(url, params=params)

# Process single location. (would have to add a for-loop for multiple locations or weather models)
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(1).ValuesAsNumpy()
hourly_apparent_temperature = hourly.Variables(2).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(3).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(4).ValuesAsNumpy()
hourly_diffuse_radiation = hourly.Variables(5).ValuesAsNumpy()
hourly_direct_normal_irradiance = hourly.Variables(6).ValuesAsNumpy()
hourly_shortwave_radiation = hourly.Variables(7).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["apparent_temperature"] = hourly_apparent_temperature
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["snowfall"] = hourly_snowfall
hourly_data["diffuse_radiation"] = hourly_diffuse_radiation
hourly_data["direct_normal_irradiance"] = hourly_direct_normal_irradiance
hourly_data["shortwave_radiation"] = hourly_shortwave_radiation

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)


In [11]:
# historical API to fetch historical data

import openmeteo_requests
import pandas as pd
import requests_cache
from retry_requests import retry
from datetime import datetime, timezone, timedelta

# define now
now = datetime.now(timezone.utc)
# calculate 5 days back from now
end_date = now - timedelta(days=5)
end_date_date_str = end_date.strftime("%Y-%m-%d")
print(f"5 days back from today: {end_date_date_str}")


# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 52.12949,
	"longitude": 5.20514,
	"hourly": ["temperature_2m", "wind_speed_10m", "apparent_temperature", "cloud_cover", "snowfall", 
            "diffuse_radiation", "direct_normal_irradiance", "shortwave_radiation"],
	"models": "knmi_seamless",
	"end_date": end_date_date_str,
	"start_date": "2025-01-01",    
}
responses = openmeteo.weather_api(url, params=params)

# Process single location. (would have to add a for-loop for multiple locations or weather models)
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(1).ValuesAsNumpy()
hourly_apparent_temperature = hourly.Variables(2).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(3).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(4).ValuesAsNumpy()
hourly_diffuse_radiation = hourly.Variables(5).ValuesAsNumpy()
hourly_direct_normal_irradiance = hourly.Variables(6).ValuesAsNumpy()
hourly_shortwave_radiation = hourly.Variables(7).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["apparent_temperature"] = hourly_apparent_temperature
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["snowfall"] = hourly_snowfall
hourly_data["diffuse_radiation"] = hourly_diffuse_radiation
hourly_data["direct_normal_irradiance"] = hourly_direct_normal_irradiance
hourly_data["shortwave_radiation"] = hourly_shortwave_radiation

weather_dataframe_obs = pd.DataFrame(data = hourly_data)
print(weather_dataframe_obs)

5 days back from today: 2025-04-27
Coordinates 52.13199996948242°N 5.190999984741211°E
Elevation 5.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
                          date  temperature_2m  wind_speed_10m  \
0    2025-01-01 00:00:00+00:00        7.432500       22.680000   
1    2025-01-01 01:00:00+00:00        7.632500       20.880001   
2    2025-01-01 02:00:00+00:00        7.282500       28.799999   
3    2025-01-01 03:00:00+00:00        7.782500       29.519999   
4    2025-01-01 04:00:00+00:00        7.682500       28.799999   
...                        ...             ...             ...   
2803 2025-04-27 19:00:00+00:00       15.582500        5.760000   
2804 2025-04-27 20:00:00+00:00       13.932500        6.840000   
2805 2025-04-27 21:00:00+00:00       12.382501        5.400000   
2806 2025-04-27 22:00:00+00:00       10.932500        4.320000   
2807 2025-04-27 23:00:00+00:00        8.932500        3.240000   

      apparent_temperature  cloud_cover  snowfall

In [None]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
from datetime import datetime, timezone
import os

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

# Define locations
locations = [
    ("DeBilt", 52.12949, 5.20514),
]

# Open-Meteo API parameters
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
    "latitude": [loc[1] for loc in locations],
    "longitude": [loc[2] for loc in locations],
    "start_date": "2022-01-01",
    "end_date": "2025-03-31",
    "hourly": ["temperature_2m", "apparent_temperature", "cloud_cover", "wind_speed_10m", "diffuse_radiation", "direct_normal_irradiance", "shortwave_radiation", "wind_speed_100m"]
}

# Fetch data from Open-Meteo
responses = openmeteo.weather_api(url, params=params)

# Store results in a list
dataframes = []
for i, response in enumerate(responses):
    location_name = locations[i][0]
    
    # Process hourly data
    hourly = response.Hourly()
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        ),
        "temperature_2m": hourly.Variables(0).ValuesAsNumpy(),
        "apparent_temperature": hourly.Variables(1).ValuesAsNumpy(),
        "cloud_cover": hourly.Variables(2).ValuesAsNumpy(),
        "wind_speed_10m": hourly.Variables(3).ValuesAsNumpy(),
        "diffuse_radiation": hourly.Variables(4).ValuesAsNumpy(),
        "direct_normal_irradiance": hourly.Variables(5).ValuesAsNumpy(),
        "shortwave_radiation": hourly.Variables(6).ValuesAsNumpy(),
        "wind_speed_100m": hourly.Variables(7).ValuesAsNumpy(),
        "location": location_name  # Add location column
    }

    # Convert to DataFrame
    df = pd.DataFrame(hourly_data)
    dataframes.append(df)

# Combine all data into a single DataFrame
final_dataframe = pd.concat(dataframes, ignore_index=True)

# Generate timestamp for filename
fetch_timestamp = datetime.now(timezone.utc)
timestamp_str = fetch_timestamp.isoformat(timespec='seconds').replace(':', '-')



csv_filename = f"../Data/hist_weather_data_2022-2025Mar31_{timestamp_str}.csv"
final_dataframe.to_csv(csv_filename, index=False)

print(f"Data saved to {csv_filename}")

In [None]:
most_recent_date = final_dataframe['date'].max()
print(f"The most recent date in the dataframe is: {most_recent_date}")

In [None]:
# Calculate the memory usage of the dataframe in bytes
memory_usage_bytes = final_dataframe.memory_usage(deep=True).sum()

# Convert the memory usage to kilobytes
memory_usage_kb = memory_usage_bytes / 1024

print(f"The size of the dataframe in kilobytes is: {memory_usage_kb:.2f} KB")

In [None]:
# Generate descriptive statistics for all columns in the dataframe
descriptive_stats = final_dataframe.describe()
print(descriptive_stats)