Code to fetch latest historical weather data

open-meteo.com provides hourly weather data, with high resolution. with hourly updates. predictions up to 16 days into to the future, as well as historical data.. 

You can access past weather data dating back to 1940 with the historical weather API 
offered. *However, there is a 5-day delay in the data*. If you want information for the most recent days, you can use the forecast API and adjust the Past Days setting.


https://open-meteo.com/en/docs/knmi-api?models=knmi_seamless (forecast API) for OBSERVED weather data up to 3 months  ago, up to 'current' (actual) weather
https://open-meteo.com/en/docs/historical-weather-api for weather data > 3 months  ago (secondary)


In [1]:
# install related packages

%pip install openmeteo-requests
%pip install requests-cache retry-requests numpy pandas


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


# weather features and model involved
temperature_2m 🌡️
wind_speed_10m 💨
wind_speed_100m 💨
cloud_cover ☁️
snowfall ☃
apparent_temperature 🥵
radiation features (3x): diffuse_radiation,direct_normal_irradiance,shortwave_radiation
(Diffuse Solar Radiation DHI, Direct Normal Irradiance DNI, Shortwave Solar Radiation GHI)

Location DeBilt ; model used "KNMI-seamless"

In [2]:
# historical API to fetch historical data

import openmeteo_requests
import pandas as pd
import requests_cache
from retry_requests import retry
from datetime import datetime, timezone, timedelta

# define now
now = datetime.now(timezone.utc)
# calculate 5 days back from now
end_date = now - timedelta(days=5)
end_date_date_str = end_date.strftime("%Y-%m-%d")
print(f"5 days back from today: {end_date_date_str}")


# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 52.12949,
	"longitude": 5.20514,
	"hourly": ["temperature_2m", "wind_speed_10m", "apparent_temperature", "cloud_cover", "snowfall", 
            "diffuse_radiation", "direct_normal_irradiance", "shortwave_radiation"],
	"models": "knmi_seamless",
	"end_date": end_date_date_str,
	"start_date": "2025-01-01",    
}
responses = openmeteo.weather_api(url, params=params)

# Process single location. (would have to add a for-loop for multiple locations or weather models)
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(1).ValuesAsNumpy()
hourly_apparent_temperature = hourly.Variables(2).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(3).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(4).ValuesAsNumpy()
hourly_diffuse_radiation = hourly.Variables(5).ValuesAsNumpy()
hourly_direct_normal_irradiance = hourly.Variables(6).ValuesAsNumpy()
hourly_shortwave_radiation = hourly.Variables(7).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["temperature_2m_obs"] = hourly_temperature_2m
hourly_data["wind_speed_10m_obs"] = hourly_wind_speed_10m
hourly_data["apparent_temperature_obs"] = hourly_apparent_temperature
hourly_data["cloud_cover_obs"] = hourly_cloud_cover
hourly_data["snowfall_obs"] = hourly_snowfall
hourly_data["diffuse_radiation_obs"] = hourly_diffuse_radiation
hourly_data["direct_normal_irradiance_obs"] = hourly_direct_normal_irradiance
hourly_data["shortwave_radiation_obs"] = hourly_shortwave_radiation

weather_dataframe_obs = pd.DataFrame(data = hourly_data)
print(weather_dataframe_obs)

5 days back from today: 2025-04-27
Coordinates 52.13199996948242°N 5.190999984741211°E
Elevation 5.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
                          date  temperature_2m_obs  wind_speed_10m_obs  \
0    2025-01-01 00:00:00+00:00            7.432500           22.680000   
1    2025-01-01 01:00:00+00:00            7.632500           20.880001   
2    2025-01-01 02:00:00+00:00            7.282500           28.799999   
3    2025-01-01 03:00:00+00:00            7.782500           29.519999   
4    2025-01-01 04:00:00+00:00            7.682500           28.799999   
...                        ...                 ...                 ...   
2803 2025-04-27 19:00:00+00:00           15.582500            5.760000   
2804 2025-04-27 20:00:00+00:00           13.932500            6.840000   
2805 2025-04-27 21:00:00+00:00           12.382501            5.400000   
2806 2025-04-27 22:00:00+00:00           10.932500            4.320000   
2807 2025-04-27 23:00:00+00:

In [3]:
import sqlite3

# Connect to the SQLite database
db_path = '/Users/Twan/Library/Mobile Documents/com~apple~CloudDocs/Data Science/Data Projects EASI/ENEXIS/src/data/WARP.db'
conn = sqlite3.connect(db_path)

# Write the DataFrame to the database table 'raw_weather_obs'
# If table exists, replace it. If not, create new table
weather_dataframe_obs.to_sql('raw_weather_obs', conn, if_exists='replace', index=False)

# Close the connection
conn.close()

print("Data successfully written to database table 'raw_weather_obs'")

Data successfully written to database table 'raw_weather_obs'


In [4]:


# Connect to the SQLite database using the existing db_path
conn = sqlite3.connect(db_path)
# Query the last 5 rows from raw_weather_obs table
df_head = pd.read_sql_query("SELECT * FROM raw_weather_obs ORDER BY date DESC LIMIT 5", conn)

# Close the connection
conn.close()

# Display the results
print(df_head)
print(df_head.tail())
# Get the max timestamp from the database
conn = sqlite3.connect(db_path)
final_dataframe = pd.read_sql_query("SELECT date FROM raw_weather_obs", conn)
conn.close()

# Convert to datetime and find max date
final_dataframe['date'] = pd.to_datetime(final_dataframe['date'])
most_recent_date = final_dataframe['date'].max().strftime('%Y-%m-%d')
print(f"The most recent date in raw_weather_obs is: {most_recent_date}")

                        date  temperature_2m_obs  wind_speed_10m_obs  \
0  2025-04-27 23:00:00+00:00            8.932500                3.24   
1  2025-04-27 22:00:00+00:00           10.932500                4.32   
2  2025-04-27 21:00:00+00:00           12.382501                5.40   
3  2025-04-27 20:00:00+00:00           13.932500                6.84   
4  2025-04-27 19:00:00+00:00           15.582500                5.76   

   apparent_temperature_obs  cloud_cover_obs  snowfall_obs  \
0                  7.386863              0.0           0.0   
1                  9.502896              0.0           0.0   
2                 10.842266              0.0           0.0   
3                 12.001608              0.0           0.0   
4                 13.504770              0.0           0.0   

   diffuse_radiation_obs  direct_normal_irradiance_obs  \
0               0.000000                      0.000000   
1               0.000000                      0.000000   
2               0.00

In [5]:
# forecast API to fetch 'recent' historical data

import openmeteo_requests
import pandas as pd
import requests_cache
from retry_requests import retry
from datetime import datetime, timezone, timedelta

# define now
now = datetime.now(timezone.utc)
# Convert current time to YYYY-MM-DD format
now_date_str = now.strftime("%Y-%m-%d")

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/forecast"
params = {
	"latitude": 52.12949,
	"longitude": 5.20514,
	"hourly": ["temperature_2m", "wind_speed_10m", "apparent_temperature", "cloud_cover", "snowfall", "diffuse_radiation", "direct_normal_irradiance", "shortwave_radiation"],
	"models": "knmi_seamless",
	"end_date": now_date_str,
	"start_date": most_recent_date,
}
responses = openmeteo.weather_api(url, params=params)

# Process single location. (would have to add a for-loop for multiple locations or weather models)
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(1).ValuesAsNumpy()
hourly_apparent_temperature = hourly.Variables(2).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(3).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(4).ValuesAsNumpy()
hourly_diffuse_radiation = hourly.Variables(5).ValuesAsNumpy()
hourly_direct_normal_irradiance = hourly.Variables(6).ValuesAsNumpy()
hourly_shortwave_radiation = hourly.Variables(7).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["temperature_2m_obs"] = hourly_temperature_2m
hourly_data["wind_speed_10m_obs"] = hourly_wind_speed_10m
hourly_data["apparent_temperature_obs"] = hourly_apparent_temperature
hourly_data["cloud_cover_obs"] = hourly_cloud_cover
hourly_data["snowfall_obs"] = hourly_snowfall
hourly_data["diffuse_radiation_obs"] = hourly_diffuse_radiation
hourly_data["direct_normal_irradiance_obs"] = hourly_direct_normal_irradiance
hourly_data["shortwave_radiation_obs"] = hourly_shortwave_radiation

recent_obs_dataframe = pd.DataFrame(data = hourly_data)
print(recent_obs_dataframe)


Coordinates 52.13199996948242°N 5.190999984741211°E
Elevation 5.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
                         date  temperature_2m_obs  wind_speed_10m_obs  \
0   2025-04-27 00:00:00+00:00            9.032500            7.200000   
1   2025-04-27 01:00:00+00:00            8.432500            7.200000   
2   2025-04-27 02:00:00+00:00            8.432500            7.200000   
3   2025-04-27 03:00:00+00:00            7.382500            5.040000   
4   2025-04-27 04:00:00+00:00            7.682500            5.760000   
..                        ...                 ...                 ...   
139 2025-05-02 19:00:00+00:00           17.132500            9.360000   
140 2025-05-02 20:00:00+00:00           15.682500            9.000000   
141 2025-05-02 21:00:00+00:00           14.882501           10.080000   
142 2025-05-02 22:00:00+00:00           13.882501           10.799999   
143 2025-05-02 23:00:00+00:00           12.932500           10.080000   



In [6]:
conn = sqlite3.connect(db_path)

# Read existing data from raw_weather_obs
existing_data = pd.read_sql_query("SELECT * FROM raw_weather_obs", conn)

# Convert date columns to datetime for both dataframes
existing_data['date'] = pd.to_datetime(existing_data['date'])
recent_obs_dataframe['date'] = pd.to_datetime(recent_obs_dataframe['date'])

# Remove any duplicates based on date
merged_df = pd.concat([existing_data, recent_obs_dataframe]).drop_duplicates(subset='date', keep='last')

# Sort by date
merged_df = merged_df.sort_values('date')

# Write the merged dataframe back to the database
merged_df.to_sql('raw_weather_obs', conn, if_exists='replace', index=False)

# Close the connection
conn.close()

print(f"Data successfully merged. Total rows: {len(merged_df)}")
print(f"Date range: {merged_df['date'].min()} to {merged_df['date'].max()}")

Data successfully merged. Total rows: 2928
Date range: 2025-01-01 00:00:00+00:00 to 2025-05-02 23:00:00+00:00
