# Open Weather Map

OpenWeather provides weather data for any location on the globe using a proprietary ML-powerd hyperlocal forecasting model with resolution from 500 m to 2 km, globally.

Sources:
- [API Documentation](https://openweathermap.org/api)

In [308]:
import requests
import time
import pandas as pd
import geopandas as gpd
from tqdm import tqdm
from datetime import datetime, timedelta
from shapely.geometry import MultiPolygon
from getpass import getpass

In [28]:
tqdm.pandas()

## Load data

In [142]:
gdf = gpd.read_file("data/administrative_boundaries/gadm/gadm41_SDN_1.json")

## Extract lat-long from multipolygon format

In [145]:
# Ensure geometries are in EPSG:4326 (lat/lon)
gdf = gdf.to_crs(epsg=4326)

In [147]:
def convert_coordinates_format(geometry):
    """
    Convert coordinates from GeoDataFrame geometry (tuple format) to the required format.
    
    Parameters:
        geometry (shapely.geometry): The geometry object (Polygon or MultiPolygon).
        
    Returns:
        list: Coordinates in the required format, i.e., a list of lists of [longitude, latitude].
    """
    if geometry.geom_type == 'MultiPolygon':
        # convert each Polygon within the MultiPolygon to the required format
        return [
            [list(polygon.exterior.coords) for polygon in geometry.geoms]
        ]
    elif geometry.geom_type == 'Polygon':
        # convert the Polygon to the required format
        return [list(geometry.exterior.coords)]
    else:
        return []

In [149]:
gdf["coordinates"] = gdf["geometry"].progress_apply(lambda x: convert_coordinates_format(x))

100%|██████████| 18/18 [00:00<00:00, 1400.80it/s]


The OpenWeatherMap API expects a single latitude and longitude point, so it's necessary to calculate the central point (centroid) of the polygon or use representative points from the coordinates.

In [152]:
def get_centroid_coordinates(gdf):
    """
    Get the centroid of each region in the GeoDataFrame.
    
    Args:
    - gdf (GeoDataFrame): A GeoDataFrame with polygon or multipolygon geometries.
    
    Returns:
    - list: A list of (latitude, longitude) tuples for each region's centroid.
    """
    centroids = []
    
    for _, row in gdf.iterrows():
        geometry = row['geometry']
        
        if geometry.is_valid:
            centroid = geometry.centroid
            centroids.append((centroid.y, centroid.x))  # (lat, lon)
    
    return centroids

In [154]:
gdf["centroid_coordinates"] = get_centroid_coordinates(gdf)

## Get weather data

Example API call: ```https://api.openweathermap.org/data/3.0/onecall/day_summary?lat={lat}&lon={lon}&date={date}&appid={API key}```

Example response:
```
{
   "lat":33,
   "lon":35,
   "tz":"+02:00",
   "date":"2020-03-04",
   "units":"standard",
   "cloud_cover":{
      "afternoon":0
   },
   "humidity":{
      "afternoon":33
   },
   "precipitation":{
      "total":0
   },
   "temperature":{
      "min":286.48,
      "max":299.24,
      "afternoon":296.15,
      "night":289.56,
      "evening":295.93,
      "morning":287.59
   },
   "pressure":{
      "afternoon":1015
   },
   "wind":{
      "max":{
         "speed":8.7,
         "direction":120
      }
   }
}                
```

In [310]:
API_KEY = getpass.getpass("OpenWeatherMap API key: ")
BASE_URL = "https://api.openweathermap.org/data/3.0/onecall/day_summary"

OpenWeatherMap API key:  ········


In [178]:
def get_daily_weather(lat, lon, date, tz):
    """
    Requests daily weather data (temperature and precipitation) for a given latitude, longitude, date, and timezone.
    
    Args:
    - lat (float): Latitude coordinate.
    - lon (float): Longitude coordinate.
    - date (str): Date in 'YYYY-MM-DD' format.
    - tz (str): Timezone for the location.
    
    Returns:
    - dict: A dictionary containing temperature and precipitation data or an error message.
    """
    params = {
        'lat': lat,
        'lon': lon,
        'date': date,  
        'tz': tz, 
        'appid': API_KEY,
        'units': 'metric' 
    }
    
    response = requests.get(BASE_URL, params=params)
    
    if response.status_code == 200:
        data = response.json()
        
        return {
            'latitude': lat,
            'longitude': lon,
            'date': date,
            'data': data
        }
    else:
        return {
            'latitude': lat,
            'longitude': lon,
            'date': date,
            'error': response.json().get('message', 'Failed to fetch data')
        }

In [187]:
def fetch_weather_for_geodataframe(gdf, start_date, end_date):
    """
    Fetch weather data for each row in a GeoPandas DataFrame.
    
    Args:
    - gdf (GeoDataFrame): A GeoPandas DataFrame with columns 'lat', 'lon', and 'tz'.
    - start_date (str): Start date in 'YYYY-MM-DD' format.
    - end_date (str): End date in 'YYYY-MM-DD' format.
    
    Returns:
    - DataFrame: A DataFrame containing weather data for each location and day.
    """
    weather_records = []
    
    start_date = datetime.strptime(start_date, '%Y-%m-%d')
    end_date = datetime.strptime(end_date, '%Y-%m-%d')
    
    for _, row in tqdm(gdf.iterrows()):
        lat = row['centroid_coordinates'][0]
        lon = row['centroid_coordinates'][1]
        tz = '+02:00'
        
        current_date = start_date
        while current_date <= end_date:
            date_str = current_date.strftime('%Y-%m-%d')
            weather_data = get_daily_weather(lat, lon, date_str, tz)
            
            if 'error' not in weather_data:
                weather_records.append({
                    'latitude': lat,
                    'longitude': lon,
                    'timezone': tz,
                    'date': date_str,
                    'data': weather_data['data']
                })
            else:
                print(f"Error fetching data for {lat}, {lon} on {date_str}: {weather_data['error']}")
            
            current_date += timedelta(days=7)
            time.sleep(1)
    
    return pd.DataFrame(weather_records)

In [189]:
start_date = '2024-01-01'
end_date = '2024-12-31'

In [196]:
weather_df = fetch_weather_for_geodataframe(gdf, start_date, end_date)

18it [28:26, 94.83s/it]


In [223]:
weather_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 954 entries, 0 to 953
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   latitude   954 non-null    float64
 1   longitude  954 non-null    float64
 2   timezone   954 non-null    object 
 3   date       954 non-null    object 
 4   data       954 non-null    object 
dtypes: float64(2), object(3)
memory usage: 37.4+ KB


In [255]:
def process_weather_data(data):
    return {
        'cloud_cover': data['cloud_cover']['afternoon'],
        'humidity_percentage': data['humidity']['afternoon'],
        'precipitation_total_mm': data['precipitation']['total'],
        'temperature_min_celsius': data['temperature']['min'],
        'temperature_max_celsius': data['temperature']['max'],
        'temperature_afternoon_celsius': data['temperature']['afternoon'],
        'temperature_night_celsius': data['temperature']['night'],
        'temperature_evening_celsius': data['temperature']['evening'],
        'temperature_morning_celsius': data['temperature']['morning'],
        'pressure_hpa': data['pressure']['afternoon'],
        'wind_max_speed_kmh': data['wind']['max']['speed'],
        'wind_max_direction_degrees': data['wind']['max']['direction']
    }

In [259]:
processed_data = weather_df['data'].apply(process_weather_data)

In [261]:
processed_df = pd.DataFrame(processed_data.tolist())

In [265]:
weather_df = pd.concat([weather_df, processed_df], axis=1)

In [279]:
gdf[['latitude', 'longitude']] = pd.DataFrame(gdf['centroid_coordinates'].tolist(), index=gdf.index)

In [283]:
merged_df = pd.merge(gdf, weather_df, on=['latitude', 'longitude'], how='left')

## Saving the data

In [293]:
gdf_to_save = merged_df[['GID_1', 'GID_0', 'COUNTRY', 'NAME_1', 'VARNAME_1', 'NL_NAME_1',
       'TYPE_1', 'ENGTYPE_1', 'CC_1', 'HASC_1', 'ISO_1', 'geometry',
       'coordinates', 'centroid_coordinates', 'latitude',
       'longitude', 'date', 'cloud_cover',
       'humidity_percentage', 'precipitation_total_mm',
       'temperature_min_celsius', 'temperature_max_celsius',
       'temperature_afternoon_celsius', 'temperature_night_celsius',
       'temperature_evening_celsius', 'temperature_morning_celsius',
       'pressure_hpa', 'wind_max_speed_kmh', 'wind_max_direction_degrees']].copy().reset_index(drop=True)

In [301]:
gdf_to_save.drop(columns='geometry').to_csv('data/openweathermap/openweathermap_for_sdn_by_state.csv', index=False)

In [303]:
gdf_to_save.drop(columns='geometry').to_json('data/openweathermap/openweathermap_for_sdn_by_state.json', orient='records')

In [305]:
gdf_to_save.to_file("data/openweathermap/openweathermap_for_sdn_by_state.geojson", driver="GeoJSON")

gdf_to_save.to_file("data/openweathermap/openweathermap_for_sdn_by_state.shp", driver="ESRI Shapefile")

  gdf_to_save.to_file("data/openweathermap/openweathermap_for_sdn_by_state.shp", driver="ESRI Shapefile")
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
  ogr_write(
