In [None]:
import os
import cdsapi
from tqdm import tqdm
import time

# Initialize the CDS API client
c = cdsapi.Client()  # Add this line to instantiate the client

# Define the expected number of files for pressure and surface folders
expected_files = {
    'pressure': 7,
    'surface': 26
}

# Define the years to check
years = range(20024, 2025)

# Define the countries and their base directories
countries_dict = {
    "SA": "Saudi Arabia",
    "YE": "Yemen",
    "OM": "Oman",
    "QA": "Qatar",
    "BH": "Bahrain",
    "AE": "United Arab Emirates",
    "KW": "Kuwait"
}

country_areas = {
    "SA": [16.0, 34.5, 32.0, 55.6667],
    "YE": [12.1111, 41.833, 18.999, 54.5303],
    "OM": [16.642, 52.0, 26.4, 59.8],
    "QA": [24.4814, 50.757, 26.1546, 51.6369],
    "BH": [25.5350, 50.454, 26.2640, 50.6640],
    "AE": [22.6333, 51.5833, 26.084, 56.3817],
    "KW": [28.5243, 46.5527, 30.1037, 48.4315]
}

# Define variables for surface and pressure levels
surface_variables = [
    '2m_temperature', '2m_dewpoint_temperature', '10m_u_component_of_wind', '10m_v_component_of_wind',
    'surface_solar_radiation_downwards', 'surface_thermal_radiation_downwards', 'evaporation', 
    'potential_evaporation', 'sea_surface_temperature', 'soil_temperature_level_1', 
    'soil_temperature_level_2', 'soil_temperature_level_3', 'soil_temperature_level_4', 
    'volumetric_soil_water_layer_1', 'volumetric_soil_water_layer_2', 'volumetric_soil_water_layer_3',
    'volumetric_soil_water_layer_4', 'leaf_area_index_high_vegetation', 'leaf_area_index_low_vegetation', 
    'surface_pressure', 'mean_sea_level_pressure', 'convective_available_potential_energy', 'geopotential',
    'instantaneous_10m_wind_gust', 'total_precipitation', 'k_index'
]

pressure_level_variables = [
    'geopotential', 'relative_humidity', 'temperature', 'u_component_of_wind', 
    'v_component_of_wind', 'vertical_velocity', 'vorticity'
]

# Output directory
base_dir = r"Z:\Thesis\Data\Met\ERA5"

# Function to verify missing files
def verify_missing_files(directory, file_type):
    if not os.path.exists(directory):
        print(f"Directory does not exist: {directory}")
        return expected_files[file_type]
    
    files_in_directory = [f for f in os.listdir(directory) if f.endswith('.nc')]
    num_files = len(files_in_directory)
    if num_files == expected_files[file_type]:
        print(f"All {file_type} files are present in {directory}. ({num_files} files)")
        return 0
    else:
        missing_files = expected_files[file_type] - num_files
        print(f"Missing {missing_files} {file_type} files in {directory}. Expected {expected_files[file_type]}, but found {num_files} files.")
        return missing_files

# Function to download data
def download_data(year, country_code, country_name, area, output_directory):
    for variable_type, variables in [("surface", surface_variables), ("pressure", pressure_level_variables)]:
        output_path = os.path.join(output_directory, year, country_name, variable_type)
        os.makedirs(output_path, exist_ok=True)

        for variable in variables:
            file_name = f"{year}_{country_name}_{variable_type}_{variable}.nc"
            if not os.path.exists(os.path.join(output_path, file_name)):  # Check if file already exists
                request = {
                    'product_type': 'reanalysis',
                    'format': 'netcdf',
                    'area': area,
                    'variable': variable,
                    'year': year,
                    'month': [f"{i:02d}" for i in range(1, 13)],
                    'day': [f"{i:02d}" for i in range(1, 31)],
                    'time': [f"{i:02d}:00" for i in range(0, 24, 3)],
                }
                if variable_type == 'pressure':
                    request.update({'pressure_level': ['1000', '925', '850', '700', '500', '300', '200', '100', '50', '10']})

                # Log file download
                print(f"Downloading {file_name} for {country_name}, {year}...")
                c.retrieve(
                    f'reanalysis-era5-{"single-levels" if variable_type == "surface" else "pressure-levels"}',
                    request,
                    os.path.join(output_path, file_name)
                )

# Main logic to verify and download missing files
for year in range(2024, 2025):
    print(f"\nChecking files for the year {year}:")
    for country_code, country_name in countries_dict.items():
        area = country_areas[country_code]
        
        print(f"\nChecking {country_name} for the year {year}:")
        
        # Define paths for pressure and surface files
        pressure_path = os.path.join(base_dir, str(year), country_name, 'pressure')
        surface_path = os.path.join(base_dir, str(year), country_name, 'surface')
        
        # Verify missing pressure files
        missing_pressure_files = verify_missing_files(pressure_path, 'pressure')
        
        # Verify missing surface files
        missing_surface_files = verify_missing_files(surface_path, 'surface')
        
        # Only download missing files if there are any
        if missing_pressure_files > 0 or missing_surface_files > 0:
            with tqdm(total=missing_pressure_files + missing_surface_files,
                      desc=f"Downloading missing data for {country_name} {year}") as pbar:
                retry_count = 0
                success = False
                while retry_count < 10 and not success:
                    try:
                        # Call the download function with logging
                        download_data(str(year), country_code, country_name, area, base_dir)
                        pbar.update(missing_pressure_files + missing_surface_files)
                        success = True
                    except Exception as e:
                        retry_count += 1
                        print(f"Error downloading data for {country_name} {year}: {e}")
                        print(f"Retrying {retry_count}/10...")
                        time.sleep(5)  # Wait for 5 seconds before retrying
                if not success:
                    print(f"Failed to download missing data for {country_name} {year} after 10 attempts.")
                    break  # Stop if retries fail



Checking files for the year 2024:

Checking Saudi Arabia for the year 2024:
Directory does not exist: Z:\Thesis\Data\Met\ERA5\2024\Saudi Arabia\pressure
Directory does not exist: Z:\Thesis\Data\Met\ERA5\2024\Saudi Arabia\surface


Downloading missing data for Saudi Arabia 2024:   0%|                                                             | 0/33 [00:00<?, ?it/s]

Downloading 2024_Saudi Arabia_surface_2m_temperature.nc for Saudi Arabia, 2024...


2025-10-15 12:25:08,670 INFO Request ID is 53d881a1-f9f7-459b-8ffb-466ffacb3cb6
2025-10-15 12:25:08,868 INFO status has been updated to accepted
2025-10-15 12:25:41,415 INFO status has been updated to running
2025-10-15 12:28:02,107 INFO status has been updated to successful


b84f4e016fbe65a816c5e5e65b1d4d89.nc:   0%|          | 0.00/26.1M [00:00<?, ?B/s]

Downloading 2024_Saudi Arabia_surface_2m_dewpoint_temperature.nc for Saudi Arabia, 2024...


2025-10-15 12:28:11,332 INFO Request ID is 20f85729-8f30-46ec-b57f-a9b95f9a0203
2025-10-15 12:28:11,695 INFO status has been updated to accepted
2025-10-15 12:28:24,759 INFO status has been updated to running
2025-10-15 12:32:32,511 INFO status has been updated to successful


7ad6818e31f6d3223603a69ffd787860.nc:   0%|          | 0.00/26.7M [00:00<?, ?B/s]

Downloading 2024_Saudi Arabia_surface_10m_u_component_of_wind.nc for Saudi Arabia, 2024...


2025-10-15 12:32:39,192 INFO Request ID is 87b83500-46f9-4fd3-b8b7-66d4652a0267
2025-10-15 12:32:39,436 INFO status has been updated to accepted
2025-10-15 12:32:52,427 INFO status has been updated to running
2025-10-15 12:35:32,331 INFO status has been updated to successful


d182f539539fd3425a29fe7fa50bb424.nc:   0%|          | 0.00/37.1M [00:00<?, ?B/s]

Downloading 2024_Saudi Arabia_surface_10m_v_component_of_wind.nc for Saudi Arabia, 2024...


2025-10-15 12:35:41,054 INFO Request ID is 67e20ee6-ddfb-4f52-b5d5-388e09ee58a6
2025-10-15 12:35:41,306 INFO status has been updated to accepted
2025-10-15 12:36:13,779 INFO status has been updated to running
2025-10-15 12:38:34,242 INFO status has been updated to successful


1f701bc80cda92ecda6796d5024d368f.nc:   0%|          | 0.00/36.8M [00:00<?, ?B/s]

Downloading 2024_Saudi Arabia_surface_surface_solar_radiation_downwards.nc for Saudi Arabia, 2024...


2025-10-15 12:38:45,861 INFO Request ID is bc2a7c89-ea23-44d4-8a6e-7bcc60a9189f
2025-10-15 12:38:46,027 INFO status has been updated to accepted
2025-10-15 12:43:06,681 INFO status has been updated to running
2025-10-15 12:47:08,878 INFO status has been updated to successful


b5f5f14ca0972469f093fad9a6d5f9ed.nc:   0%|          | 0.00/16.3M [00:00<?, ?B/s]

Downloading 2024_Saudi Arabia_surface_surface_thermal_radiation_downwards.nc for Saudi Arabia, 2024...


2025-10-15 12:47:16,418 INFO Request ID is ac96d864-6539-4398-83d0-61a2cd1d4142
2025-10-15 12:47:16,662 INFO status has been updated to accepted
2025-10-15 12:50:09,739 INFO status has been updated to running
2025-10-15 12:55:39,251 INFO status has been updated to successful


ec65cba17b8c8d7c20483e0816a18cbe.nc:   0%|          | 0.00/24.6M [00:00<?, ?B/s]

Downloading 2024_Saudi Arabia_surface_evaporation.nc for Saudi Arabia, 2024...


2025-10-15 12:55:46,201 INFO Request ID is 1df39a7f-c7b2-4d4f-90ec-e592761b05f4
2025-10-15 12:55:46,457 INFO status has been updated to accepted
2025-10-15 12:58:40,180 INFO status has been updated to running
2025-10-15 13:04:10,645 INFO status has been updated to successful


31cbda1576ed352c6f117b5504082e91.nc:   0%|          | 0.00/27.7M [00:00<?, ?B/s]

Downloading 2024_Saudi Arabia_surface_potential_evaporation.nc for Saudi Arabia, 2024...


2025-10-15 13:04:16,884 INFO Request ID is 11457fb3-28e4-486c-b8ff-41802f7e05de
2025-10-15 13:04:17,095 INFO status has been updated to accepted
2025-10-15 13:07:10,116 INFO status has been updated to running
2025-10-15 13:10:38,706 INFO status has been updated to successful


c814ee0ec35b245a38bbac4ad193d318.nc:   0%|          | 0.00/30.8M [00:00<?, ?B/s]

Downloading 2024_Saudi Arabia_surface_sea_surface_temperature.nc for Saudi Arabia, 2024...


2025-10-15 13:10:45,747 INFO Request ID is 4a8c8f63-a5f2-445e-b65f-a4936d85c048
2025-10-15 13:10:45,938 INFO status has been updated to accepted
2025-10-15 13:17:07,396 INFO status has been updated to running
2025-10-15 13:21:09,390 INFO status has been updated to successful


6d8c48973b96b1f0de734ba611806156.nc:   0%|          | 0.00/2.60M [00:00<?, ?B/s]

Downloading 2024_Saudi Arabia_surface_soil_temperature_level_1.nc for Saudi Arabia, 2024...


2025-10-15 13:21:13,141 INFO Request ID is b3045f13-192f-4308-a468-85fae2781252
2025-10-15 13:21:13,340 INFO status has been updated to accepted
2025-10-15 13:24:07,076 INFO status has been updated to running
2025-10-15 13:29:36,839 INFO status has been updated to successful


429cf9d8363d7aa3cd41d6ab0d3478f5.nc:   0%|          | 0.00/25.5M [00:00<?, ?B/s]

Downloading 2024_Saudi Arabia_surface_soil_temperature_level_2.nc for Saudi Arabia, 2024...


2025-10-15 13:29:55,368 INFO Request ID is 06f6354c-c134-4147-b315-5b5fee626191
2025-10-15 13:29:55,563 INFO status has been updated to accepted
2025-10-15 13:34:16,711 INFO status has been updated to running
2025-10-15 13:38:18,957 INFO status has been updated to successful


805fe7fc4c0cc11391ce87c741a4e180.nc:   0%|          | 0.00/24.5M [00:00<?, ?B/s]

Downloading 2024_Saudi Arabia_surface_soil_temperature_level_3.nc for Saudi Arabia, 2024...


2025-10-15 13:38:25,466 INFO Request ID is b9b75a79-d1eb-468e-9691-1c2a0343837a
2025-10-15 13:38:26,206 INFO status has been updated to accepted
Recovering from connection error [('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))], attemps 1 of 500
Retrying in 120 seconds
