## Download Data from AWS S3 Bucket

This notebook is used to download NOAA Multi-Radar/Multi-Sensor System (MRMS) data from AWS S3 bucket.

In [1]:
# import libraries
import boto3
import os
from datetime import datetime, timedelta
from botocore import UNSIGNED
from botocore.client import Config

In [2]:
# List all files in a folder in an S3 bucket
def list_files_in_folder(bucket_name, folder_name):
    s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED), region_name='us-east-1')
    paginator = s3.get_paginator('list_objects_v2')
    pages = paginator.paginate(Bucket=bucket_name, Prefix=folder_name)
    
    # Get all the files in the folder
    file_keys = []
    for page in pages:
        if 'Contents' in page:
            for obj in page['Contents']:
                file_keys.append(obj['Key'])
    return file_keys

In [3]:
# Download all files in a folder in an S3 bucket
def download_files_from_folder(bucket_name, folder_name, local_folder):
    s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED), region_name='us-east-1')
    file_keys = list_files_in_folder(bucket_name, folder_name)
    
    # Check if there are any files in the folder
    if not file_keys:
        print(f"No data available for {folder_name.split('/')[-2]}.")
        return

    # Create local folder if it doesn't exist
    if not os.path.exists(local_folder):
        os.makedirs(local_folder)
    
    # Download all files
    for key in file_keys:
        if key.endswith('.grib2.gz'):
            file_name = os.path.join(local_folder, key.split('/')[-1])
            s3.download_file(bucket_name, key, file_name)
            print(f"Downloaded {key} to {file_name}")

In [4]:
# Download all files in a folder in an S3 bucket within a date range
def download_files_in_date_range(bucket_name, data_type, start_date_str, end_date_str):
    start_date = datetime.strptime(start_date_str, '%Y%m%d')
    end_date = datetime.strptime(end_date_str, '%Y%m%d')

    # make sure start_date is not after end_date
    if start_date > end_date:
        print("Error: start_date should not be after end_date.")
        return

    # Loop through the date range and download files for each date
    def daterange(start_date, end_date):
        for n in range(int((end_date - start_date).days) + 1):
            yield start_date + timedelta(n)

    for single_date in daterange(start_date, end_date):
        date_str = single_date.strftime('%Y%m%d')
        folder_name = f'CONUS/{data_type}/{date_str}/'
        local_folder = f'{data_type}/{date_str}'  # Use the date and data_type variables for the local folder name

        print(f"Downloading files for {data_type} on {date_str}...")
        download_files_from_folder(bucket_name, folder_name, local_folder)
        print(f"Completed downloading files for {data_type} on {date_str}.\n")

In [5]:
# Parameters
bucket_name = 'noaa-mrms-pds'
data_type = 'MergedBaseReflectivity_00.50'  # Specify the data type
start_date_str = '20201013'  # Specify the start date
end_date_str = '20201014'  # Specify the end date

In [6]:
# Download files for the specified date range and data type
# one file -> 1-2s
download_files_in_date_range(bucket_name, data_type, start_date_str, end_date_str)

Downloading files for MergedBaseReflectivity_00.50 on 20201013...
No data available for 20201013.
Completed downloading files for MergedBaseReflectivity_00.50 on 20201013.

Downloading files for MergedBaseReflectivity_00.50 on 20201014...
Downloaded CONUS/MergedBaseReflectivity_00.50/20201014/MRMS_MergedBaseReflectivity_00.50_20201014-211615.grib2.gz to MergedBaseReflectivity_00.50/20201014/MRMS_MergedBaseReflectivity_00.50_20201014-211615.grib2.gz
Downloaded CONUS/MergedBaseReflectivity_00.50/20201014/MRMS_MergedBaseReflectivity_00.50_20201014-211819.grib2.gz to MergedBaseReflectivity_00.50/20201014/MRMS_MergedBaseReflectivity_00.50_20201014-211819.grib2.gz
Downloaded CONUS/MergedBaseReflectivity_00.50/20201014/MRMS_MergedBaseReflectivity_00.50_20201014-212007.grib2.gz to MergedBaseReflectivity_00.50/20201014/MRMS_MergedBaseReflectivity_00.50_20201014-212007.grib2.gz
Downloaded CONUS/MergedBaseReflectivity_00.50/20201014/MRMS_MergedBaseReflectivity_00.50_20201014-212228.grib2.gz to Me