In [205]:
from datetime import datetime, timedelta
import pandas as pd
import boto3
import botocore
from botocore.exceptions import NoCredentialsError

import gzip
import shutil
import os

In [206]:
# Repeplace with your pwd and csv file name
csv_storage = "your_path_to_csv_storage/"
csv_name = "2021_torn.csv"

# Define bucket name and prefix, replace with your prefered folder
bucket_name = 'noaa-mrms-pds'
s3_file_prefix = 'CONUS/EchoTop_18_00.50/'

# set how many records you want to download
n = 2

# Replace with your download file storage address
MRMS_storage = 'your_path_to_MRMS_storage/'

In [207]:
# Function definitions
def download_file_from_s3(bucket, s3_path, local_path):
    s3 = boto3.client('s3', config=botocore.client.Config(signature_version=botocore.UNSIGNED))
    try:
        s3.download_file(bucket, s3_path, local_path)
        print(f"File downloaded successfully: {local_path}")
    except FileNotFoundError:
        print("The file was not found")
    except NoCredentialsError:
        print("Credentials not available")

def decompress_gzip(gzip_path, decompressed_path):
    with gzip.open(gzip_path, 'rb') as f_in, open(decompressed_path, 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)
    print(f"File decompressed successfully: {decompressed_path}")

def remove_file(path):
    os.remove(path)
    print(f"File removed: {path}")

def download_closest_file_from_s3(bucket, prefix, time):
    s3 = boto3.client('s3', config=botocore.client.Config(signature_version=botocore.UNSIGNED))
    closest_file = None
    min_diff = timedelta.max
    prefix_with_date = f"{prefix}{time.strftime('%Y%m%d')}/"

    for obj in s3.list_objects_v2(Bucket=bucket, Prefix=prefix_with_date).get('Contents', []):
        file_time = datetime.strptime(obj['Key'].split('_')[-1].split('.')[0], '%Y%m%d-%H%M%S')
        diff = abs(time - file_time)
        if diff < min_diff:
            min_diff = diff
            closest_file = obj['Key']
    print('closest_file name', closest_file)

    if closest_file:
        local_file = f"{MRMS_storage}{closest_file.split('/')[-1]}"
        download_file_from_s3(bucket, closest_file, local_file)
        decompressed_file = local_file.rsplit('.', 1)[0]
        decompress_gzip(local_file, decompressed_file)
        remove_file(local_file)
    else:
        print("No files found")

In [211]:
# read csv file
csv_file_path = f"{csv_storage}{csv_name}"
df = pd.read_csv(csv_file_path)

df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'])
target_times = [datetime(x.year, x.month, x.day, x.hour, x.minute, x.second) for x in df['datetime']]

for target_time in target_times[:n]:
    download_closest_file_from_s3(bucket_name, s3_file_prefix, target_time)