In [205]:
from datetime import datetime, timedelta
import pandas as pd
import boto3
import botocore
from botocore.exceptions import NoCredentialsError

import gzip
import shutil
import os

In [206]:
# Repeplace with your pwd and csv file name
csv_storage = "your_path_to_csv_storage/"
csv_name = "2021_torn.csv"

# Define bucket name and prefix, replace with your prefered folder
bucket_name = 'noaa-mrms-pds'
s3_file_prefix = 'CONUS/EchoTop_18_00.50/'

# set how many records you want to download
n = 2

# Replace with your download file storage address
MRMS_storage = 'your_path_to_MRMS_storage/'

In [207]:
def download_file_from_s3(bucket_name, s3_file_path, local_file_path):
    s3 = boto3.client('s3', config=botocore.client.Config(signature_version=botocore.UNSIGNED))
    try:
        s3.download_file(bucket_name, s3_file_path, local_file_path)
        print(f"File downloaded successfully: {local_file_path}")
    except FileNotFoundError:
        print("The file was not found")
    except NoCredentialsError:
        print("Credentials not available")

In [208]:
def decompress_gzip(gzip_file_path, decompressed_file_path):
    with gzip.open(gzip_file_path, 'rb') as f_in:
        with open(decompressed_file_path, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    print(f"File decompressed successfully: {decompressed_file_path}")

In [209]:
def remove_file(file_path):
    os.remove(file_path)
    print(f"File removed: {file_path}")

In [210]:
def download_closest_file_from_s3(bucket_name, s3_file_prefix, target_time):
    s3 = boto3.client('s3', config=botocore.client.Config(signature_version=botocore.UNSIGNED))
    min_time_diff = timedelta.max
    closest_file = None

    # Combine the s3_file_prefix with the date of the target time
    s3_file_prefix_with_date = f"{s3_file_prefix}{target_time.strftime('%Y%m%d')}/"
    
    # Iterate through files with the specified prefix
    for obj in s3.list_objects_v2(Bucket=bucket_name, Prefix=s3_file_prefix_with_date).get('Contents', []):
        # Extract file timestamp from its key
        file_time_str = obj['Key'].split('_')[-1].split('.')[0]
        file_time = datetime.strptime(file_time_str, '%Y%m%d-%H%M%S')
        # print("file_time_str", target_time)
        
        # Calculate time difference
        time_diff = abs(target_time - file_time)
        # print("time_diff", target_time, " ", file_time)
        
        # Update closest file if time difference is smaller
        if time_diff < min_time_diff:
            min_time_diff = time_diff
            closest_file = obj['Key']

    print(closest_file)

    return closest_file

In [211]:
# read csv file
csv_file_path = f"{csv_storage}{csv_name}"

df = pd.read_csv(csv_file_path)

In [212]:
#Merge date and time columns into a datatime column
df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'])

#Convert the datatime column to a Python datatime object and store it in a list
target_times = [datetime(x.year, x.month, x.day, x.hour, x.minute, x.second) for x in df['datetime']]

#Get the top n elements and convert them into a list
target_times_first_n = target_times[:n]
print(target_times_first_n)

[datetime.datetime(2021, 1, 1, 13, 1), datetime.datetime(2021, 1, 1, 13, 18)]


In [214]:
if closest_file_name:
    # Define local file path
    local_file_name = closest_file_name.split('/')[-1]
    local_file_path = f'{MRMS_storage}{local_file_name}'
    print(f"Local file path: {local_file_path}")

    # Download the closest file from S3
    download_file_from_s3(bucket_name, closest_file_name, local_file_path)
else:
    print("No files found")

Local file path: /Users/yinayi/Downloads/AWS analysis/MRMS Data/MRMS_EchoTop_18_00.50_20210101-130037.grib2.gz
File downloaded successfully: /Users/yinayi/Downloads/AWS analysis/MRMS Data/MRMS_EchoTop_18_00.50_20210101-130037.grib2.gz


In [215]:
decompressed_file_path = local_file_path.rsplit('.', 1)[0]
decompress_gzip(local_file_path, decompressed_file_path)
remove_file(local_file_path)

File decompressed successfully: /Users/yinayi/Downloads/AWS analysis/MRMS Data/MRMS_EchoTop_18_00.50_20210101-130037.grib2
File removed: /Users/yinayi/Downloads/AWS analysis/MRMS Data/MRMS_EchoTop_18_00.50_20210101-130037.grib2.gz


In [216]:
for target_time in target_times_first_n:
    closest_file_name = download_closest_file_from_s3(bucket_name, s3_file_prefix, target_time)

    if closest_file_name:
        # Define local file path
        local_file_name = closest_file_name.split('/')[-1]
        # replace with your own storage address
        MRMS_storage = '/Users/yinayi/Downloads/AWS analysis/MRMS Data/'
        local_file_path = f'{MRMS_storage}{local_file_name}'
        print(f"Local file path: {local_file_path}")

        # Download the closest file from S3
        download_file_from_s3(bucket_name, closest_file_name, local_file_path)
        
        # Decompress the downloaded file
        decompressed_file_path = local_file_path.rsplit('.', 1)[0]
        decompress_gzip(local_file_path, decompressed_file_path)
        remove_file(local_file_path)
    else:
        print("No files found")

CONUS/EchoTop_18_00.50/20210101/MRMS_EchoTop_18_00.50_20210101-130037.grib2.gz
Local file path: /Users/yinayi/Downloads/AWS analysis/MRMS Data/MRMS_EchoTop_18_00.50_20210101-130037.grib2.gz
File downloaded successfully: /Users/yinayi/Downloads/AWS analysis/MRMS Data/MRMS_EchoTop_18_00.50_20210101-130037.grib2.gz
File decompressed successfully: /Users/yinayi/Downloads/AWS analysis/MRMS Data/MRMS_EchoTop_18_00.50_20210101-130037.grib2
File removed: /Users/yinayi/Downloads/AWS analysis/MRMS Data/MRMS_EchoTop_18_00.50_20210101-130037.grib2.gz
CONUS/EchoTop_18_00.50/20210101/MRMS_EchoTop_18_00.50_20210101-131839.grib2.gz
Local file path: /Users/yinayi/Downloads/AWS analysis/MRMS Data/MRMS_EchoTop_18_00.50_20210101-131839.grib2.gz
File downloaded successfully: /Users/yinayi/Downloads/AWS analysis/MRMS Data/MRMS_EchoTop_18_00.50_20210101-131839.grib2.gz
File decompressed successfully: /Users/yinayi/Downloads/AWS analysis/MRMS Data/MRMS_EchoTop_18_00.50_20210101-131839.grib2
File removed: /Use