# Generate downloading links for IMERG v7.0 final half-hour product

In [1]:
import datetime

**Example**

https://data.gesdisc.earthdata.nasa.gov/data/GPM_L3/GPM_3IMERGHH.07/2024/270/3B-HHR.MS.MRG.3IMERG.20240926-S233000-E235959.1410.V07B.HDF5


In [2]:

def generate_links_for_year(year, day_of_year=163):
    """
    Generates a list of URLs for all 48 half-hourly IMERG files for a given year
    on the specified day_of_year.
    
    Parameters:
      year (int): The year for which to generate links.
      day_of_year (int): The day of the year (1-366). Default is 163.
      
    Returns:
      list: A list of URL strings.
    """
    # Calculate the date corresponding to the day_of_year.
    date = datetime.datetime(year, 1, 1) + datetime.timedelta(days=day_of_year - 1)
    date_str = date.strftime("%Y%m%d")
    # Use the day-of-year as found (this will be '163' for our chosen day)
    doy_int = date.timetuple().tm_yday  # e.g., 1 for January 1st
    doy_str = f"{doy_int:03d}"  # Formats to '001' if doy_int is 1
    base_url = f"https://data.gesdisc.earthdata.nasa.gov/data/GPM_L3/GPM_3IMERGHH.07/{year}/{doy_str}/"
    
    links = []
    # There are 48 half-hour intervals in a day (from 00:00 to 23:30).
    num_intervals = 48
    for i in range(num_intervals):
        # Calculate the start time for the i-th interval of the day.
        start_time = datetime.datetime(year, date.month, date.day) + datetime.timedelta(minutes=30 * i)
        # Each file covers 29 minutes and 59 seconds.
        end_time = start_time + datetime.timedelta(minutes=29, seconds=59)
        # The file number is the number of minutes since midnight (e.g. 00:00 -> 0000, 00:30 -> 0030, etc.)
        minutes_since_midnight = start_time.hour * 60 + start_time.minute
        file_number = f"{minutes_since_midnight:04d}"
        
        file_name = (
            f"3B-HHR.MS.MRG.3IMERG.{date_str}-S{start_time.strftime('%H%M%S')}"
            f"-E{end_time.strftime('%H%M%S')}.{file_number}.V07B.HDF5"
        )
        full_link = base_url + file_name
        links.append(full_link)
    return links

full_links = []

for year in range(2000, 2023):

    if year % 4 == 0:
        N_days = 366
    else:
        N_days = 365
    
    for day in range(1, N_days+1, 1):
        
        full_links += generate_links_for_year(year, day)
        

In [8]:
full_links[33500]

'https://data.gesdisc.earthdata.nasa.gov/data/GPM_L3/GPM_3IMERGHH.07/2001/332/3B-HHR.MS.MRG.3IMERG.20011128-S220000-E222959.1320.V07B.HDF5'

In [9]:
full_links = full_links[33500:]

In [10]:
with open("/glade/campaign/cisl/aiml/ksha/IMERG_V7/half_hour/download.sh", "w") as file:
    # Optional: add a bash shebang at the top
    file.write("#!/bin/bash\n\n")
    
    # Write each link on a new line
    for link in full_links:
        file.write(link + "\n")

## Downloading script