In [1]:
# pip install goes2go package

# pip install goes2go

In [2]:
from goes2go import GOES
import pandas as pd
from datetime import datetime
import xarray as xr
import subprocess
from netCDF4 import Dataset
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
import numpy as np

In [3]:
# locate the desired variable names from this website:

# https://docs.opendata.aws/noaa-goes16/cics-readme.html#accessing-goes-data-on-aws
# the website is for GOES16 but the variables have consistent naming conventions across the constellation

# note this is for GOES17, this can easily be changed to any of the GOES satellites you want.

cloud_and_moisture_download = GOES(satellite=17, product="ABI-L2-MCMIPF", domain='F') # should be cmipf? idk the diff between mcmip and cmip
cloud_optical_depth_download = GOES(satellite=17, product="ABI-L2-CODF", domain='F')
reflected_sw_download = GOES(satellite=17, product="ABI-L2-RSRF", domain='F')
cloud_top_temp_download = GOES(satellite=17, product="ABI-L2-ACHTF", domain='F')
cloud_top_height_download = GOES(satellite=17, product="ABI-L2-ACHAF", domain='F')
cloud_top_pressure_download = GOES(satellite=17, product="ABI-L2-CTPF", domain='F')
downward_shortwave_download = GOES(satellite=17, product="ABI-L2-DSRF", domain='F')
total_precip_water_download = GOES(satellite=17, product="ABI-L2-TPWF", domain='F')

In [4]:
import os
import pandas as pd
import subprocess
from datetime import datetime, timedelta

# Add the AWS CLI binary directory to PATH
os.environ['PATH'] += ':/glade/u/home/adhall/bin'
aws_path = '/glade/u/home/adhall/bin/aws'  # Full path to the AWS CLI

# Debugging: Print the current PATH to ensure it's correctly set
print("Current PATH:", os.environ['PATH'])

# Define the start and end years
start_year = 2020  # GOES16 became operational in November 2016
end_year = 2020
month = 4  # April

# Define the list of GOES download objects
download_objects = {
    "CloudAndMoistureImagery": GOES(satellite=16, product="ABI-L2-MCMIPF", domain='F'),
    "CloudOpticalDepth": GOES(satellite=16, product="ABI-L2-CODF", domain='F'),
    "ReflectedSW": GOES(satellite=16, product="ABI-L2-RSRF", domain='F'),
    "CloudTopTemperature": GOES(satellite=16, product="ABI-L2-ACHTF", domain='F'),
    "CloudTopHeight": GOES(satellite=16, product="ABI-L2-ACHAF", domain='F'),
    "CloudTopPressure": GOES(satellite=16, product="ABI-L2-CTPF", domain='F'),
    "DownwardShortwave": GOES(satellite=16, product="ABI-L2-DSRF", domain='F'),
    "TotalPrecipWater": GOES(satellite=16, product="ABI-L2-TPWF", domain='F'),
}

# Function to download files
def get_files_closest_to_time(df, dataset_name, year, target_time):
    # Convert datetime column to pandas datetime
    df['datetime'] = pd.to_datetime(df['start'])

    # Calculate absolute difference from target time
    target_time = pd.to_datetime(target_time)
    df['time_diff'] = (df['datetime'] - target_time).abs()

    # Get the file closest to the target time for each day
    closest_files = df.loc[df.groupby(df['datetime'].dt.date)['time_diff'].idxmin()]

    # Handle case where no data is found
    if closest_files.empty:
        print(f"No data found for {dataset_name} in April {year} close to {target_time}.")
        return []

    # Download files
    local_file_paths = []
    for _, row in closest_files.iterrows():
        file = row['file']
        day = row['datetime'].strftime('%j')  # Julian day
        s3_file_url = f's3://{file}'
        local_dir = f'/glade/derecho/scratch/adhall/GOESdata/raw_data/GOES_{dataset_name}/{year}'
        os.makedirs(local_dir, exist_ok=True)
        local_file_path = f'{local_dir}/GOES16_{dataset_name}_{year}day{day}_time{target_time.strftime("%H%M")}.nc'

        # Check if the file exists on the S3 server
        result = subprocess.run([aws_path, 's3', 'ls', s3_file_url, '--no-sign-request'],
                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)

        if result.returncode == 0:  # File exists
            subprocess.run([aws_path, 's3', 'cp', s3_file_url, local_file_path, '--no-sign-request'])
            local_file_paths.append(local_file_path)
            print(f"File downloaded successfully to {local_file_path}")
        else:
            print(f"File does not exist on S3: {s3_file_url}")

    return local_file_paths

# Loop over each year, product, and day to download files
for year in range(start_year, end_year + 1):
    for dataset_name, download_object in download_objects.items():
        # Define the start and end of April
        start_date = datetime(year, 4, 1)
        end_date = datetime(year, 4, 30)

        current_date = start_date
        while current_date <= end_date:
            # Define target time for each day (14:00 UTC)
            target_time = current_date.replace(hour=15, minute=0, second=0)
            print(f"Processing {dataset_name} for target time: {target_time}")

            # Query data for the specific day
            df = download_object.df(
                start=current_date.strftime('%Y-%m-%d 00:00'),
                end=current_date.strftime('%Y-%m-%d 23:59')
            )
            # Download the files for this day and dataset
            get_files_closest_to_time(df, dataset_name, current_date.year, target_time=target_time)

            # Increment the day
            current_date += timedelta(days=1)


Current PATH: /glade/u/apps/casper/23.10/spack/opt/spack/texlive/20220321/gcc/7.5.0/oyzx/bin/x86_64-linux:/glade/u/apps/casper/23.10/spack/opt/spack/texlive/20220321/gcc/7.5.0/oyzx/bin:/glade/u/apps/casper/23.10/spack/opt/spack/netcdf/4.9.2/oneapi/2023.2.1/gfig/bin:/glade/u/apps/casper/23.10/spack/opt/spack/hdf5/1.12.2/oneapi/2023.2.1/6vf2/bin:/glade/u/apps/casper/23.10/spack/opt/spack/ncarcompilers/1.0.0/oneapi/2023.2.1/mai6/bin/mpi:/glade/u/apps/casper/23.10/spack/opt/spack/openmpi/4.1.6/oneapi/2023.2.1/dgcv/bin:/glade/u/apps/casper/23.10/spack/opt/spack/ucx/1.14.1/gcc/7.5.0/vf2h/bin:/glade/u/apps/common/23.08/spack/opt/spack/cuda/12.2.1/bin:/glade/u/apps/casper/23.10/spack/opt/spack/ncarcompilers/1.0.0/oneapi/2023.2.1/mai6/bin:/glade/u/apps/common/23.08/spack/opt/spack/intel-oneapi-compilers/2023.2.1/compiler/2023.2.1/linux/lib/oclfpga/bin:/glade/u/apps/common/23.08/spack/opt/spack/intel-oneapi-compilers/2023.2.1/compiler/2023.2.1/linux/bin/intel64:/glade/u/apps/common/23.08/spack/o

In [5]:
# we need to get the aws downloader

# in the command line, i want to first try to download in this directory /glade/derecho/scratch/adhall/GOESdata

# curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"

# enter

# unzip awscliv2.zip

# enter

# ./aws/install --install-dir ~/aws-cli --bin-dir ~/bin

# enter

# echo 'export PATH=$PATH:~/bin' >> ~/.bashrc
# source ~/.bashrc

# enter

# aws --version

# enter
# this should output something like this: aws-cli/2.22.6 Python/3.12.6 Linux/5.14.21-150400.24.46-default exe/x86_64.opensuse.15

# which aws

# enter

# aws configure

# still getting an error message?

# echo 'export PATH=$PATH:/glade/u/home/adhall/bin' >> ~/.bashrc
# source ~/.bashrc

# see download script for changes to ensure you are using the correct path to the aws cli
