IMPORTS

In [None]:
# Install dependencies: 
!pip install -r requirements.txt > /dev/null

In [23]:
from datetime import datetime
import glob
import gzip
import os
import re
import shutil
from typing import List, Optional

import act
import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr

DEFINE LOCATION, DATE RANGE, AND VARIABLES

In [None]:
HOME = os.environ.get("HOME")
DATA_DIR = os.environ.get("DATA_DIR")
SITE = "sgp"
FACILITY = "C1"
BEGIN_DATE = "20231001"  # Use ARM date format: YYYYMMDD
END_DATE = "20231101"    # End date is inclusive!

# Variables are optional. If not specified, all variables will be downloaded.
VARIABLES: Optional[List[str]] = None

print(f"Subsampling ldquants data from {SITE} {FACILITY} between {BEGIN_DATE} and {END_DATE}")

Subsampling ldquants data from sgp C1 between 20231001 and 20231101


LOAD DATA

In [7]:
ARM_FILENAME_PATTERN = re.compile(
    r"^([a-z]{3})(.+)([A-Z]\d+)\.([a-z0]\d)\.(\d{8}\.\d{6})\.(.+)$"
)

# Compile list of files
date_start = datetime.strptime(BEGIN_DATE, '%Y%m%d')
date_end = datetime.strptime(END_DATE, '%Y%m%d')
dir_path = os.path.join(DATA_DIR, SITE, f"{SITE}ldquants{FACILITY}.c1" )

all_files = os.listdir(dir_path)
files_for_dates = []
for filename in all_files:
    match = ARM_FILENAME_PATTERN.match(filename)
    if match:
        file_time = match.groups()[4]
        file_date = datetime.strptime(file_time, "%Y%m%d.%H%M%S")
        if file_date >= date_start and file_date < date_end:
            files_for_dates.append(filename)
        
# Read the file data into an xarray Dataset
files_for_dates = [os.path.join(dir_path, f) for f in files_for_dates]
ds = act.io.armfiles.read_netcdf(files_for_dates)
ds.clean.cleanup()

  ds = act.io.armfiles.read_netcdf(files_for_dates)


EXPORT SUBSAMPLED DATA

In [24]:
# Filter out variables if needed
if VARIABLES:
    for var_name in ds.data_vars:
        if var_name not in VARIABLES:
                del ds[var_name]

# Store the subsampled data to a new file
output_filename = f"{SITE}ldquants{FACILITY}.c1.{BEGIN_DATE}.{END_DATE}.nc"
output_nc = os.path.join(HOME, output_filename)
ds.write.write_netcdf(path=output_nc, FillValue=-9999)

In [26]:
# Zip the file for transport
output_zip = os.path.join(HOME, f"{output_filename}.gz")
with open(output_nc, 'rb') as f_in:
    with gzip.open(output_zip, 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)

# remove the unzipped file
os.remove(output_nc)