# SNODAS Data Access

This script is designed to access and process data from the Snow Data Assimilation (SNODAS) system. 

Data is accessed through NSIDC. Because SNODAS is not available through the cloud, we must use HTTPS data querying to download and process the data.

This script is adapted from code written by Aakash Ahamed (https://github.com/kashingtonDC/SNODAS).

In [None]:
import requests
from bs4 import BeautifulSoup

import os
import time
import gzip
import shutil

import datetime
import subprocess 

from tqdm import tqdm

In [None]:
# Year/month/day setup for SNODAS archive
year = "2023"
month = "Mar" # 3-character abbreviation for month
day = "03" # 2-digit number for day of month

# Get urls for SNODAS archive
archive_url = f'https://noaadata.apps.nsidc.org/NOAA/G02158/masked/{year}/{day}_{month}/'
r = requests.get(archive_url)
data = BeautifulSoup(r.text, "html.parser")

# Extract data from SNODAS archive
dir = "/home/jovyan/shared-public/SnowPit/tmp/"
for l in data.find_all("a")[1:]:
    r = requests.get(archive_url+l['href'])
    with open(os.path.join(dir, l['href']), 'wb') as f:
        f.write(r.content)

In [None]:
def process_tarfile(tarfile, writedir, snovars=['1034']):
    # Extract date from tarfile
    date = os.path.splitext(os.path.split(tarfile)[1])[0].replace("SNODAS_","")
    
    # Untar the files using OS commands
    cmd = '''tar -xvf {} -C {}'''.format(tarfile, writedir)
    os.system(cmd)

    # Find untarred .gz files
    gz_files = [os.path.join(writedir,x) for x in os.listdir(writedir) if date in x if x.endswith(".gz")]

    # Get variable strings from each file
    varstrs = [x[x.find("ssmv")+5:x.find("ssmv")+9] for x in gz_files]

    # Compare variable strings to wanted variables
    for varstr,file in zip(varstrs, gz_files):
        outfn = os.path.splitext(file)[0]
        if varstr in snovars:
            with gzip.open(file, 'r') as f_in, open(outfn, 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
        else:
            continue

    datfiles = [os.path.join(writedir,x) for x in os.listdir(writedir) if date in x if x.endswith(".dat")]
    txtfiles = [os.path.join(writedir,x) for x in os.listdir(writedir) if date in x if x.endswith(".txt")]
    gz_files = [os.path.join(writedir,x) for x in os.listdir(writedir) if date in x if x.endswith(".gz")]

    return datfiles, txtfiles, gz_files

In [None]:
def txt2hdr(txtfiles, writedir):
    dates = [x[x.find("TS")+2:x.find("TS")+10] for x in txtfiles]
    ymd = [datetime.datetime.strptime(x, '%Y%m%d') for x in dates]
    hdrfiles = []
    
    # Account for datum change in 2013
    for date,file in zip(ymd, txtfiles):
        if date < datetime.datetime(2013, 10, 1):
            hdrfile = os.path.join(writedir,"../pre_10_2013.hdr")
        if date >= datetime.datetime(2013, 10, 1):
            hdrfile = os.path.join(writedir,"../post_10_2013.hdr")
        
        # Spec dest file
        snofn = os.path.split((os.path.splitext(file)[0]))[1] + ".hdr"
        snowpath = os.path.join(writedir,snofn)
        hdrfiles.append(snowpath)
        shutil.copy(hdrfile,snowpath)

    return hdrfiles

In [None]:
def dat2tif(datfiles, writedir):
    prod_lookup = dict({
        "1034": "SNWE"
    })

    outfnsv1 = {}

    for file in datfiles:
        date = file[file.find("TS")+2:file.find("TS")+10]
        for k,v in prod_lookup.items():
            if k in file:
                outfnsv1[file] = date + v + ".tif"

    outfnsvf = {}
    for k,v in outfnsv1.items():
        outfnsvf[k] = os.path.join(writedir, v)

    outfiles = []
    for infile,outfile in outfnsvf.items():
        if not os.path.exists(outfile):
            cmd = '''gdal_translate -of GTIff -a_srs '+proj=longlat +ellps=WGS84 +no_defs' -a_nodata -9999 -a_ullr -124.73333333 52.87500000 -66.94166667 24.95000000 {} {}'''.format(infile,outfile)
            os.system(cmd)
        else:
            print("{} already exists - moving to next file".format(outfile))

        outfiles.append(outfile)

    return outfiles

In [None]:
tmp_dir = "/home/jovyan/shared-public/SnowPit/tmp/"
tarfiles = os.listdir(tmp_dir)

for tar in tarfiles:
    file = os.path.join(tmp_dir, tar)
    dat, txt, gz = process_tarfile(file, "/home/jovyan/shared-public/SnowPit/tmp/")

    hdrfiles = txt2hdr(txt, tmp_dir)
    tiffiles = dat2tif(dat, tmp_dir)

In [None]:
samples = 6935
lines = 3351
bands = 1
header_offset = 0
file_type = "ENVI Standard"
data_type = 2
interleave = "bsq"
byte_order = 1

In [None]:
def create_envi_header(txt_path, hdr_path, samples, lines, bands, data_type, interleave, byte_order):
    """
    Creates an ENVI header (.hdr) file.

    Args:
        txt_path (str): Path to the input .txt file (used only for the 'map info' field).
        hdr_path (str): Path to save the output .hdr file.
        description (str): Description of the data.
        samples (int): Number of samples (columns).
        lines (int): Number of lines (rows).
        bands (int): Number of bands.
        data_type (int): ENVI data type code (e.g., 1 for byte, 4 for float).
        interleave (str): Interleave type ('bsq', 'bip', or 'bil').
        byte_order (int): Byte order (0 for little-endian, 1 for big-endian).
    """
    with open(txt_path, 'r') as file:
      lines_txt = file.readlines()
    map_info_line = next((line for line in lines_txt if "map info" in line.lower()), None)

    with open(hdr_path, 'w') as hdr_file:
        hdr_file.write("ENVI\n")
        hdr_file.write(f"description = {{{description}}}\n")
        hdr_file.write(f"samples = {samples}\n")
        hdr_file.write(f"lines = {lines}\n")
        hdr_file.write(f"bands = {bands}\n")
        hdr_file.write(f"header offset = 0\n")
        hdr_file.write(f"file type = ENVI Standard\n")
        hdr_file.write(f"data type = {data_type}\n")
        hdr_file.write(f"interleave = {interleave}\n")
        hdr_file.write(f"sensor type = Unknown\n")
        hdr_file.write(f"byte order = {byte_order}\n")
        if map_info_line:
          hdr_file.write(f"{map_info_line}")
        else:
          hdr_file.write(f"map info = {{UTM, 1.000, 1.000, 0.000, 0.000, 1, 1, WGS-84, units=Meters}}\n")

In [None]:
## TODO: Test the above function for developing the headers