## Data Preprocessing

All that is happening here is prescribing new file name conventions and also reprojecting thee data with the parameters examined in the `daily_eda` notebook.

In [1]:
from pathlib import Path

import rasterio as rio
import matplotlib.pyplot as plt
import dask
import dask.distributed as dd

import eda as eda
import preprocess as preprocess
from luts import ice_zones, ice_years, data_sources
from config import INPUT_DIR, DAILY_BEAUFORT_DIR, DAILY_CHUKCHI_DIR

In [2]:
ice_zones

['Beau', 'Chuk']

In [5]:
beauf_geotiffs = []

for region in ice_zones[0:1]:
    for year in ice_years:
        target = Path(INPUT_DIR / region / year)
        tiffs = eda.list_geotiffs(target, str_to_match="dailyslie")
        beauf_geotiffs = beauf_geotiffs + tiffs
print(len(beauf_geotiffs))

7797


In [3]:
chuk_geotiffs = []

for region in ice_zones[1:]:
    for year in ice_years:
        target = Path(INPUT_DIR / region / year)
        tiffs = eda.list_geotiffs(target, str_to_match="dailyslie")
        chuk_geotiffs = chuk_geotiffs + tiffs
print(len(chuk_geotiffs))

7668


In [7]:
# example of the renaming function, mostly just placing the product in the output directory and then also expanding the data source attribute
print(preprocess.rename(beauf_geotiffs[0]))
print(preprocess.rename(chuk_geotiffs[-1]))

/beegfs/CMIP6/cparr4/landfast_sea_ice_products/Beaufort_Daily/beaufort_19970414_radarsat_slie.tif
/beegfs/CMIP6/cparr4/landfast_sea_ice_products/Chukchi_Daily/chukchi_20230323_asip_slie.tif


In [8]:
client = dd.Client()

In [9]:
_ = dask.compute(*[dask.delayed(preprocess.tap_reproject_raster)(f) for f in beauf_geotiffs])

In [10]:
_ = dask.compute(*[dask.delayed(preprocess.tap_reproject_raster)(f) for f in chuk_geotiffs])

In [11]:
client.close()

In [12]:
assert len(eda.list_geotiffs(DAILY_BEAUFORT_DIR)) == len(beauf_geotiffs)

In [13]:
assert len(eda.list_geotiffs(DAILY_CHUKCHI_DIR)) == len(chuk_geotiffs)