In [1]:
import re

import rasterio as rio
import xarray as xr
import numpy as np
import pandas as pd
import tqdm
import dask.array as da
from dask.diagnostics import ProgressBar
from datetime import datetime

from eda import list_geotiffs
from config import DAILY_BEAUFORT_DIR, DAILY_CHUKCHI_DIR


In [2]:
def get_dates(target_directory):
    
    dates = []
    tiffs_to_merge = list_geotiffs(target_directory)

    for file in tqdm.tqdm(tiffs_to_merge):
        date = re.search(r'(\d{4})(\d{2})(\d{2})', file.name).groups()
        dates.append(datetime(int(date[0]), int(date[1]), int(date[2])))
    return dates

In [4]:
chukchi_dates = get_dates(DAILY_CHUKCHI_DIR)
beaufort_dates = get_dates(DAILY_BEAUFORT_DIR)


100%|████████████████████████████████████████████████████████████| 7668/7668 [00:00<00:00, 323228.91it/s]
100%|████████████████████████████████████████████████████████████| 7797/7797 [00:00<00:00, 373436.88it/s]


In [5]:
print(len(beaufort_dates))

7797


In [6]:
print(len(chukchi_dates))

7668


In [8]:
dates_in_beaufort_not_in_chukchi = list(set(beaufort_dates) - set(chukchi_dates))

In [9]:
len(dates_in_beaufort_not_in_chukchi)

160

In [10]:
dates_in_beaufort_not_in_chukchi

[datetime.datetime(2002, 7, 17, 0, 0),
 datetime.datetime(2004, 7, 4, 0, 0),
 datetime.datetime(2002, 7, 20, 0, 0),
 datetime.datetime(2004, 7, 2, 0, 0),
 datetime.datetime(2003, 7, 14, 0, 0),
 datetime.datetime(2002, 6, 26, 0, 0),
 datetime.datetime(2001, 7, 17, 0, 0),
 datetime.datetime(2004, 7, 13, 0, 0),
 datetime.datetime(2004, 7, 1, 0, 0),
 datetime.datetime(2002, 7, 14, 0, 0),
 datetime.datetime(2003, 7, 1, 0, 0),
 datetime.datetime(2001, 7, 14, 0, 0),
 datetime.datetime(2000, 7, 15, 0, 0),
 datetime.datetime(1997, 11, 13, 0, 0),
 datetime.datetime(1997, 6, 26, 0, 0),
 datetime.datetime(2004, 7, 17, 0, 0),
 datetime.datetime(2004, 7, 11, 0, 0),
 datetime.datetime(2000, 7, 14, 0, 0),
 datetime.datetime(2004, 6, 27, 0, 0),
 datetime.datetime(2004, 7, 5, 0, 0),
 datetime.datetime(2008, 7, 5, 0, 0),
 datetime.datetime(2008, 7, 3, 0, 0),
 datetime.datetime(2004, 7, 16, 0, 0),
 datetime.datetime(2002, 7, 1, 0, 0),
 datetime.datetime(2001, 7, 18, 0, 0),
 datetime.datetime(2004, 6, 20, 

In [14]:
time_index = pd.to_datetime(dates_in_beaufort_not_in_chukchi)

In [22]:
missing_dates = time_index.sort_values()

In [27]:
missing_dates.to_series().to_csv("dates_in_beau_but_not_in_chuk.csv", index=False)