In [None]:
from senorge_downloader.utils import fetch, open_dataset
import geopandas as gpd
import re
from pathlib import Path
import pandas as pd

In [None]:
muni_cells = gpd.read_file("../data/geo/muni-cells-utm33.geojson")
base_path = Path("__file__").parent / ".."

In [None]:
def snow_depth_christmas_eve(filename: Path):
    ds = open_dataset(filename)

    match = re.match(r'.*_(\d{4})\.nc', filename.name)

    if not match:
        raise ValueError(f"Could not extract year from filename {filename}")
    
    year = match.group(1)

    ds = ds.sel(time=pd.to_datetime(f'{year}-12-24'))

    dat = ds.to_dataframe().reset_index().rename(columns={"snow_depth": "value"})
    dat["year"] = dat["time"].dt.year.astype(int)

    geo = gpd.GeoDataFrame(
        data=dat, 
        geometry=gpd.points_from_xy(dat['x'], dat['y']), 
        crs=muni_cells.crs, 
        copy=False
    ) # type: ignore

    # geo = geo.dropna(subset=['value'])

    muni_dat = muni_cells.sjoin(geo, how='left', predicate='intersects').reset_index()
    muni_dat = muni_dat.sort_values(by=['kommunekode', 'year', 'rank'])

    def first_non_na(x):
        result = x[x['value'].notna()].head(1)

        if result.empty:
            return x.head(1)

        return result

    result = muni_dat.groupby(['year', 'kommunekode', 'kommunenavn']).apply(first_non_na).reset_index(drop=True)

    result["value"] = result["value"].round(4)
    result["year"] = result["year"].astype(str)

    ds.close()

    cols = ['year', 'kommunekode', 'kommunenavn', 'value']

    if 'population' in result.columns:
        cols.append('population')

    if 'count' in result.columns:
        cols.append('count')

    if 'rank' in result.columns:
        cols.append('rank')



    return result[cols]

In [None]:
res = snow_depth_christmas_eve(Path('../data/raw/sd_2018.nc'))

In [None]:
res.query('kommunenavn == "Træna" and year == 2018')

In [None]:
from tqdm import tqdm

files = (base_path / 'data/raw').glob('sd_*.nc')
out = (base_path / 'data/christmas-snow-days')
out.mkdir(parents=True, exist_ok=True)

data = pd.DataFrame()

for file in tqdm(list(files)):
    data = pd.concat([data, snow_depth_christmas_eve(file)])

data.to_csv(out / f"christmas-eve-snow-days.csv", index=False)
