# Notebook pour le calcul des séries temporelles VHI (MODIS)

### Initialisation contexte test appli

In [None]:
import os

WRK_DIR = os.path.normpath('/media/archive/EO4DM')
os.chdir(WRK_DIR)
# WRK_DIR = os.path.join(WRK_DIR,'..')

TERRITORY = 'French Polynesia (Fr)'
PRODUCT = 'VHI'
PERIOD = 'DECADE'
areas_key = 'Name'
CLEAN_RUNFOLDER = 0

TERRITORY_str = TERRITORY.replace(' ', '_').replace('(', '').replace(')', '')
DATA_HISTO = os.path.join(WRK_DIR,'DATA_HISTO',TERRITORY_str)
ANNEX_DIR = os.path.join(WRK_DIR,'ANNEX',TERRITORY_str)
INDIR_DATA = os.path.join(DATA_HISTO,'1_INDICATEURS','GLOBAL',PERIOD)

In [None]:
import glob
import shutil
import rasterio
import numpy as np
import pandas as pd
from tqdm import tqdm
import dmpipeline.ALERT_Processing.ALERT_main_functions as alertmain
import dmpipeline.GEOSTATS_Processing.GEOSTATS_processing_functions as geostats

### Prépare le dossier de sortie

In [None]:
# --- Generate directories/sub-directories ---
OUTDIR_STATS = os.path.join(WRK_DIR, f'TIME_SERIES_GLOBAL_DROUGHT_{TERRITORY_str}')
os.umask(0) # used to reset the directories permission
if not os.path.exists(OUTDIR_STATS):
    os.makedirs(OUTDIR_STATS)
    os.chmod(OUTDIR_STATS, 0o777)
elif int(CLEAN_RUNFOLDER)==1:
    shutil.rmtree(OUTDIR_STATS)
    os.makedirs(OUTDIR_STATS)
    os.chmod(OUTDIR_STATS, 0o777)


### Prépare données d'entrée (sat, masque)

In [None]:
in_files = glob.glob(os.path.join(INDIR_DATA, f'{PRODUCT}*.tif'))

# --- Prepare input masks and look-up table (for estimating geostats) ---
mask_ok = len(glob.glob(os.path.join(OUTDIR_STATS,'mask_Areas_*.tif')))>0
if mask_ok==0:
    file_areas = glob.glob(os.path.join(ANNEX_DIR, 'Areas', '*.shp'))
    file_landcover = glob.glob(os.path.join(ANNEX_DIR, 'Landcover', '*.tif'))
if len(file_areas)==0:
    print('Drought spatial stats will not be estimated : missing input shapefile containing geometries/areas to identify')
else:
    file_areas = file_areas[0]
    if len(file_landcover)!=0:
        file_landcover = file_landcover[0]
        go_landcover=1
    else: go_landcover=0
    geostats.prepareGeoStatsMasks(in_files[0], file_areas, OUTDIR_STATS, file_landcover=file_landcover, areas_key=areas_key)
    with rasterio.open(glob.glob(os.path.join(OUTDIR_STATS,'mask_Areas.tif'))[0]) as area_ds:
           maskAREA = area_ds.read(1)
           mask = (maskAREA != 0)
    area_lut = pd.read_csv(glob.glob(os.path.join(OUTDIR_STATS,'*.csv'))[0], sep=';')
    if go_landcover==1:
        with rasterio.open(glob.glob(os.path.join(OUTDIR_STATS,'mask_Areas_NOTrees_NOBuild.tif'))[0]) as NOTrees_ds, \
            rasterio.open(glob.glob(os.path.join(OUTDIR_STATS,'mask_Areas_Trees.tif'))[0]) as Trees_ds :
            mask_NOTrees_NOBuild = NOTrees_ds.read(1)
            mask_Trees = Trees_ds.read(1)

# --- AND Verify if output stats dataframes already exist (yes -> do not add hearder in csv file) ---
if PERIOD=='MONTH':
    stats_ok = len(glob.glob(os.path.join(OUTDIR_STATS, f'{PRODUCT}_STATS_M*.csv')))>0
elif PERIOD=='DECADE':
    stats_ok = len(glob.glob(os.path.join(OUTDIR_STATS, f'{PRODUCT}_STATS_D*.csv')))>0


### Calcul Geo Statistiques

In [None]:
count_head = 0 # the first time, add header to geostats data frame

for in_f in tqdm(in_files):
        
    # --- Read input file ---
    in_file_name = os.path.basename(in_f).split('.tif')[0]
    full_date = in_file_name.split('_')[1]
    period_indic = full_date[6:]

    with rasterio.open(in_f) as in_ds:
        profile_in = in_ds.profile      
        DATA = in_ds.read(1)
        if profile_in['count']==2:
            DATA_QSCORE = in_ds.read(2)
        else:
            DATA_QSCORE = ~np.isnan(DATA)

    # --- Estimate spatial stats ---
    if period_indic=='M':
        date_df = pd.to_datetime(full_date[:-1], format='%Y%m')
        period_df = period_indic
    elif period_indic=='D1':
        date_df = pd.to_datetime(full_date[:-2]+'01', format='%Y%m%d')
        period_df = 'D'
    elif period_indic=='D2':
        date_df = pd.to_datetime(full_date[:-2]+'11', format='%Y%m%d')
        period_df = 'D'
    elif period_indic=='D3':
        date_df = pd.to_datetime(full_date[:-2]+'21', format='%Y%m%d')
        period_df = 'D'
    
    if go_landcover==1:
        GeoStats_df, GeoStats_df_NOTrees_NOBuild, GeoStats_df_Trees = geostats.extractGeoStats(DATA, DATA_QSCORE, date_df, mask, maskAREA,
                                                                                               area_lut, TERRITORY, mask_NOTrees_NOBuild, mask_Trees)
        GeoStats_df_NOTrees_NOBuild = GeoStats_df_NOTrees_NOBuild.sort_values(by=['LOCATION','DATE'])
        GeoStats_df_Trees = GeoStats_df_Trees.sort_values(by=['LOCATION','DATE'])

        GeoStats_df_NOTrees_NOBuild.to_csv(
            os.path.join(OUTDIR_STATS, f'{PRODUCT}_STATS_{period_df}_NoTrees_NoBuild.csv'),
            index = False,
            float_format='%.2f',
            decimal = '.',
            sep = ';',
            mode='a',
            header = (count_head==0 and stats_ok==0))
        GeoStats_df_Trees.to_csv(
            os.path.join(OUTDIR_STATS, f'{PRODUCT}_STATS_{period_df}_Trees.csv'),
            index = False,
            float_format='%.2f',
            decimal = '.',
            sep = ';',
            mode='a',
            header = (count_head==0 and stats_ok==0))
        
        del GeoStats_df_NOTrees_NOBuild, GeoStats_df_Trees

    else:
        GeoStats_df, _, _ = geostats.extractGeoStats(DATA, DATA_QSCORE, date_df, mask, maskAREA, area_lut, TERRITORY)
    
    GeoStats_df = GeoStats_df.sort_values(by=['LOCATION','DATE'])

    GeoStats_df.to_csv(os.path.join(OUTDIR_STATS, f'{PRODUCT}_STATS_{period_df}.csv'),
        index = False,
        float_format='%.2f',
        decimal = '.',
        sep = ';',
        mode='a',
        header = (count_head==0 and stats_ok==0))
    
    count_head += 1
    
    del GeoStats_df, period_df, date_df, DATA, DATA_QSCORE, in_file_name, full_date
