# Notebook pour le calcul des corrélations croisées (servant à l'évaluation/comparaison des indicateurs)

### Initialisation contexte test appli

In [1]:
import os

WRK_DIR = os.path.normpath('D:/MATHIS/0_Projet_Secheresse/1_Scripts/toolbox/eo4dm-oeil/EO4DM')
os.chdir(WRK_DIR)
WRK_DIR = os.path.join(WRK_DIR,'..')

TERRITORY = 'New Caledonia (Fr)'
PRODUCT_OBS = 'VHI'
PRODUCT_REF = 'SPI_ref_1991_2020'
PERIOD_OBS = 'M'  # if no period, set ''
PERIOD_REF = ''   # if no period, set ''
LANDMASK_OBS = 'NoTrees_NoBuild'  # if no mask, set ''
LANDMASK_REF = ''  # if no mask, set ''

TERRITORY_str = TERRITORY.replace(' ', '_').replace('(', '').replace(')', '')
# DATA_HISTO = os.path.join(WRK_DIR,'DATA_HISTO',TERRITORY_str)
DATA_HISTO = os.path.join(WRK_DIR,'DMPIPELINE_WORKST/DATA_HISTO_backup',TERRITORY_str)
# ANNEX_DIR = os.path.join(WRK_DIR,'ANNEX',TERRITORY_str)
ANNEX_DIR= os.path.join(WRK_DIR,'DMPIPELINE_WORKST/ANNEX',TERRITORY_str)
INDIR_STATS_OBS = os.path.join(DATA_HISTO,'1_INDICATEURS/GLOBAL/STATS')
INDIR_STATS_REF = os.path.join(DATA_HISTO,'1_INDICATEURS/ALERT/METEO')



In [2]:
import glob
import shutil
import rasterio
import numpy as np
import pandas as pd
from tqdm import tqdm
import dmpipeline.ALERT_Processing.ALERT_main_functions as alertmain
import dmpipeline.GEOSTATS_Processing.GEOSTATS_processing_functions as geostats

import logging
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%d/%m/%Y %H:%M:%S', level=logging.INFO)


### Fonction calcul et mise à jour RScores : adaptée à données globales (SAT, GEE)

In [3]:
def updateRScore(ref_df, obs_df):
    '''
    Estimating Pearson correlation (Rscore) between Observed and Reference time series.
    
    Note: Rscore is the maximum score obtained from lag cross correlation varying
          between -4 and +4 months
    '''
    
    lagmax = 4
    date_start_ref = min(ref_df['DATE'])
    date_end_ref = max(ref_df['DATE'])
    date_start_obs = min(obs_df['DATE'])
    date_end_obs = max(obs_df['DATE'])
    date_start = max([date_start_ref,date_start_obs])
    date_end = min([date_end_ref,date_end_obs])

    obs_df = obs_df.drop(columns=['LOCATION'])
    ref_df = ref_df.drop(columns=['LOCATION'])
    Nb_expect_months = (date_end.year - date_start.year) * 12 + date_end.month - date_start.month + 1

    # --- Extracting data on the same period + Interpolation (if needed) ---
    ref_df_filt = ref_df.loc[(ref_df['DATE']>=date_start) & (ref_df['DATE']<=date_end)].reset_index(drop=True)
    if len(ref_df_filt)!=Nb_expect_months:
        ref_df_rsmpl = ref_df.resample('M', on='DATE').mean()
        logging.info('\nREF RESAMPLING')
        date_ref = pd.to_datetime(ref_df_rsmpl.index, format='%Y-%m-%d')
        date_ref = date_ref.strftime("%Y%m")
        date_ref = [d+'01' for d in date_ref]
        date_ref = np.array(date_ref)
        ref_df_rsmpl = ref_df_rsmpl.reset_index()
        ref_df_rsmpl['DATE'] = pd.to_datetime(date_ref, format='%Y%m%d')
        ref_df_filt = ref_df_rsmpl[(ref_df_rsmpl['DATE']>=date_start) & (ref_df_rsmpl['DATE']<=date_end)].reset_index(drop=True)
        del ref_df_rsmpl, date_ref
    if ref_df_filt.isnull().values.any():
        count_nan = ref_df_filt.isnull().values.sum()
        if count_nan==len(ref_df_filt):
            logging.info('\nNo ref data !')
            rmax=np.nan
            lagpeak=np.nan
            ppeak=np.nan
            qscore_ref=np.nan
            qscore_obs=np.nan
            rs = np.full((1,2*lagmax+1),np.nan)[0]
            ps = np.full((1,2*lagmax+1),np.nan)[0]
            return rmax, lagpeak, ppeak, qscore_ref, qscore_obs, rs, ps
        ref_df_filt = ref_df_filt.interpolate()
        if ref_df_filt.isnull().values.any():ref_df_filt=ref_df_filt.fillna(method='backfill')
        ref_df_filt = ref_df_filt[(ref_df_filt['DATE']>=date_start) & (ref_df_filt['DATE']<=date_end)].reset_index(drop=True)
        logging.info('\nREF INTERPOLATION : {} NaN'.format(str(count_nan)))
    qscore_ref = np.round(np.mean(ref_df_filt['QSCORE']), 2)

    obs_df_filt = obs_df.loc[(obs_df['DATE']>=date_start) & (obs_df['DATE']<=date_end)].reset_index(drop=True)
    if len(obs_df_filt)!=Nb_expect_months:
        obs_df_rsmpl = obs_df.resample('M', on='DATE').mean()
        logging.info('\nobs RESAMPLING')
        date_ref = pd.to_datetime(obs_df_rsmpl.index, format='%Y-%m-%d')
        date_ref = date_ref.strftime("%Y%m")
        date_ref = [d+'01' for d in date_ref]
        date_ref = np.array(date_ref)
        obs_df_rsmpl = obs_df_rsmpl.reset_index()
        obs_df_rsmpl['DATE'] = pd.to_datetime(date_ref, format='%Y%m%d')
        obs_df_filt = obs_df_rsmpl[(obs_df_rsmpl['DATE']>=date_start) & (obs_df_rsmpl['DATE']<=date_end)].reset_index(drop=True)
        del obs_df_rsmpl, date_ref
    if obs_df_filt.isnull().values.any():
        count_nan = obs_df_filt.isnull().values.sum()
        if count_nan==len(obs_df_filt):
            logging.info('\nNo obs data !')
            rmax=np.nan
            lagpeak=np.nan
            ppeak=np.nan
            qscore_ref=np.nan
            qscore_obs=np.nan
            rs = np.full((1,2*lagmax+1),np.nan)[0]
            ps = np.full((1,2*lagmax+1),np.nan)[0]
            return rmax, lagpeak, ppeak, qscore_ref, qscore_obs, rs, ps
        obs_df_filt = obs_df_filt.interpolate()
        if obs_df_filt.isnull().values.any():obs_df_filt=obs_df_filt.fillna(method='backfill')
        obs_df_filt = obs_df_filt[(obs_df_filt['DATE']>=date_start) & (obs_df_filt['DATE']<=date_end)].reset_index(drop=True)
        logging.info('\nOBS INTERPOLATION : {} NaN'.format(str(count_nan)))
    qscore_obs = np.round(np.mean(obs_df_filt['QSCORE']), 2)
    
    # --- Computing Pearson time lag correlation ---
    out_crosscorr = [alertmain.crosscorr_pearson(ref_df_filt['MEAN'] , obs_df_filt['MEAN'], lag) for lag in range(-lagmax,lagmax+1)]
    rs = np.round(np.array(out_crosscorr)[:,0], 2)
    ps = np.array(out_crosscorr)[:,1]
    lagpeak = int(np.argmax(rs) - np.floor(len(rs)/2))
    rmax = np.round(np.max(rs), 2)
    ppeak = ps[np.argmax(rs)]
    
    return rmax, lagpeak, ppeak, qscore_ref, qscore_obs, rs, ps

### Fonction calcul et mise à jour RScores : adaptée à données globales (SAT, GEE) et in-situ (STATIONS MF)

In [4]:
def updateRScore_insitu(meteo_df, obs_df):
    '''
    Estimating Pearson correlation (Rscore) between meteo and sat time series.
    
    Note: Rscore is the maximum score obtained from lag cross correlation varying
          between -4 and +4 months
    '''
    
    lagmax = 4
    date_start_ref = min(meteo_df['DATE'])
    date_end_ref = max(meteo_df['DATE'])
    date_start_obs = min(obs_df['DATE'])
    date_end_obs = max(obs_df['DATE'])
    date_start = max([date_start_ref,date_start_obs])
    date_end = min([date_end_ref,date_end_obs])

    obs_df = obs_df.drop(columns=['LOCATION'])
    Nb_expect_months = (date_end.year - date_start.year) * 12 + date_end.month - date_start.month + 1
    
    # --- Extracting data on the same period + Interpolation (if needed) ---
    
    # Meteo time series
    ind_end = np.where(meteo_df['DATE'] == date_end)
    if ind_end[0].size==0:
        logging.critical('\nMeteo data is not available for the last month')
        raise Exception('Meteo data is not available for the last month')
    meteo_df_val = meteo_df[(meteo_df['DATE']>=date_start) & (meteo_df['DATE']<=date_end)].reset_index(drop=True)
    if meteo_df_val.iloc[:,1].isnull().values.any():
        count_nan = meteo_df_val.iloc[:,1].isnull().sum()
        qscore_meteo = 1 - (count_nan/len(meteo_df_val))
        meteo_df.iloc[:,1] = meteo_df.iloc[:,1].interpolate()
        if meteo_df.iloc[:,1].isnull().values.any():meteo_df.iloc[:,1]=meteo_df.iloc[:,1].fillna(method='backfill')
        meteo_df_val = meteo_df[(meteo_df['DATE']>=date_start) & (meteo_df['DATE']<=date_end)].reset_index(drop=True)
        # logging.info('\nMETEO INTERPOLATION : {} NaN'.format(str(count_nan)))
    else:
        qscore_meteo = 1
    
    obs_df_filt = obs_df.loc[(obs_df['DATE']>=date_start) & (obs_df['DATE']<=date_end)].reset_index(drop=True)
    if len(obs_df_filt)!=Nb_expect_months:
        obs_df_rsmpl = obs_df.resample('M', on='DATE').mean()
        logging.info('\nobs RESAMPLING')
        date_ref = pd.to_datetime(obs_df_rsmpl.index, format='%Y-%m-%d')
        date_ref = date_ref.strftime("%Y%m")
        date_ref = [d+'01' for d in date_ref]
        date_ref = np.array(date_ref)
        obs_df_rsmpl = obs_df_rsmpl.reset_index()
        obs_df_rsmpl['DATE'] = pd.to_datetime(date_ref, format='%Y%m%d')
        obs_df_filt = obs_df_rsmpl[(obs_df_rsmpl['DATE']>=date_start) & (obs_df_rsmpl['DATE']<=date_end)].reset_index(drop=True)
        del obs_df_rsmpl, date_ref
    if obs_df_filt.isnull().values.any():
        count_nan = obs_df_filt.isnull().values.sum()
        if count_nan==len(obs_df_filt):
            logging.info('\nNo obs data !')
            rmax=np.nan
            lagpeak=np.nan
            ppeak=np.nan
            qscore_ref=np.nan
            qscore_obs=np.nan
            rs = np.full((1,2*lagmax+1),np.nan)[0]
            ps = np.full((1,2*lagmax+1),np.nan)[0]
            return rmax, lagpeak, ppeak, qscore_ref, qscore_obs, rs, ps
        obs_df_filt = obs_df_filt.interpolate()
        if obs_df_filt.isnull().values.any():obs_df_filt=obs_df_filt.fillna(method='backfill')
        obs_df_filt = obs_df_filt[(obs_df_filt['DATE']>=date_start) & (obs_df_filt['DATE']<=date_end)].reset_index(drop=True)
        logging.info('\nOBS INTERPOLATION : {} NaN'.format(str(count_nan)))
    qscore_obs = np.round(np.mean(obs_df_filt['QSCORE']), 2)
    
    # --- Computing Pearson time lag correlation ---
    out_crosscorr = [alertmain.crosscorr_pearson(meteo_df_val.iloc[:,1], obs_df_filt['MEAN'], lag) for lag in range(-lagmax,lagmax+1)]
    rs = np.round(np.array(out_crosscorr)[:,0], 2)
    ps = np.array(out_crosscorr)[:,1]
    lagpeak = int(np.argmax(rs) - np.floor(len(rs)/2))
    rmax = np.round(np.max(rs), 2)
    ppeak = ps[np.argmax(rs)]
    # logging.info(f"rmax={rmax:.2f} at {lagpeak} month(s) for p<{ppeak}\n")
    
    return rmax, lagpeak, ppeak, qscore_meteo, qscore_obs, rs, ps


### Calcul Lagged Cross-Correlation

In [5]:
# Lecture et préparation de la dataframe OBS

instats_obs_csv = glob.glob(os.path.join(INDIR_STATS_OBS, f'*STATS*{PERIOD_OBS}*{LANDMASK_OBS}*.csv'))[0]
instats_obs_df = pd.read_csv(instats_obs_csv,sep=';',decimal='.')
try:
    instats_obs_df['DATE'] = pd.to_datetime(instats_obs_df.DATE, format='%Y-%m-%d')
except ValueError:
    instats_obs_df['DATE'] = pd.to_datetime(instats_obs_df.DATE, format='%d/%m/%Y')
instats_obs_df = instats_obs_df[['LOCATION','DATE','MEAN','QSCORE']].copy()


# Lecture et préparation de la dataframe REF

if 'SPI_ref_1991_2020' in PRODUCT_REF:
    insitu_key=1
    instats_ref_csv = glob.glob(os.path.join(INDIR_STATS_REF, f'*{PRODUCT_REF}*.csv'))[0]
    instats_ref_df = pd.read_csv(instats_ref_csv,sep=';',decimal=',')
    instats_ref_df = instats_ref_df[['NOM','DATE','SPI3_MENS']].copy()
    instats_ref_df['DATE'] = pd.to_datetime(instats_ref_df.DATE, format='%Y%m')
    stations_spi_csv = os.path.join(ANNEX_DIR, 'Stations', 'SPI_communes_stations.csv')
    stations_spi_df = pd.read_csv(stations_spi_csv,sep=';')

else:
    insitu_key=0
    instats_ref_csv = glob.glob(os.path.join(INDIR_STATS_REF, f'*STATS*{PERIOD_REF}*{LANDMASK_REF}*.csv'))[0]
    instats_ref_df = pd.read_csv(instats_ref_csv,sep=';',decimal='.')
    instats_ref_df = instats_ref_df[['LOCATION','DATE','MEAN','QSCORE']].copy()
    try:
        instats_ref_df['DATE'] = pd.to_datetime(instats_ref_df.DATE, format='%Y-%m-%d')
    except ValueError:
        instats_ref_df['DATE'] = pd.to_datetime(instats_ref_df.DATE, format='%d/%m/%Y')

area_lut = pd.read_csv(glob.glob(os.path.join(INDIR_STATS_OBS,'ID_Name_Areas-lookup*.csv'))[0], sep=';')
# rscore_df = pd.DataFrame(columns=['LOCATION','RMAX','LAGMAX','PVMAX',f'QSCORE {PRODUCT_REF}',f'QSCORE {PRODUCT_OBS}'])
rscore_df = pd.DataFrame(columns=['LOCATION','RMAX','LAGMAX','PVMAX',f'QSCORE {PRODUCT_REF}',f'QSCORE {PRODUCT_OBS}',
                                  'R-4','R-3','R-2','R-1','R0','R+1','R+2','R+3','R+4',
                                  'PV-4','PV-3','PV-2','PV-1','PV0','PV+1','PV+2','PV+3','PV+4'])


In [6]:
print(instats_obs_csv)
print(instats_ref_csv)

D:\MATHIS\0_Projet_Secheresse\1_Scripts\toolbox\eo4dm-oeil\EO4DM\..\DMPIPELINE_WORKST/DATA_HISTO_backup\New_Caledonia_Fr\1_INDICATEURS/GLOBAL/STATS\VHI_STATS_M_NoTrees_NoBuild.csv
D:\MATHIS\0_Projet_Secheresse\1_Scripts\toolbox\eo4dm-oeil\EO4DM\..\DMPIPELINE_WORKST/DATA_HISTO_backup\New_Caledonia_Fr\1_INDICATEURS/ALERT/METEO\SPI_ref_1991_2020.csv


In [7]:
print(instats_obs_df)
print(instats_ref_df)

         LOCATION       DATE  MEAN  QSCORE
0           BELEP 2001-01-01  0.61    0.12
1     BOULOUPARIS 2001-01-01  0.46    0.20
2         BOURAIL 2001-01-01  0.43    0.19
3          CANALA 2001-01-01  0.69    0.17
4          DUMBEA 2001-01-01  0.45    0.13
...           ...        ...   ...     ...
9787     SARRAMEA 2024-01-01  0.67    0.13
9788         THIO 2024-01-01  0.69    0.16
9789        TOUHO 2024-01-01  0.80    0.12
9790          VOH 2024-01-01  0.64    0.14
9791         YATE 2024-01-01  0.66    0.10

[9792 rows x 4 columns]
               NOM       DATE  SPI3_MENS
0      BOULOUPARIS 1956-01-01        NaN
1      BOULOUPARIS 1956-02-01        NaN
2      BOULOUPARIS 1956-03-01       1.13
3      BOULOUPARIS 1956-04-01       1.23
4      BOULOUPARIS 1956-05-01       1.11
...            ...        ...        ...
35111       OUINNE 2023-09-01       1.00
35112       OUINNE 2023-10-01       1.83
35113       OUINNE 2023-11-01       1.25
35114       OUINNE 2023-12-01      -0.34
35115   

In [8]:
# # (Optional) Used to clean duplicates dates in dataframes

# instats_ref_cleaned = pd.DataFrame(columns=list(instats_ref_df))

# for a in tqdm(area_lut['nom'], desc='SUB-AREA'):
#     instats_ref_a = instats_ref_df.loc[instats_ref_df['LOCATION']==str(a)].reset_index(drop=True)
#     instats_ref_a_cleaned = instats_ref_a.drop_duplicates(subset='DATE')
#     instats_ref_cleaned = pd.concat([instats_ref_cleaned, instats_ref_a_cleaned], ignore_index=True)
#     del instats_ref_a, instats_ref_a_cleaned

# instats_ref_cleaned = instats_ref_cleaned.sort_values(by=['LOCATION','DATE'])
# instats_ref_cleaned.to_csv(
#   os.path.join(WRK_DIR, os.path.basename(instats_ref_csv)),
#   index = False,
#   float_format='%.2f',
#   decimal = '.',
#   sep = ';')

In [9]:
for a in tqdm(area_lut['nom'], desc='SUB-AREA'):
  
  print(f'SUB-AREA : {str(a)}')
              
  # Extract time series on Sub-area
  instats_obs_a = instats_obs_df.loc[instats_obs_df['LOCATION']==str(a)].reset_index(drop=True)
  testnan_obs_a = instats_obs_a['MEAN']
  if insitu_key==1:
    instats_ref_a = alertmain.aggregStation(instats_ref_df, stations_spi_df, a)
    if instats_ref_a.empty: testnan_ref_a = instats_ref_a
    else: testnan_ref_a = instats_ref_a.iloc[:,1]
  else:
     instats_ref_a = instats_ref_df.loc[instats_ref_df['LOCATION']==str(a)].reset_index(drop=True)
     testnan_ref_a = instats_ref_a['MEAN']

  # Fill rscore_df with nan if area without data
  if (testnan_ref_a.empty or testnan_ref_a.isnull().values.sum()==len(testnan_ref_a)
      or testnan_obs_a.empty or testnan_obs_a.isnull().values.sum()==len(testnan_obs_a)):
      r_dict = {'LOCATION':str(a),'RMAX':np.nan,'LAGMAX':np.nan,'PVMAX':np.nan,
                f'QSCORE {PRODUCT_REF}':np.nan,f'QSCORE {PRODUCT_OBS}':np.nan,
                'R-4':np.nan,'R-3':np.nan,'R-2':np.nan,'R-1':np.nan,'R0':np.nan,'R+1':np.nan,'R+2':np.nan,'R+3':np.nan,'R+4':np.nan,
                'PV-4':np.nan,'PV-3':np.nan,'PV-2':np.nan,'PV-1':np.nan,'PV0':np.nan,'PV+1':np.nan,'PV+2':np.nan,'PV+3':np.nan,'PV+4':np.nan}
  
  # Compute Pearson correlation (RSCORE)
  else:
      instats_obs_a = instats_obs_a.sort_values(by=['DATE'])
      instats_ref_a = instats_ref_a.sort_values(by=['DATE'])
      if insitu_key==1:
         rmax, lagpeak, ppeak, qscore_ref, qscore_obs, rs, ps = updateRScore_insitu(instats_ref_a, instats_obs_a)
      else:
         rmax, lagpeak, ppeak, qscore_ref, qscore_obs, rs, ps = updateRScore(instats_ref_a, instats_obs_a)
      r_dict = {'LOCATION':str(a),'RMAX':rmax,'LAGMAX':lagpeak,'PVMAX':ppeak,
                f'QSCORE {PRODUCT_REF}':qscore_ref,f'QSCORE {PRODUCT_OBS}':qscore_obs,
                'R-4':rs[0],'R-3':rs[1],'R-2':rs[2],'R-1':rs[3],'R0':rs[4],'R+1':rs[5],'R+2':rs[6],'R+3':rs[7],'R+4':rs[8],
                'PV-4':ps[0],'PV-3':ps[1],'PV-2':ps[2],'PV-1':ps[3],'PV0':ps[4],'PV+1':ps[5],'PV+2':ps[6],'PV+3':ps[7],'PV+4':ps[8]}
      del rmax, lagpeak, ppeak, qscore_ref, qscore_obs, rs, ps
  
  rscore_df = pd.concat([rscore_df, pd.DataFrame([r_dict])], ignore_index=True)
  
  del r_dict, instats_obs_a, instats_ref_a

SUB-AREA:   0%|          | 0/33 [00:00<?, ?it/s]

SUB-AREA : BELEP
SUB-AREA : BOULOUPARIS
SUB-AREA : BOURAIL
SUB-AREA : CANALA


SUB-AREA:  12%|█▏        | 4/33 [00:00<00:00, 31.75it/s]

SUB-AREA : DUMBEA
SUB-AREA : FARINO
SUB-AREA : HIENGHENE


SUB-AREA:  24%|██▍       | 8/33 [00:00<00:00, 27.42it/s]

SUB-AREA : HOUAILOU
SUB-AREA : ILE DES PINS
SUB-AREA : KAALA GOMEN
SUB-AREA : KONE


SUB-AREA:  33%|███▎      | 11/33 [00:00<00:00, 27.59it/s]

SUB-AREA : KOUAOUA


SUB-AREA:  45%|████▌     | 15/33 [00:00<00:00, 27.89it/s]

SUB-AREA : KOUMAC
SUB-AREA : LA FOA
SUB-AREA : LIFOU
SUB-AREA : MARE
SUB-AREA : MOINDOU
SUB-AREA : MONT DORE


SUB-AREA:  55%|█████▍    | 18/33 [00:00<00:00, 27.78it/s]

SUB-AREA : NOUMEA
SUB-AREA : OUEGOA
SUB-AREA : OUVEA
SUB-AREA : PAITA


SUB-AREA:  67%|██████▋   | 22/33 [00:00<00:00, 28.16it/s]

SUB-AREA : POINDIMIE
SUB-AREA : PONERIHOUEN
SUB-AREA : POUEBO


SUB-AREA:  79%|███████▉  | 26/33 [00:00<00:00, 29.22it/s]

SUB-AREA : POUEMBOUT
SUB-AREA : POUM
SUB-AREA : POYA
SUB-AREA : SARRAMEA


SUB-AREA:  91%|█████████ | 30/33 [00:01<00:00, 30.12it/s]

SUB-AREA : THIO
SUB-AREA : TOUHO


SUB-AREA: 100%|██████████| 33/33 [00:01<00:00, 29.09it/s]

SUB-AREA : VOH
SUB-AREA : YATE





### Prépare export rscores

In [10]:
outdir = os.path.join(WRK_DIR, f'RUN_RSCORES_DROUGHT_{TERRITORY_str}')
os.umask(0) # used to reset the directories permission
if not os.path.exists(outdir):
    os.makedirs(outdir)
    os.chmod(outdir, 0o777)

In [11]:
# Save Rscore data frame

if LANDMASK_OBS!='': LANDMASK=f'_{LANDMASK_OBS}'
elif LANDMASK_REF!='': LANDMASK=f'_{LANDMASK_REF}'
else:LANDMASK=''

rscore_filename = f'{PRODUCT_OBS}_{PRODUCT_REF}_RSCORE_QSCORES{LANDMASK}.csv'

rscore_df.to_csv(
  os.path.join(outdir, rscore_filename),
  index = False,
  decimal = '.',
  sep=';')