In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
from config.config import OBSERVACIONS_DIR, OPTIMIZATION_DIR
from src.Prophet import Prophet
from src.Optimizer import ProphetOptimizer
import multiprocessing
from joblib import Parallel, delayed

In [5]:
#If optimization dir does not exist, create it
if not OPTIMIZATION_DIR.exists():
    OPTIMIZATION_DIR.mkdir()

#If propyhet folder not inside optimization dir, create it
if not (OPTIMIZATION_DIR / 'prophet').exists():
    (OPTIMIZATION_DIR / 'prophet').mkdir()

In [4]:
file = list(OBSERVACIONS_DIR.glob("*.csv"))[4]
observacio_df = pd.read_csv(file).rename(columns = {'Date': 'ds', 'Flow': 'y'})
observacio_df

Unnamed: 0,ds,y
0,2001-01-01,0.160000
1,2001-01-02,0.160000
2,2001-01-03,0.160000
3,2001-01-04,0.281100
4,2001-01-05,0.358000
...,...,...
7347,2021-02-12,0.269524
7348,2021-02-13,0.269247
7349,2021-02-14,0.269267
7350,2021-02-15,0.262014


In [5]:
m = Prophet()
fitted_model = m.fit(observacio_df)

In [6]:
fitted_model.nash()

0.15565973804103073

In [7]:
fitted_model.plot()

In [10]:
def optimize_file(file):
    observacio_df = pd.read_csv(file).rename(columns = {'Date': 'ds', 'Flow': 'y'})

    opt = ProphetOptimizer(observacio_df)
    print(f"Optimization for {file.stem} finished")
    results = opt.optimize()

    #score first row as dict    
    dict_results = results.iloc[0].to_dict()
    dict_results['station'] = file.stem
    return dict_results

num_cores = multiprocessing.cpu_count()
results_optimization = Parallel(n_jobs=num_cores)(delayed(optimize_file)(file) for file in OBSERVACIONS_DIR.glob("*.csv"))



In [11]:
#results_optimization to dataframe with index as station
results_optimization_df = pd.DataFrame(results_optimization).set_index('station')
 
results_optimization_df

Unnamed: 0_level_0,changepoint_prior_scale,seasonality_prior_scale,nash,pbias
station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
a01,0.500000,3.34,0.165774,-0.001125
a03,0.500000,10.00,0.135972,0.002443
a04,0.500000,4.45,0.377337,-0.002716
a05,0.500000,1.12,0.167856,0.010779
a07,0.500000,7.78,0.170520,0.004614
...,...,...,...,...
a69,0.500000,3.34,0.066087,-0.006476
a70,0.444556,0.01,0.087125,0.001792
a71,0.500000,5.56,0.085032,-0.005867
a72,0.500000,1.12,0.050226,-0.029132


In [12]:
results_optimization_df.to_csv(OPTIMIZATION_DIR / 'prophet' / 'results_optimization.csv')

In [16]:
#average of nash and pbias
results_optimization_df[['nash', 'pbias']].mean()

nash     0.141231
pbias    0.001904
dtype: float64