In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from config.config import LSTM_DATA_DIR, OBSERVACIONS_FILTRAT_DIR, LSTM_PREDICTIONS_DIR, OBSERVACIONS_DIR, AUTOENCODER_DIR
import pandas as pd
import os
from keras.preprocessing.sequence import TimeseriesGenerator
import numpy as np
from keras.layers import Input, Dropout, Dense, LSTM, TimeDistributed, RepeatVector
from keras import regularizers
from keras.models import Model
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from keras.models import load_model
import joblib
from src.utils import parse_station


In [4]:
#Observacions amb les dades filtrades. Si alguna estacio no apareix aqui, es que totes les dades son erronies
observacions_filtrades = set(file.stem for file in OBSERVACIONS_FILTRAT_DIR.glob("*.csv"))

In [7]:
statistics = []
for file in list(OBSERVACIONS_DIR.glob("*.csv")):
    observacio_df = pd.read_csv(file).rename(columns = {'Date': 'ds', 'Flow': 'y'})

    #remove nan
    observacio_df_non_na = observacio_df.dropna()
     

    #if file exists in observacions_filtrades dir, read it
    if file.stem in observacions_filtrades:

        observacio_filtrada_df = pd.read_csv(OBSERVACIONS_FILTRAT_DIR / file.name).rename(columns = {'Date': 'ds', 'Flow': 'y'})
        observacio_filtrada_df_non_na = observacio_filtrada_df.dropna()
    
        #ds in observacio_df not in observacio_filtrada_df
        anomalies_df = observacio_df_non_na[~observacio_df_non_na['ds'].isin(observacio_filtrada_df_non_na['ds'])]

    else:
        anomalies_df = observacio_df.copy()

    meteo_data_file = file.stem + '_stats.csv'

    if meteo_data_file in os.listdir(LSTM_DATA_DIR):


        lstm_fitted = parse_station(meteo_data_file, only_testing = False, model_with_flow = False, transfer_learning = False, replace_simulated_by_nans = True, observacions_filtrades=False)
        stats = lstm_fitted.get_statistics_anomaly_prediction(anomalies_df)

        stats['station'] = file.stem        
        
        statistics.append(stats)

        lstm_fitted.plot()

        
    break
    




In [8]:
df = lstm_fitted.get_df()
df

Unnamed: 0,ds,y,yhat,anomaly,loss
163,2001-06-13,10.1198,8.361575,False,0.174955
164,2001-06-14,9.0888,8.315607,False,0.119805
165,2001-06-15,9.5613,8.211052,False,0.123098
166,2001-06-16,9.5899,8.435744,False,0.079660
167,2001-06-17,8.8069,8.271614,False,0.040985
...,...,...,...,...,...
7347,2021-02-12,,3.606065,False,
7348,2021-02-13,,3.689963,False,
7349,2021-02-14,,3.506598,False,
7350,2021-02-15,0.0000,3.753137,False,


In [13]:
y = df['y']
yhat = df['yhat']

y = y - y.min() / (y.max() - y.min())
yhat = (yhat - yhat.min()) / (yhat.max() - yhat.min())


In [16]:
#plot y and yhat with plotly
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=df['ds'], y=y, mode='lines', name='y'))
fig.add_trace(go.Scatter(x=df['ds'], y=yhat, mode='lines', name='yhat'))
fig.show()

