In [258]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 52.52,
	"longitude": 13.41,
	"start_date": "1990-01-01",
	"end_date": "2010-01-01",
	"hourly": ["temperature_2m", "dew_point_2m", "rain", "snowfall", "surface_pressure", "cloud_cover", "et0_fao_evapotranspiration", "wind_speed_10m", "soil_temperature_0_to_7cm", "soil_moisture_0_to_7cm"],
	"timezone": "Europe/Berlin"
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_dew_point_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_rain = hourly.Variables(2).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(3).ValuesAsNumpy()
hourly_surface_pressure = hourly.Variables(4).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(5).ValuesAsNumpy()
hourly_et0_fao_evapotranspiration = hourly.Variables(6).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(7).ValuesAsNumpy()
hourly_soil_temperature_0_to_7cm = hourly.Variables(8).ValuesAsNumpy()
hourly_soil_moisture_0_to_7cm = hourly.Variables(9).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["dew_point_2m"] = hourly_dew_point_2m
hourly_data["rain"] = hourly_rain
hourly_data["snowfall"] = hourly_snowfall
hourly_data["surface_pressure"] = hourly_surface_pressure
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["et0_fao_evapotranspiration"] = hourly_et0_fao_evapotranspiration
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["soil_temperature_0_to_7cm"] = hourly_soil_temperature_0_to_7cm
hourly_data["soil_moisture_0_to_7cm"] = hourly_soil_moisture_0_to_7cm

hourly_dataframe = pd.DataFrame(data = hourly_data)
hourly_dataframe['time'] = (hourly_dataframe['date'].dt.hour)/24
hourly_dataframe['month'] = (hourly_dataframe['date'].dt.month)/12
print(hourly_dataframe)
print(hourly_dataframe.dtypes)



Coordinates 52.5483283996582°N 13.407821655273438°E
Elevation 38.0 m asl
Timezone b'Europe/Berlin' b'CEST'
Timezone difference to GMT+0 7200 s
                            date  temperature_2m  dew_point_2m  rain  \
0      1989-12-31 22:00:00+00:00          -1.261        -1.961   0.0   
1      1989-12-31 23:00:00+00:00          -1.111        -1.911   0.0   
2      1990-01-01 00:00:00+00:00          -1.311        -2.111   0.0   
3      1990-01-01 01:00:00+00:00          -1.311        -2.511   0.0   
4      1990-01-01 02:00:00+00:00          -1.461        -3.211   0.0   
...                          ...             ...           ...   ...   
175339 2010-01-01 17:00:00+00:00          -2.811        -5.511   0.0   
175340 2010-01-01 18:00:00+00:00          -3.161        -5.761   0.0   
175341 2010-01-01 19:00:00+00:00          -2.911        -5.011   0.0   
175342 2010-01-01 20:00:00+00:00          -2.961        -4.911   0.0   
175343 2010-01-01 21:00:00+00:00          -2.911        -4.761   

In [259]:
hourly_dataframe


Unnamed: 0,date,temperature_2m,dew_point_2m,rain,snowfall,surface_pressure,cloud_cover,et0_fao_evapotranspiration,wind_speed_10m,soil_temperature_0_to_7cm,soil_moisture_0_to_7cm,time,month
0,1989-12-31 22:00:00+00:00,-1.261,-1.961,0.0,0.00,1017.432617,84.599998,0.000000,3.319036,-0.361,0.350,0.916667,1.000000
1,1989-12-31 23:00:00+00:00,-1.111,-1.911,0.0,0.00,1017.335876,83.699997,0.000000,3.396233,-0.311,0.350,0.958333,1.000000
2,1990-01-01 00:00:00+00:00,-1.311,-2.111,0.0,0.00,1017.232788,84.599998,0.000000,3.075841,-0.361,0.350,0.000000,0.083333
3,1990-01-01 01:00:00+00:00,-1.311,-2.511,0.0,0.00,1016.934265,84.599998,0.000000,3.259938,-0.361,0.350,0.041667,0.083333
4,1990-01-01 02:00:00+00:00,-1.461,-3.211,0.0,0.00,1016.632935,82.799995,0.000000,3.259938,-0.361,0.350,0.083333,0.083333
...,...,...,...,...,...,...,...,...,...,...,...,...,...
175339,2010-01-01 17:00:00+00:00,-2.811,-5.511,0.0,0.00,995.112122,100.000000,0.003735,10.805998,-0.561,0.368,0.708333,0.083333
175340,2010-01-01 18:00:00+00:00,-3.161,-5.761,0.0,0.00,995.603455,100.000000,0.002769,10.315115,-0.561,0.368,0.750000,0.083333
175341,2010-01-01 19:00:00+00:00,-2.911,-5.011,0.0,0.07,996.304688,100.000000,0.000725,10.464797,-0.561,0.368,0.791667,0.083333
175342,2010-01-01 20:00:00+00:00,-2.961,-4.911,0.0,0.07,997.000488,100.000000,0.000000,9.983106,-0.561,0.368,0.833333,0.083333


In [None]:
hourly_dataframe.to_csv('data.csv', index=False)  # Set index=False to exclude row numbers from the CSV file

In [260]:
training_columns = hourly_dataframe.select_dtypes(include=['float']).columns
means = hourly_dataframe[training_columns].mean()
stds = hourly_dataframe[training_columns].std()

# Normalize the float columns
df_normalized = hourly_dataframe[training_columns].apply(lambda x: (x - x.mean()) / x.std())

# Convert the normalized DataFrame to a NumPy array
normalized_array = df_normalized.to_numpy()

In [261]:
def unnormalize_data(normalized_data, means, stds):
    # Convert means and stds to numpy arrays if they are pandas Series
    if isinstance(means, pd.Series):
        means = means.to_numpy()
    if isinstance(stds, pd.Series):
        stds = stds.to_numpy()
    
    # Unnormalize the data
    unnormalized_data = normalized_data * stds + means
    
    return unnormalized_data

In [262]:
normalized_array.shape

(175344, 12)

In [263]:
import numpy as np

In [264]:
def prepare_sequential_data(data, past_window_size, prediction_window_size):

    X, y = [], []
    total_window_size = past_window_size + prediction_window_size
    
    for i in range(len(data) - total_window_size + 1):
        past_window = data[i:i+past_window_size]
        prediction_window = data[i+past_window_size:i+total_window_size]
        
        X.append(past_window)
        y.append(prediction_window)
    
    return np.array(X), np.array(y)

In [265]:
INPUT_WINDOW_SIZE = 72
OUTPUT_WINDOW_SIZE = 12
NUMBER_OF_FEATURES = 12

X, y = prepare_sequential_data(normalized_array, INPUT_WINDOW_SIZE, OUTPUT_WINDOW_SIZE)

In [266]:
means, stds

means.to_csv('dane\means9010.csv', index=True) 
stds.to_csv('dane\stds9010.csv', index=True) 

np.save('dane\X.npy', X)
np.save('dane\y.npy', y)

In [246]:
Xload = np.load('dane\X.npy')
yload = np.load('dane\y.npy')
meansloaded = pd.read_csv('dane\means6080.csv', skiprows=1, header=None)
stdsloaded = pd.read_csv('dane\stds6080.csv', skiprows=1, header=None)

In [267]:

def reconstruct_original_data(X, y):

    past_window_size = X.shape[1]
    prediction_window_size = y.shape[1]
    
    # Initialize the list to store the reconstructed data
    reconstructed_data = []
    
    # Start with the past window of the first sample
    reconstructed_data.extend(X[0])
    
    for i in range(len(y)):
        if i == 0:
            # The first prediction window is added directly
            reconstructed_data.extend(y[i])
        else:
            # For subsequent windows, add only the new part of the prediction window
            reconstructed_data.extend(y[i][-(prediction_window_size):])
    
    return np.array(reconstructed_data)




In [268]:
reconstructed_data = reconstruct_original_data(Xload, yload)


In [269]:
(X * stdsloaded[1].to_numpy() + meansloaded[1].to_numpy())[:, :, 4]

array([[1016.5125389 , 1016.41914686, 1016.31962688, ..., 1022.4600525 ,
        1022.75171884, 1023.04857024],
       [1016.41914686, 1016.31962688, 1016.03143705, ..., 1022.75171884,
        1023.04857024, 1023.34023658],
       [1016.31962688, 1016.03143705, 1015.74053691, ..., 1023.04857024,
        1023.34023658, 1023.14626389],
       ...,
       [1004.84772085, 1005.1608347 , 1005.08087702, ...,  991.31635886,
         991.31464812,  991.50850303],
       [1005.1608347 , 1005.08087702, 1004.5120988 , ...,  991.31464812,
         991.50850303,  991.8928552 ],
       [1005.08087702, 1004.5120988 , 1003.84709989, ...,  991.50850303,
         991.8928552 ,  992.37743301]])

In [270]:
y.shape

(175261, 12, 12)

In [271]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Reshape, Flatten
from tensorflow.keras.callbacks import ModelCheckpoint


In [272]:
model = Sequential([
    LSTM(64, input_shape=(INPUT_WINDOW_SIZE, NUMBER_OF_FEATURES), return_sequences=True),
    Flatten(),
    Dense(NUMBER_OF_FEATURES*OUTPUT_WINDOW_SIZE),
    Dense(NUMBER_OF_FEATURES*OUTPUT_WINDOW_SIZE),
    Reshape((OUTPUT_WINDOW_SIZE, NUMBER_OF_FEATURES))  # Adjust units as needed to match the output shape (24, 10)
])
model.compile(optimizer='adam', loss='mse')
checkpoint_path = "model_9010.h5"
checkpoint_callback = ModelCheckpoint(checkpoint_path, monitor='val_loss', save_best_only=True, mode='min', verbose=1)


In [273]:
# Example data shapes: X_train.shape = (samples, 72, 10), y_train.shape = (samples, 24, 10)
batch_size = 32
epochs = 3

model.fit(X, y, batch_size=batch_size, epochs=epochs, validation_split=0.2, callbacks=[checkpoint_callback])


Epoch 1/3
Epoch 1: val_loss improved from inf to 0.23870, saving model to model_9010.h5
Epoch 2/3
   1/4382 [..............................] - ETA: 3:28 - loss: 0.1307

  saving_api.save_model(


Epoch 2: val_loss improved from 0.23870 to 0.23527, saving model to model_9010.h5
Epoch 3/3
Epoch 3: val_loss improved from 0.23527 to 0.23229, saving model to model_9010.h5


<keras.src.callbacks.History at 0x2ba09a57940>

In [None]:
predict = (model.predict(X[:5]) * stdsloaded[1].to_numpy() + meansloaded[1].to_numpy())



In [231]:
X[:5].shape

(5, 72, 12)

In [234]:
predict.shape

(5, 12, 12)

In [232]:
sequence_prediction = predict[:, 0]
sequence_prediction.shape

(5, 12)

In [256]:
df = pd.DataFrame(sequence_prediction)
df.columns = stdsloaded[0]
df['month'] = (df['month']*12).astype(int)
df['time'] = (df['time']*24).astype(int)

In [257]:
df

Unnamed: 0,temperature_2m,dew_point_2m,rain,snowfall,surface_pressure,cloud_cover,et0_fao_evapotranspiration,wind_speed_10m,soil_temperature_0_to_7cm,soil_moisture_0_to_7cm,time,month
0,2.61485,0.480985,0.088726,-0.014858,1000.012901,80.01982,0.03681,15.134235,2.912408,0.380317,21,1
1,2.616761,0.906959,0.096469,-0.015229,999.793066,74.730668,0.031107,13.965767,2.684157,0.379344,23,1
2,2.830885,0.528426,0.067722,-0.013225,999.534081,63.595867,0.038523,16.286743,2.822498,0.375378,0,1
3,2.798419,0.419022,-0.045237,-0.016207,998.978918,46.643071,0.044044,16.803752,2.463991,0.375158,1,1
4,2.62675,-0.416102,0.034539,-0.012433,999.206236,47.721638,0.046697,17.062625,2.07343,0.378415,3,1


In [None]:
# ostatnie trzy dni jako np.ndarray (1, 72, 12)
predict.reshape(1, 5, 12,12).shape