In [7]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.metrics import MeanAbsoluteError
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import joblib

def convert_df_columns(df):
    for col in df.columns:
        if df[col].dtype == 'float64' or df[col].dtype == 'float32':
            # If the column is already a float, no action needed
            continue
        elif df[col].dtype == 'int64' or df[col].dtype == 'int32':
            # If the column is already an int, no action needed
            continue
        else:
            try:
                # Try converting to int first
                df[col] = df[col].astype(int)
            except ValueError:
                try:
                    # If int conversion fails, try converting to float
                    df[col] = df[col].astype(float)
                except ValueError:
                    # If both conversions fail, leave the column as is
                    print(f"Column {col} cannot be converted to int or float.")
    return df



def train_lstm_on_dfs(df_dict, input_columns, output_columns, batch_size=32, epochs=1):
    # Initialize the LSTM model
    model = Sequential([
        LSTM(50, activation='relu', input_shape=(1, len(input_columns))),  # Adjust the input shape
        Dense(len(output_columns))  # Adjust the output layer based on the number of output columns
    ])
    model.compile(optimizer='adam', loss='mean_absolute_error')
    
    # Iterate over each dataframe in the dictionary
    for df in df_dict.values():
        df=convert_df_columns(df)
        # Preprocess the data for LSTM
        # Scale each input column and combine them
        scaler = StandardScaler()
        X_scaled = np.array([scaler.fit_transform(df[col].values.reshape(-1, 1)) for col in input_columns])
        X_scaled = np.transpose(X_scaled, (1, 0, 2)).reshape(-1, 1, len(input_columns))

        # Prepare output data
        y = df[output_columns].values

        # Split the data into training and validation
        X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
        
        # Train the model on the data from the current dataframe
        model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=batch_size, epochs=epochs)
    
    # After training on all dataframes, evaluate the model on the full dataset or a separate test set
    # Evaluation code goes here...

    # Return the trained model
    return model

# Example usage:
# file_list = ['batch1.csv', 'batch2.csv', ...]  # List of filenames
# directory = '/path/to/directory'
# input_columns = ['input1', 'input2', ...]  # List of input column names
# output_columns = ['output1', 'output2', ...]  # List of output column names
# model = train_lstm_on_files(file_list, directory, input_columns, output_columns)


In [8]:
df_dict=joblib.load(r"./train_dataset_complete.pkl")

train_lstm_on_dfs(df_dict, lstm_input, lstm_output, batch_size=32, epochs=1)


TypeError: cannot astype a datetimelike from [datetime64[ns]] to [int32]

In [3]:
lstm_output=['target_normalized']

lstm_input=['county',
'is_business',
'product_type',
'is_consumption',
'weekday',
'hour_of_day',
'day_of_month',
'month',
'es_festive',
'product_type_county_is_business',
'eic_count',
'latitude',
'longitude',
'mergecolumn',
'forecast_datetime_f',
'temperature_f',
'dewpoint_f',
'cloudcover_high_f',
'cloudcover_low_f',
'cloudcover_mid_f',
'cloudcover_total_f',
'10_metre_u_wind_component_f',
'10_metre_v_wind_component_f',
'direct_solar_radiation_f',
'surface_solar_radiation_downwards_f',
'snowfall_f',
'total_precipitation_f',
'latitude_f',
'longitude_f',
'origin_datetime_f',
'hours_ahead_f',
'data_block_id_f',
'forecast_datetime_f_original',
'euros_per_mwh',
'target_normalized_1',
'temperature_f_1',
'cloudcover_high_f_1',
'cloudcover_low_f_1',
'cloudcover_mid_f_1',
'cloudcover_total_f_1',
'direct_solar_radiation_f_1',
'snowfall_f_1',
'total_precipitation_f_1',
'target_normalized_2',
'temperature_f_2',
'cloudcover_high_f_2',
'cloudcover_low_f_2',
'cloudcover_mid_f_2',
'cloudcover_total_f_2',
'direct_solar_radiation_f_2',
'snowfall_f_2',
'total_precipitation_f_2',
'target_normalized_3',
'temperature_f_3',
'cloudcover_high_f_3',
'cloudcover_low_f_3',
'cloudcover_mid_f_3',
'cloudcover_total_f_3',
'direct_solar_radiation_f_3',
'snowfall_f_3',
'total_precipitation_f_3',
'target_normalized_4',
'temperature_f_4',
'cloudcover_high_f_4',
'cloudcover_low_f_4',
'cloudcover_mid_f_4',
'cloudcover_total_f_4',
'direct_solar_radiation_f_4',
'snowfall_f_4',
'total_precipitation_f_4',
'target_normalized_5',
'temperature_f_5',
'cloudcover_high_f_5',
'cloudcover_low_f_5',
'cloudcover_mid_f_5',
'cloudcover_total_f_5',
'direct_solar_radiation_f_5',
'snowfall_f_5',
'total_precipitation_f_5',
'target_normalized_6',
'temperature_f_6',
'cloudcover_high_f_6',
'cloudcover_low_f_6',
'cloudcover_mid_f_6',
'cloudcover_total_f_6',
'direct_solar_radiation_f_6',
'snowfall_f_6',
'total_precipitation_f_6',
'target_normalized_7',
'temperature_f_7',
'cloudcover_high_f_7',
'cloudcover_low_f_7',
'cloudcover_mid_f_7',
'cloudcover_total_f_7',
'direct_solar_radiation_f_7',
'snowfall_f_7',
'total_precipitation_f_7',
'target_normalized_8',
'temperature_f_8',
'cloudcover_high_f_8',
'cloudcover_low_f_8',
'cloudcover_mid_f_8',
'cloudcover_total_f_8',
'direct_solar_radiation_f_8',
'snowfall_f_8',
'total_precipitation_f_8',
'target_normalized_9',
'temperature_f_9',
'cloudcover_high_f_9',
'cloudcover_low_f_9',
'cloudcover_mid_f_9',
'cloudcover_total_f_9',
'direct_solar_radiation_f_9',
'snowfall_f_9',
'total_precipitation_f_9',
'target_normalized_10',
'temperature_f_10',
'cloudcover_high_f_10',
'cloudcover_low_f_10',
'cloudcover_mid_f_10',
'cloudcover_total_f_10',
'direct_solar_radiation_f_10',
'snowfall_f_10',
'total_precipitation_f_10',
'target_normalized_11',
'temperature_f_11',
'cloudcover_high_f_11',
'cloudcover_low_f_11',
'cloudcover_mid_f_11',
'cloudcover_total_f_11',
'direct_solar_radiation_f_11',
'snowfall_f_11',
'total_precipitation_f_11',
'target_normalized_12',
'temperature_f_12',
'cloudcover_high_f_12',
'cloudcover_low_f_12',
'cloudcover_mid_f_12',
'cloudcover_total_f_12',
'direct_solar_radiation_f_12',
'snowfall_f_12',
'total_precipitation_f_12',
'target_normalized_13',
'temperature_f_13',
'cloudcover_high_f_13',
'cloudcover_low_f_13',
'cloudcover_mid_f_13',
'cloudcover_total_f_13',
'direct_solar_radiation_f_13',
'snowfall_f_13',
'total_precipitation_f_13',
'target_normalized_14',
'temperature_f_14',
'cloudcover_high_f_14',
'cloudcover_low_f_14',
'cloudcover_mid_f_14',
'cloudcover_total_f_14',
'direct_solar_radiation_f_14',
'snowfall_f_14',
'total_precipitation_f_14',
'target_normalized_15',
'temperature_f_15',
'cloudcover_high_f_15',
'cloudcover_low_f_15',
'cloudcover_mid_f_15',
'cloudcover_total_f_15',
'direct_solar_radiation_f_15',
'snowfall_f_15',
'total_precipitation_f_15',
'target_normalized_16',
'temperature_f_16',
'cloudcover_high_f_16',
'cloudcover_low_f_16',
'cloudcover_mid_f_16',
'cloudcover_total_f_16',
'direct_solar_radiation_f_16',
'snowfall_f_16',
'total_precipitation_f_16',
'target_normalized_17',
'temperature_f_17',
'cloudcover_high_f_17',
'cloudcover_low_f_17',
'cloudcover_mid_f_17',
'cloudcover_total_f_17',
'direct_solar_radiation_f_17',
'snowfall_f_17',
'total_precipitation_f_17',
'target_normalized_18',
'temperature_f_18',
'cloudcover_high_f_18',
'cloudcover_low_f_18',
'cloudcover_mid_f_18',
'cloudcover_total_f_18',
'direct_solar_radiation_f_18',
'snowfall_f_18',
'total_precipitation_f_18',
'target_normalized_19',
'temperature_f_19',
'cloudcover_high_f_19',
'cloudcover_low_f_19',
'cloudcover_mid_f_19',
'cloudcover_total_f_19',
'direct_solar_radiation_f_19',
'snowfall_f_19',
'total_precipitation_f_19',
'target_normalized_20',
'temperature_f_20',
'cloudcover_high_f_20',
'cloudcover_low_f_20',
'cloudcover_mid_f_20',
'cloudcover_total_f_20',
'direct_solar_radiation_f_20',
'snowfall_f_20',
'total_precipitation_f_20',
'target_normalized_21',
'temperature_f_21',
'cloudcover_high_f_21',
'cloudcover_low_f_21',
'cloudcover_mid_f_21',
'cloudcover_total_f_21',
'direct_solar_radiation_f_21',
'snowfall_f_21',
'total_precipitation_f_21',
'target_normalized_22',
'temperature_f_22',
'cloudcover_high_f_22',
'cloudcover_low_f_22',
'cloudcover_mid_f_22',
'cloudcover_total_f_22',
'direct_solar_radiation_f_22',
'snowfall_f_22',
'total_precipitation_f_22',
'target_normalized_23',
'temperature_f_23',
'cloudcover_high_f_23',
'cloudcover_low_f_23',
'cloudcover_mid_f_23',
'cloudcover_total_f_23',
'direct_solar_radiation_f_23',
'snowfall_f_23',
'total_precipitation_f_23',
'target_normalized_24',
'temperature_f_24',
'cloudcover_high_f_24',
'cloudcover_low_f_24',
'cloudcover_mid_f_24',
'cloudcover_total_f_24',
'direct_solar_radiation_f_24',
'snowfall_f_24',
'total_precipitation_f_24',
'es_festive_24']