In [1]:
import pandas as pd

# Load the dataset
file_path = r"C:\Users\nithe\OneDrive\Desktop\Infosys\the_one\electricityConsumptionAndProductioction.csv"
data = pd.read_csv(file_path)

# Display the first few rows to understand the structure of the dataset
data.head(), data.info(), data.describe()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46011 entries, 0 to 46010
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   DateTime       46011 non-null  object
 1   Consumption    46011 non-null  int64 
 2   Production     46011 non-null  int64 
 3   Nuclear        46011 non-null  int64 
 4   Wind           46011 non-null  int64 
 5   Hydroelectric  46011 non-null  int64 
 6   Oil and Gas    46011 non-null  int64 
 7   Coal           46011 non-null  int64 
 8   Solar          46011 non-null  int64 
 9   Biomass        46011 non-null  int64 
dtypes: int64(9), object(1)
memory usage: 3.5+ MB


(              DateTime  Consumption  Production  Nuclear  Wind  Hydroelectric  \
 0  2019-01-01 00:00:00         6352        6527     1395    79           1383   
 1  2019-01-01 01:00:00         6116        5701     1393    96           1112   
 2  2019-01-01 02:00:00         5873        5676     1393   142           1030   
 3  2019-01-01 03:00:00         5682        5603     1397   191            972   
 4  2019-01-01 04:00:00         5557        5454     1393   159            960   
 
    Oil and Gas  Coal  Solar  Biomass  
 0         1896  1744      0       30  
 1         1429  1641      0       30  
 2         1465  1616      0       30  
 3         1455  1558      0       30  
 4         1454  1458      0       30  ,
 None,
         Consumption    Production       Nuclear          Wind  Hydroelectric  \
 count  46011.000000  46011.000000  46011.000000  46011.000000   46011.000000   
 mean    6587.616440   6518.645628   1291.177501    792.310882    1857.052444   
 std     1043.6

In [2]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Step 1: Parse `DateTime` and set it as the index
data['DateTime'] = pd.to_datetime(data['DateTime'])
data.set_index('DateTime', inplace=True)

# Step 2: Check for missing values
missing_values = data.isnull().sum()

# Step 3: Scale the numeric data using MinMaxScaler
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)

# Step 4: Convert scaled data back into a DataFrame
scaled_df = pd.DataFrame(scaled_data, columns=data.columns, index=data.index)

# Step 5: Prepare sequences for LSTM
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length, :])  # Input: Sequence of length `seq_length`
        y.append(data[i + seq_length, 0])   # Target: Next value of `Consumption`
    return np.array(X), np.array(y)

# Sequence length for LSTM
sequence_length = 24  # Use 24 time steps (e.g., hours) to predict the next step

# Prepare input (X) and output (y)
X, y = create_sequences(scaled_data, sequence_length)

# Split into training and testing sets (80-20 split)
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

X.shape, y.shape, X_train.shape, X_test.shape, y_train.shape, y_test.shape, missing_values


((45987, 24, 9),
 (45987,),
 (36789, 24, 9),
 (9198, 24, 9),
 (36789,),
 (9198,),
 Consumption      0
 Production       0
 Nuclear          0
 Wind             0
 Hydroelectric    0
 Oil and Gas      0
 Coal             0
 Solar            0
 Biomass          0
 dtype: int64)

In [3]:
# Step 1: Data Preparation for Univariate Time Series (using "Consumption" column)
# Extract the `Consumption` column for univariate analysis
univariate_data = scaled_df[['Consumption']].values

# Function to create sequences for univariate LSTM
def create_univariate_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])  # Input: Sequence of `seq_length`
        y.append(data[i + seq_length])   # Target: Next value in the sequence
    return np.array(X), np.array(y)

# Sequence length
seq_length = 24  # Use 24 time steps to predict the next step

# Prepare sequences
X_uni, y_uni = create_univariate_sequences(univariate_data, seq_length)

# Split into training and testing sets (80-20 split)
train_size = int(0.8 * len(X_uni))
X_train_uni, X_test_uni = X_uni[:train_size], X_uni[train_size:]
y_train_uni, y_test_uni = y_uni[:train_size], y_uni[train_size:]

X_train_uni.shape, X_test_uni.shape, y_train_uni.shape, y_test_uni.shape


((36789, 24, 1), (9198, 24, 1), (36789, 1), (9198, 1))

In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Define the model
vanilla_lstm_model = Sequential([
    LSTM(50, activation='tanh', input_shape=(seq_length, 1)),
    Dense(1)
])

# Compile the model
vanilla_lstm_model.compile(optimizer='adam', loss='mse')

# Train the model
vanilla_lstm_model.fit(X_train_uni, y_train_uni, epochs=20, batch_size=64, validation_data=(X_test_uni, y_test_uni))

# Evaluate the model
vanilla_loss = vanilla_lstm_model.evaluate(X_test_uni, y_test_uni)
print(f"Vanilla LSTM Test Loss: {vanilla_loss}")


  super().__init__(**kwargs)


Epoch 1/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 13ms/step - loss: 0.0306 - val_loss: 0.0029
Epoch 2/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - loss: 0.0023 - val_loss: 0.0013
Epoch 3/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - loss: 0.0013 - val_loss: 9.9543e-04
Epoch 4/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - loss: 0.0011 - val_loss: 8.6984e-04
Epoch 5/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - loss: 9.7252e-04 - val_loss: 0.0011
Epoch 6/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - loss: 9.2888e-04 - val_loss: 7.6154e-04
Epoch 7/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - loss: 9.4696e-04 - val_loss: 8.7771e-04
Epoch 8/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - loss: 8.9785e-04 - val_loss: 7.1691

In [5]:
stacked_lstm_model = Sequential([
    LSTM(50, activation='tanh', return_sequences=True, input_shape=(seq_length, 1)),
    LSTM(50, activation='tanh'),
    Dense(1)
])

stacked_lstm_model.compile(optimizer='adam', loss='mse')

stacked_lstm_model.fit(X_train_uni, y_train_uni, epochs=20, batch_size=64, validation_data=(X_test_uni, y_test_uni))

stacked_loss = stacked_lstm_model.evaluate(X_test_uni, y_test_uni)
print(f"Stacked LSTM Test Loss: {stacked_loss}")


Epoch 1/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 30ms/step - loss: 0.0236 - val_loss: 0.0014
Epoch 2/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 29ms/step - loss: 0.0013 - val_loss: 8.6426e-04
Epoch 3/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 29ms/step - loss: 0.0010 - val_loss: 0.0012
Epoch 4/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 29ms/step - loss: 9.2967e-04 - val_loss: 7.8699e-04
Epoch 5/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 30ms/step - loss: 9.1822e-04 - val_loss: 7.9555e-04
Epoch 6/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 29ms/step - loss: 9.2240e-04 - val_loss: 7.1711e-04
Epoch 7/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 26ms/step - loss: 8.3985e-04 - val_loss: 6.7051e-04
Epoch 8/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 28ms/step - loss: 7.7685e-04 - v

In [6]:
from tensorflow.keras.layers import Bidirectional

bidirectional_lstm_model = Sequential([
    Bidirectional(LSTM(50, activation='tanh', input_shape=(seq_length, 1))),
    Dense(1)
])

bidirectional_lstm_model.compile(optimizer='adam', loss='mse')

bidirectional_lstm_model.fit(X_train_uni, y_train_uni, epochs=20, batch_size=64, validation_data=(X_test_uni, y_test_uni))

bidirectional_loss = bidirectional_lstm_model.evaluate(X_test_uni, y_test_uni)
print(f"Bidirectional LSTM Test Loss: {bidirectional_loss}")


Epoch 1/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - loss: 0.0344 - val_loss: 0.0026
Epoch 2/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - loss: 0.0025 - val_loss: 0.0015
Epoch 3/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - loss: 0.0016 - val_loss: 0.0010
Epoch 4/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - loss: 0.0011 - val_loss: 8.2230e-04
Epoch 5/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - loss: 9.2924e-04 - val_loss: 8.0355e-04
Epoch 6/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - loss: 8.4979e-04 - val_loss: 6.6355e-04
Epoch 7/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - loss: 8.1054e-04 - val_loss: 6.2462e-04
Epoch 8/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - loss: 8.0576e-04 - val_loss: 6.1713e-04
Epoc

In [7]:
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten

cnn_lstm_model = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(seq_length, 1)),
    MaxPooling1D(pool_size=2),
    LSTM(50, activation='tanh'),
    Dense(1)
])

cnn_lstm_model.compile(optimizer='adam', loss='mse')

cnn_lstm_model.fit(X_train_uni, y_train_uni, epochs=20, batch_size=64, validation_data=(X_test_uni, y_test_uni))

cnn_lstm_loss = cnn_lstm_model.evaluate(X_test_uni, y_test_uni)
print(f"CNN-LSTM Test Loss: {cnn_lstm_loss}")


Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - loss: 0.0343 - val_loss: 0.0020
Epoch 2/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0020 - val_loss: 0.0017
Epoch 3/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0017 - val_loss: 0.0013
Epoch 4/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0014 - val_loss: 0.0011
Epoch 5/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0013 - val_loss: 0.0010
Epoch 6/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0011 - val_loss: 0.0012
Epoch 7/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0011 - val_loss: 0.0011
Epoch 8/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 9.8165e-04 - val_loss: 8.8129e-04
Epoch 9/20
[1m575/575[0m [32m━━━━━━━━━━━

In [9]:
# Reshape data for ConvLSTM: (samples, timesteps, rows, columns, channels)
X_train_clstm = X_train_uni.reshape((X_train_uni.shape[0], seq_length, 1, 1, 1))  # rows=1, channels=1
X_test_clstm = X_test_uni.reshape((X_test_uni.shape[0], seq_length, 1, 1, 1))

# ConvLSTM model
convlstm_model = Sequential([
    ConvLSTM2D(filters=64, kernel_size=(1, 1), activation='relu', input_shape=(seq_length, 1, 1, 1)),
    Flatten(),
    Dense(1)
])

# Compile the model
convlstm_model.compile(optimizer='adam', loss='mse')

# Train the model
convlstm_model.fit(X_train_clstm, y_train_uni, epochs=20, batch_size=64, validation_data=(X_test_clstm, y_test_uni))

# Evaluate the model
convlstm_loss = convlstm_model.evaluate(X_test_clstm, y_test_uni)
print(f"ConvLSTM Test Loss: {convlstm_loss}")


Epoch 1/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - loss: 0.0320 - val_loss: 0.0049
Epoch 2/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 15ms/step - loss: 0.0019 - val_loss: 9.5099e-04
Epoch 3/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 15ms/step - loss: 0.0012 - val_loss: 8.5899e-04
Epoch 4/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 15ms/step - loss: 0.0011 - val_loss: 8.0436e-04
Epoch 5/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 15ms/step - loss: 8.6100e-04 - val_loss: 6.7868e-04
Epoch 6/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 15ms/step - loss: 8.3377e-04 - val_loss: 7.2157e-04
Epoch 7/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 16ms/step - loss: 7.7588e-04 - val_loss: 6.9820e-04
Epoch 8/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - loss: 7.1195e-04 - val_los