In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Step 1: Load the Dataset
file_path = r"C:\Users\nithe\OneDrive\Desktop\Infosys\the_one\electricityConsumptionAndProductioction.csv"
data = pd.read_csv(file_path)

data['DateTime'] = pd.to_datetime(data['DateTime'])
data.set_index('DateTime', inplace=True)

# basic information about the dataset
print("Dataset Head:")
print(data.head())
print("\nDataset Info:")
print(data.info())

Dataset Head:
                     Consumption  Production  Nuclear  Wind  Hydroelectric  \
DateTime                                                                     
2019-01-01 00:00:00         6352        6527     1395    79           1383   
2019-01-01 01:00:00         6116        5701     1393    96           1112   
2019-01-01 02:00:00         5873        5676     1393   142           1030   
2019-01-01 03:00:00         5682        5603     1397   191            972   
2019-01-01 04:00:00         5557        5454     1393   159            960   

                     Oil and Gas  Coal  Solar  Biomass  
DateTime                                                
2019-01-01 00:00:00         1896  1744      0       30  
2019-01-01 01:00:00         1429  1641      0       30  
2019-01-01 02:00:00         1465  1616      0       30  
2019-01-01 03:00:00         1455  1558      0       30  
2019-01-01 04:00:00         1454  1458      0       30  

Dataset Info:
<class 'pandas.core.frame

In [2]:
# Step 2: Scale the Data
# Use MinMaxScaler to normalize all columns between 0 and 1
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)
scaled_data = pd.DataFrame(scaled_data, columns=data.columns, index=data.index)

In [None]:
# Step 3: Prepare Sequences for LSTM
# Function to create input sequences and corresponding targets
def create_sequences(data, target_column, seq_length=24):
    sequences = []
    targets = []
    for i in range(len(data) - seq_length):
        seq = data.iloc[i:i + seq_length].drop(columns=[target_column]).values
        target = data.iloc[i + seq_length][target_column]
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

# Define sequence length
seq_length = 24 

# Specify target column
target_column = 'Consumption'

# Create sequences
X, y = create_sequences(scaled_data, target_column, seq_length)

# Reshape X for LSTM: [samples, time steps, features]
print(f"Shape of X: {X.shape}, Shape of y: {y.shape}")

Shape of X: (45987, 24, 8), Shape of y: (45987,)


In [4]:
# Step 4: Build the LSTM Model
model = Sequential([
    LSTM(64, activation='relu', return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.2),
    LSTM(32, activation='relu'),
    Dropout(0.2),
    Dense(1)  # Single output: Consumption
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

  super().__init__(**kwargs)


In [None]:
# Step 5: Train the Model
history = model.fit(X, y, epochs=20, batch_size=64, validation_split=0.2, verbose=1)

Epoch 1/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 27ms/step - loss: 0.0319 - val_loss: 0.0116
Epoch 2/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 29ms/step - loss: 0.0116 - val_loss: 0.0100
Epoch 3/20
[1m575/575[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 25ms/step - loss: 0.0093 - val_loss: 0.0091
Epoch 4/20
[1m 65/575[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 24ms/step - loss: 0.0083

In [None]:
# Step 6: Save and Predict
model.save('multivariate_lstm_electricity.h5')

# Predicting (example)
predictions = model.predict(X)
print("Shape of predictions:", predictions.shape)

In [None]:
# Step 7: Visualize Training History
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Step 8: Compare Actual vs Predicted Values
actual = scaler.inverse_transform(np.concatenate((scaled_data.drop(columns=[target_column]).values, y.reshape(-1, 1)), axis=1))[:, -1]
predicted = scaler.inverse_transform(np.concatenate((scaled_data.drop(columns=[target_column]).values, predictions), axis=1))[:, -1]

plt.figure(figsize=(10, 6))
plt.plot(data.index[seq_length:], actual, label='Actual Consumption', color='blue')
plt.plot(data.index[seq_length:], predicted, label='Predicted Consumption', color='orange')
plt.title('Actual vs Predicted Electricity Consumption')
plt.xlabel('Time')
plt.ylabel('Consumption')
plt.legend()
plt.show()