In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('/Users/vimu/Documents/Data Science/SDS/Github/TimeSeriesAnalysis/Data/Energy_consumption.csv',index_col= "Timestamp",parse_dates=True)

In [3]:
columns_to_remove = [
    'Humidity', 'SquareFootage', 
    'HVACUsage', 'LightingUsage', 'RenewableEnergy', 'DayOfWeek', 'Holiday'
]

df_cleaned = df.drop(columns=columns_to_remove, errors='ignore')

In [4]:
import numpy as np

def create_lagged_features(df, lookback=10):
    X, y = [], []

    for i in range(lookback, len(df)):
        X.append(df.iloc[i - lookback:i].values)        # past 10 rows
        y.append(df.iloc[i]['EnergyConsumption'])       # target is next Energy value

    return np.array(X), np.array(y)

# apply the function
X, y = create_lagged_features(df_cleaned, lookback=10)

print(f"X shape: {X.shape}")  # (samples, timesteps, features)
print(f"y shape: {y.shape}")  # (samples,)

X shape: (990, 10, 3)
y shape: (990,)


In [6]:
X

array([[[25.13943344,  5.        , 75.3643734 ],
        [27.73165052,  1.        , 83.40185495],
        [28.70427686,  2.        , 78.27088774],
        ...,
        [23.18284425,  8.        , 74.13190618],
        [25.39199937,  6.        , 78.20623587],
        [22.21254942,  1.        , 77.9922137 ]],

       [[27.73165052,  1.        , 83.40185495],
        [28.70427686,  2.        , 78.27088774],
        [20.08046949,  1.        , 56.51985049],
        ...,
        [25.39199937,  6.        , 78.20623587],
        [22.21254942,  1.        , 77.9922137 ],
        [28.06481358,  2.        , 82.27443448]],

       [[28.70427686,  2.        , 78.27088774],
        [20.08046949,  1.        , 56.51985049],
        [23.09735926,  9.        , 70.81173247],
        ...,
        [22.21254942,  1.        , 77.9922137 ],
        [28.06481358,  2.        , 82.27443448],
        [23.42254626,  6.        , 73.2786695 ]],

       ...,

       [[23.90781959,  4.        , 73.62123677],
        [29

In [7]:
from sklearn.preprocessing import MinMaxScaler

# reshape X to 2D for scaling
n_samples, timesteps, n_features = X.shape
X_reshaped = X.reshape((n_samples * timesteps, n_features))

# create scalers
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

# fit_transform input and target
X_scaled = scaler_X.fit_transform(X_reshaped)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# reshape X back to 3D
X_scaled = X_scaled.reshape((n_samples, timesteps, n_features))

print(f"X_scaled shape: {X_scaled.shape}")
print(f"y_scaled shape: {y_scaled.shape}")

X_scaled shape: (990, 10, 3)
y_scaled shape: (990, 1)
