In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input
from tensorflow.keras.callbacks import EarlyStopping

SEED = 1
np.random.seed(SEED)
tf.random.set_seed(SEED)

plt.rcParams["figure.figsize"] = (12, 4)

file = "merged_5min_spline_interpolated.csv"

TEST_DAYS = 1                    
SAMPLES_PER_DAY = 288            
TEST_SIZE = TEST_DAYS * SAMPLES_PER_DAY

WINDOW_NAME = "1hour"
WINDOW_SAMPLES = 12  
SEQUENCE_LENGTH = 12  

print(f"LSTM Model{WINDOW_NAME.upper()} Window Analysis")
print(f"Sequence length {SEQUENCE_LENGTH} timesteps")

LSTM Model - 1-HOUR Window Analysis
Sequence length: 12 timesteps


In [3]:
full_data = pd.read_csv(file)
full_data["time"] = pd.to_datetime(full_data["time"], utc=True, errors="coerce")
full_data = full_data.dropna(subset=["time"]).sort_values("time").reset_index(drop=True)
full_data = full_data.dropna(subset=["supply_air", "outdoor_temp", "satellite_value", "indoor_temp"]).reset_index(drop=True)

# spliyt
train_data = full_data.iloc[:-TEST_SIZE].reset_index(drop=True)
test_data = full_data.iloc[-TEST_SIZE:].reset_index(drop=True)

print(f"training samples{len(train_data)}")
print(f"testing samples{len(test_data)}")

training samples15801
testing samples288


In [4]:
def create_sequences(data, seq_length):
    features = ["supply_air", "outdoor_temp", "satellite_value", "indoor_temp"] 
    X_sequences = []
    y_targets = []

    for i in range(seq_length, len(data)):
        X_sequences.append(data[features].iloc[i-seq_length:i].values)
        y_targets.append(data["indoor_temp"].iloc[i])
    return np.array(X_sequences, dtype=np.float32), np.array(y_targets, dtype=np.float32)


X_train_seq, y_train = create_sequences(train_data, SEQUENCE_LENGTH)
X_test_seq, y_test = create_sequences(test_data, SEQUENCE_LENGTH)

print(f"train sequences shape{X_train_seq.shape}") 
print(f"test sequences shape{X_test_seq.shape}")
print(f"train targets shape{y_train.shape}")
print(f"test targets shape{y_test.shape}")

train sequences shape(15789, 12, 4)
test sequences shape(276, 12, 4)
train targets shape(15789,)
test targets shape(276,)


In [5]:

scaler_X = RobustScaler()
X_train_flat = X_train_seq.reshape(-1, X_train_seq.shape[-1])
X_test_flat = X_test_seq.reshape(-1, X_test_seq.shape[-1])

X_train_scaled_flat = scaler_X.fit_transform(X_train_flat)
X_test_scaled_flat = scaler_X.transform(X_test_flat)

X_train_scaled = X_train_scaled_flat.reshape(X_train_seq.shape)
X_test_scaled = X_test_scaled_flat.reshape(X_test_seq.shape)

print(f"\nScaled train sequences shape: {X_train_scaled.shape}")
print(f"Scaled test sequences shape: {X_test_scaled.shape}")



Scaled train sequences shape: (15789, 12, 4)
Scaled test sequences shape: (276, 12, 4)


In [6]:
#model
model = Sequential([
    Input(shape=(SEQUENCE_LENGTH, 4)),  # (sequence_length, num_features)
    LSTM(64, activation='tanh', return_sequences=True),
    LSTM(32, activation='tanh', return_sequences=False),
    Dense(16, activation='relu'),
    Dense(1)#singleoutput(indoor)
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),loss='mse',metrics=['mae'])

print(model.summary())
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

print("\ntrainig lstm mode")
history = model.fit(
    X_train_scaled, y_train,
    validation_split=0.2,
    epochs=10,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

None

trainig lstm mode
Epoch 1/10
[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 31ms/step - loss: 199.5770 - mae: 11.4153 - val_loss: 4.1685 - val_mae: 1.5884
Epoch 2/10
[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 30ms/step - loss: 5.4397 - mae: 1.6942 - val_loss: 3.1297 - val_mae: 1.4128
Epoch 3/10
[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 30ms/step - loss: 2.2689 - mae: 1.0240 - val_loss: 0.6598 - val_mae: 0.5831
Epoch 4/10
[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 31ms/step - loss: 0.5340 - mae: 0.4985 - val_loss: 0.4317 - val_mae: 0.4751
Epoch 5/10
[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 32ms/step - loss: 0.4086 - mae: 0.4301 - val_loss: 0.3248 - val_mae: 0.3785
Epoch 6/10
[1m395/395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 32ms/step - loss: 0.3690 - mae: 0.3955 - val_loss: 0.3038 - val_mae: 0.3596
Epoch 7/10
[1m395/395[0m [32m━━━━━━━━━━━━━━━━━

In [7]:
#train train train ufff killlllllllllll
y_pred_train = model.predict(X_train_scaled, verbose=0).flatten()
y_pred_test = model.predict(X_test_scaled, verbose=0).flatten()
print("\n print to check")

lstm_mae_full = mean_absolute_error(y_test, y_pred_test)
lstm_rmse_full = np.sqrt(mean_squared_error(y_test, y_pred_test))
lstm_r2_full = r2_score(y_test, y_pred_test)
lstm_train_mae = mean_absolute_error(y_train, y_pred_train)

print("\n lstm performance test set:")
print(f"Train MAE{lstm_train_mae:.4f}°C")
print(f"Test MAE {lstm_mae_full:.4f}°C")
print(f"Test RMSE{lstm_rmse_full:.4f}°C")
print(f"Test R²{lstm_r2_full:.4f}")


window_size = min(WINDOW_SAMPLES, len(y_test))
lstm_mae_w = mean_absolute_error(y_test[:window_size], y_pred_test[:window_size])
lstm_rmse_w = np.sqrt(mean_squared_error(y_test[:window_size], y_pred_test[:window_size]))
lstm_r2_w = r2_score(y_test[:window_size], y_pred_test[:window_size])

print(f"\n{'Model':<18} {'MAE (°C)':<12} {'RMSE (°C)':<12} {'R²':<8}")
print(f"{'LSTM':<18} {lstm_mae_w:<12.4f} {lstm_rmse_w:<12.4f} {lstm_r2_w:<8.4f}")



 print to check

 lstm performance test set:
Train MAE0.3537°C
Test MAE 0.2832°C
Test RMSE0.5200°C
Test R²0.7529

Model              MAE (°C)     RMSE (°C)    R²      
LSTM               0.1773       0.2487       -0.1359 


In [None]:
time_test = test_data["time"].iloc[SEQUENCE_LENGTH:SEQUENCE_LENGTH+window_size].values
y_true_w = y_test[:window_size]
lstm_pred_w = y_pred_test[:window_size]

plt.figure(figsize=(16, 7))
plt.plot(time_test, y_true_w, "o-", label="Actual", color="black", linewidth=2.5, markersize=5)
plt.plot(time_test, lstm_pred_w, "s-", label=f"LSTM (MAE {lstm_mae_w:.3f} °C)", color="orange", alpha=0.7, linewidth=1.5, markersize=4)
plt.xlabel("Time", fontsize=12)
plt.ylabel("Indoor temperature (°C)", fontsize=12)
plt.title(f"{WINDOW_NAME.upper()} LSTM Prediction", fontsize=14)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()
