In [49]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [50]:
# Load the dataset
df = pd.read_csv('C:/Users/a0095989/Desktop/important/Thesis/Model_Thesis/imputed_deu_data1.csv')

(261468, 15)

In [51]:
df_aachen = df[df["City"] == "Aachen"]

(3228, 15)

Visualising all the months and all the years for Aachen City

In [13]:
import plotly.express as px

# Filter data for Aachen
df_aachen = df[df["City"] == "Aachen"]

# Create the plot
fig = px.line(df_aachen, x="Year", y="AverageTemperature", 
              title="Average Temperature Trend in Aachen",
              labels={"Year": "Year", "AverageTemperature": "Average Temperature (°C)"},
              line_shape="linear")

# Show the figure
fig.show()


Visualising Only Aachen for one year 1744

In [14]:
import plotly.express as px

# Filter data for Aachen and a specific year (e.g., 2000)
year_to_view = 1744  # Change this to any desired year
df_aachen_year = df_aachen[df_aachen["Year"] == year_to_view]

# Create the plot
fig = px.line(df_aachen_year, x="Month", y="AverageTemperature", 
              title=f"Average Temperature Trend in Aachen ({year_to_view})",
              labels={"Month": "Month", "AverageTemperature": "Average Temperature (°C)"},
              line_shape="linear")

# Show the figure
fig.show()


Visualising every Decade Avg

In [38]:
# Create a new column for decades
df_aachen["Decade"] = (df_aachen["Year"] // 10) * 10

# Compute the mean temperature per decade
df_decadal = df_aachen.groupby("Decade", as_index=False)["AverageTemperature"].mean()

# Create the plot
fig = px.line(df_decadal, x="Decade", y="AverageTemperature",
              title="Average Temperature in Aachen (Decadal Averages)",
              labels={"Decade": "Decade", "AverageTemperature": "Average Temperature (°C)"},
              line_shape="linear")

# Show the figure
fig.show()


Visualising Every Year Avg

In [None]:
import plotly.express as px

# Compute the mean temperature per year (if you want a more granular view)
df_yearly = df_aachen.groupby("Year", as_index=False)["AverageTemperature"].mean()

# Create the plot
fig = px.line(df_yearly, x="Year", y="AverageTemperature",
              title="Average Temperature in Aachen (Yearly Averages)",
              labels={"Year": "Year", "AverageTemperature": "Average Temperature (°C)"},
              line_shape="linear")

# Show the figure
fig.show()


LSTM MODEL

In [32]:
import numpy as np
import pandas as pd
import plotly.express as px
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler


In [54]:
from sklearn.model_selection import train_test_split

# Filter data for Aachen
df_aachen = df[df["City"] == "Aachen"].copy()

# Aggregate yearly temperature (mean)
df_yearly = df_aachen.groupby("Year", as_index=False)["AverageTemperature"].mean()

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
df_yearly["ScaledTemp"] = scaler.fit_transform(df_yearly[["AverageTemperature"]])

# Convert data into sequences (using last 10 years to predict the next year)
def create_sequences(data, time_steps=10):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i: i + time_steps])
        y.append(data[i + time_steps])
    return np.array(X), np.array(y)

# Prepare training data
time_steps = 10  # Use last 10 years to predict the next year
X, y = create_sequences(df_yearly["ScaledTemp"].values, time_steps)

# Reshape X to fit LSTM input (samples, time steps, features)
X = X.reshape(X.shape[0], X.shape[1], 1)

# Split the data into training and validation sets (80% training, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=False)

print(f"Training data shape: {X_train.shape}, {y_train.shape}")
print(f"Validation data shape: {X_val.shape}, {y_val.shape}")


Training data shape: (207, 10, 1), (207,)
Validation data shape: (52, 10, 1), (52,)


In [56]:
# Build the LSTM model
model = Sequential([
    LSTM(50, activation="relu", return_sequences=True, input_shape=(time_steps, 1)),
    LSTM(50, activation="relu"),
    Dense(1)
])

# Compile the model
model.compile(optimizer="adam", loss="mse")

# Train the model
model.fit(X, y, epochs=100, batch_size=8, verbose=1)


Epoch 1/100
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.2370
Epoch 2/100
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0422
Epoch 3/100
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0313
Epoch 4/100
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0355
Epoch 5/100
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0326
Epoch 6/100
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0294
Epoch 7/100
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0308
Epoch 8/100
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0339
Epoch 9/100
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0282
Epoch 10/100
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0334

<keras.src.callbacks.history.History at 0x1ceb3565010>

In [65]:
# Get the last 10 years to start forecasting
last_10_years = df_yearly["ScaledTemp"].values[-time_steps:].reshape(1, time_steps, 1)

# Predict next 10 years
predictions = []
for _ in range(10):
    next_pred = model.predict(last_10_years)[0, 0]  # Extract single value
    
    # Append the predicted value and reshape correctly
    next_pred_reshaped = np.array([[next_pred]]).reshape(1, 1, 1)  # Convert to 3D
    
    # Update input sequence (shift left and add new value at the end)
    last_10_years = np.concatenate((last_10_years[:, 1:, :], next_pred_reshaped), axis=1)
    
    predictions.append(next_pred)

# Convert predictions back to original temperature scale
future_years = np.arange(df_yearly["Year"].max() + 1, df_yearly["Year"].max() + 11)
predicted_temps = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))

# Create a DataFrame for forecasted values
forecast_df = pd.DataFrame({"Year": future_years, "Predicted_Temperature": predicted_temps.flatten()})

# Plot results
fig = px.line(df_yearly, x="Year", y="AverageTemperature", title="LSTM Temperature Forecast for Aachen")
fig.add_scatter(x=forecast_df["Year"], y=forecast_df["Predicted_Temperature"], mode='lines+markers', name="LSTM Forecast")
fig.show()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step


Zahra Task