In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from keras import layers, models, callbacks
from keras.models import Sequential
from keras.layers import Dense, Dropout, Input
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

In [2]:
# Load the World Happiness Report dataset
df = pd.read_csv("/Users/admin/Desktop/Projects/HappyLens_NN/data/happiness_data.csv")
df

Unnamed: 0,Year,Rank,Country,HappinessScore,GDP,SocialSupport,LifeExpectancy,Freedom,Generosity,Corruption
0,2024,1,Finland,7.736,1.7490,1.783000,0.8240,0.986000,0.110000,0.502000
1,2024,2,Denmark,7.521,1.8250,1.748000,0.8200,0.955000,0.150000,0.488000
2,2024,3,Iceland,7.515,1.7990,1.840000,0.8730,0.971000,0.201000,0.173000
3,2024,4,Sweden,7.345,1.7830,1.698000,0.8890,0.952000,0.170000,0.467000
4,2024,5,Netherlands,7.306,1.8220,1.667000,0.8440,0.860000,0.186000,0.344000
...,...,...,...,...,...,...,...,...,...,...
1951,2011,152,Burundi,3.678,0.3674,0.627745,0.3348,0.299345,0.182982,0.132473
1952,2011,153,Sierra Leone,3.586,0.3674,0.627745,0.3348,0.299345,0.182982,0.132473
1953,2011,154,Central African Republic,3.568,0.3674,0.627745,0.3348,0.299345,0.182982,0.132473
1954,2011,155,Benin,3.493,0.3674,0.627745,0.3348,0.299345,0.182982,0.132473


# === Model 1: Basic Dense NN ===

In [3]:
# Define features and target variable
features = ['GDP', 'SocialSupport', 'LifeExpectancy', 'Freedom', 'Generosity', 'Corruption']
target = 'HappinessScore'

X = df[features]
y = df[target]

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Train-validation split
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [4]:
# Build a simple feedforward neural network
model1 = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)
])

model1.compile(optimizer=Adam(learning_rate=0.01), loss='mse', metrics=['mae'])
# Train the model
history = model1.fit(X_train, y_train, epochs=100, batch_size=16,
                    validation_data=(X_val, y_val), verbose=1)

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 7.6215 - mae: 2.0840 - val_loss: 1.1814 - val_mae: 0.8999
Epoch 2/100
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.1221 - mae: 0.8550 - val_loss: 0.8789 - val_mae: 0.7499
Epoch 3/100
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.0238 - mae: 0.8048 - val_loss: 0.8939 - val_mae: 0.7681
Epoch 4/100
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.9671 - mae: 0.7922 - val_loss: 0.8390 - val_mae: 0.7408
Epoch 5/100
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.9817 - mae: 0.7819 - val_loss: 0.8716 - val_mae: 0.7539
Epoch 6/100
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.0258 - mae: 0.8071 - val_loss: 0.8877 - val_mae: 0.7239
Epoch 7/100
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.9442 - mae:

In [5]:
# Evaluate model performance
train_mse1, train_mae1 = model1.evaluate(X_train, y_train, verbose=0)
val_mse1, val_mae1 = model1.evaluate(X_val, y_val, verbose=0)

print(f"Training MSE: {train_mse1:.4f}, MAE: {train_mae1:.4f}")
print(f"Validation MSE: {val_mse1:.4f}, MAE: {val_mae1:.4f}")

Training MSE: 0.8531, MAE: 0.7002
Validation MSE: 0.8264, MAE: 0.7044


In [6]:
# Make prediction for Ukraine 2024
ukraine_2024 = df[(df['Country'] == 'Ukraine') & (df['Year'] == 2024)]
X_ukraine = ukraine_2024[features]
X_ukraine_scaled = scaler.transform(X_ukraine)

ukraine_pred1 = model1.predict(X_ukraine_scaled)[0][0]
print(f"Neural Network Predicted Happiness Score for Ukraine (2024): {ukraine_pred1:.3f}")

real = ukraine_2024['HappinessScore'].values[0]
print(f"Actual Happiness Score: {real}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
Neural Network Predicted Happiness Score for Ukraine (2024): 5.243
Actual Happiness Score: 4.68


### Model 1: Basic NN
- ***Logic***: The simplest model — a basic feedforward neural network with two hidden layers. Only main features are used: GDP, SocialSupport, LifeExpectancy, Freedom, Generosity, Corruption.
- ***Goal***: Get a baseline performance without additional complexities.
- ***Result***:
Val MAE: 0.704,
Ukraine error: 0.563
- ***Conclusion***: The model behaves stably, though slightly overfits (Train MAE < Val MAE).

# === Model 2: Deeper Dense NN with Dropout and EarlyStopping ===

In [7]:
model2 = Sequential([
    Input(shape=(len(features),)),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1)
])

model2.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [8]:
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
# Train the model with early stopping
history = model2.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)
# Evaluate performance
train_mse2, train_mae2 = model2.evaluate(X_train, y_train, verbose=0)
val_mse2, val_mae2 = model2.evaluate(X_val, y_val, verbose=0)

Epoch 1/200
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 23.0534 - mae: 4.5232 - val_loss: 1.8077 - val_mae: 1.0971
Epoch 2/200
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3.0602 - mae: 1.3422 - val_loss: 2.1140 - val_mae: 1.2081
Epoch 3/200
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.7009 - mae: 1.2643 - val_loss: 1.4328 - val_mae: 0.9886
Epoch 4/200
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.3667 - mae: 1.1961 - val_loss: 1.4081 - val_mae: 0.9809
Epoch 5/200
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.3507 - mae: 1.2003 - val_loss: 1.3961 - val_mae: 0.9789
Epoch 6/200
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.8948 - mae: 1.1131 - val_loss: 1.1841 - val_mae: 0.9007
Epoch 7/200
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 

In [9]:
ukraine_2024 = df[(df['Country'] == 'Ukraine') & (df['Year'] == 2024)]
ukraine_input_scaled = scaler.transform(ukraine_2024[features])
ukraine_pred2 = model2.predict(ukraine_input_scaled)[0][0]
actual_ukraine = ukraine_2024[target].values[0]

print(f"Train MSE: {train_mse2:.4f}, MAE: {train_mae2:.4f}")
print(f"Validation MSE: {val_mse2:.4f}, MAE: {val_mae2:.4f}")
print(f"Neural Network Prediction for Ukraine (2024): {ukraine_pred2:.3f}")
print(f"Actual Happiness Score for Ukraine (2024): {actual_ukraine}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
Train MSE: 0.9028, MAE: 0.7467
Validation MSE: 0.7967, MAE: 0.7019
Neural Network Prediction for Ukraine (2024): 5.670
Actual Happiness Score for Ukraine (2024): 4.68


### Model 2: Deeper NN + Dropout
- ***Logic***: The model is deeper (more layers), with added Dropout to combat overfitting and EarlyStopping.
- ***Goal***: Reduce overfitting and improve generalization.
- ***Result***:
Slight improvement in Val MAE: 0.702 (only marginal),
Ukraine prediction worse — Abs Error: 0.99
- ***Conclusion***: A deeper model doesn’t guarantee better results, possibly due to limited data or the model being too complex.
- ***Possible Issue***: The model does not take into account the context of time — it is unaware that in 2024 Ukraine is at war. Economic and social indicators might look better on paper than the actual subjective happiness.

# === Model 3: Add 'Year' as feature ===

In [10]:
features = ['Year', 'GDP', 'SocialSupport', 'LifeExpectancy', 'Freedom', 'Generosity', 'Corruption']
target = 'HappinessScore'

X = df[features]
y = df[target]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [11]:
model3 = keras.Sequential([
    layers.Input(shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(1)
])

model3.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [12]:
history = model3.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100, batch_size=32, verbose=1
)

train_mse3, train_mae3 = model3.evaluate(X_train, y_train, verbose=0)
val_mse3, val_mae3 = model3.evaluate(X_val, y_val, verbose=0)

Epoch 1/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 23.8108 - mae: 4.7203 - val_loss: 12.2146 - val_mae: 3.2646
Epoch 2/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 8.8594 - mae: 2.6510 - val_loss: 2.6407 - val_mae: 1.3202
Epoch 3/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.2758 - mae: 1.1578 - val_loss: 1.6406 - val_mae: 1.0735
Epoch 4/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.8736 - mae: 1.0229 - val_loss: 1.5514 - val_mae: 1.0385
Epoch 5/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.1115 - mae: 1.0848 - val_loss: 1.4926 - val_mae: 1.0260
Epoch 6/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.8418 - mae: 1.0275 - val_loss: 1.4284 - val_mae: 1.0024
Epoch 7/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss:

In [13]:
# Прогноз для України 2024
ukraine_2024 = df[(df["Year"] == 2024) & (df["Country"] == "Ukraine")]
ukraine_features = scaler.transform(ukraine_2024[features])
ukraine_pred3 = model3.predict(ukraine_features, verbose=0)[0][0]
ukraine_actual = ukraine_2024["HappinessScore"].values[0]

# Вивід результатів
print(f"Train MSE: {train_mse3:.4f}, MAE: {train_mae3:.4f}")
print(f"Validation MSE: {val_mse3:.4f}, MAE: {val_mae3:.4f}")
print(f"Prediction for Ukraine (2024): {ukraine_pred3:.3f}")
print(f"Actual Score: {ukraine_actual}")

Train MSE: 0.8201, MAE: 0.6802
Validation MSE: 0.7907, MAE: 0.6802
Prediction for Ukraine (2024): 5.537
Actual Score: 4.68


### Model 3: + Year
- ***Logic***: Added Year as a feature to capture time dynamics (economic, social changes, etc.).
- ***Goal***: Allow the model to “see” temporal trends without LSTM.
- ***Result***:
Lowest Train MAE among NN models: 0.680,
But Ukraine prediction did not improve (Abs Error: 0.857)
- ***Conclusion***: Adding year slightly improves overall quality but does not help with country-specific accuracy. The model may have generalized more over time but lost local precision.

# === Model 4: Stratified split by year group ===

In [14]:
df['year_cat'] = pd.cut(df['Year'], bins=[2010,2015,2018,2020,2022,2024], labels=False)

split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, val_index in split.split(df, df['year_cat']):
    train_df = df.iloc[train_index]
    val_df = df.iloc[val_index]

scaler = StandardScaler()
X_train = scaler.fit_transform(train_df[features])
y_train = train_df[target].values
X_val = scaler.transform(val_df[features])
y_val = val_df[target].values

In [15]:
# Побудова моделі (зменшена глибина та кількість нейронів)
model4 = keras.Sequential([
    layers.Input(shape=(len(features),)),
    layers.Dense(16, activation='relu'),
    layers.Dense(8, activation='relu'),
    layers.Dense(1)
])

model4.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [16]:
# Навчання
history = model4.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=32,
    verbose=1,
    callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)]
)

# Оцінка
train_mse4, train_mae4 = model4.evaluate(X_train, y_train, verbose=0)
val_mse4, val_mae4 = model4.evaluate(X_val, y_val, verbose=0)

Epoch 1/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 26.8755 - mae: 5.0696 - val_loss: 23.4823 - val_mae: 4.6887
Epoch 2/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 20.5247 - mae: 4.3375 - val_loss: 15.3133 - val_mae: 3.5952
Epoch 3/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 12.1497 - mae: 3.1204 - val_loss: 6.5184 - val_mae: 2.1617
Epoch 4/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 4.8996 - mae: 1.8245 - val_loss: 2.3878 - val_mae: 1.2344
Epoch 5/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.0835 - mae: 1.1377 - val_loss: 1.8670 - val_mae: 1.1059
Epoch 6/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.0239 - mae: 1.0876 - val_loss: 1.7643 - val_mae: 1.0798
Epoch 7/100
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - lo

In [17]:
ukraine_2024 = df[(df["Year"] == 2024) & (df["Country"] == "Ukraine")]
ukraine_features = scaler.transform(ukraine_2024[features])
ukraine_pred4 = model4.predict(ukraine_features, verbose=0)[0][0]
ukraine_actual = ukraine_2024[target].values[0]

print(f"Train MSE: {train_mse4:.4f}, MAE: {train_mae4:.4f}")
print(f"Validation MSE: {val_mse4:.4f}, MAE: {val_mae4:.4f}")
print(f"Prediction for Ukraine (2024): {ukraine_pred4:.3f}")
print(f"Actual Score for Ukraine (2024): {ukraine_actual}")

Train MSE: 0.8255, MAE: 0.7079
Validation MSE: 0.8438, MAE: 0.7106
Prediction for Ukraine (2024): 5.388
Actual Score for Ukraine (2024): 4.68


### Model 4: + Stratified Split
- ***Logic***: Train/test split stratified by years — important for time-series data to avoid data leakage.
- ***Goal***: Improve generalization and reduce dependence on random splitting.
- ***Result***:
Slightly worse Val MAE: 0.711,
Ukraine error 0.708 — better than models 2 and 3.
- ***Conclusion***: More stable model with correct splitting approach. Better balance between training fit and generalization.

# === Model 5: LSTM on country sequences ===

In [18]:
# Generate rolling sequences with a window of 3 years
window_size = 3

def create_sequences(data, features, target, window_size):
    Xs, ys = [], []
    countries = data["Country"].unique()
    for country in countries:
        country_data = data[data["Country"] == country].reset_index(drop=True)
        for i in range(len(country_data) - window_size + 1):
            seq = country_data.loc[i:i+window_size-1, features].values
            target_val = country_data.loc[i+window_size-1, target]
            Xs.append(seq)
            ys.append(target_val)
    return np.array(Xs), np.array(ys)

X, y = create_sequences(df, features, target, window_size)

In [19]:
# Scale the features
num_samples, seq_len, num_features = X.shape
X_2d = X.reshape(num_samples * seq_len, num_features)
scaler = StandardScaler()
X_2d_scaled = scaler.fit_transform(X_2d)
X_scaled = X_2d_scaled.reshape(num_samples, seq_len, num_features)

In [20]:
# Train-validation split
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

In [21]:
# Build LSTM model
model5 = models.Sequential([
    layers.Input(shape=(window_size, len(features))),
    layers.LSTM(32, activation='tanh', return_sequences=False),
    layers.Dense(16, activation='relu'),
    layers.Dense(1)
])

model5.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [22]:
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
# Train
history = model5.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 28.0537 - mae: 5.1828 - val_loss: 21.9658 - val_mae: 4.5782
Epoch 2/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 18.1061 - mae: 4.0540 - val_loss: 5.4776 - val_mae: 1.9755
Epoch 3/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 4.2789 - mae: 1.7212 - val_loss: 1.2887 - val_mae: 0.9179
Epoch 4/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.2964 - mae: 0.9239 - val_loss: 0.9818 - val_mae: 0.7917
Epoch 5/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.0244 - mae: 0.7991 - val_loss: 0.8918 - val_mae: 0.7424
Epoch 6/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.0029 - mae: 0.7785 - val_loss: 0.8183 - val_mae: 0.7094
Epoch 7/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - los

In [23]:
# Evaluate
train_mse5, train_mae5 = model5.evaluate(X_train, y_train, verbose=0)
val_mse5, val_mae5 = model5.evaluate(X_val, y_val, verbose=0)

print(f"Train MSE: {train_mse5:.4f}, MAE: {train_mae5:.4f}")
print(f"Validation MSE: {val_mse5:.4f}, MAE: {val_mae5:.4f}")

Train MSE: 0.6918, MAE: 0.6092
Validation MSE: 0.6544, MAE: 0.5877


In [24]:
ukraine_data = df[df["Country"] == "Ukraine"].sort_values("Year")
ukraine_seq = ukraine_data[features].iloc[-window_size:].values
ukraine_seq_scaled = scaler.transform(ukraine_seq).reshape(1, window_size, len(features))

ukraine_pred5 = model5.predict(ukraine_seq_scaled, verbose=0)[0][0]
ukraine_actual = ukraine_data[target].iloc[-1]

print(f"Ukraine 2024 Prediction (LSTM): {ukraine_pred5:.3f}")
print(f"Ukraine 2024 Actual: {ukraine_actual}")

Ukraine 2024 Prediction (LSTM): 5.435
Ukraine 2024 Actual: 4.68


### Model 5: LSTM
- ***Logic***: Shift to a sequential model (LSTM) that accounts for temporal dynamics of countries over years. Each sample is a sequence of indicators over 3 years.
- ***Goal***: Capture the time structure of the data, not just “year as a feature” but historical changes over time.
- ***Result***:
Best overall performance: Val MAE = 0.588, Train MAE = 0.609,
Ukraine 2024 Abs Error = 0.755 — not the best, but decent.
- ***Conclusion***: LSTM generalizes best across all countries (lowest MAE) but is less adapted to unusual or abnormal situations (like the war in Ukraine), where historical data may not provide precise predictions.

# === Result ===

In [25]:
results = {
    "Model": ["Model 1: Basic NN", "Model 2: Deeper NN + Dropout", "Model 3: + Year", 
              "Model 4: + Stratified Split", "Model 5: LSTM"],
    "Train MSE": [train_mse1, train_mse2, train_mse3, train_mse4, train_mse5],
    "Train MAE": [train_mae1, train_mae2, train_mae3, train_mae4, train_mae5],
    "Val MSE": [val_mse1, val_mse2, val_mse3, val_mse4, val_mse5],
    "Val MAE": [val_mae1, val_mae2, val_mae3, val_mae4, val_mae5],
    "Ukraine 2024 Prediction": [ukraine_pred1, ukraine_pred2, ukraine_pred3, ukraine_pred4, ukraine_pred5],
    "Ukraine 2024 Actual": [real, actual_ukraine, ukraine_actual, ukraine_actual, ukraine_actual],
    "Abs Error (Ukraine 2024)": [
        abs(ukraine_pred1 - real),
        abs(ukraine_pred2 - actual_ukraine),
        abs(ukraine_pred3 - ukraine_actual),
        abs(ukraine_pred4 - ukraine_actual),
        abs(ukraine_pred5 - ukraine_actual)
    ]
}


results_df = pd.DataFrame(results)
results_df = results_df.round(3)
results_df

Unnamed: 0,Model,Train MSE,Train MAE,Val MSE,Val MAE,Ukraine 2024 Prediction,Ukraine 2024 Actual,Abs Error (Ukraine 2024)
0,Model 1: Basic NN,0.853,0.7,0.826,0.704,5.243,4.68,0.563
1,Model 2: Deeper NN + Dropout,0.903,0.747,0.797,0.702,5.67,4.68,0.99
2,Model 3: + Year,0.82,0.68,0.791,0.68,5.537,4.68,0.857
3,Model 4: + Stratified Split,0.826,0.708,0.844,0.711,5.388,4.68,0.708
4,Model 5: LSTM,0.692,0.609,0.654,0.588,5.435,4.68,0.755


**Model 5**, based on the Long Short-Term Memory (LSTM) architecture, was chosen because it effectively captures the temporal dynamics and sequential patterns in the World Happiness dataset across multiple years. Unlike traditional feedforward networks that treat each year as an independent sample, LSTM leverages historical data over a 3-year window for each country, allowing the model to learn how changes in features evolve over time and influence the happiness score.

This temporal awareness leads to the best overall validation performance, reflected in the lowest validation mean absolute error (**MAE = 0.588**) and mean squared error (MSE), indicating superior generalization ability across countries and years.

Although the Ukraine 2024 prediction error is not the absolute lowest among all models, the LSTM’s strength in modeling trends and sequences makes it the most robust and promising model for forecasting happiness, especially when working with multi-year panel data.

Therefore, Model 5 is preferred for its ability to incorporate the sequential structure of the data, improving predictive accuracy and offering a more nuanced understanding of happiness trends over time.