In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from keras import layers, models, callbacks
from keras.models import Sequential
from keras.layers import Dense, Dropout, Input
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

In [18]:
# Load the World Happiness Report dataset
df = pd.read_csv("/Users/admin/Desktop/Projects/HappyLens_NN/data/happiness_data.csv")
df

Unnamed: 0,Year,Rank,Country,HappinessScore,GDP,SocialSupport,LifeExpectancy,Freedom,Generosity,Corruption
0,2024,1,Finland,7.736,1.7490,1.783000,0.8240,0.986000,0.110000,0.502000
1,2024,2,Denmark,7.521,1.8250,1.748000,0.8200,0.955000,0.150000,0.488000
2,2024,3,Iceland,7.515,1.7990,1.840000,0.8730,0.971000,0.201000,0.173000
3,2024,4,Sweden,7.345,1.7830,1.698000,0.8890,0.952000,0.170000,0.467000
4,2024,5,Netherlands,7.306,1.8220,1.667000,0.8440,0.860000,0.186000,0.344000
...,...,...,...,...,...,...,...,...,...,...
1951,2011,152,Burundi,3.678,0.3674,0.627745,0.3348,0.299345,0.182982,0.132473
1952,2011,153,Sierra Leone,3.586,0.3674,0.627745,0.3348,0.299345,0.182982,0.132473
1953,2011,154,Central African Republic,3.568,0.3674,0.627745,0.3348,0.299345,0.182982,0.132473
1954,2011,155,Benin,3.493,0.3674,0.627745,0.3348,0.299345,0.182982,0.132473


# === Model 5: LSTM on country sequences ===

In [20]:
features = ['Year', 'GDP', 'SocialSupport', 'LifeExpectancy', 'Freedom', 'Generosity', 'Corruption']
target = 'HappinessScore'

In [21]:
# Create LSTM sequences with a 3-year window

window_size = 3

def create_sequences(data, features, target, window_size):
    Xs, ys = [], []
    countries = data["Country"].unique()
    for country in countries:
        country_data = data[data["Country"] == country].reset_index(drop=True)
        for i in range(len(country_data) - window_size + 1):
            seq = country_data.loc[i:i+window_size-1, features].values
            target_val = country_data.loc[i+window_size-1, target]
            Xs.append(seq)
            ys.append(target_val)
    return np.array(Xs), np.array(ys)

X, y = create_sequences(df, features, target, window_size)


In [22]:
# Reshape and scale features
num_samples, seq_len, num_features = X.shape
X_2d = X.reshape(num_samples * seq_len, num_features)
# Standardize features
scaler = StandardScaler()
X_2d_scaled = scaler.fit_transform(X_2d)
X_scaled = X_2d_scaled.reshape(num_samples, seq_len, num_features)

In [23]:
# Train-validation split for sequences
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

In [24]:
# Build and compile the LSTM model
model5 = models.Sequential([
    layers.Input(shape=(window_size, len(features))),
    layers.LSTM(32, activation='tanh', return_sequences=False),
    layers.Dense(16, activation='relu'),
    layers.Dense(1)
])

model5.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [25]:
# Train with early stopping
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

history = model5.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 28.4217 - mae: 5.2161 - val_loss: 22.6791 - val_mae: 4.6616
Epoch 2/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 18.5836 - mae: 4.1519 - val_loss: 6.8868 - val_mae: 2.3715
Epoch 3/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4.5210 - mae: 1.7657 - val_loss: 1.5166 - val_mae: 1.0229
Epoch 4/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.5067 - mae: 1.0131 - val_loss: 1.0110 - val_mae: 0.8076
Epoch 5/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.0388 - mae: 0.8107 - val_loss: 0.8861 - val_mae: 0.7363
Epoch 6/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.9051 - mae: 0.7293 - val_loss: 0.8391 - val_mae: 0.7110
Epoch 7/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - los

In [26]:
# Evaluate the model
train_mse5, train_mae5 = model5.evaluate(X_train, y_train, verbose=0)
val_mse5, val_mae5 = model5.evaluate(X_val, y_val, verbose=0)

print(f"Train MSE: {train_mse5:.4f}, MAE: {train_mae5:.4f}")
print(f"Validation MSE: {val_mse5:.4f}, MAE: {val_mae5:.4f}")


Train MSE: 0.7053, MAE: 0.6191
Validation MSE: 0.6675, MAE: 0.6050


In [27]:
# Predict Ukraine's Happiness Score in 2024 using last 3 years

ukraine_data = df[df["Country"] == "Ukraine"].sort_values("Year")
ukraine_seq = ukraine_data[features].iloc[-window_size:].values
ukraine_seq_scaled = scaler.transform(ukraine_seq).reshape(1, window_size, len(features))

ukraine_pred5 = model5.predict(ukraine_seq_scaled, verbose=0)[0][0]
ukraine_actual = ukraine_data[target].iloc[-1]

print(f"Ukraine 2024 Prediction (LSTM): {ukraine_pred5:.3f}")
print(f"Ukraine 2024 Actual: {ukraine_actual}")

Ukraine 2024 Prediction (LSTM): 5.697
Ukraine 2024 Actual: 4.68


In [28]:
from openai import OpenAI

In [None]:
# --- Scenario simulation for Ukraine 2024 with 3-year sequence ---

ukraine_seq = df[(df["Country"] == "Ukraine") & (df["Year"].isin([2022, 2023, 2024]))]
ukraine_seq = ukraine_seq.sort_values("Year").reset_index(drop=True)
scenario = ukraine_seq.copy()

# Apply artificial improvements
scenario.loc[scenario["Year"] == 2024, "GDP"] *= 1.10
scenario.loc[scenario["Year"] == 2024, "SocialSupport"] += 0.1
scenario.loc[scenario["Year"] == 2024, "Freedom"] += 0.1

# Prepare data for prediction
X_scenario = scenario[features].values.reshape(1, len(scenario), len(features))
X_scenario_scaled = scaler.transform(X_scenario.reshape(-1, len(features))).reshape(1, len(scenario), len(features))

# Predict new happiness score after simulated improvements
predicted_score = model5.predict(X_scenario_scaled)[0][0]
real_score = ukraine_seq.loc[ukraine_seq["Year"] == 2024, "HappinessScore"].values[0]

# Replace this with your actual OpenAI key
client = OpenAI(api_key="your-api-key") 

# === GenAI Explanation ===

def genai_explanation(gdp, support, freedom, predicted, real):
    prompt = f"""
Country: Ukraine
Year: 2024
The following socio-economic indicators were increased:
- GDP by 10% (to {gdp:.3f})
- Social Support by +0.1 (to {support:.3f})
- Freedom by +0.1 (to {freedom:.3f})

Predicted Happiness Score after intervention: {predicted:.2f}
Actual Happiness Score for 2024: {real:.2f}

Explain how these changes might influence the well-being and life satisfaction of Ukrainians in a short narrative, comparing predicted and actual values.
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a policy analyst and social researcher."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

gdp = scenario.loc[scenario["Year"] == 2024, "GDP"].values[0]
support = scenario.loc[scenario["Year"] == 2024, "SocialSupport"].values[0]
freedom = scenario.loc[scenario["Year"] == 2024, "Freedom"].values[0]

explanation = genai_explanation(gdp, support, freedom, predicted_score, real_score)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step


In [30]:
from IPython.display import display, Markdown

display(Markdown("##  Scenario Simulation: Ukraine 2024\n"))
display(Markdown(f"**- Predicted Happiness Score (with improvements)**: `{predicted_score:.2f}`"))
display(Markdown(f"**- Actual Happiness Score (2024)**: `{real_score:.2f}`"))

error = abs(predicted_score - real_score)
delta_text = f"`{error:.2f}` points difference"
display(Markdown(f"**- Difference**: {delta_text}"))

display(Markdown("---"))
display(Markdown("##  GenAI Explanation:\n"))
display(Markdown(explanation))

##  Scenario Simulation: Ukraine 2024


**- Predicted Happiness Score (with improvements)**: `5.85`

**- Actual Happiness Score (2024)**: `4.68`

**- Difference**: `1.17` points difference

---

##  GenAI Explanation:


The rise in socio-economic indicators for Ukraine points to a generally improving situation for citizens in 2024. A 10% increase in the country's GDP suggests stronger economic performance and potential improvements in employment, wages, and overall living standards. This, in theory, would contribute towards an improved perceived quality of life and satisfaction among Ukrainians.

The enhanced Social Support value indicates potential strides in strengthening social safety nets, accessibility to public services, and interpersonal relationships within communities. This improvement in social support could have a significant impact on individual happiness, as it is known to mitigate stress and offer a sense of security and belonging.

An increase in Freedom ties directly to personal rights, civil liberties and individuals' capacity to make their own life choices. In this context, a growth in the Freedom indicator would theoretically enhance people's satisfaction in life, linked with their ability to exercise their rights and live as per their aspirations.

The predicted Happiness Score of 5.85 suggests these improvements should have considerably boosted the overall happiness and life satisfaction of Ukrainians. However, the actual Happiness Score of 4.68 reveals a significant divergence from anticipated levels of happiness.

This discrepancy might be due to factors not covered by the indicators measured, such as ongoing political tensions, socio-cultural issues, environmental concerns, or public health challenges. It might also reflect that economic improvements and higher social support do not immediately translate into a heightened sense of wellbeing. People's experiences and perceptions can be influenced by a variety of subjective components and it could take a longer time for societal advancements to be assimilated into individual levels of happiness and satisfaction. 

It is also possible that the distribution of the economic gains and social support are unequal, creating disparities across different groups within the Ukrainian population that could diminish overall happiness. Therefore, further research would be needed to understand the specific factors at play in this gap between predicted and actual happiness levels.

In [31]:
# === Simulate individual variable changes separately ===
def simulate_single_feature_change(feature, change_value, df, model, scaler, features, window_size):
    scenario = df[(df["Country"] == "Ukraine") & (df["Year"].isin([2022, 2023, 2024]))].copy()
    scenario = scenario.sort_values("Year").reset_index(drop=True)
    scenario.loc[scenario["Year"] == 2024, feature] += change_value
    X = scenario[features].values.reshape(1, window_size, len(features))
    X_scaled = scaler.transform(X.reshape(-1, len(features))).reshape(1, window_size, len(features))
    pred = model.predict(X_scaled)[0][0]
    return pred


actual_score = df[(df["Country"] == "Ukraine") & (df["Year"] == 2024)]["HappinessScore"].values[0]


gdp_score = simulate_single_feature_change("GDP", df.loc[(df["Country"] == "Ukraine") & (df["Year"] == 2024), "GDP"].values[0] * 0.10, df, model5, scaler, features, window_size)
support_score = simulate_single_feature_change("SocialSupport", 0.1, df, model5, scaler, features, window_size)
freedom_score = simulate_single_feature_change("Freedom", 0.1, df, model5, scaler, features, window_size)

# === GenAI analysis for individual changes ===
def genai_explanation_all(gdp_score, support_score, freedom_score, actual_score):
    prompt = f"""
Country: Ukraine
Year: 2024

Three separate scenario-based happiness predictions were made:
1. GDP increased by 10% → Predicted Happiness: {gdp_score:.2f}
2. Social Support increased by +0.1 → Predicted Happiness: {support_score:.2f}
3. Freedom increased by +0.1 → Predicted Happiness: {freedom_score:.2f}

Actual Happiness Score in 2024: {actual_score:.2f}

Analyze:
- Which indicator had the strongest positive effect?
- Why might the actual score still be lower than the predicted ones?
- What does this imply about the broader determinants of happiness in Ukraine during this period?
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a policy analyst and social researcher."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

explanation = genai_explanation_all(gdp_score, support_score, freedom_score, actual_score)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step


In [32]:
from IPython.display import display, Markdown

def output(gdp_score, support_score, freedom_score, actual_score, explanation):
    md = f"""
## 🇺🇦 Scenario-Based Happiness Predictions for **Ukraine (2024)**

| Indicator Change                  | Predicted Happiness Score |
|----------------------------------|----------------------------|
|  GDP ↑ by 10%                  | **{gdp_score:.2f}**                |
|  Social Support ↑ by +0.1     | **{support_score:.2f}**            |
|  Freedom ↑ by +0.1             | **{freedom_score:.2f}**            |

### 🟡 Actual Reported Happiness Score (2024): **{actual_score:.2f}**

---

### - **GenAI Explanation**
{explanation}
"""
    display(Markdown(md))

output(gdp_score, support_score, freedom_score, actual_score, explanation)



## 🇺🇦 Scenario-Based Happiness Predictions for **Ukraine (2024)**

| Indicator Change                  | Predicted Happiness Score |
|----------------------------------|----------------------------|
|  GDP ↑ by 10%                  | **5.71**                |
|  Social Support ↑ by +0.1     | **5.75**            |
|  Freedom ↑ by +0.1             | **5.78**            |

### 🟡 Actual Reported Happiness Score (2024): **4.68**

---

### - **GenAI Explanation**
- The indicator which had the strongest positive effect on the happiness prediction was the increase in Freedom. A 0.1 increase in this category led to an estimate of 5.78, which was higher than the other two scenarios.

- Several factors might explain why the actual score was lower than the predictions. One possibility is that other key factors that shape happiness, such as the country's political climate, levels of corruption, or social inequalities, worsened during this year. It's also possible that the mechanisms through which GDP, social support, and freedom influence happiness are more complex than assumed in the predictions – perhaps these factors didn't translate into increases in happiness as smoothly or directly as assumed. Lastly, it could be that the models used to make the predictions were based on different cultural or socio-economic contexts, and thus didn't capture the specifics of Ukraine's situation perfectly.

- Despite the increases in GDP, social support, and freedom, the lower actual happiness score suggests that there might be other broader determinants of happiness in Ukraine. These could include elements such as sense of belonging and community, mental health, patterns of work and leisure, and satisfaction with government. Basically, while income, social support, and freedom are certainly important, they are not the only things that matter. This highlights the importance of a holistic, multidimensional approach to understanding and improving happiness and well-being. The government may need to address several areas concurrently to improve the happiness of Ukrainians rather than focusing solely on economic growth or individual freedoms.


In [33]:
import json

lstm_pred_baseline = float(ukraine_pred5)

lstm_pred_gdp = float(gdp_score)
lstm_pred_ss = float(support_score)
lstm_pred_f = float(freedom_score)

lstm_results = {
    'model': 'LSTM',
    'baseline': lstm_pred_baseline,
    'GDP+10%': lstm_pred_gdp,
    'SocialSupport+0.1': lstm_pred_ss,
    'Freedom+0.1': lstm_pred_f
}

with open('../model_what_if/lstm_scenarios.json', 'w') as f:
    json.dump(lstm_results, f)
