In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from keras import layers, models, callbacks
from keras.models import Sequential
from keras.layers import Dense, Dropout, Input
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

In [None]:
# Load the World Happiness Report dataset
df = pd.read_csv("/HappyLens-NN/data/happiness_data.csv")
df

Unnamed: 0,Year,Rank,Country,HappinessScore,GDP,SocialSupport,LifeExpectancy,Freedom,Generosity,Corruption
0,2024,1,Finland,7.736,1.7490,1.783000,0.8240,0.986000,0.110000,0.502000
1,2024,2,Denmark,7.521,1.8250,1.748000,0.8200,0.955000,0.150000,0.488000
2,2024,3,Iceland,7.515,1.7990,1.840000,0.8730,0.971000,0.201000,0.173000
3,2024,4,Sweden,7.345,1.7830,1.698000,0.8890,0.952000,0.170000,0.467000
4,2024,5,Netherlands,7.306,1.8220,1.667000,0.8440,0.860000,0.186000,0.344000
...,...,...,...,...,...,...,...,...,...,...
1951,2011,152,Burundi,3.678,0.3674,0.627745,0.3348,0.299345,0.182982,0.132473
1952,2011,153,Sierra Leone,3.586,0.3674,0.627745,0.3348,0.299345,0.182982,0.132473
1953,2011,154,Central African Republic,3.568,0.3674,0.627745,0.3348,0.299345,0.182982,0.132473
1954,2011,155,Benin,3.493,0.3674,0.627745,0.3348,0.299345,0.182982,0.132473


# === Model 5: LSTM on country sequences ===

In [6]:
features = ['Year', 'GDP', 'SocialSupport', 'LifeExpectancy', 'Freedom', 'Generosity', 'Corruption']
target = 'HappinessScore'

In [7]:
# Create LSTM sequences with a 3-year window

window_size = 3

def create_sequences(data, features, target, window_size):
    Xs, ys = [], []
    countries = data["Country"].unique()
    for country in countries:
        country_data = data[data["Country"] == country].reset_index(drop=True)
        for i in range(len(country_data) - window_size + 1):
            seq = country_data.loc[i:i+window_size-1, features].values
            target_val = country_data.loc[i+window_size-1, target]
            Xs.append(seq)
            ys.append(target_val)
    return np.array(Xs), np.array(ys)

X, y = create_sequences(df, features, target, window_size)


In [8]:
# Reshape and scale features
num_samples, seq_len, num_features = X.shape
X_2d = X.reshape(num_samples * seq_len, num_features)
# Standardize features
scaler = StandardScaler()
X_2d_scaled = scaler.fit_transform(X_2d)
X_scaled = X_2d_scaled.reshape(num_samples, seq_len, num_features)

In [9]:
# Train-validation split for sequences
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

In [10]:
# Build and compile the LSTM model
model5 = models.Sequential([
    layers.Input(shape=(window_size, len(features))),
    layers.LSTM(32, activation='tanh', return_sequences=False),
    layers.Dense(16, activation='relu'),
    layers.Dense(1)
])

model5.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [11]:
# Train with early stopping
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

history = model5.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=200,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 28.7587 - mae: 5.2499 - val_loss: 23.9653 - val_mae: 4.7827
Epoch 2/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 20.8054 - mae: 4.4014 - val_loss: 10.9071 - val_mae: 3.1013
Epoch 3/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 7.6983 - mae: 2.4442 - val_loss: 2.0515 - val_mae: 1.0929
Epoch 4/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1.7078 - mae: 1.0374 - val_loss: 1.2535 - val_mae: 0.9189
Epoch 5/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.2322 - mae: 0.9040 - val_loss: 1.0150 - val_mae: 0.8092
Epoch 6/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.0601 - mae: 0.8286 - val_loss: 0.8961 - val_mae: 0.7460
Epoch 7/200
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - lo

In [12]:
# Evaluate the model
train_mse5, train_mae5 = model5.evaluate(X_train, y_train, verbose=0)
val_mse5, val_mae5 = model5.evaluate(X_val, y_val, verbose=0)

print(f"Train MSE: {train_mse5:.4f}, MAE: {train_mae5:.4f}")
print(f"Validation MSE: {val_mse5:.4f}, MAE: {val_mae5:.4f}")


Train MSE: 0.6801, MAE: 0.6007
Validation MSE: 0.6599, MAE: 0.5962


In [13]:
# Predict Ukraine's Happiness Score in 2024 using last 3 years

ukraine_data = df[df["Country"] == "Ukraine"].sort_values("Year")
ukraine_seq = ukraine_data[features].iloc[-window_size:].values
ukraine_seq_scaled = scaler.transform(ukraine_seq).reshape(1, window_size, len(features))

ukraine_pred5 = model5.predict(ukraine_seq_scaled, verbose=0)[0][0]
ukraine_actual = ukraine_data[target].iloc[-1]

print(f"Ukraine 2024 Prediction (LSTM): {ukraine_pred5:.3f}")
print(f"Ukraine 2024 Actual: {ukraine_actual}")

Ukraine 2024 Prediction (LSTM): 5.334
Ukraine 2024 Actual: 4.68


In [16]:
from openai import OpenAI

In [None]:
# --- Scenario simulation for Ukraine 2024 with 3-year sequence ---

ukraine_seq = df[(df["Country"] == "Ukraine") & (df["Year"].isin([2022, 2023, 2024]))]
ukraine_seq = ukraine_seq.sort_values("Year").reset_index(drop=True)
scenario = ukraine_seq.copy()

# Apply artificial improvements
scenario.loc[scenario["Year"] == 2024, "GDP"] *= 1.10
scenario.loc[scenario["Year"] == 2024, "SocialSupport"] += 0.1
scenario.loc[scenario["Year"] == 2024, "Freedom"] += 0.1

# Prepare data for prediction
X_scenario = scenario[features].values.reshape(1, len(scenario), len(features))
X_scenario_scaled = scaler.transform(X_scenario.reshape(-1, len(features))).reshape(1, len(scenario), len(features))

# Predict new happiness score after simulated improvements
predicted_score = model5.predict(X_scenario_scaled)[0][0]
real_score = ukraine_seq.loc[ukraine_seq["Year"] == 2024, "HappinessScore"].values[0]

# Replace this with your actual OpenAI key
# client = OpenAI(api_key="your-api-key") 

# === GenAI Explanation ===

def genai_explanation(gdp, support, freedom, predicted, real):
    prompt = f"""
Country: Ukraine
Year: 2024
The following socio-economic indicators were increased:
- GDP by 10% (to {gdp:.3f})
- Social Support by +0.1 (to {support:.3f})
- Freedom by +0.1 (to {freedom:.3f})

Predicted Happiness Score after intervention: {predicted:.2f}
Actual Happiness Score for 2024: {real:.2f}

Explain how these changes might influence the well-being and life satisfaction of Ukrainians in a short narrative, comparing predicted and actual values.
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a policy analyst and social researcher."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

gdp = scenario.loc[scenario["Year"] == 2024, "GDP"].values[0]
support = scenario.loc[scenario["Year"] == 2024, "SocialSupport"].values[0]
freedom = scenario.loc[scenario["Year"] == 2024, "Freedom"].values[0]

explanation = genai_explanation(gdp, support, freedom, predicted_score, real_score)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step


In [19]:
from IPython.display import display, Markdown

display(Markdown("##  Scenario Simulation: Ukraine 2024\n"))
display(Markdown(f"**- Predicted Happiness Score (with improvements)**: `{predicted_score:.2f}`"))
display(Markdown(f"**- Actual Happiness Score (2024)**: `{real_score:.2f}`"))

error = abs(predicted_score - real_score)
delta_text = f"`{error:.2f}` points difference"
display(Markdown(f"**- Difference**: {delta_text}"))

display(Markdown("---"))
display(Markdown("##  GenAI Explanation:\n"))
display(Markdown(explanation))

##  Scenario Simulation: Ukraine 2024


**- Predicted Happiness Score (with improvements)**: `5.44`

**- Actual Happiness Score (2024)**: `4.68`

**- Difference**: `0.76` points difference

---

##  GenAI Explanation:


An increase in the indicators - GDP, Social Support, and Freedom - in Ukraine in 2024 theoretically suggests an improvement in the overall well-being and life satisfaction of the Ukrainian people. The Gross Domestic Product (GDP) growth of 10% is indicative of an economic upswing and should contribute positively to the standard of living. This growth could mean more job opportunities, higher wages, and a healthier economy overall. 

The increase in Social Support is a strong determinant of well-being. This 0.1 positive shift indicates stronger public services, more robust community support networks, better access to healthcare, and possibly more efficient social safety nets. This could lead to less stress, less financial instability, and an overall higher quality of life for individuals.

Moreover, the augmentation in Freedom by 0.1 signifies more civil liberties, democratic participation, and personal rights in Ukraine. This increase in Freedom tends to lead to higher life satisfaction as it gives individuals the rights to make choices about their lives, providing a sense of control and autonomy.

The predicted Happiness Score, taking into account these positive changes in socio-economic indicators, was 5.44. However, the actual Happiness Score for 2024 was 4.68, lower than predicted. This discrepancy might suggest that while economic conditions, social support, and freedom improved, other important factors influencing happiness may have been overlooked. These could include variables such as health, perceptions of corruption, generosity, and education. Therefore, while the improved indicators are positive and helpful, they may not cover all aspects required for the overall happiness score to increase as predicted.
  
It's also important to consider that the impact of these socio-economic changes may not be immediately observed within the same year, as people's perception and experience of their quality of life and happiness might take time to adjust to these improvements. It could potentially reflect in happiness scores in subsequent years. 

Conclusively, it's crucial for policymakers and social researchers to continue assessing various factors that influence happiness and not only concentrate on economic growth, social support, and freedom, while planning interventions for improving wellbeing and life satisfaction.

In [20]:
# === Simulate individual variable changes separately ===
def simulate_single_feature_change(feature, change_value, df, model, scaler, features, window_size):
    scenario = df[(df["Country"] == "Ukraine") & (df["Year"].isin([2022, 2023, 2024]))].copy()
    scenario = scenario.sort_values("Year").reset_index(drop=True)
    scenario.loc[scenario["Year"] == 2024, feature] += change_value
    X = scenario[features].values.reshape(1, window_size, len(features))
    X_scaled = scaler.transform(X.reshape(-1, len(features))).reshape(1, window_size, len(features))
    pred = model.predict(X_scaled)[0][0]
    return pred


actual_score = df[(df["Country"] == "Ukraine") & (df["Year"] == 2024)]["HappinessScore"].values[0]


gdp_score = simulate_single_feature_change("GDP", df.loc[(df["Country"] == "Ukraine") & (df["Year"] == 2024), "GDP"].values[0] * 0.10, df, model5, scaler, features, window_size)
support_score = simulate_single_feature_change("SocialSupport", 0.1, df, model5, scaler, features, window_size)
freedom_score = simulate_single_feature_change("Freedom", 0.1, df, model5, scaler, features, window_size)

# === GenAI analysis for individual changes ===
def genai_explanation_all(gdp_score, support_score, freedom_score, actual_score):
    prompt = f"""
Country: Ukraine
Year: 2024

Three separate scenario-based happiness predictions were made:
1. GDP increased by 10% → Predicted Happiness: {gdp_score:.2f}
2. Social Support increased by +0.1 → Predicted Happiness: {support_score:.2f}
3. Freedom increased by +0.1 → Predicted Happiness: {freedom_score:.2f}

Actual Happiness Score in 2024: {actual_score:.2f}

Analyze:
- Which indicator had the strongest positive effect?
- Why might the actual score still be lower than the predicted ones?
- What does this imply about the broader determinants of happiness in Ukraine during this period?
"""
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a policy analyst and social researcher."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

explanation = genai_explanation_all(gdp_score, support_score, freedom_score, actual_score)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step


In [21]:
from IPython.display import display, Markdown

def output(gdp_score, support_score, freedom_score, actual_score, explanation):
    md = f"""
## 🇺🇦 Scenario-Based Happiness Predictions for **Ukraine (2024)**

| Indicator Change                  | Predicted Happiness Score |
|----------------------------------|----------------------------|
|  GDP ↑ by 10%                  | **{gdp_score:.2f}**                |
|  Social Support ↑ by +0.1     | **{support_score:.2f}**            |
|  Freedom ↑ by +0.1             | **{freedom_score:.2f}**            |

### 🟡 Actual Reported Happiness Score (2024): **{actual_score:.2f}**

---

### - **GenAI Explanation**
{explanation}
"""
    display(Markdown(md))

output(gdp_score, support_score, freedom_score, actual_score, explanation)



## 🇺🇦 Scenario-Based Happiness Predictions for **Ukraine (2024)**

| Indicator Change                  | Predicted Happiness Score |
|----------------------------------|----------------------------|
|  GDP ↑ by 10%                  | **5.35**                |
|  Social Support ↑ by +0.1     | **5.39**            |
|  Freedom ↑ by +0.1             | **5.37**            |

### 🟡 Actual Reported Happiness Score (2024): **4.68**

---

### - **GenAI Explanation**
1. The indicator with the strongest positive effect on the predicted happiness in Ukraine in 2024 is the increase in social support, resulting in a predicted happiness score of 5.39. This is based on the given scenarios and the scores that they yielded.

2. There could be several reasons why the actual happiness score is still lower than the predicted ones:

    - The projections may have neglected other significant factors affecting happiness, such as health, education, corruption perception, job security, and income inequality. Since these areas weren't considered in the three scenarios, their potential adverse impacts might be why the actual score is lower.
    
    - The relationship between the three areas considered (GDP, social support, and freedom) and happiness may not be as linear or direct as assumed in the predictions. For instance, cultural aspects, societal values, or historical events might modulate the impact of these factors on happiness.

    - The predictions could be overestimating the impact of these factors on happiness or could be too simplistic, ignoring potential interactions between these variables.
    
    - There might have been unforeseen negative events or circumstances in the year 2024 that significantly impacted the economy, social support, or freedom, and thereby the happiness score, which were not factored into the predicted scenarios.

3. The gap between the predicted and actual happiness scores suggests that broader socio-economic and sociopolitical determinants play a crucial role in influencing happiness in Ukraine. It signifies that while improving GDP, social support, and freedom are important, these alone are not sufficient to enhance overall happiness. Policies therefore must be holistic and multifaceted, focusing also on improving areas such as health, education, income equality, and tackling corruption. It also implies that understanding cultural, societal, and historical aspects of happiness is essential in making accurate predictions. This should inform future research and policy development in Ukraine. For instance, initiatives to increase financial stability or freedom must be balanced with efforts to improve the social fabric and societal well-being as a whole.


In [22]:
import json

lstm_pred_baseline = float(ukraine_pred5)

lstm_pred_gdp = float(gdp_score)
lstm_pred_ss = float(support_score)
lstm_pred_f = float(freedom_score)

lstm_results = {
    'model': 'LSTM',
    'baseline': lstm_pred_baseline,
    'GDP+10%': lstm_pred_gdp,
    'SocialSupport+0.1': lstm_pred_ss,
    'Freedom+0.1': lstm_pred_f
}

with open('../model_what_if/lstm_scenarios.json', 'w') as f:
    json.dump(lstm_results, f)


In [None]:
# model5.save('/HappyLens-NN/dashboard/lstm_model.h5')
model5.save('/HappyLens-NN/dashboard/lstm_model.keras')