In [5]:
import pandas as pd
from sklearn.linear_model import BayesianRidge
from sklearn.preprocessing import StandardScaler

# === 1. Load the aligned health data ===
df = pd.read_csv(r"D:\Users\lenovo\Desktop\VA_Data\2021_2011_health_filled_fixed.csv") 

# === 2. Define the health level ===
health_pct_cols = ['very_good', 'good', 'fair', 'bad', 'very_bad']

# === 3. Construct the input feature: 2011 + 2021 percentage ===
X_cols = [f"{col}_2011" for col in health_pct_cols] + [f"{col}_2021" for col in health_pct_cols]
X = df[X_cols]

# Standardized features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# === 4. Model and predict for each health level 2031 ===
predictions = {'Area code': df['Area code']}

for col in health_pct_cols:
    y_train = df[f"{col}_2021"]  # Fit the trend with the data of 2021
    model = BayesianRidge()
    model.fit(X_scaled, y_train)
    y_pred, y_std = model.predict(X_scaled, return_std=True)

    predictions[f"{col}_2031_pred"] = y_pred
    predictions[f"{col}_2031_lower"] = y_pred - 2 * y_std
    predictions[f"{col}_2031_upper"] = y_pred + 2 * y_std

# === 5. Save as CSV ===
df_2031 = pd.DataFrame(predictions)
df_2031.to_csv(r"D:\Users\lenovo\Desktop\VA_Data\predicted_health_2031_bayesian.csv", index=False)

print("The Bayes prediction is complete and the result has been saved as predicted_health_2031_bayesian.csv")
df_2031

The Bayes prediction is complete and the result has been saved as predicted_health_2031_bayesian.csv


Unnamed: 0,Area code,very_good_2031_pred,very_good_2031_lower,very_good_2031_upper,good_2031_pred,good_2031_lower,good_2031_upper,fair_2031_pred,fair_2031_lower,fair_2031_upper,bad_2031_pred,bad_2031_lower,bad_2031_upper,very_bad_2031_pred,very_bad_2031_lower,very_bad_2031_upper
0,K04000001,0.483699,0.483554,0.483844,0.336360,0.336216,0.336504,0.127471,0.127327,0.127616,0.040477,0.040333,0.040621,0.011991,0.011847,0.012135
1,E92000001,0.484879,0.484734,0.485024,0.337064,0.336919,0.337208,0.126524,0.126380,0.126669,0.039799,0.039655,0.039944,0.011734,0.011590,0.011879
2,E12000001,0.445799,0.445653,0.445944,0.334567,0.334422,0.334711,0.149487,0.149342,0.149632,0.054062,0.053917,0.054207,0.016086,0.015941,0.016230
3,E06000047,0.429625,0.429479,0.429770,0.338998,0.338853,0.339143,0.156348,0.156203,0.156493,0.057784,0.057639,0.057928,0.017247,0.017102,0.017392
4,E06000005,0.452215,0.452068,0.452363,0.346233,0.346086,0.346381,0.142699,0.142552,0.142846,0.046530,0.046383,0.046677,0.012320,0.012173,0.012467
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,W06000018,0.437222,0.437075,0.437370,0.320946,0.320799,0.321093,0.157117,0.156970,0.157264,0.064080,0.063933,0.064227,0.020636,0.020489,0.020783
391,W06000019,0.410514,0.410364,0.410663,0.326811,0.326663,0.326960,0.167553,0.167405,0.167702,0.071521,0.071372,0.071669,0.023601,0.023452,0.023749
392,W06000020,0.426274,0.426126,0.426422,0.339210,0.339063,0.339358,0.155403,0.155256,0.155550,0.060514,0.060366,0.060661,0.018597,0.018449,0.018744
393,W06000021,0.464363,0.464217,0.464509,0.343037,0.342892,0.343182,0.136141,0.135996,0.136286,0.043677,0.043532,0.043822,0.012780,0.012635,0.012925


In [9]:
import pandas as pd

# Load the complete prediction results of the upper and lower bounds of the belt
df_full = pd.read_csv(r"D:\Users\lenovo\Desktop\VA_Data\predicted_health_2031_bayesian.csv")

# Retain the Area code and each type of health prediction value
columns_to_keep = ['Area code'] + [col for col in df_full.columns if col.endswith('_2031_pred')]
df_clean = df_full[columns_to_keep]

# Simple file
df_clean.to_csv(r"D:\Users\lenovo\Desktop\VA_Data\predicted_health_2031_clean.csv", index=False)

df_clean

Unnamed: 0,Area code,very_good_2031_pred,good_2031_pred,fair_2031_pred,bad_2031_pred,very_bad_2031_pred
0,K04000001,0.483699,0.336360,0.127471,0.040477,0.011991
1,E92000001,0.484879,0.337064,0.126524,0.039799,0.011734
2,E12000001,0.445799,0.334567,0.149487,0.054062,0.016086
3,E06000047,0.429625,0.338998,0.156348,0.057784,0.017247
4,E06000005,0.452215,0.346233,0.142699,0.046530,0.012320
...,...,...,...,...,...,...
390,W06000018,0.437222,0.320946,0.157117,0.064080,0.020636
391,W06000019,0.410514,0.326811,0.167553,0.071521,0.023601
392,W06000020,0.426274,0.339210,0.155403,0.060514,0.018597
393,W06000021,0.464363,0.343037,0.136141,0.043677,0.012780


In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Read data
df_2011_2021 = pd.read_csv(r"D:\Users\lenovo\Desktop\VA_Data\2021_2011_health_filled_fixed.csv")
df_2031 = pd.read_csv(r"D:\Users\lenovo\Desktop\VA_Data\predicted_health_2031_clean.csv")

# Merge data
df_merged = pd.merge(df_2011_2021, df_2031, on="Area code", how="left")

# Calculate the health index for 2031
df_merged["health_index_2031_pred"] = (
    2 * df_merged["very_good_2031_pred"] +
    1 * df_merged["good_2031_pred"] +
    0 * df_merged["fair_2031_pred"] +
    (-1) * df_merged["bad_2031_pred"] +
    (-2) * df_merged["very_bad_2031_pred"]
)

# Normalized health index
scaler = MinMaxScaler(feature_range=(0, 100))
df_merged["health_index_norm_2031_pred"] = scaler.fit_transform(
    df_merged[["health_index_2031_pred"]]
)

# Save
df_merged.to_csv(r"D:\Users\lenovo\Desktop\VA_Data\health_data_with_2021_2031_index.csv", index=False)

print("The merging and calculation completed, and the result has been saved ashealth_data_with_2021_2031_index.csv")


The merging and calculation completed, and the result has been saved ashealth_data_with_2021_2031_index.csv


In [9]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Read data
df_health = pd.read_csv(r"D:\Users\lenovo\Desktop\VA_Data\health_data_with_2021_2031_index.csv")
df_2011_cleaned = pd.read_csv(r"D:\Users\lenovo\Desktop\VA_Data\2011_cleaned.csv")

# Standardized column names
df_health.rename(columns={"Area code": "Area_code"}, inplace=True)
df_2011_cleaned.rename(columns={"Area code": "Area_code"}, inplace=True)

# Check whether Area_code matched
unmatched_codes = set(df_2011_cleaned["Area_code"]) - set(df_health["Area_code"])
if unmatched_codes:
    print("There are unmatched AREA_codes, for example:", list(unmatched_codes)[:5])
else:
    print("Area_code complete match, ready to merge.")

# Merge geographic information
df_merged = pd.merge(
    df_health,
    df_2011_cleaned[["Area_code", "Region", "County", "District"]],
    on="Area_code",
    how="left"
)

# Clean up duplicate fields (avoid _x and _y if they have been merged once before)
for col in ["Region_y", "County_y", "District_y"]:
    if col in df_merged.columns:
        df_merged.drop(columns=[col], inplace=True)

df_merged.rename(columns={
    "Region_x": "Region",
    "County_x": "County",
    "District_x": "District",
    "Area_code": "Area code"  # Change it back for Tableau use
}, inplace=True)

df_merged = df_merged.drop(columns=["Region.1", "County.1", "District.1"])
# Save
df_merged.rename(columns={"Area_code": "Area code"}, inplace=True)
df_merged.to_csv(r"D:\Users\lenovo\Desktop\VA_Data\health_data_with_2021_2031_index.csv", index=False)

print("The merge is complete and has been saved as health_data_with_2021_2031_index.csv")


Area_code complete match, ready to merge.
The merge is complete and has been saved as health_data_with_2021_2031_index.csv
