In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error

# 1. SIMULATE VITAL SIGNS DATA
np.random.seed(42)
days = 365
dates = pd.date_range('2024-01-01', periods=days, freq='D')
heart_rate = (
    75 + np.random.normal(0, 5, size=days) +
    10 * np.sin(np.arange(days) * (2 * np.pi / 365))
)

data = pd.DataFrame({'Date': dates, 'HeartRate': heart_rate})
data.set_index('Date', inplace=True)

# 2. PLOT: Time Series
plt.figure(figsize=(12,4))
plt.plot(data.index, data['HeartRate'], color='green')
plt.title('Daily Heart Rate Over Time')
plt.xlabel('Date')
plt.ylabel('Heart Rate')
plt.tight_layout()
plt.show()

# 3. NORMALIZE
scaler = MinMaxScaler()
scaled = scaler.fit_transform(data[['HeartRate']])

# 4. PREPARE SEQUENCES
def create_dataset(arr, time_step=7):
    X, y = [], []
    for i in range(len(arr) - time_step):
        X.append(arr[i:i+time_step, 0])
        y.append(arr[i+time_step, 0])
    return np.array(X), np.array(y)

time_step = 7
X_all, y_all = create_dataset(scaled, time_step)
sample_dates = data.index[time_step:]

X_all = X_all.reshape(X_all.shape[0], X_all.shape[1], 1)

# 5. TRAIN/TEST SPLIT
split = int(len(X_all) * 0.8)
X_train, X_test = X_all[:split], X_all[split:]
y_train, y_test = y_all[:split], y_all[split:]

train_dates = sample_dates[:split]
test_dates  = sample_dates[split:]

# 6. BUILD & TRAIN LSTM
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(time_step,1)),
    LSTM(50),
    Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=2)

# 7. PREDICT & INVERSE TRANSFORM
y_pred = model.predict(X_test)
y_pred_rescaled = scaler.inverse_transform(y_pred)
y_test_rescaled = scaler.inverse_transform(y_test.reshape(-1,1))

# 8. PLOT: Forecast vs Actual
plt.figure(figsize=(12,4))
plt.plot(test_dates, y_test_rescaled,  label='Actual Heart Rate',  color='blue')
plt.plot(test_dates, y_pred_rescaled, label='Predicted Heart Rate', color='red', alpha=0.8)
plt.title('Forecast vs Actual Heart Rate')
plt.xlabel('Date')
plt.ylabel('Heart Rate')
plt.legend()
plt.tight_layout()
plt.show()

# 9. PRINT METRIC
mse = mean_squared_error(y_test_rescaled, y_pred_rescaled)
print(f'Test Mean Squared Error: {mse:.3f}')


In [None]:
# 1. SIMULATE ADMISSION DATA
months = 36
dates_monthly = pd.date_range('2022-01-01', periods=months, freq='M')

np.random.seed(42)
emergency_admissions = np.random.poisson(lam=80, size=months) + \
    (10 * np.sin(np.arange(months) * (2 * np.pi / 12)))

data_admission = pd.DataFrame({'Date': dates_monthly, 'EmergencyAdmissions': emergency_admissions})
data_admission.set_index('Date', inplace=True)

# 2. PLOT: Admissions Over Time
plt.figure(figsize=(12,4))
plt.plot(data_admission.index, data_admission['EmergencyAdmissions'], color='orange')
plt.title('Monthly Emergency Admissions Over Time')
plt.xlabel('Date')
plt.ylabel('Emergency Admissions')
plt.tight_layout()
plt.show()

# 3. NORMALIZE
scaled_admissions = scaler.fit_transform(data_admission[['EmergencyAdmissions']])

# 4. PREPARE SEQUENCES
time_step = 3
X_adm, y_adm = create_dataset(scaled_admissions, time_step)
sample_months = data_admission.index[time_step:]

X_adm = X_adm.reshape(X_adm.shape[0], X_adm.shape[1], 1)

# 5. TRAIN/TEST SPLIT
split = int(len(X_adm) * 0.8)
X_train_adm, X_test_adm = X_adm[:split], X_adm[split:]
y_train_adm, y_test_adm = y_adm[:split], y_adm[split:]

train_months = sample_months[:split]
test_months = sample_months[split:]

# 6. BUILD & TRAIN MODEL
model_adm = Sequential([
    LSTM(30, return_sequences=True, input_shape=(time_step,1)),
    LSTM(30),
    Dense(1)
])
model_adm.compile(optimizer='adam', loss='mean_squared_error')
model_adm.fit(X_train_adm, y_train_adm, epochs=10, batch_size=16, verbose=2)

# 7. PREDICT
y_pred_adm = model_adm.predict(X_test_adm)
y_pred_adm_rescaled = scaler.inverse_transform(y_pred_adm)
y_test_adm_rescaled = scaler.inverse_transform(y_test_adm.reshape(-1,1))

# 8. PLOT: Forecast vs Actual
plt.figure(figsize=(12,4))
plt.plot(test_months, y_test_adm_rescaled, label='Actual Admissions', color='blue')
plt.plot(test_months, y_pred_adm_rescaled, label='Predicted Admissions', color='red', alpha=0.8)
plt.title('Forecast vs Actual Emergency Admissions')
plt.xlabel('Date')
plt.ylabel('Admissions')
plt.legend()
plt.tight_layout()
plt.show()

# 9. METRICS
mse_adm = mean_squared_error(y_test_adm_rescaled, y_pred_adm_rescaled)
print(f'Test MSE (Admissions Forecasting): {mse_adm:.3f}')


In [None]:
# 1. SIMULATE RISK SCORE DATA
np.random.seed(42)
risk_scores = np.random.normal(loc=50, scale=10, size=months) + \
    (5 * np.cos(np.arange(months) * (2 * np.pi / 12)))

data_risk = pd.DataFrame({'Date': dates_monthly, 'RiskScore': risk_scores})
data_risk.set_index('Date', inplace=True)

# 2. PLOT: Risk Scores Over Time
plt.figure(figsize=(12,4))
plt.plot(data_risk.index, data_risk['RiskScore'], color='purple')
plt.title('Monthly Health Risk Scores Over Time')
plt.xlabel('Date')
plt.ylabel('Risk Score')
plt.tight_layout()
plt.show()

# 3. NORMALIZE
scaled_risk = scaler.fit_transform(data_risk[['RiskScore']])

# 4. PREPARE SEQUENCES
time_step = 3
X_risk, y_risk = create_dataset(scaled_risk, time_step)
sample_risk_months = data_risk.index[time_step:]

X_risk = X_risk.reshape(X_risk.shape[0], X_risk.shape[1], 1)

# 5. TRAIN/TEST SPLIT
split = int(len(X_risk) * 0.8)
X_train_risk, X_test_risk = X_risk[:split], X_risk[split:]
y_train_risk, y_test_risk = y_risk[:split], y_risk[split:]

train_risk_months = sample_risk_months[:split]
test_risk_months = sample_risk_months[split:]

# 6. BUILD & TRAIN MODEL
model_risk = Sequential([
    LSTM(40, return_sequences=True, input_shape=(time_step,1)),
    LSTM(40),
    Dense(1)
])
model_risk.compile(optimizer='adam', loss='mean_squared_error')
model_risk.fit(X_train_risk, y_train_risk, epochs=10, batch_size=16, verbose=2)

# 7. PREDICT
y_pred_risk = model_risk.predict(X_test_risk)
y_pred_risk_rescaled = scaler.inverse_transform(y_pred_risk)
y_test_risk_rescaled = scaler.inverse_transform(y_test_risk.reshape(-1,1))

# 8. PLOT: Forecast vs Actual
plt.figure(figsize=(12,4))
plt.plot(test_risk_months, y_test_risk_rescaled, label='Actual Risk', color='blue')
plt.plot(test_risk_months, y_pred_risk_rescaled, label='Predicted Risk', color='red', alpha=0.8)
plt.title('Forecast vs Actual Health Risk')
plt.xlabel('Date')
plt.ylabel('Risk Score')
plt.legend()
plt.tight_layout()
plt.show()

# 9. METRIC
mse_risk = mean_squared_error(y_test_risk_rescaled, y_pred_risk_rescaled)
print(f'Test MSE (Risk Score Forecasting): {mse_risk:.3f}')


In [None]:
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

# 1. SIMULATE LIFESTYLE FEATURES
np.random.seed(42)
patients = 500
features = {
    'HospitalVisitsPerYear': np.random.poisson(5, patients),
    'ExerciseFrequency': np.random.randint(0, 7, patients),
    'SmokingStatus': np.random.choice([0,1], patients), # 0: Non-smoker, 1: Smoker
    'Age': np.random.randint(20, 80, patients)
}

df_patients = pd.DataFrame(features)

# 2. SCALING
scaled_features = scaler.fit_transform(df_patients)

# 3. KMeans Clustering
kmeans = KMeans(n_clusters=4, random_state=42)
patient_clusters = kmeans.fit_predict(scaled_features)
df_patients['Cluster'] = patient_clusters

# 4. PCA for 2D visualization
pca = PCA(n_components=2)
reduced_features = pca.fit_transform(scaled_features)
df_patients['PCA1'] = reduced_features[:,0]
df_patients['PCA2'] = reduced_features[:,1]

# 5. PLOT CLUSTERS
plt.figure(figsize=(8,6))
for cluster_id in np.unique(patient_clusters):
    subset = df_patients[df_patients['Cluster']==cluster_id]
    plt.scatter(subset['PCA1'], subset['PCA2'], label=f'Cluster {cluster_id}')
plt.title('Patient Segmentation Using KMeans')
plt.xlabel('PCA 1')
plt.ylabel('PCA 2')
plt.legend()
plt.tight_layout()
plt.show()

# 6. RETENTION & RISK VALUES (Dummy Calculation)
df_patients['HealthRiskValue'] = (
    0.4 * df_patients['HospitalVisitsPerYear'] +
    0.3 * (6 - df_patients['ExerciseFrequency']) +
    0.2 * df_patients['SmokingStatus'] +
    0.1 * (df_patients['Age']/10)
)

# 7. MEAN HEALTH RISK PER CLUSTER
risk_per_cluster = df_patients.groupby('Cluster')['HealthRiskValue'].mean()
print("\nAverage Health Risk Value per Cluster:")
print(risk_per_cluster)
