In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

# --- Step 1: Generate synthetic dataset ---
np.random.seed(42)
data = pd.DataFrame({
    'day': range(1, 201),
    'temperature': np.random.uniform(15, 40, 200),
    'humidity': np.random.uniform(30, 90, 200),
    'population_density': np.random.uniform(1000, 5000, 200),
    'mobility_index': np.random.uniform(0.2, 1.0, 200),
    'previous_cases': np.random.randint(10, 500, 200)
})
data['new_cases'] = (
    0.5 * data['previous_cases'] +
    0.1 * data['temperature'] -
    0.05 * data['humidity'] +
    0.0005 * data['population_density'] +
    10 * data['mobility_index'] +
    np.random.normal(0, 20, 200)
).astype(int)

print("Sample Data:\n", data.head())

# --- Step 2: Split data ---
X = data[['temperature', 'humidity', 'population_density', 'mobility_index', 'previous_cases']]
y = data['new_cases']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Step 3: Train model ---
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# --- Step 4: Evaluate ---
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"\nModel Evaluation:\nMAE: {mae:.2f}\nR2 Score: {r2:.2f}")

# --- Step 5: Visualize ---
plt.figure(figsize=(8, 6))
sns.scatterplot(x=y_test, y=y_pred)
plt.xlabel("Actual New Cases")
plt.ylabel("Predicted New Cases")
plt.title("Disease Spread Prediction")
plt.show()

# --- Step 6: Predict future cases ---
sample_input = pd.DataFrame({
    'temperature': [32],
    'humidity': [45],
    'population_density': [3000],
    'mobility_index': [0.8],
    'previous_cases': [220]
})
future_pred = model.predict(sample_input)
print(f"\nPredicted new cases for tomorrow: {int(future_pred[0])}")