In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

# Step 1: Generate synthetic housing dataset
np.random.seed(42)
n_samples = 250
sizes = np.random.randint(800, 4000, n_samples)
bedrooms = np.clip((sizes / 600 + np.random.normal(0, 1, n_samples)).round(), 1, 6).astype(int)
prices = (sizes * 200) + (bedrooms * 10000) + np.random.normal(0, 20000, n_samples)

df = pd.DataFrame({
    'Size': sizes,
    'Bedrooms': bedrooms,
    'Price': prices.round(2)
})

# Step 2: Prepare features and target
X = df[['Size', 'Bedrooms']]
y = df['Price']

# Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Build and train the model
model = HistGradientBoostingRegressor()
model.fit(X_train, y_train)

# Step 5: Evaluate the model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Model Evaluation:\nMean Absolute Error: ${mae:,.2f}\nR² Score: {r2:.4f}")

# Step 6: Predict for all entries and save results
df['Predicted_Price'] = model.predict(X)

# Step 7: Show full result
print("\nPredicted Results for All Entries:")
print(df)

# Step 8: Save to CSV
output_file = "predicted_housing_prices.csv"
df.to_csv(output_file, index=False)

# Step 9 (Optional - for Google Colab): Enable file download
try:
    from google.colab import files
    files.download(output_file)
except ImportError:
    print(f"\nFile saved as: {output_file} (run in Colab to enable download)")

Model Evaluation:
Mean Absolute Error: $20,939.34
R² Score: 0.9815

Predicted Results for All Entries:
     Size  Bedrooms      Price  Predicted_Price
0    3974         6  869639.26    834597.592652
1    1660         3  391533.17    363637.316121
2    2094         3  462804.86    450526.908082
3    1930         4  423354.01    424296.922596
4    1895         5  427527.78    435352.844454
..    ...       ...        ...              ...
245  1093         1  244221.47    224826.855082
246  3127         3  706173.82    677617.584658
247  3731         6  816679.05    834597.592652
248   997         2  207638.26    224826.855082
249  3829         6  823131.98    834597.592652

[250 rows x 4 columns]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>