In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.cluster import KMeans

# 1️⃣ Load your Boston dataset
file_path = 'boston_housing.csv'  # change this to your CSV path
df = pd.read_csv(file_path)

# 2️⃣ Specify target column
target_column = 'MEDV'  # Boston dataset target

# 3️⃣ Split features and target
X = df.drop(columns=[target_column])
y = df[target_column]

# 4️⃣ Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5️⃣ Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ----------- Regression Models -----------

# Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
y_pred_lin = lin_reg.predict(X_test)
print("Linear Regression MSE:", mean_squared_error(y_test, y_pred_lin))

# Polynomial Regression (degree=2)
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
poly_reg = LinearRegression()
poly_reg.fit(X_train_poly, y_train)
y_pred_poly = poly_reg.predict(X_test_poly)
print("Polynomial Regression MSE:", mean_squared_error(y_test, y_pred_poly))

# Decision Tree Regressor
dt_reg = DecisionTreeRegressor(random_state=42)
dt_reg.fit(X_train, y_train)
y_pred_dt = dt_reg.predict(X_test)
print("Decision Tree Regression MSE:", mean_squared_error(y_test, y_pred_dt))

# Random Forest Regressor
rf_reg = RandomForestRegressor(random_state=42)
rf_reg.fit(X_train, y_train)
y_pred_rf = rf_reg.predict(X_test)
print("Random Forest Regression MSE:", mean_squared_error(y_test, y_pred_rf))

# Support Vector Regressor
svr = SVR()
svr.fit(X_train_scaled, y_train)
y_pred_svr = svr.predict(X_test_scaled)
print("Support Vector Regression MSE:", mean_squared_error(y_test, y_pred_svr))

# KNN Regressor
knn_reg = KNeighborsRegressor()
knn_reg.fit(X_train_scaled, y_train)
y_pred_knn = knn_reg.predict(X_test_scaled)
print("KNN Regression MSE:", mean_squared_error(y_test, y_pred_knn))

# ----------- K-Means Clustering (unsupervised) -----------
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X_train_scaled)
clusters = kmeans.predict(X_test_scaled)
print("KMeans Cluster Labels (first 10):", clusters[:10])


Linear Regression MSE: 24.291119474973602
Polynomial Regression MSE: 14.257338169801294
Decision Tree Regression MSE: 10.416078431372549
Random Forest Regression MSE: 7.901513892156864
Support Vector Regression MSE: 25.66853967839604
KNN Regression MSE: 20.60552941176471
KMeans Cluster Labels (first 10): [1 2 0 1 0 1 1 0 0 1]


