In [110]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import MinMaxScaler

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error

In [111]:
# Original housing dataset
data = {
    "Size (sqft)": [2000, 1500, 1800, 1200, 2500],
    "Bedrooms": [3, 2, 3, 2, 4],
    "Bathrooms": [2, 1, 2, 1, 3],
    "Location Score": [8, 7, 9, 6, 10],
    "Price ($)": [300000, 200000, 250000, 180000, 500000]
}

In [112]:
# Convert to Dataframe

df = pd.DataFrame(data)

In [113]:
# Separate features and target
X = df[["Size (sqft)", "Bedrooms", "Bathrooms", "Location Score"]]
y = df["Price ($)"]


In [114]:
print(X)

   Size (sqft)  Bedrooms  Bathrooms  Location Score
0         2000         3          2               8
1         1500         2          1               7
2         1800         3          2               9
3         1200         2          1               6
4         2500         4          3              10


In [115]:
# Split the dataset

X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [116]:
# Apply log transformation
X_train_log = X_train.copy()
X_train_log["Size (sqft)"] = np.log(X_train_log["Size (sqft)"]) # Log transformation

In [117]:
# Apply Square root transformation
X_train_sqrt = X_train.copy()
X_train_sqrt["Size (sqft)"] = np.log(X_train_sqrt["Size (sqft)"]) # Square root transformation

In [118]:
# Min max scaling
scaler = MinMaxScaler()
X_train_Minmax = scaler.fit_transform(X_train) # Min max scaling

In [119]:
# Train Linear Regression Models
lr = LinearRegression() 

In [120]:
# Original

lr.fit(X_train,y_train)
mse_original = mean_squared_error(y_test, lr.predict(X_test))

In [121]:
# Log transformation

lr.fit(X_train_log,y_train)
mse_log = mean_squared_error(y_test,lr.predict(X_test))

In [122]:
# Square root transformation

lr.fit(X_train_sqrt,y_train)
mse_sqrt = mean_squared_error(y_test,lr.predict(X_test))

In [123]:
# Min-max scaling 

lr.fit(X_train_Minmax,y_train)
mse_minmax = mean_squared_error(y_test,lr.predict(scaler.transform(X_test)))

In [124]:
# Print results
print(f"Original Data MSE: {mse_original:.2f}")
print(f"Log Transformation MSE: {mse_log:.2f}")
print(f"Square Root Transformation MSE: {mse_sqrt:.2f}")
print(f"Min-Max Scaling MSE: {mse_minmax:.2f}")

Original Data MSE: 41344444444.42
Log Transformation MSE: 172436545978148512.00
Square Root Transformation MSE: 172436545978148512.00
Min-Max Scaling MSE: 41344444444.44
