In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
data = pd.read_csv('housing1.csv')

# Inspect the dataset
print(data.head())

# Separate features and target variable
X = data[['SquareFootage', 'Bedrooms']]  # Features
y = data['Price']
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model using Linear Regression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

# Train the model using Decision Tree Regressor
tree_model = DecisionTreeRegressor(random_state=42)
tree_model.fit(X_train, y_train)

# Evaluate the Linear Regression model
y_pred_linear = linear_model.predict(X_test)
mse_linear = mean_squared_error(y_test, y_pred_linear)
print(f"Linear Regression Mean Squared Error: {mse_linear}")

# Evaluate the Decision Tree model
y_pred_tree = tree_model.predict(X_test)
mse_tree = mean_squared_error(y_test, y_pred_tree)
print(f"Decision Tree Mean Squared Error: {mse_tree}")

# Select the better model based on MSE
if mse_linear < mse_tree:
    print("Linear Regression performs better.")
else:
    print("Decision Tree Regressor performs better.")


   SquareFootage  Bedrooms   Price
0           1500         3  300000
1           2000         4  400000
2           1800         3  350000
3           2500         4  500000
4           1700         2  280000
Linear Regression Mean Squared Error: 537438318.2900218
Decision Tree Mean Squared Error: 1450000000.0
Linear Regression performs better.


# New Section