In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
file_path = r"C:\Users\User\OneDrive\Desktop\New folder (2)\Dataset 2 (House Sales) (2)\transformed_house_sales2.csv"
house_sales_df = pd.read_csv(file_path)

# Display the first few rows of the dataset
print("First few rows of the dataset:")
print(house_sales_df.head())

# Independent variables (features) including new features
X = house_sales_df[['sqft_living', 'grade', 'bathrooms', 'sqft_above', 'sqft_lot', 'condition', 'floors', 'price_per_sqft', 'bath_bed_ratio']]

# Dependent variable (target)
y = house_sales_df['price']

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate Mean Squared Error
mse = mean_squared_error(y_test, y_pred)

# Calculate R-squared
r2_new = r2_score(y_test, y_pred)

# Print evaluation metrics
print("\nModel Evaluation Results:")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"New R-squared (R²) with added features: {r2_new:.4f}")

# Display the coefficients of the model
coefficients = pd.DataFrame({'Feature': X.columns, 'Coefficient': model.coef_})
print("\nModel Coefficients:")
print(coefficients)

# Display the intercept of the model
print(f"\nIntercept: {model.intercept_:.4f}")


First few rows of the dataset:
           id        date      price  bedrooms  bathrooms  sqft_living  \
0  7129300520  2014-10-13   221900.0       3.0        1.0       1180.0   
1  5631500400  2015-02-25   180000.0       2.0        1.0        770.0   
2  2487200875  2014-12-09   604000.0       4.0        3.0       1960.0   
3  1954400510  2015-02-18   510000.0       3.0        2.0       1680.0   
4  7237550310  2014-05-12  1225000.0       4.0        4.5       5420.0   

   sqft_lot  floors  waterfront  view  ...  yr_built  yr_renovated  zipcode  \
0    5650.0     1.0           0     0  ...      1955             0    98178   
1   10000.0     1.0           0     0  ...      1933             0    98028   
2    5000.0     1.0           0     0  ...      1965             0    98136   
3    8080.0     1.0           0     0  ...      1987             0    98074   
4   18298.5     1.0           0     0  ...      2001             0    98053   

       lat     long  sqft_living15  sqft_lot15  b