In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [25]:
file_path = 'housePrice.csv'
data = pd.read_csv(file_path)
data.head(10)

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Address,Price,Price(USD)
0,63,1,True,True,True,Shahran,1850000000.0,61666.67
1,60,1,True,True,True,Shahran,1850000000.0,61666.67
2,79,2,True,True,True,Pardis,550000000.0,18333.33
3,95,2,True,True,True,Shahrake Qods,902500000.0,30083.33
4,123,2,True,True,True,Shahrake Gharb,7000000000.0,233333.33
5,70,2,True,True,False,North Program Organization,2050000000.0,68333.33
6,87,2,True,True,True,Pardis,600000000.0,20000.0
7,59,1,True,True,True,Shahran,2150000000.0,71666.67
8,54,2,True,True,False,Andisheh,493000000.0,16433.33
9,71,1,True,True,True,West Ferdows Boulevard,2370000000.0,79000.0


In [22]:
data['Area'] = pd.to_numeric(data['Area'], errors='coerce')
valid_area_condition = (data['Area'] > 0) & (data['Area'] < 1000)
valid_address_condition = data['Address'].notna()
filtered_data = data[valid_area_condition & valid_address_condition]

In [30]:
X = filtered_data[['Area', 'Room', 'Parking', 'Warehouse', 'Elevator']].copy()
X['Parking'] = X['Parking'].astype(int)
X['Warehouse'] = X['Warehouse'].astype(int)
X['Elevator'] = X['Elevator'].astype(int)
y = filtered_data['Price(USD)']

In [31]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [32]:
model = LinearRegression()
model.fit(X_train, y_train)

In [33]:
y_pred = model.predict(X_test)

In [34]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("R-squared (R2):", r2)

Mean Absolute Error (MAE): 96843.77912206911
Mean Squared Error (MSE): 39343660714.720436
R-squared (R2): 0.5319739993784479


In [35]:
coefficients = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_
})
print(coefficients)

     Feature   Coefficient
0       Area   2423.592871
1       Room  45005.858633
2    Parking  -9165.621483
3  Warehouse  44676.190074
4   Elevator  33319.600502
