**Step 1: Preprocess the Data**

In [1]:
import pandas as pd

data = pd.read_csv('/content/Housing.csv')
print(data.head())


      price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0  13300000  7420         4          2        3      yes        no       no   
1  12250000  8960         4          4        4      yes        no       no   
2  12250000  9960         3          2        2      yes        no      yes   
3  12215000  7500         4          2        2      yes        no      yes   
4  11410000  7420         4          1        2      yes       yes      yes   

  hotwaterheating airconditioning  parking prefarea furnishingstatus  
0              no             yes        2      yes        furnished  
1              no             yes        3       no        furnished  
2              no              no        2      yes   semi-furnished  
3              no             yes        3      yes        furnished  
4              no             yes        2       no        furnished  


**Step 2: Split Data into Train and Test Sets**

In [2]:
from sklearn.model_selection import train_test_split

In [3]:
# Convert 'yes/no' columns to binary values (0/1)
data['mainroad'] = data['mainroad'].map({'yes': 1, 'no': 0})
data['guestroom'] = data['guestroom'].map({'yes': 1, 'no': 0})
data['basement'] = data['basement'].map({'yes': 1, 'no': 0})
data['hotwaterheating'] = data['hotwaterheating'].map({'yes': 1, 'no': 0})
data['airconditioning'] = data['airconditioning'].map({'yes': 1, 'no': 0})
data['prefarea'] = data['prefarea'].map({'yes': 1, 'no': 0})


In [4]:
data = pd.get_dummies(data, columns=['furnishingstatus'], drop_first=True)


In [5]:
X = data.drop('price', axis=1)
y = data['price']  # Target column


In [6]:
# Train-Test Split
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
print(X_train.isnull().sum())
print(y_train.isnull().sum())

area                               0
bedrooms                           0
bathrooms                          0
stories                            0
mainroad                           0
guestroom                          0
basement                           0
hotwaterheating                    0
airconditioning                    0
parking                            0
prefarea                           0
furnishingstatus_semi-furnished    0
furnishingstatus_unfurnished       0
dtype: int64
0


In [8]:
print(X_train.shape)
print(y_train.shape)


(436, 13)
(436,)


In [9]:
print(X_train.dtypes)
print(y_train.dtype)


area                               int64
bedrooms                           int64
bathrooms                          int64
stories                            int64
mainroad                           int64
guestroom                          int64
basement                           int64
hotwaterheating                    int64
airconditioning                    int64
parking                            int64
prefarea                           int64
furnishingstatus_semi-furnished     bool
furnishingstatus_unfurnished        bool
dtype: object
int64


In [10]:
X_train = X_train.fillna(0)


In [11]:
X_test['mainroad'] = X_test['mainroad'].fillna(0)
X_test['guestroom'] = X_test['guestroom'].fillna(0)
X_test['basement'] = X_test['basement'].fillna(0)


In [12]:
X_test = X_test.dropna()  # Drop rows with any missing values


**Step 3: Train Regression Models**

In [13]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


**Step 4: Evaluate Model**

In [14]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [15]:
#   Mean Absolute Error
mae = mean_absolute_error(y_test, y_pred)

#   Mean Squared Error
mse = mean_squared_error(y_test, y_pred)

#   R² Score
r2 = r2_score(y_test, y_pred)

print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')
print(f'R² Score: {r2}')


Mean Absolute Error: 970043.4039201637
Mean Squared Error: 1754318687330.6633
R² Score: 0.6529242642153185


**Step 5: Make Predictions**

In [16]:
new_data = pd.DataFrame({
    'area': [8000],
    'bedrooms': [3],
    'bathrooms': [2],
    'stories': [2],
    'mainroad': [1],  # 1 for yes, 0 for no
    'guestroom': [0],
    'basement': [0],
    'hotwaterheating': [0],
    'airconditioning': [1],
    'parking': [2],
    'prefarea': [1],
    'furnishingstatus_semi-furnished': [0],
    'furnishingstatus_unfurnished': [1]
})

new_data = new_data.fillna(0)  # Fill missing values

# Make predictions
new_prediction = model.predict(new_data)
print(f'Predicted Price: {new_prediction[0]}')


Predicted Price: 7207237.67417388
