# Import libraries


In [None]:
import pandas as pd
import numpy as np
import matplotlib as  plt
import seaborn as sns
import pickle

# Generate random vaues of the size , price and bedroom

In [24]:
def generate_house_data(n_samples=100):
    np.random.seed(50)
    size = np.random.normal(1400, 50, n_samples)
    price = size * 50 + np.random.normal(0, 50, n_samples)
    bedroom = np.random.normal(1, 10, n_samples)
    return pd.DataFrame({"size":size, "price":price, "bedroom":bedroom})


In [13]:
df = generate_house_data(n_samples=100)
df

Unnamed: 0,size,price,bedroom
0,1321.982395,66086.212282,6.650218
1,1398.451120,69924.913799,1.572648
2,1368.953579,68426.325683,3.415748
3,1326.770976,66332.487139,5.861726
4,1470.597306,73521.436577,-2.634334
...,...,...,...
95,1334.661329,66821.650808,5.411463
96,1355.511585,67727.300704,-9.834500
97,1400.263387,70081.687066,-5.209275
98,1443.321244,72137.698251,3.956314


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   size     100 non-null    float64
 1   price    100 non-null    float64
 2   bedroom  100 non-null    float64
dtypes: float64(3)
memory usage: 2.5 KB


# Train the model using linear regerestion

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression

In [20]:
def train_model():
    df = generate_house_data(n_samples=100)
    x = df[["size", "bedroom"]] # Need 2D array for sklearn
    y = df["price"]

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
    #Train model
    model = LinearRegression()
    model.fit(x_train, y_train)

    #Make prediction on test set
    y_pred = model.predict(x_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    print(f"Model Performance:")
    print(f"Mean Squared Error: {mse:.2f}")
    print(f"Root Mean Squared Error: {rmse:.2f}")
    print(f"Model Coefficient (price per sq ft): ${model.coef_[0]:.2f}")
    print(f"Model Intercept: ${model.intercept_:.2f}")

    return model, x_test, y_test, y_pred

In [21]:
model, x_test, y_test, y_pred = train_model()

Model Performance:
Mean Squared Error: 2807.75
Root Mean Squared Error: 52.99
Model Coefficient (price per sq ft): $49.81
Model Intercept: $255.78


In [22]:
# Save model to file
with open("house_price_model.pkl", "wb") as f:
    pickle.dump(model, f)