In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pickle

# Creating a real-time dataset: House Price Prediction
data = {
    'Square_Feet': [850, 900, 1200, 1500, 1800, 2200, 2500, 2800, 3000, 3500],
    'Bedrooms': [2, 2, 3, 3, 4, 4, 5, 5, 6, 6],
    'Bathrooms': [1, 1, 2, 2, 3, 3, 3, 4, 4, 5],
    'Distance_to_City': [10, 8, 7, 6, 5, 4, 3, 2, 1, 1],  # Distance in miles
    'House_Age': [20, 18, 15, 10, 8, 6, 5, 3, 2, 1],  # Age of the house in years
    'Price': [150000, 160000, 210000, 260000, 320000, 400000, 450000, 500000, 550000, 600000]  # Target variable in $
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Splitting features (X) and target (y)
X = df.drop(columns=['Price'])  # Features
y = df['Price']  # Target variable

# Train-test split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Creating and training the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)




In [5]:
# Saving the trained model using pickle
with open('house_price_model.pkl', 'wb') as f:
    pickle.dump(model, f)

print("Model trained and saved as 'house_price_model.pkl'")

# Optional: Checking model performance on test data
score = model.score(X_test, y_test)
print(f"Model R² Score on Test Data: {score:.4f}")

Model trained and saved as 'house_price_model.pkl'
Model R² Score on Test Data: 0.9963


In [6]:

# Load the trained model
with open('house_price_model.pkl', 'rb') as f:
    model = pickle.load(f)

# New house record (Square_Feet, Bedrooms, Bathrooms, Distance_to_City, House_Age)
new_house = np.array([[2000, 3, 2, 5, 10]])  # Example house

# Predict the price
predicted_price = model.predict(new_house)

print(f"Predicted House Price: ${predicted_price[0]:,.2f}")


Predicted House Price: $355,327.47


