In [1]:
# Step 1: Dataset Selection
from sklearn.datasets import load_boston

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Load the Boston housing dataset
boston = load_boston()
X, y = boston.data, boston.target

In [4]:
# Step 2: Data Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [5]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Initialize StandardScaler to scale features
scaler = StandardScaler()

In [7]:
# Fit scaler to training data and transform both training and testing data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [8]:
# Step 3: Pipeline Setup
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor

In [9]:
# Define the pipeline with preprocessing steps and model
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', RandomForestRegressor(random_state=42))
])

In [10]:
# Step 4: Model Training
pipeline.fit(X_train, y_train)

Pipeline(steps=[('scaler', StandardScaler()),
                ('regressor', RandomForestRegressor(random_state=42))])

In [11]:
# Step 5: Model Evaluation
from sklearn.metrics import mean_squared_error, r2_score

In [12]:
# Predict on the testing data
y_pred = pipeline.predict(X_test)

In [13]:
# Calculate evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Squared Error:", mse)
print("R-squared:", r2)

Mean Squared Error: 7.912745333333333
R-squared: 0.8920995891343227


In [14]:
# Step 6: Saving the Model
import joblib

# Save the trained model to disk
joblib.dump(pipeline, 'housing_price_model.pkl')

['housing_price_model.pkl']

In [15]:
# For real-time use, you can load the pre-trained model and use it to make predictions on new data
# Here, we'll simulate real-time prediction using a subset of the testing data

# Load the pre-trained model
loaded_model = joblib.load('housing_price_model.pkl')

In [15]:
# Simulate new data for prediction (subset of testing data)
new_data = X_test[:5]  # Assuming new_data represents new housing listings

In [16]:
# Preprocess the new data using the same preprocessing steps as during training
new_data_scaled = scaler.transform(new_data)

In [18]:
# Use the pre-trained model to make predictions on the new data
predictions = loaded_model.predict(new_data_scaled)
print("Predictions for new data:")
print(predictions)

Predictions for new data:
[41.565 41.565 41.642 41.565 42.349]
