In [12]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

In [13]:
# Load the housing data from the uploaded file
file_path = 'ontario.csv'
housing_data = pd.read_csv(file_path)

# Display the first few rows of the dataframe to understand its structure
housing_data.head()

Unnamed: 0,City,Price,Address,Number_Beds,Number_Baths,Province,Population,Latitude,Longitude,Median_Family_Income
0,Barrie,799000.0,70 RED OAK Drive,3,4,Ontario,154676,44.3711,-79.6769,97000
1,Barrie,1049000.0,104 DURHAM AVE W,4,4,Ontario,154676,44.3711,-79.6769,97000
2,Barrie,589900.0,#301 -100 DEAN AVE,3,2,Ontario,154676,44.3711,-79.6769,97000
3,Barrie,869900.0,#1,5,3,Ontario,154676,44.3711,-79.6769,97000
4,Barrie,689900.0,105 LAIDLAW Drive,3,2,Ontario,154676,44.3711,-79.6769,97000


In [14]:
# Selecting relevant features
features = housing_data[['Price', 'Latitude', 'Longitude', 'Number_Beds', 'Number_Baths']]

In [15]:
# Creating a hypothetical target variable for 'fast selling' houses
# Assumption: Houses that are priced within 10% of the average price for similar houses (same city, number of beds, and baths) are considered 'fast selling'
average_prices = housing_data.groupby(['City', 'Number_Beds', 'Number_Baths'])['Price'].transform('mean')
price_tolerance = 0.1 * average_prices
housing_data['Fast_Selling'] = np.where((housing_data['Price'] >= (average_prices - price_tolerance)) &
                                         (housing_data['Price'] <= (average_prices + price_tolerance)), 1, 0)

In [16]:
# Target variable
target = housing_data['Fast_Selling']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=42)

In [17]:
# Training a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

In [18]:
# Predicting and evaluating the model
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

In [19]:
# Output the model accuracy
accuracy

0.949346016646849

In [26]:
import ipywidgets as widgets
from IPython.display import display

# Widgets for user input
price_widget = widgets.FloatText(value=0, description='Price:')
latitude_widget = widgets.FloatText(value=0, description='Latitude:')
longitude_widget = widgets.FloatText(value=0, description='Longitude:')
bedrooms_widget = widgets.IntText(value=0, description='Bedrooms:')
bathrooms_widget = widgets.IntText(value=0, description='Bathrooms:')
predict_button = widgets.Button(description='Predict')

# Display widgets
display(price_widget, latitude_widget, longitude_widget, bedrooms_widget, bathrooms_widget, predict_button)
# Prediction function
def on_predict_button_clicked(b):
    # Get values from widgets
    price = price_widget.value
    latitude = latitude_widget.value
    longitude = longitude_widget.value
    bedrooms = bedrooms_widget.value
    bathrooms = bathrooms_widget.value

    # Prediction logic (using the trained model 'clf')
    prediction = predict_fast_selling(price, latitude, longitude, bedrooms, bathrooms)

    # Display prediction
    print(prediction)

# Prediction function
def on_predict_button_clicked(b):
    # Get values from widgets
    price = price_widget.value
    latitude = latitude_widget.value
    longitude = longitude_widget.value
    bedrooms = bedrooms_widget.value
    bathrooms = bathrooms_widget.value

    # Format input features as a DataFrame with appropriate column names
    input_features = pd.DataFrame({
        'Price': [price],
        'Latitude': [latitude],
        'Longitude': [longitude],
        'Number_Beds': [bedrooms],
        'Number_Baths': [bathrooms]
    })

    # Prediction logic (using the trained model 'clf')
    prediction = clf.predict(input_features)

    # Display prediction
    prediction_text = "Likely to Sell Fast" if prediction[0] == 1 else "Not Likely to Sell Fast"
    print(prediction_text)

# Link the button to the function
predict_button.on_click(on_predict_button_clicked)


FloatText(value=0.0, description='Price:')

FloatText(value=0.0, description='Latitude:')

FloatText(value=0.0, description='Longitude:')

IntText(value=0, description='Bedrooms:')

IntText(value=0, description='Bathrooms:')

Button(description='Predict', style=ButtonStyle())

Likely to Sell Fast
Not Likely to Sell Fast
Not Likely to Sell Fast
Likely to Sell Fast
Likely to Sell Fast
Not Likely to Sell Fast
