### House Price Prediction using SVR
● Predicting the selling price of houses based on features such as square
footage, number of bedrooms and bathrooms, location, amenities, etc

In [9]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load the dataset
data = pd.read_csv('train.csv')


# Preprocess the data
X = data[['beds', 'baths', 'size', 'lot_size', 'zip_code']]
y = data['price']

# Fill missing values with mean column values
X.fillna(X.mean(), inplace=True)


# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Create and train the SVR model
svr_model = SVR(kernel='rbf', C=1000, gamma=0.1)
svr_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svr_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5
print(f"Root Mean Squared Error: {rmse}")

# Define a function to predict house price
def predict_house_price(beds, baths, size, lot_size, zip_code):
    input_features = scaler.transform([[beds, baths, size, lot_size, zip_code]])
    predicted_price = svr_model.predict(input_features)
    return predicted_price[0]

predicted_price = predict_house_price(3, 2, 1800, 5000, 98122)
print(f"Predicted Price: ${predicted_price}")    

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.fillna(X.mean(), inplace=True)


Root Mean Squared Error: 533149.8145855789
Predicted Price: $833095.6089431777


## Model On Test Data


In [10]:
# Load the test dataset
test_data = pd.read_csv('test.csv')

# Preprocess the test data
X_test = test_data[['beds', 'baths', 'size', 'lot_size', 'zip_code']]
y_test = test_data['price']

# Fill missing values with mean column values in the test set
X_test.fillna(X_test.mean(), inplace=True)

# Scale the test features
X_test_scaled = scaler.transform(X_test)

# Make predictions on the test set
y_pred_test = svr_model.predict(X_test_scaled)

# Evaluate the model on the test set
mse_test = mean_squared_error(y_test, y_pred_test)
rmse_test = mse_test ** 0.5
print(f"Root Mean Squared Error on Test Set: {rmse_test}")

Root Mean Squared Error on Test Set: 581336.1839685004


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test.fillna(X_test.mean(), inplace=True)
