<a href="https://colab.research.google.com/github/venomiteh/House_Price_Prediction/blob/main/HousePricingPrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch as tr
import numpy as np
import pandas as pn
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import joblib

# Step 1) Load the data
file = pn.read_csv('/content/USA Housing Dataset.csv')
#print(file.head())

print("Info:\n")
#print(file.info())
#print("Describe:\n")
#print(file.describe())
#print("Null counter:\n")
#print(file.isnull().sum())
#print("After\n")



# Step 2) Filter out unnecessary columns (like street, city, etc.)
file = file.drop(columns=["street", "city", "yr_renovated", "country", "date", "sqft_above", "sqft_basement", "view","statezip"])
#print(file.columns)
#print(file.head())

# Step 3) Handle missing values (if any)
file = file.dropna()

# Step 4) Separate Features and Target Variable
x = file.drop(columns=["price"])  # Features
y = file["price"].values.reshape(-1, 1)  # Target

# Step 5) Normalize Features
# Make sure to only normalize numeric columns, not categorical ones like 'statezip'
scaler_X = StandardScaler()
scaler_y = StandardScaler()

# Normalize numeric columns (after dropping non-numeric columns)
x = scaler_X.fit_transform(x.select_dtypes(include=[np.number]))  # Only normalize numeric columns
y = scaler_y.fit_transform(y)  # Normalize target too

# Step 6) Convert to PyTorch tensors
X_tensor = tr.tensor(x, dtype=tr.float32)
Y_tensor = tr.tensor(y, dtype=tr.float32)

# Step 7) Split into Training and Testing Data
X_train, X_test, y_train, y_test = tratin_test_split(X_tensor, Y_tensor, test_size=0.2, random_state=42)

# Step 8) Create the Linear Regression model
class LinearRegression(nn.Module):
    def __init__(self, input_size):
        super(LinearRegression, self).__init__()
        self.fc = nn.Linear(input_size, 1)  # Linear layer

    def forward(self, x):
        return self.fc(x)

# Step 9) Create the model
model = LinearRegression(X_train.shape[1])

# Step 10) Define Loss and Optimizer
criterion = nn.MSELoss()  # Mean Squared Error Loss for regression
optimizer = tr.optim.SGD(model.parameters(), lr=0.1)  # Stochastic Gradient Descent

# Step 11) Train the model
epochs = 100
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()

    y_pred = model(X_train)  # Forward pass
    loss = criterion(y_pred, y_train)  # Compute loss
    loss.backward()  # BackWard Pass
    optimizer.step()  # Update weights

    if (epoch + 1) % 50 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}')

# Step 12) Evaluate the model
model.eval()
y_pred_test = model(X_test)
# Evaluate in normalized space
test_loss = criterion(y_pred_test, y_test)
print(f"Test Loss (Normalized): {test_loss.item():.4f}")

# Convert to original scale for readable output
y_pred_test_orig = scaler_y.inverse_transform(y_pred_test.detach().numpy())
y_test_orig = scaler_y.inverse_transform(y_test.detach().numpy())

# Show some sample predictions
for i in range(5):
    print(f"Actual: ${y_test_orig[i][0]:,.2f} - Predicted: ${y_pred_test_orig[i][0]:,.2f}")

#this saves the model to linear_regression_model.pth
joblib.dump(scaler_X, 'scaler_X.pkl')
joblib.dump(scaler_y, 'scaler_y.pkl')
tr.save(model.state_dict(), 'linear_regression_model.pth')


Info:



NameError: name 'tratin_test_split' is not defined

In [None]:
import torch as tr
import numpy as np
import torch.nn as nn
import pandas as pd
import joblib  # To load the saved scalers
from sklearn.preprocessing import StandardScaler

# Define the same model structure as during training
class LinearRegression(nn.Module):
    def __init__(self, input_size):
        super(LinearRegression, self).__init__()
        self.fc = nn.Linear(input_size, 1)

    def forward(self, x):
        return self.fc(x)

# Step 1: Load the trained model
model = LinearRegression(8)  # 8 input features
model.load_state_dict(tr.load('/content/linear_regression_model.pth'))
model.eval()

# Step 2: Load the saved scalers (from training)
scaler_X = joblib.load('/content/scaler_X.pkl')
scaler_y = joblib.load('/content/scaler_y.pkl')

# Step 3: Get user input
def get_user_input():
    print("Enter the following details about the house:")

    bedrooms = float(input("Number of bedrooms: "))
    bathrooms = float(input("Number of bathrooms: "))
    sqft_living = float(input("Square footage of the house (sqft_living): "))
    sqft_lot = float(input("Lot size in square feet (sqft_lot): "))
    floors = float(input("Number of floors: "))
    waterfront = float(input("Waterfront (1 if yes, 0 if no): "))
    condition = float(input("Condition rating (1-5): "))
    yr_built = float(input("Year built: "))

    return pd.DataFrame({
        'bedrooms': [bedrooms],
        'bathrooms': [bathrooms],
        'sqft_living': [sqft_living],
        'sqft_lot': [sqft_lot],
        'floors': [floors],
        'waterfront': [waterfront],
        'condition': [condition],
        'yr_built': [yr_built]
    })

# Step 4: Predict price
def predict_price():
    user_input_df = get_user_input()

    # Scale the input using the same scaler from training
    scaled_input = scaler_X.transform(user_input_df)
    input_tensor = tr.tensor(scaled_input, dtype=tr.float32)

    # Predict using the model
    with tr.no_grad():
        prediction_scaled = model(input_tensor).item()

    # Inverse transform to get the actual dollar price
    prediction_actual = scaler_y.inverse_transform([[prediction_scaled]])[0][0]

    print(f"\nPredicted house price: ${prediction_actual:,.2f}")

# Run the prediction
predict_price()


Enter the following details about the house:
Number of bedrooms: 3
Number of bathrooms: 3
Square footage of the house (sqft_living): 1700
Lot size in square feet (sqft_lot): 2000
Number of floors: 2
Waterfront (1 if yes, 0 if no): 0
Condition rating (1-5): 4
Year built: 2008

Predicted house price: $455,814.95
