In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load and prepare the data
data = pd.read_csv('Housing new one.csv')

# Convert numeric columns to float, replacing non-numeric values with NaN
numeric_columns = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking', 'price']
for col in numeric_columns:
    data[col] = pd.to_numeric(data[col], errors='coerce')

# Remove rows with NaN values
data = data.dropna()

# Separate features and target
X = data.drop('price', axis=1)
y = data['price']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define numeric and categorical columns
numeric_features = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']
categorical_features = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']

# Create preprocessor for the pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# Create and train the model using a pipeline
model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Fit the model
model.fit(X_train, y_train)

# Function to get user input for prediction
def get_user_input():
    return pd.DataFrame({
        'area': [float(input("Total Area (sqft): "))],
        'bedrooms': [float(input("Number of Bedrooms: "))],
        'bathrooms': [float(input("Number of Bathrooms: "))],
        'stories': [float(input("Number of Stories: "))],
        'mainroad': [input("Mainroad (yes/no): ")],
        'guestroom': [input("Guestroom (yes/no): ")],
        'basement': [input("Basement (yes/no): ")],
        'hotwaterheating': [input("Hot Water Heating (yes/no): ")],
        'airconditioning': [input("Air Conditioning (yes/no): ")],
        'parking': [float(input("Number of Parking Spaces: "))],
        'prefarea': [input("Preferred Area (yes/no): ")],
        'furnishingstatus': [input("Furnishing Status (furnished/semi-furnished/unfurnished): ")]
    })

# Predict the price for a new house
new_house = get_user_input()

# Use the pipeline's predict method to handle both preprocessing and prediction
predicted_price = model.predict(new_house)
print(f"\nPredicted Price: {predicted_price[0]: .2f}")


Total Area (sqft): 1500
Number of Bedrooms: 2
Number of Bathrooms: 2
Number of Stories: 3
Mainroad (yes/no): yes
Guestroom (yes/no): no
Basement (yes/no): no
Hot Water Heating (yes/no): no
Air Conditioning (yes/no): yes
Number of Parking Spaces: 2
Preferred Area (yes/no): yes
Furnishing Status (furnished/semi-furnished/unfurnished): furnished

Predicted Price:  5095790.00
