# House Price Prediction using Random Forest

In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# Load the train and test datasets
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

# Drop 'Id' column
train_df.drop(columns=['Id'], inplace=True)
test_ids = test_df['Id']
test_df.drop(columns=['Id'], inplace=True)

# Separate target variable
y = train_df['SalePrice']
X = train_df.drop(columns=['SalePrice'])

# Identify categorical and numerical columns
cat_cols = X.select_dtypes(include=['object']).columns
num_cols = X.select_dtypes(exclude=['object']).columns

# Handle missing values
num_imputer = SimpleImputer(strategy='median')
cat_imputer = SimpleImputer(strategy='most_frequent')

X[num_cols] = num_imputer.fit_transform(X[num_cols])
X[cat_cols] = cat_imputer.fit_transform(X[cat_cols])
test_df[num_cols] = num_imputer.transform(test_df[num_cols])
test_df[cat_cols] = cat_imputer.transform(test_df[cat_cols])

# Encode categorical variables
encoder = LabelEncoder()
for col in cat_cols:
    X[col] = encoder.fit_transform(X[col])
    test_df[col] = encoder.transform(test_df[col])

# Split data into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on validation set
y_pred = model.predict(X_valid)

# Calculate Mean Absolute Error
mae = mean_absolute_error(y_valid, y_pred)
print(f"Mean Absolute Error: {mae}")


In [None]:

# Predict on the test dataset
test_predictions = model.predict(test_df)

# Create submission file
submission = pd.DataFrame({'Id': test_ids, 'SalePrice': test_predictions})
submission.to_csv("submission.csv", index=False)
print("Submission file created successfully!")
