In [None]:
import pandas as pd
import numpy as np
from pyspark.sql import SparkSession
from pyspark.sql import functions as F

In [None]:
spark = SparkSession.builder \
    .appName("RegresssionModel") \
    .getOrCreate()
# Create a sample DataFrame

In [None]:
df = pd.read_csv("house_price_regression_dataset.csv")

In [None]:
df

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

# Feature Engineering: create House_Age and drop Year_Built
current_year = 2025
df['House_Age'] = current_year - df['Year_Built']
df = df.drop(columns=['Year_Built'])

# Handle missing values (if any) by imputing median
df = df.fillna(df.median())

# Split features and target
X = df.drop(columns=['House_Price'])
y = df['House_Price']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scale numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build TensorFlow Keras model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)  # Regression output
])

# Compile the model
model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['mae']
)

# Train the model
history = model.fit(
    X_train_scaled, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    verbose=1
)

# Evaluate on the test set
test_loss, test_mae = model.evaluate(X_test_scaled, y_test, verbose=1)
print(f"Test MAE: {test_mae:.2f}")


In [None]:
X_train

In [None]:
X_train["prediction"] = model.predict(X_train_scaled)
X_train["actual"] = y_train

In [None]:
X_train.to_csv("train_predictions.csv", index=False)

In [None]:
X_test["prediction"] = model.predict(X_test_scaled)
X_test["actual"] = y_test
X_test.to_csv("test_predictions.csv", index=False)