In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
import tensorflow as tf

# Load dataset
housing_data = pd.read_csv('housing.csv.zip')

# Handle missing values by dropping rows with NaN values
housing_data_cleaned = housing_data.dropna()

# Features and target variable
X = housing_data_cleaned[['longitude', 'latitude', 'housing_median_age', 'total_rooms', 
                          'total_bedrooms', 'population', 'households', 'median_income', 'ocean_proximity']]
y = housing_data_cleaned['median_house_value']

# Preprocessing
numeric_features = ['longitude', 'latitude', 'housing_median_age', 'total_rooms', 
                    'total_bedrooms', 'population', 'households', 'median_income']
categorical_features = ['ocean_proximity']

# Create preprocessing pipeline
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder()

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply preprocessing
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

# Build the model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train_processed.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)  # Output layer for regression
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Train the model
model.fit(X_train_processed, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model
test_loss, test_mae = model.evaluate(X_test_processed, y_test)
print(f"Test MAE: {test_mae}")


In [None]:
import pickle

# Assuming preprocessor is already fitted on the training data
with open('preprocessor.pkl', 'wb') as f:
    pickle.dump(preprocessor, f)


In [None]:
# Example new house data
new_house_data = pd.DataFrame({
    'longitude': [-122.23],
    'latitude': [37.88],
    'housing_median_age': [41],
    'total_rooms': [880],
    'total_bedrooms': [129],
    'population': [322],
    'households': [126],
    'median_income': [8.3252],
    'ocean_proximity': ['NEAR BAY']  # This is a categorical value
})

# Preprocess the new data (use the same preprocessor pipeline)
new_house_processed = preprocessor.transform(new_house_data)

# Make the prediction
predicted_price = model.predict(new_house_processed)
print(f"Predicted House Price: {predicted_price[0][0]}")


In [None]:
model.save('house_price_model.h5')

In [None]:
# Load the model from the file
loaded_model = tf.keras.models.load_model('house_price_model.h5')

# Use the loaded model to make predictions (similar to the earlier example)
predicted_price = loaded_model.predict(new_house_processed)
print(f"Predicted House Price: {predicted_price[0][0]}")
