In [None]:
import pandas as pd

# Provide the full path of your file
# file_path = r"E:\Study Materials\4.1\AI\AI Lab\Dataset\Housing.csv"

# Load the CSV file into a DataFrame
df = pd.read_csv('Housing.csv')

In [None]:
df.head()

In [None]:
# Checking for missing values in the dataset
missing_values = df.isnull().sum()

# Display missing values
missing_values


In [None]:
df['furnishingstatus'].unique()

In [None]:
df['furnishingstatus'].value_counts()

In [None]:
df2 = df.copy()

In [None]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Encode binary categorical features using LabelEncoder
binary_columns = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
encoder = LabelEncoder()

for column in binary_columns:
    df2[column] = encoder.fit_transform(df2[column])


In [None]:
df2.head()

In [None]:
# One-hot encode the 'furnishingstatus' column
df2 = pd.get_dummies(df2, columns=['furnishingstatus'], drop_first=True)

In [None]:
df2.head()

In [None]:
# Scale numeric features
scaler = StandardScaler()
numeric_features = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']
df2[numeric_features] = scaler.fit_transform(df2[numeric_features])

In [None]:
df2.head()

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into features and target variable
X = df2.drop('price', axis=1)
y = df2['price']


In [None]:
X.head()

In [None]:
y.head()

In [None]:
# Split into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Display the shapes of the resulting datasets
(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

# Create a linear regression model
model = LinearRegression()


In [None]:
# Train the model on the training data
model.fit(X_train, y_train)

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)

In [None]:
# Evaluate the model's performance
mae = mean_absolute_error(y_test, y_pred)
r_squared = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
(mae, mse, r_squared)

In [None]:
# Return the actual and predicted prices
actual_vs_predicted = pd.DataFrame({
    'Actual Price': y_test,
    'Predicted Price': y_pred
})

actual_vs_predicted

In [None]:
# Convert the predicted prices to integers
actual_vs_predicted['Predicted Price'] = actual_vs_predicted['Predicted Price'].astype(int)

# Display the updated DataFrame
actual_vs_predicted.head()


In [None]:
# Extract the coefficients and create a DataFrame for interpretation
coefficients = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_
}).sort_values(by='Coefficient', ascending=False)

# Display the coefficients
coefficients


In [None]:
import matplotlib.pyplot as plt

# Plotting histograms for target variable (price) and key features
plt.figure(figsize=(12, 8))
plt.subplot(2, 2, 1)
plt.hist(df2['price'], bins=20, color='green', alpha=0.7)
plt.title('Distribution of House Prices')

plt.subplot(2, 2, 2)
plt.scatter(df2['area'], df2['price'], alpha=0.5)
plt.title('House Price vs Area')
plt.xlabel('Area (Standardized)')
plt.ylabel('Price')

plt.subplot(2, 2, 3)
plt.scatter(df2['bathrooms'], df2['price'], alpha=0.5)
plt.title('House Price vs Number of Bathrooms')
plt.xlabel('Bathrooms (Standardized)')
plt.ylabel('Price')

plt.subplot(2, 2, 4)
plt.scatter(df2['stories'], df2['price'], alpha=0.5)
plt.title('House Price vs Number of Stories')
plt.xlabel('Stories (Standardized)')
plt.ylabel('Price')

plt.tight_layout()
plt.show()


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate the correlation matrix
correlation_matrix = df2.corr()

# Plot the heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Matrix")
plt.show()


In [None]:
import tensorflow as tf
from tensorflow import keras

# Build the neural network model
model = keras.Sequential([
    keras.layers.Dense(64, input_dim=X_train.shape[1], activation='relu'),  # Hidden layer with 64 neurons
    keras.layers.Dense(32, activation='relu'),  # Hidden layer with 32 neurons
    keras.layers.Dense(1)  # Output layer with 1 neuron (for regression)
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
history = model.fit(X_train, y_train, epochs=500, batch_size=8, validation_split=0.2, verbose=1)

In [None]:
# Evaluate the model on the test set
test_loss, test_mae = model.evaluate(X_test, y_test)
print("\nTest Mean Absolute Error:", test_mae)

# Make predictions
predictions = model.predict(X_test)
print("\nPredicted Prices:", predictions.flatten())

In [None]:
# Return the actual and predicted prices
actual_vs_predicted = pd.DataFrame({
    'Actual Price': y_test,
    'Predicted Price': predictions.flatten()
})

In [None]:
actual_vs_predicted.head()