In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt

# Load the dataset
train_data = pd.read_csv("C:/Users/admin/Documents/ADS_Assignments/train.csv")
test_data = pd.read_csv("C:/Users/admin/Documents/ADS_Assignments/test.csv")

# Preprocess the data
train_data.fillna(-1, inplace=True)
test_data.fillna(-1, inplace=True)

# Convert categorical features to strings
cat_cols = train_data.select_dtypes(include=['object']).columns
train_data[cat_cols] = train_data[cat_cols].astype(str)
test_data[cat_cols] = test_data[cat_cols].astype(str)

# Encode categorical features
one_hot_encoder = OneHotEncoder(handle_unknown='ignore')
encoded_features_train = one_hot_encoder.fit_transform(train_data[cat_cols]).toarray()
encoded_features_test = one_hot_encoder.transform(test_data[cat_cols]).toarray()

# Combine encoded features with numeric features
numeric_cols = train_data.select_dtypes(include=['int64', 'float64']).drop(['Id', 'SalePrice'], axis=1).columns
X_train = pd.concat([pd.DataFrame(encoded_features_train), train_data[numeric_cols].reset_index(drop=True)], axis=1)
X_test = pd.concat([pd.DataFrame(encoded_features_test), test_data[numeric_cols].reset_index(drop=True)], axis=1)
y_train = train_data['SalePrice']

# Convert feature names to strings
X_train.columns = X_train.columns.astype(str)
X_test.columns = X_test.columns.astype(str)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create the model
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation="relu"))
model.add(Dense(64, activation="relu"))
model.add(Dense(32, activation="relu"))
model.add(Dense(1, activation="linear"))
model.compile(loss="mean_squared_error", optimizer="adam")

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate the model using cross-validation
cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
cv_rmse_scores = np.sqrt(-cv_scores)
print(f"Cross-validated RMSE: {np.mean(cv_rmse_scores)}")

# Evaluate the model using a separate validation set
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)
y_val_pred = model.predict(X_val)

mse = mean_squared_error(y_val, y_val_pred)
mae = mean_absolute_error(y_val, y_val_pred)
r2 = r2_score(y_val, y_val_pred)

print(f"Validation Mean Squared Error: {mse}, Mean Absolute Error: {mae}, R-squared: {r2}")

#Visualize training loss over epochs
history = model.history.history
plt.plot(history['loss'])
plt.title('Training Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Mean Squared Error')
plt.show()






TypeError: Cannot clone object '<keras.src.engine.sequential.Sequential object at 0x0000024FCE7BF890>' (type <class 'keras.src.engine.sequential.Sequential'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' method.