In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error


In [None]:
# Load data
df = pd.read_csv('train.csv')

# Select features and target
X = df.drop('price_doc', axis=1)
y = df['price_doc']

# Handling categorical variables
categorical_features = X.select_dtypes(include=['object']).columns
numeric_features = X.select_dtypes(exclude=['object']).columns

# Preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

X_processed = preprocessor.fit_transform(X)
y = np.log1p(y)  # Optional: Log transformation of the target variable

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X_processed, y, test_size=0.2, random_state=42)


In [None]:
# Convert to tf.data.Dataset
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(32)


In [None]:
# Define the model
model = tf.keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=[X_train.shape[1]]),
    layers.Dropout(0.3),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)
])


In [None]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')


In [None]:
# Early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(
    patience=10,
    min_delta=0.001,
    restore_best_weights=True,
)

# Train the model
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=100,
    callbacks=[early_stopping]
)


In [None]:
# Evaluate the model
y_val_pred = model.predict(val_dataset)
rmse = np.sqrt(mean_squared_error(np.expm1(y_val), np.expm1(y_val_pred)))  # Revert log transformation
print(f'Validation RMSE: {rmse}')


In [None]:
# Load test data
test_df = pd.read_csv('/path/to/your/test.csv')

# Preprocess test data
X_test = preprocessor.transform(test_df.drop(['id'], axis=1))  # Replace 'id' as per your dataset

# Convert test data to tf.data.Dataset
test_dataset = tf.data.Dataset.from_tensor_slices(X_test).batch(32)

# Predict on test data
y_test_pred = model.predict(test_dataset)

# Prepare submission file
submission = pd.DataFrame({
    'id': test_df['id'],  # Replace 'id' as per your dataset
    'price_doc': y_test_pred.flatten()
})
submission.to_csv('neural_network_submission.csv', index=False)
