In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, regularizers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint



In [None]:
# Load data
df = pd.read_csv('train.csv')

# Select features and target
X = df.drop('price_doc', axis=1)
y = df['price_doc']

# Handling categorical variables
categorical_features = X.select_dtypes(include=['object']).columns
numeric_features = X.select_dtypes(exclude=['object']).columns

# Preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

X_processed = preprocessor.fit_transform(X).toarray()
y = np.log1p(y)  # Optional: Log transformation of the target variable

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X_processed, y, test_size=0.2, random_state=42)


In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(32)

In [None]:
model = tf.keras.Sequential([
    layers.Dense(256, activation='relu', input_shape=[X_train.shape[1]], kernel_regularizer=regularizers.l2(0.001)),
    layers.Dropout(0.3),
    layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    layers.Dropout(0.2),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
])

In [None]:
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mean_squared_error')

In [None]:
early_stopping = EarlyStopping(patience=20, min_delta=0.001, restore_best_weights=True)
checkpoint_filepath = '/tmp/checkpoint'
model_checkpoint = ModelCheckpoint(filepath=checkpoint_filepath, save_best_only=True, monitor='val_loss', mode='min')

history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=200,
    callbacks=[early_stopping, model_checkpoint]
)

In [None]:
best_model = tf.keras.models.load_model(checkpoint_filepath)
y_val_pred = best_model.predict(val_dataset)
rmse = np.sqrt(mean_squared_error(np.expm1(y_val), np.expm1(y_val_pred)))
print(f'Validation RMSE: {rmse}')


In [None]:
# Load test data
test_df = pd.read_csv('/path/to/your/test.csv')
X_test = preprocessor.transform(test_df.drop(['row ID'], axis=1)).toarray()
test_dataset = tf.data.Dataset.from_tensor_slices(X_test).batch(32)

# Predict on test data using the best model
y_test_pred = best_model.predict(test_dataset)

# Prepare submission file
submission = pd.DataFrame({
    'row ID': test_df['row ID'],
    'price_doc': y_test_pred.flatten()
})
submission.to_csv('neural_network_submission.csv', index=False)


