In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import GradientBoostingClassifier

# Load datasets
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# Features and target
features = ['date', 'hour', 'bc_price', 'bc_demand', 'ab_price', 'ab_demand', 'transfer']
target = 'bc_price_evo'

X = train[features]
y = train[target].map({'UP': 1, 'DOWN': 0})  # encode target as 0/1
X_test = test[features]

# Split data for local validation
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# --- Define Gradient Boosting Classifier with best params ---
gb_model = GradientBoostingClassifier(
    learning_rate=0.10877874303668693,
    max_depth=9,
    min_samples_leaf=4,
    min_samples_split=9,
    n_estimators=491,
    subsample=0.9396893641976711,
    random_state=42
)

# --- Train model ---
gb_model.fit(X_train_scaled, y_train)

# --- Evaluate on validation set ---
y_val_pred = gb_model.predict(X_val_scaled)
val_acc = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {val_acc:.4f}")

# --- Predict on test set ---
y_test_pred = gb_model.predict(X_test_scaled)
y_test_pred_labels = np.where(y_test_pred == 1, 'UP', 'DOWN')

# --- Create submission file ---
submission = pd.DataFrame({
    'id': test['id'],
    'bc_price_evo': y_test_pred_labels
})

submission.to_csv('submission.csv', index=False)
print("Submission file saved as submission.csv")

KeyboardInterrupt: 