In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import GradientBoostingClassifier

# =============================
# 1. Load and Prepare Data
# =============================

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

# Define features and target
features = ['date', 'hour', 'bc_price', 'bc_demand', 'ab_price', 'ab_demand', 'transfer']
target = 'bc_price_evo'

X = train[features]
y = train[target].map({'UP': 1, 'DOWN': 0})  # Encode UP/DOWN to 1/0
X_test = test[features]

# Split for validation
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# =============================
# 2. Feature Scaling
# =============================

scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns, index=X_train.index)
X_val_scaled   = pd.DataFrame(scaler.transform(X_val), columns=X_val.columns, index=X_val.index)
X_test_scaled  = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

# =============================
# 3. Define Gradient Boosting Model
# =============================

gb_model = GradientBoostingClassifier(
    learning_rate=0.12489462263598237,
    max_depth=9,
    min_samples_leaf=8,
    min_samples_split=4,
    n_estimators=972,
    subsample=0.9002007699061254,
    random_state=42
)

# =============================
# 4. Train & Evaluate
# =============================

gb_model.fit(X_train_scaled, y_train)

y_val_pred = gb_model.predict(X_val_scaled)
val_acc = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy (Gradient Boosting): {val_acc:.4f}")

# =============================
# 5. Predict on Test Set
# =============================

y_test_pred = gb_model.predict(X_test_scaled)
y_test_pred_labels = np.where(y_test_pred == 1, 'UP', 'DOWN')

# =============================
# 6. Create Submission File
# =============================

submission = pd.DataFrame({
    'id': test['id'],
    'bc_price_evo': y_test_pred_labels
})

submission.to_csv("submission.csv", index=False)
print("✅ Submission file saved as submission.csv")


Validation Accuracy (Gradient Boosting): 0.9272
✅ Submission file saved as submission.csv


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import VotingClassifier, RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier

# =============================
# 1. Load and Prepare Data
# =============================

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

# Define features and target
features = ['date', 'hour', 'bc_price', 'bc_demand', 'ab_price', 'ab_demand', 'transfer']
target = 'bc_price_evo'

X = train[features]
y = train[target].map({'UP': 1, 'DOWN': 0})  # Encode UP/DOWN to 1/0
X_test = test[features]

# Split for validation
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# =============================
# 2. Feature Scaling (keep names)
# =============================

scaler = StandardScaler()

X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns, index=X_train.index)
X_val_scaled   = pd.DataFrame(scaler.transform(X_val), columns=X_val.columns, index=X_val.index)
X_test_scaled  = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

# =============================
# 3. Define Models
# =============================

rf_model = RandomForestClassifier(
    max_depth=19,
    max_features=0.9744427686266666,
    min_samples_leaf=2,
    min_samples_split=3,
    n_estimators=445,
    random_state=42,
    n_jobs=-1
)

xgb_model = XGBClassifier(
    colsample_bytree=0.8006065178748798,
    gamma=0.3193180715867101,
    learning_rate=0.13999278615555305,
    max_depth=9,
    n_estimators=499,
    reg_lambda=1.5076853103939694,
    subsample=0.954646859580264,
    random_state=42,
    n_jobs=-1,
    eval_metric='logloss'
)

gb_model = GradientBoostingClassifier(
    learning_rate=0.12489462263598237,
    max_depth=9,
    min_samples_leaf=8,
    min_samples_split=4,
    n_estimators=972,
    subsample=0.9002007699061254,
    random_state=42
)

# =============================
# 4. Combine in Voting Classifier
# =============================

voting_model = VotingClassifier(
    estimators=[
        ('rf', rf_model),
        ('xgb', xgb_model),
        ('gb', gb_model)
    ],
    voting='soft',  # Use probabilities instead of hard votes
    n_jobs=-1
)

# =============================
# 5. Train & Evaluate
# =============================

voting_model.fit(X_train_scaled, y_train)

y_val_pred = voting_model.predict(X_val_scaled)
val_acc = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {val_acc:.4f}")

# =============================
# 6. Predict on Test Set
# =============================

y_test_pred = voting_model.predict(X_test_scaled)
y_test_pred_labels = np.where(y_test_pred == 1, 'UP', 'DOWN')

# =============================
# 7. Create Submission File
# =============================

submission = pd.DataFrame({
    'id': test['id'],
    'bc_price_evo': y_test_pred_labels
})

submission.to_csv("submission.csv", index=False)
print("✅ Submission file saved as submission.csv")


Validation Accuracy: 0.9161
✅ Submission file saved as submission.csv


In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import (
    StackingClassifier,
    RandomForestClassifier,
    GradientBoostingClassifier
)
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression

# =============================
# 1. Load and Prepare Data
# =============================

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

# Define features and target
features = ['date', 'hour', 'bc_price', 'bc_demand', 'ab_price', 'ab_demand', 'transfer']
target = 'bc_price_evo'

X = train[features]
y = train[target].map({'UP': 1, 'DOWN': 0})  # Encode UP/DOWN to 1/0
X_test = test[features]

# Split for validation
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# =============================
# 2. Feature Scaling (keep names)
# =============================

scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns, index=X_train.index)
X_val_scaled   = pd.DataFrame(scaler.transform(X_val), columns=X_val.columns, index=X_val.index)
X_test_scaled  = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns, index=X_test.index)

# =============================
# 3. Define Base Models
# =============================

rf_model = RandomForestClassifier(
    max_depth=19,
    max_features=0.9744427686266666,
    min_samples_leaf=2,
    min_samples_split=3,
    n_estimators=445,
    random_state=42,
    n_jobs=-1
)

xgb_model = XGBClassifier(
    random_state=42,
    n_jobs=-1,
    eval_metric='logloss',
    colsample_bytree=0.9027996528688915,
    gamma=0.02344823389560996,
    learning_rate=0.0906017446075922,
    max_depth=9,
    min_child_weight=2,
    n_estimators=537,
    reg_alpha=0.7723183917356393,
    reg_lambda=1.5403270022239868, 
    subsample=0.940872600127416
)

gb_model = GradientBoostingClassifier(
    learning_rate=0.12489462263598237,
    max_depth=9,
    min_samples_leaf=8,
    min_samples_split=4,
    n_estimators=972,
    subsample=0.9002007699061254,
    random_state=42
)

# =============================
# 4. Define Meta-Model (Blender)
# =============================

meta_model = GradientBoostingClassifier(
    n_estimators=200,       # number of boosting stages (you can tune this)
    learning_rate=0.01,     # smaller learning rate = more robust, less overfitting
    max_depth=3,            # controls complexity of base learners
    subsample=0.8,          # stochastic boosting helps generalization
    random_state=42
)
# meta_model = LogisticRegression(
#     max_iter=5000,
#     random_state=42
# )

# =============================
# 5. Combine in Stacking Classifier
# =============================

stacking_model = StackingClassifier(
    estimators=[
        ('rf', rf_model),
        ('xgb', xgb_model),
        ('gb', gb_model)
    ],
    final_estimator=meta_model,
    stack_method='predict_proba',  # use probabilities from base models
    cv=3,                           # internal cross-validation for meta-features
    n_jobs=-1
)

# =============================
# 6. Train & Evaluate
# =============================

stacking_model.fit(X_train_scaled, y_train)

y_val_pred = stacking_model.predict(X_val_scaled)
val_acc = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy (Stacking): {val_acc:.4f}")

# =============================
# 7. Predict on Test Set
# =============================

y_test_pred = stacking_model.predict(X_test_scaled)
y_test_pred_labels = np.where(y_test_pred == 1, 'UP', 'DOWN')

# =============================
# 8. Create Submission File
# =============================

submission = pd.DataFrame({
    'id': test['id'],
    'bc_price_evo': y_test_pred_labels
})

submission.to_csv("submission.csv", index=False)
print("✅ Submission file saved as submission.csv")


Validation Accuracy (Stacking): 0.9286
✅ Submission file saved as submission.csv
