In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

train = pd.read_csv("../data/flight_delays_train.csv")
test = pd.read_csv("../data/flight_delays_test.csv")

y_train = train["dep_delayed_15min"].map({"Y": 1, "N": 0}).values
train_df = train.drop("dep_delayed_15min", axis=1)

full_df = pd.concat([train_df, test])
full_df["Route"] = full_df["Origin"] + "-" + full_df["Dest"]

cat_features = ["Month", "DayofMonth", "DayOfWeek", "UniqueCarrier", "Origin", "Dest", "Route"]

for col in cat_features:
    le = LabelEncoder()
    full_df[col] = le.fit_transform(full_df[col].astype(str))

X_train = full_df[:len(train_df)].values
X_test = full_df[len(train_df):].values

In [9]:
X_train_part, X_valid, y_train_part, y_valid = train_test_split(
    X_train, y_train, test_size=0.3, random_state=17
)

xgb_model = XGBClassifier(
    n_estimators=500, 
    learning_rate=0.1, 
    max_depth=6, 
    random_state=17, 
    n_jobs=-1
)

xgb_model.fit(X_train_part, y_train_part)
xgb_valid_pred = xgb_model.predict_proba(X_valid)[:, 1]

print("XGBoost Validation AUC:", roc_auc_score(y_valid, xgb_valid_pred))

XGBoost Validation AUC: 0.7394486201449284


In [11]:
from catboost import CatBoostClassifier

cb_model = CatBoostClassifier(
    iterations=500, 
    learning_rate=0.1, 
    depth=6, 
    random_seed=17, 
    verbose=0
)

cb_model.fit(X_train_part, y_train_part)
cb_valid_pred = cb_model.predict_proba(X_valid)[:, 1]

print("CatBoost Validation AUC:", roc_auc_score(y_valid, cb_valid_pred))

CatBoost Validation AUC: 0.7359501377415965


In [12]:
xgb_final = XGBClassifier(
    n_estimators=500, 
    learning_rate=0.1, 
    max_depth=6, 
    random_state=17, 
    n_jobs=-1
)
xgb_final.fit(X_train, y_train)


xgb_test_pred = xgb_final.predict_proba(X_test)[:, 1]


pd.Series(xgb_test_pred, name="dep_delayed_15min").to_csv(
    "xgb_final_submission.csv", index_label="id", header=True
)

print("Файл xgb_final_submission.csv успешно сохранен.")

Файл xgb_final_submission.csv успешно сохранен.
