In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import ndcg_score, roc_auc_score
from scipy import sparse
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder,  minmax_scale
#import torch
#import torch.nn as nn
#from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold, GroupShuffleSplit
import itertools
#from torch.utils.data import DataLoader
#import torch
from catboost import CatBoostRanker, Pool,  cv
import optuna
from optuna.integration import CatBoostPruningCallback
from catboost.utils import eval_metric
from optuna.pruners import SuccessiveHalvingPruner

In [2]:
# read the files changes path to where the data is stored
test = pd.read_csv("./test_set_VU_DM.csv")
train = pd.read_csv("./training_set_VU_DM.csv")
sample = pd.read_csv("./submission_sample.csv")

In [3]:
train['mean_pos']=train.groupby('prop_id')['position'].transform('mean')
test['mean_pos']=test['prop_id'].map(train.groupby('prop_id')['position'].mean())

In [4]:
def missing_values_table(df):
    """Returns a DataFrame with missing counts and percent missing for each column."""
    n = len(df)
    missing_count   = df.isna().sum()
    missing_percent = 100 * missing_count / n
    missing_df = (
        pd.DataFrame({
            'missing_count':   missing_count,
            'missing_percent': missing_percent
        })
        .sort_values('missing_percent', ascending=False)
    )
    return missing_df

In [5]:
########################## some beginning on the feature engineering
def missing_values_table(df):
    """Returns a DataFrame with missing counts and percent missing for each column."""
    n = len(df)
    missing_count   = df.isna().sum()
    missing_percent = 100 * missing_count / n
    missing_df = (
        pd.DataFrame({
            'missing_count':   missing_count,
            'missing_percent': missing_percent
        })
        .sort_values('missing_percent', ascending=False)
    )
    return missing_df
    
def preprocess_missing_and_competitors(train_df, test_df):
    # 1) Drop features with >93% missing or that leak the target
    drop_cols = [
        # competitor 1,4,6,7 are ~97–98% missing → too sparse to learn
        *[f'comp{i}_{t}' for i in [1,4,6,7] for t in ['rate','inv','rate_percent_diff']],
        'gross_bookings_usd'  # only in train, leaks booking price
    ]
    train_df.drop(columns=drop_cols, errors='ignore', inplace=True)
    test_df.drop(columns=drop_cols, errors='ignore', inplace=True)

    # 2) Impute & flag user history features
    #    Missing means “no prior purchases” → keep with sentinel + flag
    for df in (train_df, test_df):
        # visitor_hist_starrating
        df['hist_star_na'] = df['visitor_hist_starrating'].isna().astype(int)
        # fill with median starrating across users
        star_med = train_df['visitor_hist_starrating'].median()
        df['visitor_hist_starrating'] = df['visitor_hist_starrating'].fillna(star_med)

        # visitor_hist_adr_usd (avg USD spend)
        df['hist_adr_na'] = df['visitor_hist_adr_usd'].isna().astype(int)
        adr_med = train_df['visitor_hist_adr_usd'].median()
        df['visitor_hist_adr_usd'] = df['visitor_hist_adr_usd'].fillna(adr_med)

    # 3) Impute & flag affinity score
    #    Null means “hotel never seen” → fill with global minimum and flag
    affinity_min = train_df['srch_query_affinity_score'].min(skipna=True)
    for df in (train_df, test_df):
        df['affinity_na'] = df['srch_query_affinity_score'].isna().astype(int)
        df['srch_query_affinity_score'] = (
            df['srch_query_affinity_score']
            .fillna(affinity_min)
        )

    # 4) Keep & impute competitor 2,3,5,8 features (~50–90% missing)
    #    Null → “no data” sentinel (for categorical) or 0 (for percent diff), plus flag
    keep_comps = [2,3,5,8]
    for i in keep_comps:
        # availability flag
        inv_col = f'comp{i}_inv'
        flag_col = f'comp{i}_inv_na'
        for df in (train_df, test_df):
            df[flag_col] = df[inv_col].isna().astype(int)
            # fill null with 2 (new category: 0=no avail,1=avail,2=no data)
            df[inv_col] = df[inv_col].fillna(2).astype(int)

        # price‐compare flag
        rate_col = f'comp{i}_rate'
        rate_flag = f'comp{i}_rate_na'
        for df in (train_df, test_df):
            df[rate_flag] = df[rate_col].isna().astype(int)
            # fill null as “no data” = 2
            df[rate_col] = df[rate_col].fillna(2).astype(int)

        # percent_diff
        pdiff_col = f'comp{i}_rate_percent_diff'
        pdiff_flag = f'comp{i}_pdiff_na'
        for df in (train_df, test_df):
            df[pdiff_flag] = df[pdiff_col].isna().astype(int)
            # fill null as 0% diff (no info)
            df[pdiff_col] = df[pdiff_col].fillna(0.0)

    # 5) Bucket orig_destination_distance
    #    Missing → sentinel bucket + flag
    for df in (train_df, test_df):
        df['dist_na'] = df['orig_destination_distance'].isna().astype(int)
        df['orig_destination_distance'] = (
            df['orig_destination_distance'].fillna(-1)
        )
        # define bins (in km)
        bins = [-1, 0, 10, 50, 200, np.inf]
        labels = ['missing','0-10km','10-50km','50-200km','200km+']
        df['dist_bucket'] = pd.cut(
            df['orig_destination_distance'],
            bins=bins, labels=labels
        )

    return train_df, test_df

def create_base_features(df):
    """1) Parse datetime & basic price/historical features."""
    df = df.copy()
    # --- Date/time splits ---
    df['date_time']   = pd.to_datetime(df['date_time'])
    df['search_year'] = df['date_time'].dt.year
    df['search_month']= df['date_time'].dt.month
    df['search_day']  = df['date_time'].dt.day
    df['search_hour'] = df['date_time'].dt.hour

    # --- Price per night & hist price devation ---
    df['price_per_night'] = df['price_usd'] / df['srch_length_of_stay']
    df['price_vs_historical'] = df['price_usd'] - df['prop_log_historical_price']
    df['price_vs_historical'].fillna(0, inplace=True)
    return df

def add_destination_stats(train_df, test_df):
    """6) Dest‑level total searches & booking rate."""
    dest = (
        train_df
        .groupby('srch_destination_id')
        .agg(dest_searches=('srch_id','count'),
             dest_bookings=('booking_bool','sum'))
        .assign(dest_booking_rate=lambda x: x.dest_bookings / x.dest_searches)
        .reset_index()
    )
    # Reassign merge result back to each DataFrame
    train_df = train_df.merge(
        dest[['srch_destination_id','dest_searches','dest_booking_rate']],
        on='srch_destination_id', how='left'
    )
    test_df = test_df.merge(
        dest[['srch_destination_id','dest_searches','dest_booking_rate']],
        on='srch_destination_id', how='left'
    )
    return train_df, test_df

def add_within_search_features(df):
    """7) Z‑scores & deltas in each search group."""
    grp = df.groupby('srch_id')
    # price
    df['price_mean_srch'] = grp['price_usd'].transform('mean')
    df['price_std_srch']  = grp['price_usd'].transform('std').fillna(1)
    df['price_zscore']    = (df['price_usd'] - df['price_mean_srch']) / df['price_std_srch']
    # stars
    df['star_mean_srch']  = grp['prop_starrating'].transform('mean')
    df['star_delta_srch'] = df['prop_starrating'] - df['star_mean_srch']
    # user delta
    df['star_delta_user'] = df['prop_starrating'] - df['visitor_hist_starrating']
    # distance
    df['dist_mean_srch']  = grp['orig_destination_distance'].transform('mean')
    df['dist_std_srch']   = grp['orig_destination_distance'].transform('std').fillna(1)
    df['dist_zscore']     = (df['orig_destination_distance'] - df['dist_mean_srch']) / df['dist_std_srch']
    return df

def add_temporal_features(df):
    """8) Weekday/weekend & check‑in weekend flags."""
    # day‑of‑week for search
    df['search_dow'] = df['date_time'].dt.weekday  # 0=Mon…6=Sun
    df['is_search_weekend'] = df['search_dow'].isin([5,6]).astype(int)
    # approximate check‑in day
    checkin = df['date_time'] + pd.to_timedelta(df['srch_booking_window'], 'D')
    df['checkin_dow'] = checkin.dt.weekday
    df['is_checkin_weekend'] = df['checkin_dow'].isin([5,6]).astype(int)
    return df

def add_ranks(df):
    """9) Dense ranks of price, star & distance within each search."""
    df['price_rank'] = df.groupby('srch_id')['price_usd'].rank('dense', ascending=True)
    df['star_rank']  = df.groupby('srch_id')['prop_starrating'].rank('dense', ascending=False)
    df['dist_rank']  = df.groupby('srch_id')['orig_destination_distance'].rank('dense', ascending=True)
    return df

def one_hot_encode_columns(train_df, test_df, columns_to_encode):
    """
    Converts specified string columns in train and test DataFrames to one-hot encoded features.

    Args:
        train_df (pd.DataFrame): The training DataFrame.
        test_df (pd.DataFrame): The testing DataFrame.
        columns_to_encode (list): A list of column names (strings) to be one-hot encoded.

    Returns:
        tuple: A tuple containing the modified training and testing DataFrames with one-hot encoded columns.
    """
    train_processed = train_df.copy()
    test_processed = test_df.copy()

    for col in columns_to_encode:
        if col in train_processed.columns and col in test_processed.columns:
            # Get unique values from both train and test to ensure consistent encoding
            all_unique_values = pd.concat([train_processed[col], test_processed[col]]).unique()

            for value in all_unique_values:
                train_processed[f'{col}_{value}'] = (train_processed[col] == value).astype(int)
                test_processed[f'{col}_{value}'] = (test_processed[col] == value).astype(int)

            # Drop the original categorical column
            train_processed.drop(columns=[col], inplace=True)
            test_processed.drop(columns=[col], inplace=True)
        else:
            print(f"Warning: Column '{col}' not found in both train and test DataFrames. Skipping one-hot encoding for this column.")

    return train_processed, test_processed

def minmax(arr):
    return (arr - arr.min()) / (arr.max() - arr.min() + 1e-8)

def ensemble_predictions(pred1, pred2, weights):
    w1, w2 = weights
    return w1 * minmax(pred1) + w2 * minmax(pred2)

def preprocess_missing_and_competitors_fit(train_df):
    stats = {}
    # 1) Which cols to drop
    stats['drop_cols'] = [
        *[f'comp{i}_{t}' for i in [1,4,6,7] 
          for t in ['rate','inv','rate_percent_diff']],
        'gross_bookings_usd'
    ]
    # 2) Medians & mins for imputation
    stats['star_med']     = train_df['visitor_hist_starrating'].median()
    stats['adr_med']      = train_df['visitor_hist_adr_usd'].median()
    stats['affinity_min'] = train_df['srch_query_affinity_score'].min(skipna=True)
    return stats

def preprocess_missing_and_competitors_transform(df, stats):
    df = df.copy()
    df.drop(columns=stats['drop_cols'], errors='ignore', inplace=True)

    # visitor history
    df['hist_star_na'] = df['visitor_hist_starrating'].isna().astype(int)
    df['visitor_hist_starrating'] = (
        df['visitor_hist_starrating']
          .fillna(stats['star_med'])
    )
    df['hist_adr_na'] = df['visitor_hist_adr_usd'].isna().astype(int)
    df['visitor_hist_adr_usd'] = (
        df['visitor_hist_adr_usd']
          .fillna(stats['adr_med'])
    )

    # affinity
    df['affinity_na'] = df['srch_query_affinity_score'].isna().astype(int)
    df['srch_query_affinity_score'] = (
        df['srch_query_affinity_score']
          .fillna(stats['affinity_min'])
    )

    # keep comps 2,3,5,8
    for i in [2,3,5,8]:
        for col, fill in [
            (f'comp{i}_inv',               2),
            (f'comp{i}_rate',              2),
            (f'comp{i}_rate_percent_diff', 0.0),
        ]:
            df[f'{col}_na'] = df[col].isna().astype(int)
            df[col] = df[col].fillna(fill)

    # distance bucket
    df['dist_na'] = df['orig_destination_distance'].isna().astype(int)
    df['orig_destination_distance'] = (
        df['orig_destination_distance'].fillna(-1)
    )
    bins  = [-1,0,10,50,200,np.inf]
    lbls  = ['missing','0-10km','10-50km','50-200km','200km+']
    df['dist_bucket'] = pd.cut(
        df['orig_destination_distance'], bins=bins, labels=lbls
    )
    return df

def add_destination_stats_fit(train_df):
    dest = (
        train_df
        .groupby('srch_destination_id')
        .agg(dest_searches=('srch_id','count'),
             dest_bookings=('booking_bool','sum'))
    )
    dest['dest_booking_rate'] = dest['dest_bookings'] / dest['dest_searches']
    return dest

def add_destination_stats_transform(df, dest_stats):
    df = df.copy()
    return df.merge(
        dest_stats[['dest_searches','dest_booking_rate']],
        on='srch_destination_id', how='left'
    )
def run_pipeline_on(train_df, other_df, steps):
    df1, df2 = train_df.copy(), other_df.copy()
    for fn in steps:
        df1, df2 = fn(df1, df2)
    return df1, df2

In [6]:
# 0) Define target
y = train['booking_bool'] * 5 + train['click_bool']

In [7]:
# 1) Feature‐Engineering Pipeline
train_feat, test_feat = train.copy(), test.copy()
steps = [
    preprocess_missing_and_competitors,
    add_destination_stats,
    lambda tr, te: one_hot_encode_columns(tr, te, ['dist_bucket']),
    lambda tr, te: (create_base_features(tr), create_base_features(te)),
    lambda tr, te: (add_within_search_features(tr), add_within_search_features(te)),
    lambda tr, te: (add_temporal_features(tr), add_temporal_features(te)),
    lambda tr, te: (add_ranks(tr), add_ranks(te)),
]
for fn in steps:
    train_feat, test_feat = fn(train_feat, test_feat)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['price_vs_historical'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['price_vs_historical'].fillna(0, inplace=True)


In [8]:
# 2) Train/Validation Split + Preprocessing


# a) split on search IDs
train_ids, valid_ids = train_test_split(
    train_feat['srch_id'].unique(), 
    test_size=0.2, 
    random_state=22
)
mask_tr   = train_feat['srch_id'].isin(train_ids)
mask_va   = ~mask_tr
mask_full = mask_tr | mask_va  # full = all rows

# b) pick your features
drop = ['date_time','gross_bookings_usd','position',
        'click_bool','booking_bool','srch_id','prop_id']
features = [c for c in train_feat.columns if c not in drop]

# c) pull out X, y
X       = train_feat[features]
X_test  = test_feat[features]
y       = train['booking_bool'] * 5 + train['click_bool']  # as before

# d) replace infinities/nans by train median
for df in (X, X_test):
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
for col in features:
    med = X[col].median()
    X[col].fillna(med, inplace=True)
    X_test[col].fillna(med, inplace=True)

# e) scale
scaler = StandardScaler()
X_tr    = scaler.fit_transform(X[mask_tr])
X_va    = scaler.transform(X[mask_va])
X_full  = scaler.transform(X)         # entire dataset
X_test  = scaler.transform(X_test)

y_tr    = y[mask_tr].values
y_va    = y[mask_va].values
y_full  = y.values                     # entire dataset

# 3) Build grouping arrays / session‐ids
id_tr   = train_feat.loc[mask_tr, 'srch_id'].values
id_va   = train_feat.loc[mask_va, 'srch_id'].values
id_full = train_feat['srch_id'].values




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace([np.inf, -np.inf], np.nan, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X[col].fillna(med, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col].fillna(med, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work becaus

In [9]:
# 1) Compute per-query group sizes
grp_tr   = train_feat.loc[mask_tr, 'srch_id'].value_counts(sort=False).values
grp_va   = train_feat.loc[mask_va, 'srch_id'].value_counts(sort=False).values
grp_full = train_feat['srch_id'].value_counts(sort=False).values

# 2) Build the LGB Datasets
lgb_train = lgb.Dataset(X_tr,    label=y_tr,   group=grp_tr)
lgb_val   = lgb.Dataset(X_va,    label=y_va,   group=grp_va)
lgb_full  = lgb.Dataset(X_full,  label=y_full, group=grp_full)


In [10]:
# 3) Build grouping arrays

train_pool = Pool(X_tr,   label=y_tr,   group_id=id_tr)
val_pool   = Pool(X_va,   label=y_va,   group_id=id_va)
full_pool  = Pool(X_full, label=y_full, group_id=id_full)


In [11]:
# 1) Define your search space

param_dist = {
    'learning_rate':    [0.03,0.05,0.08],
    'num_leaves':       [32,64,96],
    'min_data_in_leaf': [80,90,100],
    'feature_fraction': [0.85,0.9, 1.0],
    'bagging_fraction': [0.85,0.9, 1.0],
    'bagging_freq':     [5,10,20],
}

n_iter = 35
best_score, score = 0,0
best_params,params = None,0

In [12]:
# 1) Your random search stays the same…
for i in range(1, n_iter + 1):
    print(i)
    params = {
        'objective':         'lambdarank',
        'metric':            'ndcg',
        'ndcg_eval_at':      [5],
        'verbose':           1,
        'feature_pre_filter': False,      # ← disable the one-shot pre-filter
    }
    # 2) then overlay your random hyper‐params
    params.update({k: np.random.choice(v) for k, v in param_dist.items()})
    
    model = lgb.train(
        params,
        train_set=lgb_train,
        num_boost_round=1000,
        valid_sets=lgb_val,
        valid_names=['valid'],
        callbacks=[lgb.early_stopping(stopping_rounds=50)]
    )
    
    score = model.best_score['valid']['ndcg@5']
    print(f"    → ndcg@5 = {score:.4f}, rounds = {model.best_iteration}")
    
    if score > best_score:
        best_score_ = score
        best_params = params.copy()
        best_iter =  model.best_iteration

1
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.231556 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7128
[LightGBM] [Info] Number of data points in the train set: 3968534, number of used features: 80
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[396]	valid's ndcg@5: 0.39817
    → ndcg@5 = 0.3982, rounds = 396
2
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.191514 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7128
[LightGBM] [Info] Number of data points in the train set: 3968534, number of used features: 80
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[683]	valid's ndcg@5: 0.4003

In [13]:
print(best_score, best_params, best_iter)

0 {'objective': 'lambdarank', 'metric': 'ndcg', 'ndcg_eval_at': [5], 'verbose': 1, 'feature_pre_filter': False, 'learning_rate': 0.03, 'num_leaves': 96, 'min_data_in_leaf': 80, 'feature_fraction': 1.0, 'bagging_fraction': 0.9, 'bagging_freq': 10} 706


In [14]:
# 3) Retrain on all data
model_lgb_full = lgb.train(
    best_params,
    lgb_full,
    num_boost_round=best_iter # can still be ajdusted to best best_iter
)

# 4) Predict on test
lgb_test_preds = model_lgb_full.predict(X_test)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.312885 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 7138
[LightGBM] [Info] Number of data points in the train set: 4958347, number of used features: 80


In [15]:
def objective(trial):
    params_cat = {
        'loss_function': 'YetiRank',
        'eval_metric': 'NDCG:top=5',
        'random_seed': 22,
        'od_type': 'Iter',
        'od_wait': 20,
        'learning_rate': trial.suggest_float("learning_rate", 0.01, 0.1, log=True),
        'depth': trial.suggest_int("depth", 6, 12),
        'l2_leaf_reg': trial.suggest_float("l2_leaf_reg", 1e-3, 10.0, log=True),
        'random_strength': trial.suggest_float("random_strength", 1e-3, 10.0, log=True),
        'bagging_temperature': trial.suggest_float("bagging_temperature", 0.0, 1.0),
        'border_count': trial.suggest_int("border_count", 32, 255),
        'iterations': 400,
        'use_best_model': True,
        'verbose': 100,
    }

    model_cat = CatBoostRanker(**params_cat)

    # Pruner callback for PFound
    pruning_callback = CatBoostPruningCallback(trial, 'NDCG:top=5;type=Base')


    model_cat.fit(
        train_pool,
        eval_set=val_pool,
        early_stopping_rounds=50,
        callbacks=[pruning_callback],
    )

    # Manually trigger pruning
    pruning_callback.check_pruned()

     # 5) Grab the best‐iteration score
    hist = model_cat.get_evals_result()['validation']['NDCG:top=5;type=Base']
    return max(hist) 




study = optuna.create_study(
    direction="maximize",
    study_name="catboost_yetirank",
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=5)
)
study.optimize(objective, n_trials=50, timeout=10000)

print(f"Best PFound: {study.best_trial.value}")
print("Best hyperparameters:")
for k, v in study.best_trial.params.items():
    print(f"  {k}: {v}")

[I 2025-05-16 01:08:41,393] A new study created in memory with name: catboost_yetirank
  pruning_callback = CatBoostPruningCallback(trial, 'NDCG:top=5;type=Base')


0:	test: 0.3128857	best: 0.3128857 (0)	total: 2.96s	remaining: 19m 40s
100:	test: 0.3873578	best: 0.3874665 (98)	total: 3m 22s	remaining: 9m 59s
200:	test: 0.3933890	best: 0.3933890 (200)	total: 6m 35s	remaining: 6m 31s
300:	test: 0.3961590	best: 0.3962093 (299)	total: 9m 47s	remaining: 3m 13s


[I 2025-05-16 01:21:46,205] Trial 0 finished with value: 0.39859924419192555 and parameters: {'learning_rate': 0.08115798052942344, 'depth': 11, 'l2_leaf_reg': 4.365984979409287, 'random_strength': 1.2724461034600165, 'bagging_temperature': 0.35447016253489705, 'border_count': 39}. Best is trial 0 with value: 0.39859924419192555.


399:	test: 0.3984495	best: 0.3985992 (398)	total: 12m 58s	remaining: 0us

bestTest = 0.3985992442
bestIteration = 398

Shrink model to first 399 iterations.
0:	test: 0.3017700	best: 0.3017700 (0)	total: 2.37s	remaining: 15m 43s
100:	test: 0.3810750	best: 0.3810750 (100)	total: 3m 24s	remaining: 10m 4s
200:	test: 0.3886841	best: 0.3886841 (200)	total: 6m 42s	remaining: 6m 38s
300:	test: 0.3923123	best: 0.3924060 (299)	total: 9m 58s	remaining: 3m 16s


[I 2025-05-16 01:35:02,793] Trial 1 finished with value: 0.39424577982101805 and parameters: {'learning_rate': 0.03379972948244641, 'depth': 11, 'l2_leaf_reg': 0.0016015514498101495, 'random_strength': 0.00868564899810549, 'bagging_temperature': 0.2843013196310744, 'border_count': 67}. Best is trial 0 with value: 0.39859924419192555.


399:	test: 0.3942458	best: 0.3942458 (399)	total: 13m 13s	remaining: 0us

bestTest = 0.3942457798
bestIteration = 399

0:	test: 0.2560430	best: 0.2560430 (0)	total: 1.31s	remaining: 8m 41s
100:	test: 0.3831681	best: 0.3831681 (100)	total: 2m 6s	remaining: 6m 14s
200:	test: 0.3893467	best: 0.3893467 (200)	total: 4m 11s	remaining: 4m 8s
300:	test: 0.3931101	best: 0.3931101 (300)	total: 6m 14s	remaining: 2m 3s


[I 2025-05-16 01:43:21,917] Trial 2 finished with value: 0.39509974658457636 and parameters: {'learning_rate': 0.09137352506412875, 'depth': 6, 'l2_leaf_reg': 0.010361546750932168, 'random_strength': 0.2619704240949627, 'bagging_temperature': 0.34682000328207396, 'border_count': 230}. Best is trial 0 with value: 0.39859924419192555.


399:	test: 0.3950077	best: 0.3950997 (389)	total: 8m 16s	remaining: 0us

bestTest = 0.3950997466
bestIteration = 389

Shrink model to first 390 iterations.
0:	test: 0.2645525	best: 0.2645525 (0)	total: 1.4s	remaining: 9m 16s
100:	test: 0.3842992	best: 0.3842992 (100)	total: 2m 9s	remaining: 6m 24s
200:	test: 0.3909487	best: 0.3909755 (199)	total: 4m 21s	remaining: 4m 19s
300:	test: 0.3942346	best: 0.3942453 (299)	total: 6m 28s	remaining: 2m 7s


[I 2025-05-16 01:51:58,696] Trial 3 finished with value: 0.39648784413232696 and parameters: {'learning_rate': 0.08570310155536492, 'depth': 7, 'l2_leaf_reg': 0.013531731299897933, 'random_strength': 0.10272194993875035, 'bagging_temperature': 0.10746148003140243, 'border_count': 222}. Best is trial 0 with value: 0.39859924419192555.


399:	test: 0.3964785	best: 0.3964878 (398)	total: 8m 34s	remaining: 0us

bestTest = 0.3964878441
bestIteration = 398

Shrink model to first 399 iterations.
0:	test: 0.3018309	best: 0.3018309 (0)	total: 2.56s	remaining: 17m 1s
100:	test: 0.3879837	best: 0.3880695 (99)	total: 3m 44s	remaining: 11m 5s
200:	test: 0.3936960	best: 0.3937362 (199)	total: 7m 17s	remaining: 7m 13s
300:	test: 0.3966767	best: 0.3966767 (300)	total: 10m 52s	remaining: 3m 34s


[I 2025-05-16 02:06:26,482] Trial 4 finished with value: 0.39874119395399443 and parameters: {'learning_rate': 0.07714031806517758, 'depth': 11, 'l2_leaf_reg': 6.283026147398587, 'random_strength': 0.3456940983125714, 'bagging_temperature': 0.28284103079597067, 'border_count': 239}. Best is trial 4 with value: 0.39874119395399443.


399:	test: 0.3986592	best: 0.3987412 (393)	total: 14m 25s	remaining: 0us

bestTest = 0.398741194
bestIteration = 393

Shrink model to first 394 iterations.
0:	test: 0.2965233	best: 0.2965233 (0)	total: 2.1s	remaining: 13m 57s


[I 2025-05-16 02:06:41,126] Trial 5 pruned. Trial was pruned at iteration 5.



bestTest = 0.315739417
bestIteration = 5

Shrink model to first 6 iterations.
0:	test: 0.3125094	best: 0.3125094 (0)	total: 2.49s	remaining: 16m 34s
100:	test: 0.3888285	best: 0.3888285 (100)	total: 4m 2s	remaining: 11m 59s
200:	test: 0.3944491	best: 0.3944491 (200)	total: 7m 52s	remaining: 7m 48s
300:	test: 0.3977955	best: 0.3980139 (299)	total: 11m 46s	remaining: 3m 52s


[I 2025-05-16 02:22:23,480] Trial 6 finished with value: 0.40022295038525013 and parameters: {'learning_rate': 0.06838901258399085, 'depth': 12, 'l2_leaf_reg': 1.6882516888744639, 'random_strength': 0.185823675147253, 'bagging_temperature': 0.4923294314873814, 'border_count': 158}. Best is trial 6 with value: 0.40022295038525013.


399:	test: 0.4002230	best: 0.4002230 (399)	total: 15m 38s	remaining: 0us

bestTest = 0.4002229504
bestIteration = 399

0:	test: 0.2911280	best: 0.2911280 (0)	total: 2.01s	remaining: 13m 21s


[I 2025-05-16 02:22:37,903] Trial 7 pruned. Trial was pruned at iteration 5.



bestTest = 0.3347573925
bestIteration = 5

Shrink model to first 6 iterations.
0:	test: 0.2555037	best: 0.2555037 (0)	total: 1.39s	remaining: 9m 15s


[I 2025-05-16 02:22:47,662] Trial 8 pruned. Trial was pruned at iteration 5.



bestTest = 0.3027411877
bestIteration = 5

Shrink model to first 6 iterations.
0:	test: 0.2733042	best: 0.2733042 (0)	total: 1.6s	remaining: 10m 37s


[I 2025-05-16 02:22:59,183] Trial 9 pruned. Trial was pruned at iteration 5.



bestTest = 0.3231791248
bestIteration = 5

Shrink model to first 6 iterations.
0:	test: 0.3062524	best: 0.3062524 (0)	total: 2.62s	remaining: 17m 27s


[I 2025-05-16 02:23:58,811] Trial 10 pruned. Trial was pruned at iteration 23.



bestTest = 0.3667069006
bestIteration = 23

Shrink model to first 24 iterations.
0:	test: 0.3107411	best: 0.3107411 (0)	total: 2.89s	remaining: 19m 12s


[I 2025-05-16 02:24:30,614] Trial 11 pruned. Trial was pruned at iteration 11.



bestTest = 0.35599329
bestIteration = 11

Shrink model to first 12 iterations.
0:	test: 0.2829340	best: 0.2829340 (0)	total: 1.71s	remaining: 11m 21s


[I 2025-05-16 02:24:44,139] Trial 12 pruned. Trial was pruned at iteration 7.



bestTest = 0.346320588
bestIteration = 7

Shrink model to first 8 iterations.
0:	test: 0.3106512	best: 0.3106512 (0)	total: 2.66s	remaining: 17m 42s


[I 2025-05-16 02:25:03,799] Trial 13 pruned. Trial was pruned at iteration 6.



bestTest = 0.3433788643
bestIteration = 6

Shrink model to first 7 iterations.
0:	test: 0.3018905	best: 0.3018905 (0)	total: 2.71s	remaining: 18m 1s


[I 2025-05-16 02:26:05,813] Trial 14 pruned. Trial was pruned at iteration 24.



bestTest = 0.3674096328
bestIteration = 24

Shrink model to first 25 iterations.
0:	test: 0.2999757	best: 0.2999757 (0)	total: 2.33s	remaining: 15m 31s


[I 2025-05-16 02:27:25,873] Trial 15 pruned. Trial was pruned at iteration 37.



bestTest = 0.3742446701
bestIteration = 37

Shrink model to first 38 iterations.
0:	test: 0.2810369	best: 0.2810369 (0)	total: 1.45s	remaining: 9m 37s
100:	test: 0.3896776	best: 0.3896776 (100)	total: 2m 18s	remaining: 6m 49s
200:	test: 0.3950736	best: 0.3950736 (200)	total: 4m 34s	remaining: 4m 31s
300:	test: 0.3971765	best: 0.3974169 (299)	total: 6m 49s	remaining: 2m 14s


[I 2025-05-16 02:36:06,655] Trial 16 finished with value: 0.3983432429944145 and parameters: {'learning_rate': 0.09900849563228, 'depth': 9, 'l2_leaf_reg': 0.1134025187680777, 'random_strength': 0.7351811381885495, 'bagging_temperature': 0.4274699723809353, 'border_count': 156}. Best is trial 6 with value: 0.40022295038525013.


Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.398343243
bestIteration = 330

Shrink model to first 331 iterations.
0:	test: 0.3018065	best: 0.3018065 (0)	total: 2.32s	remaining: 15m 25s


[I 2025-05-16 02:36:22,734] Trial 17 pruned. Trial was pruned at iteration 5.



bestTest = 0.3386637366
bestIteration = 5

Shrink model to first 6 iterations.
0:	test: 0.3063153	best: 0.3063153 (0)	total: 2.3s	remaining: 15m 17s


[I 2025-05-16 02:36:38,747] Trial 18 pruned. Trial was pruned at iteration 5.



bestTest = 0.3283572493
bestIteration = 5

Shrink model to first 6 iterations.
0:	test: 0.3028284	best: 0.3028284 (0)	total: 2.21s	remaining: 14m 40s


[I 2025-05-16 02:36:53,397] Trial 19 pruned. Trial was pruned at iteration 5.



bestTest = 0.3445778007
bestIteration = 5

Shrink model to first 6 iterations.
0:	test: 0.2750139	best: 0.2750139 (0)	total: 1.57s	remaining: 10m 24s


[I 2025-05-16 02:37:03,975] Trial 20 pruned. Trial was pruned at iteration 5.



bestTest = 0.3186919261
bestIteration = 5

Shrink model to first 6 iterations.
0:	test: 0.3031022	best: 0.3031022 (0)	total: 2.48s	remaining: 16m 28s


[I 2025-05-16 02:37:28,369] Trial 21 pruned. Trial was pruned at iteration 10.



bestTest = 0.3589417465
bestIteration = 10

Shrink model to first 11 iterations.
0:	test: 0.3067763	best: 0.3067763 (0)	total: 2.27s	remaining: 15m 4s


[I 2025-05-16 02:37:42,569] Trial 22 pruned. Trial was pruned at iteration 5.



bestTest = 0.340821202
bestIteration = 5

Shrink model to first 6 iterations.
0:	test: 0.3113857	best: 0.3113857 (0)	total: 2.56s	remaining: 17m


[I 2025-05-16 02:38:43,879] Trial 23 pruned. Trial was pruned at iteration 23.



bestTest = 0.3693684689
bestIteration = 23

Shrink model to first 24 iterations.
0:	test: 0.3071430	best: 0.3071430 (0)	total: 1.97s	remaining: 13m 5s
100:	test: 0.3884330	best: 0.3884330 (100)	total: 3m 15s	remaining: 9m 37s
200:	test: 0.3936304	best: 0.3936304 (200)	total: 6m 26s	remaining: 6m 22s


[I 2025-05-16 02:45:27,490] Trial 24 pruned. Trial was pruned at iteration 208.



bestTest = 0.3938154991
bestIteration = 206

Shrink model to first 207 iterations.
0:	test: 0.2996527	best: 0.2996527 (0)	total: 2s	remaining: 13m 20s


[I 2025-05-16 02:45:41,502] Trial 25 pruned. Trial was pruned at iteration 5.



bestTest = 0.3285896666
bestIteration = 5

Shrink model to first 6 iterations.
0:	test: 0.3153254	best: 0.3153254 (0)	total: 2.72s	remaining: 18m 5s


[I 2025-05-16 02:45:59,617] Trial 26 pruned. Trial was pruned at iteration 5.



bestTest = 0.3441221472
bestIteration = 5

Shrink model to first 6 iterations.
0:	test: 0.3077845	best: 0.3077845 (0)	total: 2.44s	remaining: 16m 14s
100:	test: 0.3884804	best: 0.3884804 (100)	total: 3m 36s	remaining: 10m 39s
200:	test: 0.3943978	best: 0.3944777 (199)	total: 7m 3s	remaining: 6m 59s
300:	test: 0.3972099	best: 0.3974172 (297)	total: 10m 31s	remaining: 3m 27s


[I 2025-05-16 03:00:01,900] Trial 27 finished with value: 0.39925047662254987 and parameters: {'learning_rate': 0.09537282722595875, 'depth': 11, 'l2_leaf_reg': 9.758875166860346, 'random_strength': 0.4683469350761278, 'bagging_temperature': 0.6894660735656242, 'border_count': 173}. Best is trial 6 with value: 0.40022295038525013.


399:	test: 0.3991691	best: 0.3992505 (391)	total: 13m 59s	remaining: 0us

bestTest = 0.3992504766
bestIteration = 391

Shrink model to first 392 iterations.
0:	test: 0.2927725	best: 0.2927725 (0)	total: 1.4s	remaining: 9m 16s


[I 2025-05-16 03:00:12,211] Trial 28 pruned. Trial was pruned at iteration 5.



bestTest = 0.3455866123
bestIteration = 5

Shrink model to first 6 iterations.
0:	test: 0.3111343	best: 0.3111343 (0)	total: 2.93s	remaining: 19m 30s
100:	test: 0.3887946	best: 0.3887946 (100)	total: 4m 14s	remaining: 12m 32s
200:	test: 0.3948677	best: 0.3948677 (200)	total: 8m 15s	remaining: 8m 10s
300:	test: 0.3976128	best: 0.3976310 (286)	total: 12m 19s	remaining: 4m 3s


[I 2025-05-16 03:16:33,814] Trial 29 finished with value: 0.39990307103906114 and parameters: {'learning_rate': 0.07299365219549724, 'depth': 12, 'l2_leaf_reg': 1.9158609514635259, 'random_strength': 0.4774629579529207, 'bagging_temperature': 0.8541618523931782, 'border_count': 237}. Best is trial 6 with value: 0.40022295038525013.


399:	test: 0.3997913	best: 0.3999031 (397)	total: 16m 18s	remaining: 0us

bestTest = 0.399903071
bestIteration = 397

Shrink model to first 398 iterations.
0:	test: 0.3133744	best: 0.3133744 (0)	total: 2.89s	remaining: 19m 12s


[I 2025-05-16 03:16:52,011] Trial 30 pruned. Trial was pruned at iteration 5.



bestTest = 0.3402953032
bestIteration = 5

Shrink model to first 6 iterations.
0:	test: 0.3099347	best: 0.3099347 (0)	total: 2.74s	remaining: 18m 14s


[I 2025-05-16 03:17:15,914] Trial 31 pruned. Trial was pruned at iteration 7.



bestTest = 0.3511792729
bestIteration = 7

Shrink model to first 8 iterations.
0:	test: 0.3013516	best: 0.3013516 (0)	total: 2.75s	remaining: 18m 19s


[I 2025-05-16 03:17:32,150] Trial 32 pruned. Trial was pruned at iteration 5.



bestTest = 0.3414897515
bestIteration = 5

Shrink model to first 6 iterations.
0:	test: 0.3020611	best: 0.3020611 (0)	total: 2.48s	remaining: 16m 28s
100:	test: 0.3896272	best: 0.3898336 (98)	total: 3m 42s	remaining: 10m 58s
200:	test: 0.3950366	best: 0.3950370 (199)	total: 7m 15s	remaining: 7m 11s
300:	test: 0.3976788	best: 0.3979828 (289)	total: 10m 52s	remaining: 3m 34s


[I 2025-05-16 03:31:57,410] Trial 33 finished with value: 0.3998253151968382 and parameters: {'learning_rate': 0.08601842196892498, 'depth': 11, 'l2_leaf_reg': 2.2645774637097236, 'random_strength': 0.18425504171294013, 'bagging_temperature': 0.6628665776577807, 'border_count': 213}. Best is trial 6 with value: 0.40022295038525013.


399:	test: 0.3998253	best: 0.3998253 (399)	total: 14m 22s	remaining: 0us

bestTest = 0.3998253152
bestIteration = 399

0:	test: 0.3009680	best: 0.3009680 (0)	total: 2.81s	remaining: 18m 40s
100:	test: 0.3898163	best: 0.3898690 (99)	total: 3m 47s	remaining: 11m 12s
200:	test: 0.3952769	best: 0.3957069 (191)	total: 7m 20s	remaining: 7m 16s
300:	test: 0.3978623	best: 0.3979615 (298)	total: 10m 56s	remaining: 3m 35s


[I 2025-05-16 03:46:31,940] Trial 34 finished with value: 0.39951492261945243 and parameters: {'learning_rate': 0.08414526842055725, 'depth': 11, 'l2_leaf_reg': 2.16803181222979, 'random_strength': 0.17161242294608234, 'bagging_temperature': 0.664501480898066, 'border_count': 222}. Best is trial 6 with value: 0.40022295038525013.


399:	test: 0.3994313	best: 0.3995149 (398)	total: 14m 31s	remaining: 0us

bestTest = 0.3995149226
bestIteration = 398

Shrink model to first 399 iterations.
0:	test: 0.3059095	best: 0.3059095 (0)	total: 2.62s	remaining: 17m 25s
100:	test: 0.3904070	best: 0.3904967 (98)	total: 4m 11s	remaining: 12m 25s
200:	test: 0.3946239	best: 0.3946239 (200)	total: 8m 12s	remaining: 8m 7s


[I 2025-05-16 03:56:29,054] Trial 35 pruned. Trial was pruned at iteration 242.



bestTest = 0.3957765094
bestIteration = 234

Shrink model to first 235 iterations.
Best PFound: 0.40022295038525013
Best hyperparameters:
  learning_rate: 0.06838901258399085
  depth: 12
  l2_leaf_reg: 1.6882516888744639
  random_strength: 0.185823675147253
  bagging_temperature: 0.4923294314873814
  border_count: 158


In [16]:

best_params_cat = study.best_trial.params # Corrected line
print("Best hyperparameters:")
for key, value in best_params_cat.items():
    print(f"  {key}: {value}")

final_params_cat= {**best_params_cat, 'iterations': 1000, 'verbose': 100} #added verbose
final_cat = CatBoostRanker(**final_params_cat)
final_cat.fit(
    full_pool,
    early_stopping_rounds=100
)



Best hyperparameters:
  learning_rate: 0.06838901258399085
  depth: 12
  l2_leaf_reg: 1.6882516888744639
  random_strength: 0.185823675147253
  bagging_temperature: 0.4923294314873814
  border_count: 158
0:	total: 3.96s	remaining: 1h 5m 55s
100:	total: 5m 9s	remaining: 45m 54s
200:	total: 10m 2s	remaining: 39m 53s
300:	total: 14m 57s	remaining: 34m 44s
400:	total: 19m 54s	remaining: 29m 44s
500:	total: 24m 51s	remaining: 24m 45s
600:	total: 29m 48s	remaining: 19m 47s
700:	total: 34m 47s	remaining: 14m 50s
800:	total: 39m 48s	remaining: 9m 53s
900:	total: 44m 48s	remaining: 4m 55s
999:	total: 49m 45s	remaining: 0us


<catboost.core.CatBoostRanker at 0x1b8eb539ac0>

In [17]:
# 4) Predict on test set
cat_test_preds = final_cat.predict(X_test)


In [18]:
# 2) Put them into your submission

#can use cat_test_pred or lgb_test_preds

sample['score'] = lgb_test_preds
submission = (
    sample
    .sort_values(['srch_id', 'score'], ascending=[True, False])
    [['srch_id', 'prop_id']]
)

# 3) Write to CSV
submission.to_csv('submission0.csv', index=False)
print("submission.csv written using lgb only!") 

submission.csv written using lgb only!


In [19]:
"""params = {
    'loss_function':       'YetiRank',
    'eval_metric':         'NDCG:top=5',
    'random_seed':         22,
    'od_type':             'Iter',
    'od_wait':             20,
    'learning_rate':       0.05,
    'depth':               8,
    'l2_leaf_reg':         3.0,
    'random_strength':     1.0,
    'bagging_temperature': 0.2,
    'border_count':        128,
    'iterations':          1000,
    'use_best_model':      True,
    'verbose':             100,
}

# 3) Train
model = CatBoostRanker(**params)
model.fit(
    train_pool,
    eval_set=val_pool,
    early_stopping_rounds=50,
    verbose=False,            # or True to see each iteration
    verbose_eval=100,         # log every 100 iters
)

# 4) Best iteration
best_iter = model.get_best_iteration()
print("Best iteration:", best_iter)

# 5) Validation NDCG@5 history
ndcg_history = model.get_evals_result()['validation']['NDCG:top=5;type=Base']
print(f"Validation NDCG@5 @ best_iter: {ndcg_history[best_iter]:.4f}")

# 6) Retrain on full data if desired
# full_pool = Pool(X_full, label=y_full, group_id=id_full)
# model_full = CatBoostRanker(**params)
# model_full.fit(full_pool, use_best_model=True)

# 7) Predict on test
# test_pool = Pool(X_test)  # note: no label needed
# test_preds = model_full.predict(test_pool)"""

'params = {\n    \'loss_function\':       \'YetiRank\',\n    \'eval_metric\':         \'NDCG:top=5\',\n    \'random_seed\':         22,\n    \'od_type\':             \'Iter\',\n    \'od_wait\':             20,\n    \'learning_rate\':       0.05,\n    \'depth\':               8,\n    \'l2_leaf_reg\':         3.0,\n    \'random_strength\':     1.0,\n    \'bagging_temperature\': 0.2,\n    \'border_count\':        128,\n    \'iterations\':          1000,\n    \'use_best_model\':      True,\n    \'verbose\':             100,\n}\n\n# 3) Train\nmodel = CatBoostRanker(**params)\nmodel.fit(\n    train_pool,\n    eval_set=val_pool,\n    early_stopping_rounds=50,\n    verbose=False,            # or True to see each iteration\n    verbose_eval=100,         # log every 100 iters\n)\n\n# 4) Best iteration\nbest_iter = model.get_best_iteration()\nprint("Best iteration:", best_iter)\n\n# 5) Validation NDCG@5 history\nndcg_history = model.get_evals_result()[\'validation\'][\'NDCG:top=5;type=Base\']\npr

In [20]:
# 1. Predict raw scores for the validation set
cat_val_preds = final_cat.predict(X_va)
lgb_val_preds = model_lgb_full.predict(X_va, num_iteration=best_it)

# 2. Compute per-session NDCG@5 manually
ndcgs_cat = []
ndcgs_lgb = []

for q in np.unique(id_va):
    idx = np.where(id_va == q)[0]
    if len(idx) > 1:  # skip sessions with only 1 result
        true_rels = y_va[idx]
        ndcgs_cat.append(ndcg_score([true_rels], [cat_val_preds[idx]], k=5))
        ndcgs_lgb.append(ndcg_score([true_rels], [lgb_val_preds[idx]], k=5))

# 3. Average NDCG@5 for both models
catboost_ndcg5 = np.mean(ndcgs_cat)
lgb_ndcg5 = np.mean(ndcgs_lgb)

print(f"\nCatBoost Val NDCG@5: {catboost_ndcg5:.4f}")
print(f"\nLightGBM Val NDCG@5: {lgb_ndcg5:.4f}")


NameError: name 'best_it' is not defined

In [None]:


# 1) Scale just your two tree-based model preds on validation
cb_s  = minmax(cat_val_preds)   # your CatBoost val preds
lgb_s = minmax(lgb_val_preds)   # your LightGBM val preds

# 2) Sweep over blends of just CB + LGB
best, best_w = 0, None
for w_cb in np.linspace(0, 1, 11):
    w_lgb = 1 - w_cb
    sc = w_cb * cb_s + w_lgb * lgb_s

    # compute NDCG@5 on val
    ndcgs = []
    for q in np.unique(id_va):
        idx = np.where(id_va == q)[0]
        if len(idx) > 1:
            true = y_va[idx]
            ndcgs.append(ndcg_score([true], [sc[idx]], k=5))
    mean_ndcg = np.mean(ndcgs)

    print(f"w_cb={w_cb:.1f}, w_lgb={w_lgb:.1f} → Val NDCG@5: {mean_ndcg:.4f}")
    if mean_ndcg > best:
        best, best_w = mean_ndcg, (w_cb, w_lgb)

print(f"\n Best blend (CB+LGB): w_cb={best_w[0]:.2f}, w_lgb={best_w[1]:.2f} → NDCG@5={best:.4f}")


In [21]:
# --- Ensemble the test predictions with your optimal blend ---
best_w = [0.5,0.5]

ensemble_final_preds = ensemble_predictions(
    cat_test_preds,
    lgb_test_preds,
    weights=[best_w[0], best_w[1]]
)

# Put them into submission
sample['score'] = ensemble_final_preds
submission = (
    sample
    .sort_values(['srch_id','score'], ascending=[True, False])
    [['srch_id','prop_id']]
)
submission.to_csv('submission.csv', index=False)
print(f"Submission.csv written using w_cb={best_w[0]:.2f}, w_lgb={best_w[1]:.2f}!") 


Submission.csv written using w_cb=0.50, w_lgb=0.50!


In [22]:
import pickle
with open('cat_preds.pkl', 'wb') as file:
    pickle.dump(cat_test_preds, file)

with open('lgb_preds.pkl', 'wb') as file:
    pickle.dump(lgb_test_preds, file)

In [None]:
"""
# --- 1) ListNet loss (unchanged) ---
def listnet_loss(scores, labels, group_ids):
    loss, count = 0.0, 0
    for q in np.unique(group_ids):
        idx = np.where(group_ids == q)[0]
        if len(idx) < 2:
            continue
        s_q, y_q = scores[idx], labels[idx].float()
        P, P_hat = torch.softmax(y_q, 0), torch.softmax(s_q, 0)
        loss += -torch.sum(P * torch.log(P_hat + 1e-8))
        count += 1
    return loss / max(count, 1)

param_grid = {
    'lr':           [1e-3, 2e-3],
    'batch_size':   [512, 1024],
    'dropout':      [0.1, 0.2],
    'weight_decay': [0.0, 1e-4]
}

best = {'ndcg': 0.0, 'cfg': None}

for lr, bs, drop, wd in itertools.product(*param_grid.values()):
    tr_loader = DataLoader(ExpediaDataset(X_tr, y_tr.values),
                           batch_size=bs, shuffle=True)
    va_loader = DataLoader(ExpediaDataset(X_va, y_va.values),
                           batch_size=bs, shuffle=False)

    mdl = DeepRecommender(X_tr.shape[1]).to(device)
    for m in mdl.modules():
        if isinstance(m, nn.Dropout):
            m.p = drop

    opt = torch.optim.Adam(
        mdl.parameters(),
        lr=lr,
        weight_decay=wd
    )
    sched = torch.optim.lr_scheduler.OneCycleLR(
        opt,
        max_lr=lr * 10,
        steps_per_epoch=len(tr_loader),
        epochs=10
    )

    best_ndcg, stale = 0.0, 0
    for epoch in range(1, 8):
        # — train —
        mdl.train()
        for Xb, yb in tr_loader:
            Xb, yb = Xb.to(device), yb.to(device)
            opt.zero_grad()
            loss = listnet_loss(mdl(Xb), yb, id_tr[:len(Xb)])
            loss.backward()
            opt.step()

        # — validate —
        mdl.eval()
        preds = []
        with torch.no_grad():
            for Xb, _ in va_loader:
                preds.extend(mdl(Xb.to(device)).cpu().numpy())
        preds = np.array(preds)

        # compute mean NDCG@5 using the same y_va array
        ndcgs = []
        for q in np.unique(id_va):
            idx = np.where(id_va == q)[0]
            if len(idx) > 1:
                true = y_va.values[idx]
                score = preds[idx]
                ndcgs.append(ndcg_score([true], [score], k=5))
        mean_ndcg = np.mean(ndcgs)
        sched.step(mean_ndcg)

        if mean_ndcg > best_ndcg + 1e-4:
            best_ndcg, stale = mean_ndcg, 0
        else:
            stale += 1
        if stale >= 5:
            break

    print(f"cfg lr={lr}, bs={bs}, drop={drop} → Val NDCG@5: {best_ndcg:.4f}")
    if best_ndcg > best['ndcg']:
        best.update({'ndcg': best_ndcg, 'cfg': (lr, bs, drop)})

print("🏆 Best config:", best)
"""




In [None]:
"""# unpack your best cfg and tweak
lr, bs, drop, wd = 1e-3, 128, 0.1, 1e-5

# rebuild full-training loader
full_loader = DataLoader(
    ExpediaDatasetGrouped(X_full, y_full, id_full),
    batch_size=bs, shuffle=True
)

# 1) Slightly larger network at the top
class BiggerRecommender(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 512), nn.ReLU(), nn.BatchNorm1d(512), nn.Dropout(drop),
            nn.Linear(512, 256), nn.ReLU(), nn.BatchNorm1d(256), nn.Dropout(drop),
            nn.Linear(256, 128), nn.ReLU(), nn.BatchNorm1d(128), nn.Dropout(drop),
            nn.Linear(128, 64),  nn.ReLU(), nn.Dropout(drop),
            nn.Linear(64, 1)
        )
    def forward(self, x):
        return self.net(x).squeeze(1)

final_nn = BiggerRecommender(X_full.shape[1]).to(device)

# 2) Optimizer with very light weight decay
opt = torch.optim.Adam(final_nn.parameters(), lr=lr, weight_decay=wd)

# 3) Simpler stepLR scheduler (cuts LR by 0.5 every 10 epochs)
sched = torch.optim.lr_scheduler.StepLR(opt, step_size=10, gamma=0.5)

# 4) Train for 30 epochs with gradient clipping
for epoch in range(1, 31):
    final_nn.train()
    total_loss = 0.0
    for Xb, yb, gb in full_loader:
        Xb, yb = Xb.to(device), yb.to(device)

        opt.zero_grad()
        logits = final_nn(Xb)
        loss   = listnet_loss(logits, yb, gb)  
        loss.backward()
        torch.nn.utils.clip_grad_norm_(final_nn.parameters(), 5.0)
        opt.step()

        total_loss += loss.item() * Xb.size(0)

    sched.step()
    avg_loss = total_loss / len(full_loader.dataset)
    print(f"Epoch {epoch:02d}/30 — Avg ListNet Loss: {avg_loss:.4f} — LR: {opt.param_groups[0]['lr']:.1e}")
"""

In [None]:
"""# --- 4) Produce final test preds ---
test_loader = DataLoader(
    ExpediaDataset(X_test, np.zeros(len(X_test))),
    batch_size=bs,
    shuffle=False
)

nn_test_preds = []
final_nn.eval()
with torch.no_grad():
    for Xb, _ in test_loader:           # <-- unpack both X and dummy y
        Xb = Xb.to(device)
        nn_test_preds.extend(
            torch.sigmoid(final_nn(Xb))
                 .cpu()
                 .numpy()
        )
nn_test_preds = np.array(nn_test_preds)

print("NN preds:", nn_test_preds.min(), nn_test_preds.max())
"""

In [None]:
"""

# 1) Get NN probabilities on the validation fold
nn_val_probs = []
final_nn.eval()
with torch.no_grad():
    for Xb, _ in va_loader:
        nn_val_probs.extend(
            torch.sigmoid(final_nn(Xb.to(device)))
            .cpu()
            .numpy()
        )
nn_val_probs = np.array(nn_val_probs)  # shape = (n_val,)

# 2) Compute per-session NDCG@5
ndcgs = []
for q in np.unique(id_va):
    idx = np.where(id_va == q)[0]
    if len(idx) > 1:
        true_rels = y_val_nn[idx]
        scores    = nn_val_probs[idx]
        ndcgs.append(ndcg_score([true_rels], [scores], k=5))

nn_val_ndcg5 = np.mean(ndcgs)
print(f"NN  Val NDCG@5: {nn_val_ndcg5:.4f}")
"""

In [None]:
"""# --- Ensemble the predictions ---
ensemble_final_preds = ensemble_predictions(nn_test_preds, model_lgb_pred, weights=[0.5, 0.5]) # Adjust weights

# Predict relevance scores for each test row
preds_ens = ensemble_final_preds

# Insert those scores into the sample submission DataFrame
sample['score'] = preds_ens

# Sort by search session (ascending) and score (descending)
#   so that for each srch_id, the most relevant prop_id comes first
submission = sample.sort_values(
    ['srch_id', 'score'],
    ascending=[True, False]
)

# keep only the required columns and write to CSV
#   Kaggle expects: srch_id, prop_id (in ranked order)
submission[['srch_id', 'prop_id']].to_csv(
    'submission.csv',
    index=False
)
print("Submission.csv adjusted with new scores!")"""

In [None]:
"""
# 1) LightGBM val set predictions
# Replace `model_lgb` with whatever variable you named your trained LightGBM model
val_preds_lgb = model_lgb.predict(X_va)   # shape = (n_val,)

# 2) NN val set predictions
final_nn.eval()
nn_val_preds = []
with torch.no_grad():
    for Xb, _ in va_loader:  # va_loader from your NN split on X_va/y_va
        nn_val_preds.extend(
            torch.sigmoid(final_nn(Xb.to(device))).cpu().numpy()
        )
nn_val_preds = np.array(nn_val_preds)      # shape = (n_val,)

# 3) Blend them
w_nn, w_lgb = 0.4, 0.6
ensemble_val = w_nn * nn_val_preds + w_lgb * val_preds_lgb


ndcgs = []
for q in np.unique(id_va):
    idx = np.where(id_va == q)[0]
    if len(idx) > 1:
        true_rel  = y_val_nn[idx]    # your val labels array
        score_rel = ensemble_val[idx]
        ndcgs.append(ndcg_score([true_rel], [score_rel], k=5))

mean_ndcg5 = np.mean(ndcgs)
print(f"Ensembled Val NDCG@5: {mean_ndcg5:.4f}")
"""

In [None]:
"""

# 1) Min–max scale each prediction array into [0,1]
def minmax(arr):
    return (arr - arr.min()) / (arr.max() - arr.min() + 1e-8)

nn_scaled  = minmax(nn_test_preds)
lgb_scaled = minmax(model_lgb_pred)   # or whatever your LGB test‐preds variable is

# 2) Do the two blends
ens1 = ensemble_predictions(nn_scaled, lgb_scaled, [0.5, 0.5])
ens2 = ensemble_predictions(nn_scaled, lgb_scaled, [0.0, 1.0])

# 3) Compare their sorted‐order permutations
order1 = np.argsort(ens1)
order2 = np.argsort(ens2)

# 4) Compute fraction of positions that differ
fraction_changed = np.mean(order1 != order2)
print(f"Fraction of test‐rows whose position changes: {fraction_changed:.4%}")
"""

In [None]:
"""

# 0) Scale both NN & LGB validation predictions into [0,1]
nn_scaled_val  = minmax_scale(nn_val_preds)      # shape = (n_val,)
lgb_scaled_val = minmax_scale(val_preds_lgb)     # shape = (n_val,)

best_w, best_score = None, 0.0
for w_nn in np.linspace(0, 1, 11):
    w_lgb = 1 - w_nn

    # blended on the same [0,1] scale
    blended = w_nn * nn_scaled_val + w_lgb * lgb_scaled_val

    # compute NDCG@5 per session
    ndcgs = []
    for q in np.unique(id_va):
        idx = np.where(id_va == q)[0]
        if len(idx) > 1:
            true   = y_val_nn[idx]
            scores = blended[idx]
            ndcgs.append(ndcg_score([true], [scores], k=5))
    mean_ndcg = np.mean(ndcgs)

    print(f"w_nn={w_nn:.1f}, w_lgb={w_lgb:.1f} → Val NDCG@5: {mean_ndcg:.4f}")
    if mean_ndcg > best_score:
        best_score, best_w = mean_ndcg, w_nn

print(f"\n🏆 Best blend: w_nn={best_w:.2f}, w_lgb={1-best_w:.2f} → NDCG@5={best_score:.4f}")
"""

In [None]:
"""df = sample[['srch_id']].copy()
df['nn_rank']  = pd.DataFrame({'score': nn_test_preds,  'srch': sample['srch_id']}) \
                    .groupby('srch')['score'] \
                    .rank(method='dense', ascending=False)
df['lgb_rank'] = pd.DataFrame({'score': model_lgb_pred,'srch': sample['srch_id']}) \
                    .groupby('srch')['score'] \
                    .rank(method='dense', ascending=False)

# weighted rank
w_nn, w_lgb = best_w, (1-best_w)
df['ensemble_rank'] = w_nn * df['nn_rank'] + w_lgb * df['lgb_rank']

# use that to sort
submission = sample.assign(_rank=df['ensemble_rank']) \
    .sort_values(['srch_id','_rank'], ascending=[True,True]) \
    [['srch_id','prop_id']]

# keep only the required columns and write to CSV
#   Kaggle expects: srch_id, prop_id (in ranked order)
submission[['srch_id', 'prop_id']].to_csv(
    'submission.csv',
    index=False
)
print("Submission.csv adjusted with new scores!")"""

In [None]:
"""df_sub = sample[['srch_id']].copy()
df_sub['nn_rank']  = pd.Series(nn_test_preds).groupby(sample['srch_id']).rank("dense", ascending=False)
df_sub['lgb_rank'] = pd.Series(model_lgb_pred).groupby(sample['srch_id']).rank("dense", ascending=False)

# weighted rank blend
df_sub['ensemble_rank'] = 0.2*df_sub['nn_rank'] + 0.8*df_sub['lgb_rank']

submission = (
    sample.assign(_rank=df_sub['ensemble_rank'])
          .sort_values(['srch_id','_rank'], ascending=[True,True])
          [['srch_id','prop_id']]
)
# keep only the required columns and write to CSV
#   Kaggle expects: srch_id, prop_id (in ranked order)
submission[['srch_id', 'prop_id']].to_csv(
    'submission.csv',
    index=False
)
print("Submission.csv adjusted with new scores!")"""