Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support to specifying position using position_column parameter #6825

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
ruff format test
  • Loading branch information
NProkoptsev committed Feb 27, 2025
commit 93ba7e18caea0efd3bf370cfdee6ac743c7f75eb
24 changes: 6 additions & 18 deletions tests/python_package_test/test_engine.py
Original file line number Diff line number Diff line change
@@ -849,9 +849,7 @@ def test_ranking_with_position_information_with_dataset_constructor(tmp_path):


# Appends queries and positions to the dataset file
def append_queries_and_positions_to_file(
file_dataset_in, file_query_in, positions, out_path
):
def append_queries_and_positions_to_file(file_dataset_in, file_query_in, positions, out_path):
queries = []
query_id = 0
with open(file_query_in, "r") as f:
@@ -916,9 +914,7 @@ def test_ranking_with_position_and_group_information_in_single_file(tmp_path):

lgb_train = lgb.Dataset(str(tmp_path / "rank.train"), params=params)
lgb_valid = [lgb_train.create_valid(str(tmp_path / "rank.test"), params=params)]
gbm_unbiased_with_single_file = lgb.train(
params, lgb_train, valid_sets=lgb_valid, num_boost_round=50
)
gbm_unbiased_with_single_file = lgb.train(params, lgb_train, valid_sets=lgb_valid, num_boost_round=50)

# Training with query files and list of positions
params = {
@@ -933,19 +929,11 @@ def test_ranking_with_position_and_group_information_in_single_file(tmp_path):
# ignore position and group column
"ignore_column": "301,302",
}
copyfile(
str(rank_example_dir / "rank.train.query"), str(tmp_path / "rank.train.query")
)
copyfile(
str(rank_example_dir / "rank.test.query"), str(tmp_path / "rank.test.query")
)
lgb_train = lgb.Dataset(
str(tmp_path / "rank.train"), params=params, position=positions
)
copyfile(str(rank_example_dir / "rank.train.query"), str(tmp_path / "rank.train.query"))
copyfile(str(rank_example_dir / "rank.test.query"), str(tmp_path / "rank.test.query"))
lgb_train = lgb.Dataset(str(tmp_path / "rank.train"), params=params, position=positions)
lgb_valid = [lgb_train.create_valid(str(tmp_path / "rank.test"))]
gbm_unbiased_with_multiple_files = lgb.train(
params, lgb_train, valid_sets=lgb_valid, num_boost_round=50
)
gbm_unbiased_with_multiple_files = lgb.train(params, lgb_train, valid_sets=lgb_valid, num_boost_round=50)
# the performance of the unbiased LambdaMART when using query files and list of positions should match the performance of the unbiased LambdaMART when using single file with group and position columns
assert (
gbm_unbiased_with_multiple_files.best_score["valid_0"]["ndcg@3"]
Loading
Oops, something went wrong.