Skip to content

Commit

Permalink
Add a bias term
Browse files Browse the repository at this point in the history
In effect, a bias value allows you to shift the activation function
to the left or right, which may be critical for successful learning.
  • Loading branch information
VaibhavKansagara authored and ojwb committed Jul 1, 2022
1 parent 90b307f commit 2a43250
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 6 deletions.
4 changes: 3 additions & 1 deletion xapian-letor/api/featurelist.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ FeatureList::normalise(std::vector<FeatureVector>& fvec) const
std::vector<FeatureVector>
FeatureList::create_feature_vectors(const Xapian::MSet & mset,
const Xapian::Query & letor_query,
const Xapian::Database & letor_db) const
const Xapian::Database& letor_db,
bool flag, double bias) const
{
LOGCALL(API, std::vector<FeatureVector>, "FeatureList::create_feature_vectors", mset | letor_query | letor_db);
if (mset.empty())
Expand All @@ -121,6 +122,7 @@ FeatureList::create_feature_vectors(const Xapian::MSet & mset,
// Append feature values
fvals.insert(fvals.end(), values.begin(), values.end());
}
if (flag) fvals.push_back(bias);
double wt = i.get_weight();
// Weight is added as a feature by default.
fvals.push_back(wt);
Expand Down
3 changes: 2 additions & 1 deletion xapian-letor/include/xapian-letor/featurelist.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ class XAPIAN_VISIBILITY_DEFAULT FeatureList {
std::vector<Xapian::FeatureVector>
create_feature_vectors(const Xapian::MSet & mset,
const Xapian::Query & letor_query,
const Xapian::Database & letor_db) const;
const Xapian::Database& letor_db, bool flag = false,
double bias = 1.0) const;

private:
/// Perform query-level normalisation of FeatureVectors.
Expand Down
3 changes: 2 additions & 1 deletion xapian-letor/include/xapian-letor/ranker.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ prepare_training_file(const std::string & db_path,
const std::string & qrel_file,
Xapian::doccount msetsize,
const std::string & filename,
const Xapian::FeatureList & flist = FeatureList());
const Xapian::FeatureList& flist = FeatureList(),
bool flag = false, double bias = 1.0);

class XAPIAN_VISIBILITY_DEFAULT Ranker : public Xapian::Internal::intrusive_base {
/// Path to Xapian::Database instance to be used.
Expand Down
11 changes: 8 additions & 3 deletions xapian-letor/ranker/ranker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,11 @@ initialise_queryparser(const Xapian::Database & db)

void
Xapian::prepare_training_file(const string & db_path, const string & queryfile,
const string & qrel_file, Xapian::doccount msetsize,
const string & filename, const FeatureList & flist)
const string& qrel_file,
Xapian::doccount msetsize,
const string& filename,
const FeatureList& flist,
bool flag, double bias)
{
// Set db
Xapian::Database letor_db(db_path);
Expand Down Expand Up @@ -298,7 +301,9 @@ Xapian::prepare_training_file(const string & db_path, const string & queryfile,

vector<FeatureVector> fvv_mset = flist.create_feature_vectors(mset,
query,
letor_db);
letor_db,
flag,
bias);
vector<FeatureVector> fvv_qrel;
int k = 0;
for (Xapian::MSetIterator i = mset.begin(); i != mset.end(); ++i) {
Expand Down
1 change: 1 addition & 0 deletions xapian-letor/tests/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@
/ndcg_score_output.txt
/training_output.txt
/training_output1.txt
/training_output_data_bias.txt
/training_output_three_correct.txt
2 changes: 2 additions & 0 deletions xapian-letor/tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ EXTRA_DIST +=\
testdata/querythree.txt \
testdata/score_qrel.txt \
testdata/training_data.txt \
testdata/training_data_bias.txt \
testdata/training_data_ndcg.txt \
testdata/training_data_one_document.txt \
testdata/training_data_three_correct.txt \
Expand All @@ -117,6 +118,7 @@ CLEANFILES +=\
ndcg_output_listnet_3.txt \
ndcg_score_output.txt \
ndcg_score_test.txt \
training_output_data_bias.txt \
training_output_data_one_doc.txt \
training_output_empty.txt \
training_output_three_correct.txt \
Expand Down
53 changes: 53 additions & 0 deletions xapian-letor/tests/api_letor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,59 @@ DEFINE_TESTCASE(preparetrainingfileonedb, generated && path && writable)
unlink("training_output_data_one_doc.txt");
}

DEFINE_TESTCASE(preparetrainingfile_with_bias, generated && path && writable)
{
vector<Xapian::Feature*> flist;
flist.push_back(new Xapian::TfFeature());
flist.push_back(new Xapian::TfDoclenFeature());
flist.push_back(new Xapian::IdfFeature());
flist.push_back(new Xapian::CollTfCollLenFeature());
flist.push_back(new Xapian::TfIdfDoclenFeature());
flist.push_back(new Xapian::TfDoclenCollTfCollLenFeature());
string db_path = get_database_path("apitest_listnet_ranker1",
db_index_one_document);
string data_directory = test_driver::get_srcdir() + "/testdata/";
string query = data_directory + "queryone.txt";
string qrel = data_directory + "qrelone.txt";
string training_data = data_directory + "training_data_bias.txt";
unlink("training_output_data_bias.txt");
Xapian::prepare_training_file(db_path, query, qrel, 10,
"training_output_data_bias.txt", flist, true);
TEST(file_exists("training_output_data_bias.txt"));
ifstream if1(training_data);
ifstream if2("training_output_data_bias.txt");
string line1;
string line2;
while (getline(if1, line1)) {
TEST(getline(if2, line2));
istringstream iss1(line1);
istringstream iss2(line2);
string temp1;
string temp2;
int i = 0;
while ((iss1 >> temp1) && (iss2 >> temp2)) {
// The 0th, 1st and 22nd literals taken as input, are strings,
// and can be compared directly, They are: For example(test):
// ("1", "qid:20001" and "#docid=1") at 0th, 1st, and 22nd pos
// respectively. Whereas the other values are doubles which
// would have to tested under TEST_DOUBLE() against precision.
if (i == 0 || i == 1 || i == 22) {
TEST_EQUAL(temp1, temp2);
} else {
size_t t1 = temp1.find_first_of(':');
size_t t2 = temp2.find_first_of(':');
TEST_EQUAL_DOUBLE(stod(temp1.substr(t1 + 1)),
stod(temp2.substr(t2 + 1)));
}
i++;
}
TEST_REL(i, ==, 23);
TEST(!(iss2 >> temp2));
}
TEST(!getline(if2, line2));
unlink("training_output_data_bias.txt");
}

#define TEST_PARSE_EXCEPTION(TESTFILE) TEST_EXCEPTION(Xapian::LetorParseError,\
Xapian::prepare_training_file(db_path,\
data_directory + TESTFILE, qrel, 10,\
Expand Down
1 change: 1 addition & 0 deletions xapian-letor/tests/testdata/training_data_bias.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
4 qid:110001 1:1 2:1 3:1 4:1 5:1 6:1 7:-0.155541 8:-0.311083 9:-0.466624 10:1 11:1 12:1 13:-0.0269673 14:-0.00573887 15:-0.0063153 16:1 17:1 18:1 19:1 20:1 #docid=1

0 comments on commit 2a43250

Please sign in to comment.