Skip to content

Commit

Permalink
Merge branch 'master' of github.com:zeyiwen/thundergbm
Browse files Browse the repository at this point in the history
update docs
  • Loading branch information
zeyiwen committed Jan 20, 2019
2 parents 26df79a + 0e322a0 commit 200a97e
Show file tree
Hide file tree
Showing 32 changed files with 702 additions and 665 deletions.
6 changes: 3 additions & 3 deletions .gitignore
Expand Up @@ -11,12 +11,12 @@ tags
.settings
Debug
Release
dataset
gbdt
dataset*
*~
core
*.DS_Store
*.o
*.swp
2myrun.sh
*build*
!*build*.cu
!*build*.h
65 changes: 65 additions & 0 deletions include/thundergbm/booster.h
@@ -0,0 +1,65 @@
//
// Created by ss on 19-1-17.
//

#ifndef THUNDERGBM_BOOSTER_H
#define THUNDERGBM_BOOSTER_H

#include <thundergbm/objective/objective_function.h>
#include <thundergbm/metric/metric.h>
#include <thundergbm/updater/function_builder.h>
#include <thundergbm/updater/hist_tree_builder.h>
#include <thundergbm/updater/exact_tree_builder.h>
#include "thundergbm/common.h"
#include "syncarray.h"
#include "tree.h"

class Booster {
public:
void init(const DataSet &dataSet, const GBMParam &param);

void boost(vector<vector<Tree>> &boosted_model);

private:
MSyncArray<GHPair> gradients;
std::unique_ptr<ObjectiveFunction> obj;
std::unique_ptr<Metric> metric;
MSyncArray<float_type> y;
std::unique_ptr<FunctionBuilder> fbuilder;
GBMParam param;
int n_devices;
};

void Booster::init(const DataSet &dataSet, const GBMParam &param) {
this->param = param;
fbuilder.reset(FunctionBuilder::create(param.tree_method));
fbuilder->init(dataSet, param);
obj.reset(ObjectiveFunction::create(param.objective));
obj->configure(param, dataSet);
metric.reset(Metric::create(obj->default_metric_name()));
metric->configure(param, dataSet);

n_devices = param.n_device;
int n_outputs = param.num_class * dataSet.n_instances();
gradients = MSyncArray<GHPair>(n_devices, n_outputs);
y = MSyncArray<float_type>(n_devices, dataSet.n_instances());

DO_ON_MULTI_DEVICES(n_devices, [&](int device_id) {
y[device_id].copy_from(dataSet.y.data(), dataSet.n_instances());
});
}

void Booster::boost(vector<vector<Tree>> &boosted_model) {
//update gradients
DO_ON_MULTI_DEVICES(n_devices, [&](int device_id) {
obj->get_gradient(y[device_id], fbuilder->get_y_predict()[device_id], gradients[device_id]);
});

//build new model/approximate function
boosted_model.push_back(fbuilder->build_approximate(gradients));

//show metric on training set
LOG(INFO) << metric->get_name() << " = " << metric->get_score(fbuilder->get_y_predict().front());
}

#endif //THUNDERGBM_BOOSTER_H
2 changes: 1 addition & 1 deletion include/thundergbm/hist_cut.h
Expand Up @@ -31,7 +31,7 @@ class HistCut {
}

void get_cut_points(SparseColumns &columns, InsStat &stats, int max_num_bins, int n_instances);
void get_cut_points2(SparseColumns &columns, InsStat &stats, int max_num_bins, int n_instances);
void get_cut_points2(SparseColumns &columns, int max_num_bins, int n_instances);
};

#endif //THUNDERGBM_HIST_CUT_H
8 changes: 0 additions & 8 deletions include/thundergbm/metric/pointwise_metric.h
Expand Up @@ -14,12 +14,4 @@ class RMSE : public Metric {
string get_name() const override { return "RMSE"; }
};

class Accuracy: public Metric {
public:
float_type get_score(const SyncArray<float_type> &y_p) const override;

string get_name() const override;
};


#endif //THUNDERGBM_POINTWISE_METRIC_H
2 changes: 1 addition & 1 deletion include/thundergbm/objective/multiclass_obj.h
Expand Up @@ -17,7 +17,7 @@ class Softmax : public ObjectiveFunction {

void configure(GBMParam param, const DataSet &dataset) override;

string default_metric() override { return "macc"; }
string default_metric_name() override { return "macc"; }

~Softmax() override = default;

Expand Down
2 changes: 1 addition & 1 deletion include/thundergbm/objective/objective_function.h
Expand Up @@ -16,7 +16,7 @@ class ObjectiveFunction {
virtual void
predict_transform(SyncArray<float_type> &y){};
virtual void configure(GBMParam param, const DataSet &dataset) = 0;
virtual string default_metric() = 0;
virtual string default_metric_name() = 0;

static ObjectiveFunction* create(string name);
static bool need_load_group_file(string name);
Expand Down
4 changes: 2 additions & 2 deletions include/thundergbm/objective/ranking_obj.h
Expand Up @@ -19,7 +19,7 @@ class LambdaRank : public ObjectiveFunction {

void configure(GBMParam param, const DataSet &dataset) override;

string default_metric() override;
string default_metric_name() override;

~LambdaRank() override = default;

Expand All @@ -36,7 +36,7 @@ class LambdaRankNDCG : public LambdaRank {
public:
void configure(GBMParam param, const DataSet &dataset) override;

string default_metric() override;
string default_metric_name() override;

protected:
float_type get_delta_z(float_type labelI, float_type labelJ, int rankI, int rankJ, int group_id) override;
Expand Down
18 changes: 14 additions & 4 deletions include/thundergbm/objective/regression_obj.h
Expand Up @@ -34,7 +34,7 @@ class RegressionObj : public ObjectiveFunction {

~RegressionObj() override = default;

string default_metric() override {
string default_metric_name() override {
return "rmse";
}
};
Expand All @@ -56,16 +56,26 @@ struct LogisticLoss {

template<>
struct LogisticLoss<float> {
HOST_DEVICE static GHPair gradient(float y, float y_p) { return GHPair(y_p - y, fmaxf(y_p * (1 - y_p), 1e-16f)); }
HOST_DEVICE static GHPair gradient(float y, float y_p) {
float p = sigmoid(y_p);
return GHPair(p - y, fmaxf(p * (1 - p), 1e-16f));
}

HOST_DEVICE static float predict_transform(float y) { return sigmoid(y); }

HOST_DEVICE static float predict_transform(float x) { return 1 / (1 + expf(-x)); }
static float sigmoid(float x) {return 1 / (1 + expf(-x));}
};

template<>
struct LogisticLoss<double> {
HOST_DEVICE static GHPair gradient(double y, double y_p) { return GHPair(y_p - y, fmax(y_p * (1 - y_p), 1e-16)); }
HOST_DEVICE static GHPair gradient(double y, double y_p) {
double p = sigmoid(y_p);
return GHPair(p - y, fmax(p * (1 - p), 1e-16));
}

HOST_DEVICE static double predict_transform(double x) { return 1 / (1 + exp(-x)); }

static double sigmoid(double x) {return 1 / (1 + exp(-x));}
};

#endif //THUNDERGBM_REGRESSION_OBJ_H
55 changes: 49 additions & 6 deletions include/thundergbm/syncarray.h
Expand Up @@ -125,30 +125,73 @@ class SyncArray : public el::Loggable {
void log(el::base::type::ostream_t &ostream) const override {
int i;
ostream << "[";
const T *data = host_data();
for (i = 0; i < size() - 1 && i < el::base::consts::kMaxLogPerContainer - 1; ++i) {
// for (i = 0; i < size() - 1; ++i) {
ostream << host_data()[i] << ",";
ostream << data[i] << ",";
}
ostream << host_data()[i];
if (size() < el::base::consts::kMaxLogPerContainer - 1) {
if (size() <= el::base::consts::kMaxLogPerContainer) {
ostream << "]";
} else {
ostream << "...";
ostream << ", ...(" << size() - el::base::consts::kMaxLogPerContainer << " more)";
}
};

int get_owner_id() const {
return mem->get_owner_id();
}

private:
//move constructor
SyncArray(SyncArray<T> &&rhs) noexcept : mem(rhs.mem), size_(rhs.size_) {
rhs.mem = nullptr;
rhs.size_ = 0;
}

//move assign
SyncArray &operator=(SyncArray<T> &&rhs) noexcept {
delete mem;
mem = rhs.mem;
size_ = rhs.size_;

rhs.mem = nullptr;
rhs.size_ = 0;
return *this;
}

SyncArray(const SyncArray<T> &);
SyncArray(const SyncArray<T> &) = delete;

SyncArray &operator=(const SyncArray<T> &);
SyncArray &operator=(const SyncArray<T> &) = delete;

private:
SyncMem *mem;
size_t size_;
};

template<typename T>
class MSyncArray : public vector<SyncArray<T>> {
public:
explicit MSyncArray(size_t n_device) : base_class(n_device) {};

explicit MSyncArray(size_t n_device, size_t size) : base_class(size) {
for (int i = 0; i < n_device; ++i) {
this->at(i) = SyncArray<T>(size);
}
};

MSyncArray() : base_class() {};

//move constructor and assign
MSyncArray(MSyncArray<T> &&) noexcept = default;

MSyncArray &operator=(MSyncArray<T> &&) noexcept = default;

MSyncArray(const MSyncArray<T> &) = delete;

MSyncArray &operator=(const MSyncArray<T> &) = delete;

private:
typedef vector<SyncArray<T>> base_class;
};

#endif //THUNDERGBM_SYNCDATA_H
9 changes: 4 additions & 5 deletions include/thundergbm/trainer.h
Expand Up @@ -11,11 +11,10 @@

class TreeTrainer{
public:
float_type train(GBMParam &param);
float_type train_exact(GBMParam &param);
float_type train_hist(GBMParam &param);

float_type compute_rmse(const InsStat &stats);
void train(GBMParam &param);
// float_type train(GBMParam &param);
// float_type train_exact(GBMParam &param);
// float_type train_hist(GBMParam &param);

void save_trees(GBMParam &param, vector<Tree> &trees);
private:
Expand Down
48 changes: 48 additions & 0 deletions include/thundergbm/updater/exact_tree_builder.h
@@ -0,0 +1,48 @@
//
// Created by ss on 19-1-19.
//

#ifndef THUNDERGBM_EXACT_TREE_BUILDER_H
#define THUNDERGBM_EXACT_TREE_BUILDER_H

#include <thundergbm/util/multi_device.h>
#include "thundergbm/common.h"
#include "function_builder.h"
#include "shard.h"


class ExactTreeBuilder : public FunctionBuilder {
public:
vector<Tree> build_approximate(const MSyncArray<GHPair> &gradients) override;

void init(const DataSet &dataset, const GBMParam &param) override;

const MSyncArray<float_type>& get_y_predict() override;

struct InternalShard: public Shard {
int rank;

void find_split(int level);

void update_ins2node_id();
};

template<typename L>
static void for_each_shard(vector<InternalShard> &shards, L lambda) {
DO_ON_MULTI_DEVICES(shards.size(), [&](int device_id) {
lambda(shards[device_id]);
});
}

vector<InternalShard> shards;

void split_point_all_reduce(int depth, vector<InternalShard> &shards);

void ins2node_id_all_reduce(vector<InternalShard> &shards, int depth);
GBMParam param;
private:
MSyncArray<float_type> y_predict;
};


#endif //THUNDERGBM_EXACT_TREE_BUILDER_H
47 changes: 0 additions & 47 deletions include/thundergbm/updater/exact_updater.h

This file was deleted.

23 changes: 23 additions & 0 deletions include/thundergbm/updater/function_builder.h
@@ -0,0 +1,23 @@
//
// Created by ss on 19-1-17.
//

#ifndef THUNDERGBM_FUNCTION_BUILDER_H
#define THUNDERGBM_FUNCTION_BUILDER_H

#include <thundergbm/tree.h>
#include "thundergbm/common.h"
#include "thundergbm/sparse_columns.h"

class FunctionBuilder {
public:
virtual vector<Tree> build_approximate(const MSyncArray<GHPair> &gradients) = 0;

virtual void init(const DataSet &dataset, const GBMParam &param) {};

virtual const MSyncArray<float_type> &get_y_predict() = 0;

static FunctionBuilder *create(std::string name);
};

#endif //THUNDERGBM_FUNCTION_BUILDER_H

0 comments on commit 200a97e

Please sign in to comment.