Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/zeyiwen/thundersvm
Browse files Browse the repository at this point in the history
  • Loading branch information
QinbinLi committed Nov 24, 2017
2 parents 59dc69b + a1f6f03 commit cc5d6c2
Show file tree
Hide file tree
Showing 11 changed files with 201 additions and 41 deletions.
2 changes: 1 addition & 1 deletion Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ EXTRACT_ALL = NO
# be included in the documentation.
# The default value is: NO.

EXTRACT_PRIVATE = YES
EXTRACT_PRIVATE = NO

# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
# scope will be included in the documentation.
Expand Down
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,19 @@ mkdir build && cd build && cmake -DUSE_CUDA=OFF -DUSE_EIGEN=ON .. && make -j
```
Now ThunderSVM will work solely on CPUs and does not rely on CUDA.

## How to cite ThunderSVM
If you use ThunderSVM in your paper, please cite our work.
```
@article{wenthundersvm17,
author = {Wen, Zeyi and Shi, Jiashuai and He, Bingsheng and Li, Qinbin and Chen, Jian},
title = {{ThunderSVM}: A Fast SVM library for SVMs on GPUs and CPUs},
journal = {To appear in arxiv},
year = {2017}
}
```
## Related websites
* [LibSVM](https://www.csie.ntu.edu.tw/~cjlin/libsvm/) | [SVM<sup>light</sup>](http://svmlight.joachims.org/) | [OHD-SVM](https://github.com/OrcusCZ/OHD-SVM) | [NVIDIA Machine Learning](http://www.nvidia.com/object/machine-learning.html)

## TODO
- integrate with interfaces

## Acknowlegement
* We acknowledge NVIDIA for their hardware donations.
* This project is hosted by NUS, collaborating with Prof. Jian Chen (South China University of Technology). Initial work of this project was done when Zeyi Wen worked at The University of Melbourne.
20 changes: 13 additions & 7 deletions include/thundersvm/cmdparser.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,25 @@

#include "svmparam.h"

/**
* @brief Command-line parser
*/
class CMDParser{
public:
CMDParser() : do_cross_validation(false), nr_fold(0), gpu_id(0) {};
CMDParser() : do_cross_validation(false), nr_fold(0), gpu_id(0) {};

void parse_command_line(int argc, char **argv);
void parse_python(int argc, char **argv);
SvmParam param_cmd;

void parse_python(int argc, char **argv);

SvmParam param_cmd;
bool do_cross_validation;
int nr_fold;
int gpu_id;
int gpu_id;
char svmtrain_input_file_name[1024];
char svmpredict_input_file[1024];
char svmpredict_output_file[1024];
char svmpredict_model_file_name[1024];
char svmpredict_input_file[1024];
char svmpredict_output_file[1024];
char svmpredict_model_file_name[1024];
char model_file_name[1024];
};

Expand Down
34 changes: 34 additions & 0 deletions include/thundersvm/dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,71 @@

#include "thundersvm.h"
#include "syncdata.h"

/**
* @brief Dataset reader
*/
class DataSet {
public:
struct node{
node(int index, float_type value) : index(index), value(value) {}

int index;
float_type value;
};

typedef vector<vector<DataSet::node>> node2d;

DataSet();

/**
* construct a dataset using given instances
* @param instances given instances
* @param n_features the number of features of given instances
* @param y the label of each instances
*/
DataSet(const DataSet::node2d &instances, int n_features, const vector<float_type> &y);

///load dataset from file
void load_from_file(string file_name);

///load dataset from python
void load_from_python(float *y, char **x, int len);

///group instances in same class
void group_classes(bool classification = true);

size_t n_instances() const;

size_t n_features() const;

size_t n_classes() const;

///the number of instances for each class
const vector<int> &count() const;

///the start position of instances for each class
const vector<int> &start() const;

///mapping logical label (0,1,2,3,...) to real label (maybe 2,4,5,6,...)
const vector<int> &label() const;

///label for each instances, the instances are arranged as they are in file
const vector<float_type> &y() const;

const node2d & instances() const;

///instances of class \f$y_i\f$
const node2d instances(int y_i) const;

///instances of class \f$y_i\f$ and \f$y_j\f$
const node2d instances(int y_i, int y_j) const;

///mapping instance index (after grouped) to the original index (in file)
const vector<int> original_index() const;

const vector<int> original_index(int y_i) const;

const vector<int> original_index(int y_i, int y_j) const;

private:
Expand Down
28 changes: 26 additions & 2 deletions include/thundersvm/kernelmatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,42 @@
#include "dataset.h"
#include "svmparam.h"

/**
* @brief The management class of kernel values.
*/
class KernelMatrix{
public:
/**
* Create KernelMatrix with given instances (training data or support vectors).
* @param instances the instances, either are training instances for training, or are support vectors for prediction.
* @param param kernel_type in parm is used
*/
explicit KernelMatrix(const DataSet::node2d &instances, SvmParam param);

/**
* return specific rows in kernel matrix
* @param [in] idx the indices of the rows
* @param [out] kernel_rows
*/
void get_rows(const SyncData<int> &idx, SyncData<float_type> &kernel_rows) const;

/**
* return kernel values of each given instance and each instance stored in KernelMatrix
* @param [in] instances the given instances
* @param [out] kernel_rows
*/
void get_rows(const DataSet::node2d &instances, SyncData<float_type> &kernel_rows) const;

///return the diagonal elements of kernel matrix
const SyncData<float_type> &diag() const;

size_t n_instances() const { return n_instances_; };//number of instances
size_t n_features() const { return n_features_; };//number of features
///the number of instances in KernelMatrix
size_t n_instances() const { return n_instances_; };

///the maximum number of features of instances
size_t n_features() const { return n_features_; }

///the number of non-zero features of all instances
size_t nnz() const {return nnz_;};//number of nonzero
private:
KernelMatrix &operator=(const KernelMatrix &) const;
Expand Down
2 changes: 1 addition & 1 deletion include/thundersvm/model/nusvr.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include "svr.h"

/**
* @brief-Support Vector Machine for regression
* @brief Support Vector Machine for regression
*/
class NuSVR : public SVR {
public:
Expand Down
2 changes: 1 addition & 1 deletion include/thundersvm/model/svr.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
using std::map;

/**
* Support Vector Machine for regression
* @brief Support Vector Machine for regression
*/
class SVR : public SvmModel {
public:
Expand Down
49 changes: 30 additions & 19 deletions include/thundersvm/svmparam.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@

#include "thundersvm.h"

/**
* @brief params for ThunderSVM
*/
struct SvmParam {
SvmParam() {
svm_type = C_SVC;
Expand All @@ -20,32 +23,40 @@ struct SvmParam {
nr_weight = 0;
}

/// SVM type
enum SVM_TYPE {
C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR
}; /* svm_type */
};
/// kernel function type
enum KERNEL_TYPE {
LINEAR, POLY, RBF, SIGMOID/*, PRECOMPUTED*/
}; /* kernel_type */
};
SVM_TYPE svm_type;
KERNEL_TYPE kernel_type;

float_type C; //for regularization
float_type gamma; //for rbf kernel
float_type p; //for regression
float_type nu; //for nu-SVM
float_type epsilon; //stopping criteria
int degree; //degree for polynomial kernel

float_type coef0; /* for poly/sigmoid */

/* these are for training only */
// double cache_size; /* in MB */
int nr_weight; /* for C_SVC */
int *weight_label; /* for C_SVC */
float_type *weight; /* for C_SVC */
// int shrinking; /* use the shrinking heuristics */
int probability; /* do probability estimates */
///regularization parameter
float_type C;
///for RBF kernel
float_type gamma;
///for regression
float_type p;
///for \f$\nu\f$-SVM
float_type nu;
///stopping criteria
float_type epsilon;
///degree for polynomial kernel
int degree;
///for polynomial/sigmoid kernel
float_type coef0;
///for SVC
int nr_weight;
///for SVC
int *weight_label;
///for SVC
float_type *weight;
///do probability estimates
int probability;
static const char *kernel_type_name[6];
static const char *svm_type_name[6]; /* svm_type */
static const char *svm_type_name[6];
};
#endif //THUNDERSVM_SVMPARAM_H
48 changes: 41 additions & 7 deletions include/thundersvm/syncdata.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,64 +8,98 @@
#include "thundersvm.h"
#include "syncmem.h"

/**
* @brief Wrapper of SyncMem with a type
* @tparam T type of element
*/
template<typename T>
class SyncData : public el::Loggable {
public:
/**
* initialize class that can store given count of elements
* @param count the given count
*/
explicit SyncData(size_t count);

SyncData() : mem(nullptr), size_(0) {};

~SyncData();

const T *host_data() const;

const T *device_data() const;

T *host_data();

T *device_data();

void set_host_data(T *host_ptr){
mem->set_host_data(host_ptr);
}

void set_device_data(T *device_ptr){
mem->set_device_data(device_ptr);
mem->set_device_data(device_ptr);
}

void to_host() const{
mem->to_host();
mem->to_host();
}

void to_device() const{
mem->to_device();
mem->to_device();
}

/**
* random access operator
* @param index the index of the elements
* @return **host** element at the index
*/
const T &operator[](int index) const{
return host_data()[index];
return host_data()[index];
}

T &operator[](int index){
return host_data()[index];
return host_data()[index];
}

/**
* copy device data. This will call to_device() implicitly.
* @param source source device data pointer
* @param count the count of elements
*/
void copy_from(const T *source, size_t count);

void copy_from(const SyncData<T> &source);

/**
* set all elements to the given value. This method will set device data.
* @param value
*/
void mem_set(const T &value);

/**
* resize to a new size. This will also clear all data.
* @param count
*/
void resize(size_t count);

size_t mem_size() const {//number of bytes
return mem->size();
return mem->size();
}

size_t size() const {//number of values
return size_;
}

SyncMem::HEAD head() const{
return mem->head();
return mem->head();
}

void log(el::base::type::ostream_t &ostream) const override;

private:
SyncData<T> &operator=(const SyncData<T> &);

SyncData(const SyncData<T>&);

SyncMem *mem;
Expand Down

0 comments on commit cc5d6c2

Please sign in to comment.