Skip to content

Commit

Permalink
Merge pull request #23 from GODqinbin/master
Browse files Browse the repository at this point in the history
Improve docs. Add python interface.
  • Loading branch information
shijiashuai committed Nov 17, 2017
2 parents 654e2a5 + 48687f7 commit 2434965
Show file tree
Hide file tree
Showing 4 changed files with 209 additions and 3 deletions.
22 changes: 20 additions & 2 deletions docs/get-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Here we provide a quick start tutorial for users to install and test ThunderSVM.
## Installation
* Clone ThunderSVM repository
```bash
git clone git@github.com:zeyiwen/mascot.git
git clone --recursive git@github.com:zeyiwen/mascot.git
```
* Download testing datasets
```bash
Expand All @@ -25,6 +25,16 @@ make -j runtest
```
Make sure all the test cases pass.

## Set running mode
We provide two ways to run ThunderSVM. By modifying CMakeList.txt in the thundersvm directory, you can choose whether or not to use gpu to speed up ThunderSVM. To run ThunderSVM with gpu, you should turn USE_CUDA on in CMakeList.txt.
```bash
set(USE_CUDA ON CACHE BOOL "Compile with CUDA")
```
To run ThunderSVM without gpu, you should turn USE_CUDA off.
```bash
set(USE_CUDA OFF CACHE BOOL "Compile with CUDA")
```

## Training SVMs
We show some concrete examples of using ThunderSVM. ThunderSVM uses the same command line options as LibSVM, so existing users of LibSVM can use ThunderSVM quickly. For new users of SVMs, the [user guide](http://www.csie.ntu.edu.tw/~cjlin/papers/guide/guide.pdf) provided in the LibSVM website also helps.

Expand Down Expand Up @@ -64,4 +74,12 @@ Instructions available in [How To](how-to.md) page.
```bash
./thundersvm -s 3 -t 2 -g 3.8 -c 1000 abalone_scale svm.model
```
The meaning of each option can be found in the [parameters](parameters.md) page.
The meaning of each option can be found in the [parameters](parameters.md) page.

### Python Interface
ThunderSVM provides python interface. Go to the python subdirectory and type python. Put your dataset file in dataset subdirectory. Here is an example to call ThunderSVM functions.
```bash
>>>from svm import *
>>>svm_train('-s 1 -t 2 -g 0.5 -c 100 -n 0.1 -e 0.001 test_dataset.txt test_dataset.txt.model')
>>>svm_predict('test_dataset.txt test_dataset.txt.model test_dataset.txt.out')
```
26 changes: 26 additions & 0 deletions python/svm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env python

from ctypes import *
from ctypes.util import find_library
from os import path
import sys


dirname = path.dirname(path.abspath(__file__))
libsvm = CDLL(path.join(dirname, '../build/lib/libthundersvm-lib.so'))

def svm_train(param):
param_list = param.split()
param_list.insert(0, 'thundersvm-train')
param_array = (c_char_p * len(param_list))()
param_array[:] = param_list
libsvm.thundersvm_train(len(param_list), param_array)

def svm_predict(param):
param_list = param.split()
param_list.insert(0, 'thundersvm-predict')
param_array = (c_char_p * len(param_list))()
param_array[:] = param_list
libsvm.thundersvm_predict(len(param_list), param_array)

#libsvm.thundersvm_train(15, "./thundersvm-train -s 1 -t 2 -g 0.5 -c 100 -n 0.1 -e 0.001 dataset/test_dataset.txt dataset/test_dataset.txt.model");
3 changes: 2 additions & 1 deletion src/thundersvm/cmdparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ void CMDParser::parse_command_line(int argc, char **argv) {
}
if (i >= argc - 2)
HelpInfo_svmpredict();

/*
input = fopen(argv[i], "r");
if (input == NULL) {
fprintf(stderr, "can't open input file %s\n", argv[i]);
Expand All @@ -175,6 +175,7 @@ void CMDParser::parse_command_line(int argc, char **argv) {
fprintf(stderr, "can't open output file %s\n", argv[i + 2]);
exit(1);
}
*/
strcpy(svmpredict_input_file, argv[i]);
strcpy(svmpredict_output_file, argv[i + 2]);
strcpy(svmpredict_model_file_name, argv[i + 1]);
Expand Down
161 changes: 161 additions & 0 deletions src/thundersvm/svm_python_interface.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
//functions for python interface

#include <thundersvm/util/log.h>
#include <thundersvm/model/svc.h>
#include <thundersvm/model/svr.h>
#include <thundersvm/model/oneclass_svc.h>
#include <thundersvm/model/nusvc.h>
#include <thundersvm/model/nusvr.h>
#include <thundersvm/util/metric.h>
#include "thundersvm/cmdparser.h"
#include <iostream>

INITIALIZE_EASYLOGGINGPP
extern "C" {
void thundersvm_train(int argc, char **argv) {
CMDParser parser;
parser.parse_command_line(argc, argv);
/*
parser.param_cmd.svm_type = SvmParam::NU_SVC;
parser.param_cmd.kernel_type = SvmParam::RBF;
parser.param_cmd.C = 100;
parser.param_cmd.gamma = 0;
parser.param_cmd.nu = 0.1;
parser.param_cmd.epsilon = 0.001;
*/

DataSet train_dataset;
char input_file_path[1024] = DATASET_DIR;
char model_file_path[1024] = DATASET_DIR;
strcat(input_file_path, parser.svmtrain_input_file_name);
strcat(model_file_path, parser.model_file_name);
train_dataset.load_from_file(input_file_path);
SvmModel *model = nullptr;
switch (parser.param_cmd.svm_type) {
case SvmParam::C_SVC:
model = new SVC();
break;
case SvmParam::NU_SVC:
model = new NuSVC();
break;
case SvmParam::ONE_CLASS:
model = new OneClassSVC();
break;
case SvmParam::EPSILON_SVR:
model = new SVR();
break;
case SvmParam::NU_SVR:
model = new NuSVR();
break;
}

//todo add this to check_parameter method
if (parser.param_cmd.svm_type == SvmParam::NU_SVC) {
train_dataset.group_classes();
for (int i = 0; i < train_dataset.n_classes(); ++i) {
int n1 = train_dataset.count()[i];
for (int j = i + 1; j < train_dataset.n_classes(); ++j) {
int n2 = train_dataset.count()[j];
if (parser.param_cmd.nu * (n1 + n2) / 2 > min(n1, n2)) {
printf("specified nu is infeasible\n");
return;
}
}
}
}

#ifdef USE_CUDA
CUDA_CHECK(cudaSetDevice(parser.gpu_id));
#endif

vector<float_type> predict_y, test_y;
if (parser.do_cross_validation) {
vector<float_type> test_predict = model->cross_validation(train_dataset, parser.param_cmd, parser.nr_fold);
uint dataset_size = test_predict.size() / 2;
test_y.insert(test_y.end(), test_predict.begin(), test_predict.begin() + dataset_size);
predict_y.insert(predict_y.end(), test_predict.begin() + dataset_size, test_predict.end());
} else {
model->train(train_dataset, parser.param_cmd);
model->save_to_file(model_file_path);
//predict_y = model->predict(train_dataset.instances(), 10000);
//test_y = train_dataset.y();
}
/*
//perform svm testing
Metric *metric = nullptr;
switch (parser.param_cmd.svm_type) {
case SvmParam::C_SVC:
case SvmParam::NU_SVC: {
metric = new Accuracy();
break;
}
case SvmParam::EPSILON_SVR:
case SvmParam::NU_SVR: {
metric = new MSE();
break;
}
case SvmParam::ONE_CLASS: {
}
}
if (metric) {
LOG(INFO) << metric->name() << " = " << metric->score(predict_y, test_y);
}
*/
return;
}

void thundersvm_predict(int argc, char **argv){
CMDParser parser;
parser.parse_command_line(argc, argv);

char model_file_path[1024] = DATASET_DIR;
char predict_file_path[1024] = DATASET_DIR;
char output_file_path[1024] = DATASET_DIR;
strcat(model_file_path, parser.svmpredict_model_file_name);
strcat(predict_file_path, parser.svmpredict_input_file);
strcat(output_file_path, parser.svmpredict_output_file);
std::fstream file;
file.open(model_file_path, std::fstream::in);
string feature, svm_type;
file >> feature >> svm_type;
CHECK_EQ(feature, "svm_type");
SvmModel *model = nullptr;
Metric *metric = nullptr;
if (svm_type == "c_svc") {
model = new SVC();
metric = new Accuracy();
} else if (svm_type == "nu_svc") {
model = new NuSVC();
metric = new Accuracy();
} else if (svm_type == "one_class") {
model = new OneClassSVC();
//todo determine a metric
} else if (svm_type == "epsilon_svr") {
model = new SVR();
metric = new MSE();
} else if (svm_type == "nu_svr") {
model = new NuSVR();
metric = new MSE();
}

#ifdef USE_CUDA
CUDA_CHECK(cudaSetDevice(parser.gpu_id));
#endif

model->load_from_file(model_file_path);
file.close();
file.open(output_file_path);
DataSet predict_dataset;
predict_dataset.load_from_file(predict_file_path);
vector<float_type> predict_y;
predict_y = model->predict(predict_dataset.instances(), 10000);
for (int i = 0; i < predict_y.size(); ++i) {
file << predict_y[i] << std::endl;
}
file.close();

if (metric) {
LOG(INFO) << metric->name() << " = " << metric->score(predict_y, predict_dataset.y());
}
}
}

0 comments on commit 2434965

Please sign in to comment.