diff --git a/Makefile b/Makefile index 1bc5d4e5..a49b9ad5 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ CXX ?= g++ CFLAGS = -Wall -Wconversion -O3 -fPIC -SHVER = 4 +SHVER = 5 OS = $(shell uname) ifeq ($(OS),Darwin) SHARED_LIB_FLAG = -dynamiclib -Wl,-install_name,libsvm.so.$(SHVER) diff --git a/matlab/svmtrain.c b/matlab/svmtrain.c index f55c0648..213906d9 100644 --- a/matlab/svmtrain.c +++ b/matlab/svmtrain.c @@ -46,6 +46,9 @@ void exit_with_help() "-e epsilon : set tolerance of termination criterion (default 0.001)\n" "-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)\n" "-b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)\n" + "-f floatprecision : set the floating-point precision of kernel values (default 0)\n" + " 0 -- float\n" + " 1 -- double\n" "-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)\n" "-v n: n-fold cross validation mode\n" "-q : quiet mode (no outputs)\n" @@ -125,6 +128,7 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name) param.p = 0.1; param.shrinking = 1; param.probability = 0; + param.use_double_precision_kernel_values = 0; param.nr_weight = 0; param.weight_label = NULL; param.weight = NULL; @@ -187,6 +191,9 @@ int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name) case 'b': param.probability = atoi(argv[i]); break; + case 'f': + param.use_double_precision_kernel_values = atoi(argv[i]); + break; case 'q': print_func = &print_null; i--; diff --git a/python/libsvm/svm.py b/python/libsvm/svm.py index 7304ec76..030c5500 100644 --- a/python/libsvm/svm.py +++ b/python/libsvm/svm.py @@ -240,10 +240,10 @@ def __init__(self, y, x, isKernel=False): class svm_parameter(Structure): _names = ["svm_type", "kernel_type", "degree", "gamma", "coef0", "cache_size", "eps", "C", "nr_weight", "weight_label", "weight", - "nu", "p", "shrinking", "probability"] + "nu", "p", "shrinking", "probability", "use_double_precision_kernel_values"] _types = [c_int, c_int, c_int, c_double, c_double, c_double, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), - c_double, c_double, c_int, c_int] + c_double, c_double, c_int, c_int, c_int] _fields_ = genFields(_names, _types) def __init__(self, options = None): @@ -274,6 +274,7 @@ def set_to_default_values(self): self.p = 0.1 self.shrinking = 1 self.probability = 0 + self.use_double_precision_kernel_values = 0 self.nr_weight = 0 self.weight_label = None self.weight = None @@ -331,6 +332,9 @@ def parse_options(self, options): elif argv[i] == "-b": i = i + 1 self.probability = int(argv[i]) + elif argv[i] == '-f': + i = i + 1 + self.use_double_precision_kernel_values = int(argv[i]) elif argv[i] == "-q": self.print_func = PRINT_STRING_FUN(print_null) elif argv[i] == "-v": diff --git a/python/libsvm/svmutil.py b/python/libsvm/svmutil.py index 923d26ca..cc26315f 100644 --- a/python/libsvm/svmutil.py +++ b/python/libsvm/svmutil.py @@ -84,6 +84,9 @@ def svm_train(arg1, arg2=None, arg3=None): -e epsilon : set tolerance of termination criterion (default 0.001) -h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1) -b probability_estimates : whether to train a model for probability estimates, 0 or 1 (default 0) + -f floatprecision : set the floating-point precision of kernel values (default 0) + 0 -- float + 1 -- double -wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1) -v n: n-fold cross validation mode -q : quiet mode (no outputs) diff --git a/svm-train.c b/svm-train.c index b6ce987d..f4cc0899 100644 --- a/svm-train.c +++ b/svm-train.c @@ -35,6 +35,9 @@ void exit_with_help() "-e epsilon : set tolerance of termination criterion (default 0.001)\n" "-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)\n" "-b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)\n" + "-f floatprecision : set the floating-point precision of kernel values (default 0)\n" + " 0 -- float\n" + " 1 -- double\n" "-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)\n" "-v n: n-fold cross validation mode\n" "-q : quiet mode (no outputs)\n" @@ -176,6 +179,7 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode param.p = 0.1; param.shrinking = 1; param.probability = 0; + param.use_double_precision_kernel_values = 0; param.nr_weight = 0; param.weight_label = NULL; param.weight = NULL; @@ -225,6 +229,9 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode case 'b': param.probability = atoi(argv[i]); break; + case 'f': + param.use_double_precision_kernel_values = atoi(argv[i]); + break; case 'q': print_func = &print_null; i--; diff --git a/svm.cpp b/svm.cpp index 01e1b5ed..2e710c6e 100644 --- a/svm.cpp +++ b/svm.cpp @@ -13,7 +13,6 @@ #endif int libsvm_version = LIBSVM_VERSION; -typedef float Qfloat; typedef signed char schar; #ifndef min template static inline T min(T x,T y) { return (x class Cache { public: @@ -95,7 +95,8 @@ class Cache void lru_insert(head_t *h); }; -Cache::Cache(int l_,size_t size_):l(l_),size(size_) +template +Cache::Cache(int l_,size_t size_):l(l_),size(size_) { head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0 size /= sizeof(Qfloat); @@ -104,21 +105,24 @@ Cache::Cache(int l_,size_t size_):l(l_),size(size_) lru_head.next = lru_head.prev = &lru_head; } -Cache::~Cache() +template +Cache::~Cache() { for(head_t *h = lru_head.next; h != &lru_head; h=h->next) free(h->data); free(head); } -void Cache::lru_delete(head_t *h) +template +void Cache::lru_delete(head_t *h) { // delete from current location h->prev->next = h->next; h->next->prev = h->prev; } -void Cache::lru_insert(head_t *h) +template +void Cache::lru_insert(head_t *h) { // insert to last position h->next = &lru_head; @@ -127,7 +131,8 @@ void Cache::lru_insert(head_t *h) h->next->prev = h; } -int Cache::get_data(const int index, Qfloat **data, int len) +template +int Cache::get_data(const int index, Qfloat **data, int len) { head_t *h = &head[index]; if(h->len) lru_delete(h); @@ -157,7 +162,8 @@ int Cache::get_data(const int index, Qfloat **data, int len) return len; } -void Cache::swap_index(int i, int j) +template +void Cache::swap_index(int i, int j) { if(i==j) return; @@ -195,15 +201,17 @@ void Cache::swap_index(int i, int j) // the constructor of Kernel prepares to calculate the l*l kernel matrix // the member function get_Q is for getting one column from the Q Matrix // +template class QMatrix { public: virtual Qfloat *get_Q(int column, int len) const = 0; - virtual double *get_QD() const = 0; + virtual Qfloat *get_QD() const = 0; virtual void swap_index(int i, int j) const = 0; virtual ~QMatrix() {} }; -class Kernel: public QMatrix { +template +class Kernel: public QMatrix { public: Kernel(int l, svm_node * const * x, const svm_parameter& param); virtual ~Kernel(); @@ -211,7 +219,7 @@ class Kernel: public QMatrix { static double k_function(const svm_node *x, const svm_node *y, const svm_parameter& param); virtual Qfloat *get_Q(int column, int len) const = 0; - virtual double *get_QD() const = 0; + virtual Qfloat *get_QD() const = 0; virtual void swap_index(int i, int j) const // no so const... { swap(x[i],x[j]); @@ -254,7 +262,8 @@ class Kernel: public QMatrix { } }; -Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param) +template +Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param) :kernel_type(param.kernel_type), degree(param.degree), gamma(param.gamma), coef0(param.coef0) { @@ -289,13 +298,15 @@ Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param) x_square = 0; } -Kernel::~Kernel() +template +Kernel::~Kernel() { delete[] x; delete[] x_square; } -double Kernel::dot(const svm_node *px, const svm_node *py) +template +double Kernel::dot(const svm_node *px, const svm_node *py) { double sum = 0; while(px->index != -1 && py->index != -1) @@ -317,7 +328,8 @@ double Kernel::dot(const svm_node *px, const svm_node *py) return sum; } -double Kernel::k_function(const svm_node *x, const svm_node *y, +template +double Kernel::k_function(const svm_node *x, const svm_node *y, const svm_parameter& param) { switch(param.kernel_type) @@ -376,6 +388,14 @@ double Kernel::k_function(const svm_node *x, const svm_node *y, } } +struct SolutionInfo { + double obj; + double rho; + double upper_bound_p; + double upper_bound_n; + double r; // for Solver_NU +}; + // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918 // Solves: // @@ -394,20 +414,13 @@ double Kernel::k_function(const svm_node *x, const svm_node *y, // // solution will be put in \alpha, objective value will be put in obj // +template class Solver { public: Solver() {}; virtual ~Solver() {}; - struct SolutionInfo { - double obj; - double rho; - double upper_bound_p; - double upper_bound_n; - double r; // for Solver_NU - }; - - void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_, + void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_, double *alpha_, double Cp, double Cn, double eps, SolutionInfo* si, int shrinking); protected: @@ -417,8 +430,8 @@ class Solver { enum { LOWER_BOUND, UPPER_BOUND, FREE }; char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE double *alpha; - const QMatrix *Q; - const double *QD; + const QMatrix *Q; + const Qfloat *QD; double eps; double Cp,Cn; double *p; @@ -451,7 +464,8 @@ class Solver { bool be_shrunk(int i, double Gmax1, double Gmax2); }; -void Solver::swap_index(int i, int j) +template +void Solver::swap_index(int i, int j) { Q->swap_index(i,j); swap(y[i],y[j]); @@ -463,7 +477,8 @@ void Solver::swap_index(int i, int j) swap(G_bar[i],G_bar[j]); } -void Solver::reconstruct_gradient() +template +void Solver::reconstruct_gradient() { // reconstruct inactive elements of G from G_bar and free variables @@ -505,7 +520,8 @@ void Solver::reconstruct_gradient() } } -void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_, +template +void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_, double *alpha_, double Cp, double Cn, double eps, SolutionInfo* si, int shrinking) { @@ -787,7 +803,8 @@ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_, } // return 1 if already optimal, return 0 otherwise -int Solver::select_working_set(int &out_i, int &out_j) +template +int Solver::select_working_set(int &out_i, int &out_j) { // return i,j such that // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) @@ -886,7 +903,8 @@ int Solver::select_working_set(int &out_i, int &out_j) return 0; } -bool Solver::be_shrunk(int i, double Gmax1, double Gmax2) +template +bool Solver::be_shrunk(int i, double Gmax1, double Gmax2) { if(is_upper_bound(i)) { @@ -906,7 +924,8 @@ bool Solver::be_shrunk(int i, double Gmax1, double Gmax2) return(false); } -void Solver::do_shrinking() +template +void Solver::do_shrinking() { int i; double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) } @@ -967,7 +986,8 @@ void Solver::do_shrinking() } } -double Solver::calculate_rho() +template +double Solver::calculate_rho() { double r; int nr_free = 0; @@ -1010,16 +1030,17 @@ double Solver::calculate_rho() // // additional constraint: e^T \alpha = constant // -class Solver_NU: public Solver +template +class Solver_NU: public Solver { public: Solver_NU() {} - void Solve(int l, const QMatrix& Q, const double *p, const schar *y, + void Solve(int l, const QMatrix& Q, const double *p, const schar *y, double *alpha, double Cp, double Cn, double eps, SolutionInfo* si, int shrinking) { this->si = si; - Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking); + Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking); } private: SolutionInfo *si; @@ -1027,10 +1048,25 @@ class Solver_NU: public Solver double calculate_rho(); bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4); void do_shrinking(); + + using Solver::active_size; + using Solver::y; + using Solver::G; + using Solver::Q; + using Solver::QD; + using Solver::eps; + using Solver::l; + using Solver::unshrink; + + using Solver::is_upper_bound; + using Solver::is_lower_bound; + using Solver::swap_index; + using Solver::reconstruct_gradient; }; // return 1 if already optimal, return 0 otherwise -int Solver_NU::select_working_set(int &out_i, int &out_j) +template +int Solver_NU::select_working_set(int &out_i, int &out_j) { // return i,j such that y_i = y_j and // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) @@ -1142,7 +1178,8 @@ int Solver_NU::select_working_set(int &out_i, int &out_j) return 0; } -bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4) +template +bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4) { if(is_upper_bound(i)) { @@ -1162,7 +1199,8 @@ bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, doubl return(false); } -void Solver_NU::do_shrinking() +template +void Solver_NU::do_shrinking() { double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) } double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) } @@ -1214,7 +1252,8 @@ void Solver_NU::do_shrinking() } } -double Solver_NU::calculate_rho() +template +double Solver_NU::calculate_rho() { int nr_free1 = 0,nr_free2 = 0; double ub1 = INF, ub2 = INF; @@ -1267,15 +1306,16 @@ double Solver_NU::calculate_rho() // // Q matrices for various formulations // -class SVC_Q: public Kernel +template +class SVC_Q: public Kernel { public: SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_) - :Kernel(prob.l, prob.x, param) + : Kernel(prob.l, prob.x, param) { clone(y,y_,prob.l); - cache = new Cache(prob.l,(size_t)(param.cache_size*(1<<20))); - QD = new double[prob.l]; + cache = new Cache(prob.l,(size_t)(param.cache_size*(1<<20))); + QD = new Qfloat[prob.l]; for(int i=0;i*kernel_function)(i,i); } @@ -1295,7 +1335,7 @@ class SVC_Q: public Kernel return data; } - double *get_QD() const + Qfloat *get_QD() const { return QD; } @@ -1303,7 +1343,7 @@ class SVC_Q: public Kernel void swap_index(int i, int j) const { cache->swap_index(i,j); - Kernel::swap_index(i,j); + Kernel::swap_index(i,j); swap(y[i],y[j]); swap(QD[i],QD[j]); } @@ -1316,18 +1356,21 @@ class SVC_Q: public Kernel } private: schar *y; - Cache *cache; - double *QD; + Cache *cache; + Qfloat *QD; + + using Kernel::kernel_function; }; -class ONE_CLASS_Q: public Kernel +template +class ONE_CLASS_Q: public Kernel { public: ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param) - :Kernel(prob.l, prob.x, param) + : Kernel(prob.l, prob.x, param) { - cache = new Cache(prob.l,(size_t)(param.cache_size*(1<<20))); - QD = new double[prob.l]; + cache = new Cache(prob.l,(size_t)(param.cache_size*(1<<20))); + QD = new Qfloat[prob.l]; for(int i=0;i*kernel_function)(i,i); } @@ -1344,7 +1387,7 @@ class ONE_CLASS_Q: public Kernel return data; } - double *get_QD() const + Qfloat *get_QD() const { return QD; } @@ -1352,7 +1395,7 @@ class ONE_CLASS_Q: public Kernel void swap_index(int i, int j) const { cache->swap_index(i,j); - Kernel::swap_index(i,j); + Kernel::swap_index(i,j); swap(QD[i],QD[j]); } @@ -1362,19 +1405,22 @@ class ONE_CLASS_Q: public Kernel delete[] QD; } private: - Cache *cache; - double *QD; + Cache *cache; + Qfloat *QD; + + using Kernel::kernel_function; }; -class SVR_Q: public Kernel +template +class SVR_Q: public Kernel { public: SVR_Q(const svm_problem& prob, const svm_parameter& param) - :Kernel(prob.l, prob.x, param) + : Kernel(prob.l, prob.x, param) { l = prob.l; - cache = new Cache(l,(size_t)(param.cache_size*(1<<20))); - QD = new double[2*l]; + cache = new Cache(l,(size_t)(param.cache_size*(1<<20))); + QD = new Qfloat[2*l]; sign = new schar[2*l]; index = new int[2*l]; for(int k=0;k *cache; schar *sign; int *index; mutable int next_buffer; Qfloat *buffer[2]; - double *QD; + Qfloat *QD; + + using Kernel::kernel_function; }; // @@ -1449,7 +1497,7 @@ class SVR_Q: public Kernel // static void solve_c_svc( const svm_problem *prob, const svm_parameter* param, - double *alpha, Solver::SolutionInfo* si, double Cp, double Cn) + double *alpha, SolutionInfo* si, double Cp, double Cn) { int l = prob->l; double *minus_ones = new double[l]; @@ -1464,9 +1512,15 @@ static void solve_c_svc( if(prob->y[i] > 0) y[i] = +1; else y[i] = -1; } - Solver s; - s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y, - alpha, Cp, Cn, param->eps, si, param->shrinking); + if (param->use_double_precision_kernel_values) { + Solver s; + s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y, + alpha, Cp, Cn, param->eps, si, param->shrinking); + } else { + Solver s; + s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y, + alpha, Cp, Cn, param->eps, si, param->shrinking); + } double sum_alpha=0; for(i=0;il; @@ -1518,9 +1572,15 @@ static void solve_nu_svc( for(i=0;ieps, si, param->shrinking); + if (param->use_double_precision_kernel_values) { + Solver_NU s; + s.Solve(l, SVC_Q(*prob,*param,y), zeros, y, + alpha, 1.0, 1.0, param->eps, si, param->shrinking); + } else { + Solver_NU s; + s.Solve(l, SVC_Q(*prob,*param,y), zeros, y, + alpha, 1.0, 1.0, param->eps, si, param->shrinking); + } double r = si->r; info("C = %f\n",1/r); @@ -1539,7 +1599,7 @@ static void solve_nu_svc( static void solve_one_class( const svm_problem *prob, const svm_parameter *param, - double *alpha, Solver::SolutionInfo* si) + double *alpha, SolutionInfo* si) { int l = prob->l; double *zeros = new double[l]; @@ -1561,9 +1621,15 @@ static void solve_one_class( ones[i] = 1; } - Solver s; - s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones, - alpha, 1.0, 1.0, param->eps, si, param->shrinking); + if (param->use_double_precision_kernel_values) { + Solver s; + s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones, + alpha, 1.0, 1.0, param->eps, si, param->shrinking); + } else { + Solver s; + s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones, + alpha, 1.0, 1.0, param->eps, si, param->shrinking); + } delete[] zeros; delete[] ones; @@ -1571,7 +1637,7 @@ static void solve_one_class( static void solve_epsilon_svr( const svm_problem *prob, const svm_parameter *param, - double *alpha, Solver::SolutionInfo* si) + double *alpha, SolutionInfo* si) { int l = prob->l; double *alpha2 = new double[2*l]; @@ -1590,9 +1656,15 @@ static void solve_epsilon_svr( y[i+l] = -1; } - Solver s; - s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, - alpha2, param->C, param->C, param->eps, si, param->shrinking); + if (param->use_double_precision_kernel_values) { + Solver s; + s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, + alpha2, param->C, param->C, param->eps, si, param->shrinking); + } else { + Solver s; + s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, + alpha2, param->C, param->C, param->eps, si, param->shrinking); + } double sum_alpha = 0; for(i=0;il; double C = param->C; @@ -1631,9 +1703,15 @@ static void solve_nu_svr( y[i+l] = -1; } - Solver_NU s; - s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, - alpha2, C, C, param->eps, si, param->shrinking); + if (param->use_double_precision_kernel_values) { + Solver_NU s; + s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, + alpha2, C, C, param->eps, si, param->shrinking); + } else { + Solver_NU s; + s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, + alpha2, C, C, param->eps, si, param->shrinking); + } info("epsilon = %f\n",-si->r); @@ -1659,7 +1737,7 @@ static decision_function svm_train_one( double Cp, double Cn) { double *alpha = Malloc(double,prob->l); - Solver::SolutionInfo si; + SolutionInfo si; switch(param->svm_type) { case C_SVC: @@ -2610,7 +2688,7 @@ double svm_predict_values(const svm_model *model, const svm_node *x, double* dec #pragma omp parallel for private(i) reduction(+:sum) schedule(guided) #endif for(i=0;il;i++) - sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param); + sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param); sum -= model->rho[0]; *dec_values = sum; @@ -2629,7 +2707,7 @@ double svm_predict_values(const svm_model *model, const svm_node *x, double* dec #pragma omp parallel for private(i) schedule(guided) #endif for(i=0;iSV[i],model->param); + kvalue[i] = Kernel::k_function(x,model->SV[i],model->param); int *start = Malloc(int,nr_class); start[0] = 0; diff --git a/svm.h b/svm.h index de5145dc..de75ab62 100644 --- a/svm.h +++ b/svm.h @@ -44,6 +44,7 @@ struct svm_parameter double p; /* for EPSILON_SVR */ int shrinking; /* use the shrinking heuristics */ int probability; /* do probability estimates */ + int use_double_precision_kernel_values; }; //