9 int libsvm_version = LIBSVM_VERSION;
11 typedef signed char schar;
13 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
16 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
18 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
19 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
22 memcpy((void *)dst,(void *)src,sizeof(T)*n);
24 static inline double powi(double base, int times)
26 double tmp = base, ret = 1.0;
28 for(int t=times; t>0; t/=2)
37 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
39 static void print_string_stdout(const char *s)
44 static void (*svm_print_string) (const char *) = &print_string_stdout;
46 static void info(const char *fmt,...)
53 (*svm_print_string)(buf);
56 static void info(const char *fmt,...) {}
62 // l is the number of total data items
63 // size is the cache size limit in bytes
68 Cache(int l,long int size);
71 // request data [0,len)
72 // return some position p where [p,len) need to be filled
73 // (p >= len if nothing needs to be filled)
74 int get_data(const int index, Qfloat **data, int len);
75 void swap_index(int i, int j);
81 head_t *prev, *next; // a circular list
83 int len; // data[0,len) is cached in this entry
88 void lru_delete(head_t *h);
89 void lru_insert(head_t *h);
92 Cache::Cache(int l_,long int size_):l(l_),size(size_)
94 head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0
95 size /= sizeof(Qfloat);
96 size -= l * sizeof(head_t) / sizeof(Qfloat);
97 size = max(size, 2 * (long int) l); // cache must be large enough for two columns
98 lru_head.next = lru_head.prev = &lru_head;
103 for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
108 void Cache::lru_delete(head_t *h)
110 // delete from current location
111 h->prev->next = h->next;
112 h->next->prev = h->prev;
115 void Cache::lru_insert(head_t *h)
117 // insert to last position
119 h->prev = lru_head.prev;
124 int Cache::get_data(const int index, Qfloat **data, int len)
126 head_t *h = &head[index];
127 if(h->len) lru_delete(h);
128 int more = len - h->len;
135 head_t *old = lru_head.next;
143 // allocate new space
144 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
154 void Cache::swap_index(int i, int j)
158 if(head[i].len) lru_delete(&head[i]);
159 if(head[j].len) lru_delete(&head[j]);
160 swap(head[i].data,head[j].data);
161 swap(head[i].len,head[j].len);
162 if(head[i].len) lru_insert(&head[i]);
163 if(head[j].len) lru_insert(&head[j]);
166 for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
171 swap(h->data[i],h->data[j]);
188 // the static method k_function is for doing single kernel evaluation
189 // the constructor of Kernel prepares to calculate the l*l kernel matrix
190 // the member function get_Q is for getting one column from the Q Matrix
194 virtual Qfloat *get_Q(int column, int len) const = 0;
195 virtual Qfloat *get_QD() const = 0;
196 virtual void swap_index(int i, int j) const = 0;
197 virtual ~QMatrix() {}
200 class Kernel: public QMatrix {
202 Kernel(int l, svm_node * const * x, const svm_parameter& param);
205 static double k_function(const svm_node *x, const svm_node *y,
206 const svm_parameter& param);
207 virtual Qfloat *get_Q(int column, int len) const = 0;
208 virtual Qfloat *get_QD() const = 0;
209 virtual void swap_index(int i, int j) const // no so const...
212 if(x_square) swap(x_square[i],x_square[j]);
216 double (Kernel::*kernel_function)(int i, int j) const;
223 const int kernel_type;
228 static double dot(const svm_node *px, const svm_node *py);
229 double kernel_linear(int i, int j) const
231 return dot(x[i],x[j]);
233 double kernel_poly(int i, int j) const
235 return powi(gamma*dot(x[i],x[j])+coef0,degree);
237 double kernel_rbf(int i, int j) const
239 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
241 double kernel_sigmoid(int i, int j) const
243 return tanh(gamma*dot(x[i],x[j])+coef0);
245 double kernel_precomputed(int i, int j) const
247 return x[i][(int)(x[j][0].value)].value;
251 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
252 :kernel_type(param.kernel_type), degree(param.degree),
253 gamma(param.gamma), coef0(param.coef0)
258 kernel_function = &Kernel::kernel_linear;
261 kernel_function = &Kernel::kernel_poly;
264 kernel_function = &Kernel::kernel_rbf;
267 kernel_function = &Kernel::kernel_sigmoid;
270 kernel_function = &Kernel::kernel_precomputed;
276 if(kernel_type == RBF)
278 x_square = new double[l];
280 x_square[i] = dot(x[i],x[i]);
292 double Kernel::dot(const svm_node *px, const svm_node *py)
295 while(px->index != -1 && py->index != -1)
297 if(px->index == py->index)
299 sum += px->value * py->value;
305 if(px->index > py->index)
314 double Kernel::k_function(const svm_node *x, const svm_node *y,
315 const svm_parameter& param)
317 switch(param.kernel_type)
322 return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
326 while(x->index != -1 && y->index !=-1)
328 if(x->index == y->index)
330 double d = x->value - y->value;
337 if(x->index > y->index)
339 sum += y->value * y->value;
344 sum += x->value * x->value;
350 while(x->index != -1)
352 sum += x->value * x->value;
356 while(y->index != -1)
358 sum += y->value * y->value;
362 return exp(-param.gamma*sum);
365 return tanh(param.gamma*dot(x,y)+param.coef0);
366 case PRECOMPUTED: //x: test (validation), y: SV
367 return x[(int)(y->value)].value;
369 return 0; // Unreachable
373 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
376 // min 0.5(\alpha^T Q \alpha) + p^T \alpha
378 // y^T \alpha = \delta
380 // 0 <= alpha_i <= Cp for y_i = 1
381 // 0 <= alpha_i <= Cn for y_i = -1
385 // Q, p, y, Cp, Cn, and an initial feasible point \alpha
386 // l is the size of vectors and matrices
387 // eps is the stopping tolerance
389 // solution will be put in \alpha, objective value will be put in obj
394 virtual ~Solver() {};
396 struct SolutionInfo {
399 double upper_bound_p;
400 double upper_bound_n;
401 double r; // for Solver_NU
404 void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
405 double *alpha_, double Cp, double Cn, double eps,
406 SolutionInfo* si, int shrinking);
410 double *G; // gradient of objective function
411 enum { LOWER_BOUND, UPPER_BOUND, FREE };
412 char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
420 double *G_bar; // gradient, if we treat free variables as 0
422 bool unshrink; // XXX
426 return (y[i] > 0)? Cp : Cn;
428 void update_alpha_status(int i)
430 if(alpha[i] >= get_C(i))
431 alpha_status[i] = UPPER_BOUND;
432 else if(alpha[i] <= 0)
433 alpha_status[i] = LOWER_BOUND;
434 else alpha_status[i] = FREE;
436 bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
437 bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
438 bool is_free(int i) { return alpha_status[i] == FREE; }
439 void swap_index(int i, int j);
440 void reconstruct_gradient();
441 virtual int select_working_set(int &i, int &j);
442 virtual double calculate_rho();
443 virtual void do_shrinking();
445 bool be_shrunk(int i, double Gmax1, double Gmax2);
448 void Solver::swap_index(int i, int j)
453 swap(alpha_status[i],alpha_status[j]);
454 swap(alpha[i],alpha[j]);
456 swap(active_set[i],active_set[j]);
457 swap(G_bar[i],G_bar[j]);
460 void Solver::reconstruct_gradient()
462 // reconstruct inactive elements of G from G_bar and free variables
464 if(active_size == l) return;
469 for(j=active_size;j<l;j++)
470 G[j] = G_bar[j] + p[j];
472 for(j=0;j<active_size;j++)
476 if(2*nr_free < active_size)
477 info("\nWarning: using -h 0 may be faster\n");
479 if (nr_free*l > 2*active_size*(l-active_size))
481 for(i=active_size;i<l;i++)
483 const Qfloat *Q_i = Q->get_Q(i,active_size);
484 for(j=0;j<active_size;j++)
486 G[i] += alpha[j] * Q_i[j];
491 for(i=0;i<active_size;i++)
494 const Qfloat *Q_i = Q->get_Q(i,l);
495 double alpha_i = alpha[i];
496 for(j=active_size;j<l;j++)
497 G[j] += alpha_i * Q_i[j];
502 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
503 double *alpha_, double Cp, double Cn, double eps,
504 SolutionInfo* si, int shrinking)
511 clone(alpha,alpha_,l);
517 // initialize alpha_status
519 alpha_status = new char[l];
521 update_alpha_status(i);
524 // initialize active set (for shrinking)
526 active_set = new int[l];
532 // initialize gradient
535 G_bar = new double[l];
543 if(!is_lower_bound(i))
545 const Qfloat *Q_i = Q.get_Q(i,l);
546 double alpha_i = alpha[i];
549 G[j] += alpha_i*Q_i[j];
550 if(is_upper_bound(i))
552 G_bar[j] += get_C(i) * Q_i[j];
559 int counter = min(l,1000)+1;
563 // show progress and do shrinking
567 counter = min(l,1000);
568 if(shrinking) do_shrinking();
573 if(select_working_set(i,j)!=0)
575 // reconstruct the whole gradient
576 reconstruct_gradient();
577 // reset active set size and check
580 if(select_working_set(i,j)!=0)
583 counter = 1; // do shrinking next iteration
588 // update alpha[i] and alpha[j], handle bounds carefully
590 const Qfloat *Q_i = Q.get_Q(i,active_size);
591 const Qfloat *Q_j = Q.get_Q(j,active_size);
593 double C_i = get_C(i);
594 double C_j = get_C(j);
596 double old_alpha_i = alpha[i];
597 double old_alpha_j = alpha[j];
601 double quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
604 double delta = (-G[i]-G[j])/quad_coef;
605 double diff = alpha[i] - alpha[j];
630 alpha[j] = C_i - diff;
638 alpha[i] = C_j + diff;
644 double quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
647 double delta = (G[i]-G[j])/quad_coef;
648 double sum = alpha[i] + alpha[j];
657 alpha[j] = sum - C_i;
673 alpha[i] = sum - C_j;
688 double delta_alpha_i = alpha[i] - old_alpha_i;
689 double delta_alpha_j = alpha[j] - old_alpha_j;
691 for(int k=0;k<active_size;k++)
693 G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
696 // update alpha_status and G_bar
699 bool ui = is_upper_bound(i);
700 bool uj = is_upper_bound(j);
701 update_alpha_status(i);
702 update_alpha_status(j);
704 if(ui != is_upper_bound(i))
709 G_bar[k] -= C_i * Q_i[k];
712 G_bar[k] += C_i * Q_i[k];
715 if(uj != is_upper_bound(j))
720 G_bar[k] -= C_j * Q_j[k];
723 G_bar[k] += C_j * Q_j[k];
730 si->rho = calculate_rho();
732 // calculate objective value
737 v += alpha[i] * (G[i] + p[i]);
742 // put back the solution
745 alpha_[active_set[i]] = alpha[i];
748 // juggle everything back
751 while(active_set[i] != i)
752 swap_index(i,active_set[i]);
753 // or Q.swap_index(i,active_set[i]);
756 si->upper_bound_p = Cp;
757 si->upper_bound_n = Cn;
759 info("\noptimization finished, #iter = %d\n",iter);
764 delete[] alpha_status;
770 // return 1 if already optimal, return 0 otherwise
771 int Solver::select_working_set(int &out_i, int &out_j)
773 // return i,j such that
774 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
775 // j: minimizes the decrease of obj value
776 // (if quadratic coefficeint <= 0, replace it with tau)
777 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
783 double obj_diff_min = INF;
785 for(int t=0;t<active_size;t++)
788 if(!is_upper_bound(t))
797 if(!is_lower_bound(t))
806 const Qfloat *Q_i = NULL;
807 if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
808 Q_i = Q->get_Q(i,active_size);
810 for(int j=0;j<active_size;j++)
814 if (!is_lower_bound(j))
816 double grad_diff=Gmax+G[j];
822 double quad_coef=Q_i[i]+QD[j]-2.0*y[i]*Q_i[j];
824 obj_diff = -(grad_diff*grad_diff)/quad_coef;
826 obj_diff = -(grad_diff*grad_diff)/TAU;
828 if (obj_diff <= obj_diff_min)
831 obj_diff_min = obj_diff;
838 if (!is_upper_bound(j))
840 double grad_diff= Gmax-G[j];
846 double quad_coef=Q_i[i]+QD[j]+2.0*y[i]*Q_i[j];
848 obj_diff = -(grad_diff*grad_diff)/quad_coef;
850 obj_diff = -(grad_diff*grad_diff)/TAU;
852 if (obj_diff <= obj_diff_min)
855 obj_diff_min = obj_diff;
870 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
872 if(is_upper_bound(i))
875 return(-G[i] > Gmax1);
877 return(-G[i] > Gmax2);
879 else if(is_lower_bound(i))
882 return(G[i] > Gmax2);
884 return(G[i] > Gmax1);
890 void Solver::do_shrinking()
893 double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
894 double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
896 // find maximal violating pair first
897 for(i=0;i<active_size;i++)
901 if(!is_upper_bound(i))
906 if(!is_lower_bound(i))
914 if(!is_upper_bound(i))
919 if(!is_lower_bound(i))
927 if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
930 reconstruct_gradient();
935 for(i=0;i<active_size;i++)
936 if (be_shrunk(i, Gmax1, Gmax2))
939 while (active_size > i)
941 if (!be_shrunk(active_size, Gmax1, Gmax2))
943 swap_index(i,active_size);
951 double Solver::calculate_rho()
955 double ub = INF, lb = -INF, sum_free = 0;
956 for(int i=0;i<active_size;i++)
958 double yG = y[i]*G[i];
960 if(is_upper_bound(i))
967 else if(is_lower_bound(i))
982 r = sum_free/nr_free;
990 // Solver for nu-svm classification and regression
992 // additional constraint: e^T \alpha = constant
994 class Solver_NU : public Solver
998 void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
999 double *alpha, double Cp, double Cn, double eps,
1000 SolutionInfo* si, int shrinking)
1003 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
1007 int select_working_set(int &i, int &j);
1008 double calculate_rho();
1009 bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
1010 void do_shrinking();
1013 // return 1 if already optimal, return 0 otherwise
1014 int Solver_NU::select_working_set(int &out_i, int &out_j)
1016 // return i,j such that y_i = y_j and
1017 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1018 // j: minimizes the decrease of obj value
1019 // (if quadratic coefficeint <= 0, replace it with tau)
1020 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1022 double Gmaxp = -INF;
1023 double Gmaxp2 = -INF;
1026 double Gmaxn = -INF;
1027 double Gmaxn2 = -INF;
1031 double obj_diff_min = INF;
1033 for(int t=0;t<active_size;t++)
1036 if(!is_upper_bound(t))
1045 if(!is_lower_bound(t))
1055 const Qfloat *Q_ip = NULL;
1056 const Qfloat *Q_in = NULL;
1057 if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1058 Q_ip = Q->get_Q(ip,active_size);
1060 Q_in = Q->get_Q(in,active_size);
1062 for(int j=0;j<active_size;j++)
1066 if (!is_lower_bound(j))
1068 double grad_diff=Gmaxp+G[j];
1074 double quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
1076 obj_diff = -(grad_diff*grad_diff)/quad_coef;
1078 obj_diff = -(grad_diff*grad_diff)/TAU;
1080 if (obj_diff <= obj_diff_min)
1083 obj_diff_min = obj_diff;
1090 if (!is_upper_bound(j))
1092 double grad_diff=Gmaxn-G[j];
1093 if (-G[j] >= Gmaxn2)
1098 double quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
1100 obj_diff = -(grad_diff*grad_diff)/quad_coef;
1102 obj_diff = -(grad_diff*grad_diff)/TAU;
1104 if (obj_diff <= obj_diff_min)
1107 obj_diff_min = obj_diff;
1114 if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
1117 if (y[Gmin_idx] == +1)
1126 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
1128 if(is_upper_bound(i))
1131 return(-G[i] > Gmax1);
1133 return(-G[i] > Gmax4);
1135 else if(is_lower_bound(i))
1138 return(G[i] > Gmax2);
1140 return(G[i] > Gmax3);
1146 void Solver_NU::do_shrinking()
1148 double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1149 double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1150 double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1151 double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1153 // find maximal violating pair first
1155 for(i=0;i<active_size;i++)
1157 if(!is_upper_bound(i))
1161 if(-G[i] > Gmax1) Gmax1 = -G[i];
1163 else if(-G[i] > Gmax4) Gmax4 = -G[i];
1165 if(!is_lower_bound(i))
1169 if(G[i] > Gmax2) Gmax2 = G[i];
1171 else if(G[i] > Gmax3) Gmax3 = G[i];
1175 if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1178 reconstruct_gradient();
1182 for(i=0;i<active_size;i++)
1183 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1186 while (active_size > i)
1188 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1190 swap_index(i,active_size);
1198 double Solver_NU::calculate_rho()
1200 int nr_free1 = 0,nr_free2 = 0;
1201 double ub1 = INF, ub2 = INF;
1202 double lb1 = -INF, lb2 = -INF;
1203 double sum_free1 = 0, sum_free2 = 0;
1205 for(int i=0;i<active_size;i++)
1209 if(is_upper_bound(i))
1210 lb1 = max(lb1,G[i]);
1211 else if(is_lower_bound(i))
1212 ub1 = min(ub1,G[i]);
1221 if(is_upper_bound(i))
1222 lb2 = max(lb2,G[i]);
1223 else if(is_lower_bound(i))
1224 ub2 = min(ub2,G[i]);
1235 r1 = sum_free1/nr_free1;
1240 r2 = sum_free2/nr_free2;
1249 // Q matrices for various formulations
1251 class SVC_Q: public Kernel
1254 SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1255 :Kernel(prob.l, prob.x, param)
1258 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1259 QD = new Qfloat[prob.l];
1260 for(int i=0;i<prob.l;i++)
1261 QD[i]= (Qfloat)(this->*kernel_function)(i,i);
1264 Qfloat *get_Q(int i, int len) const
1268 if((start = cache->get_data(i,&data,len)) < len)
1270 for(j=start;j<len;j++)
1271 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
1276 Qfloat *get_QD() const
1281 void swap_index(int i, int j) const
1283 cache->swap_index(i,j);
1284 Kernel::swap_index(i,j);
1301 class ONE_CLASS_Q: public Kernel
1304 ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1305 :Kernel(prob.l, prob.x, param)
1307 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1308 QD = new Qfloat[prob.l];
1309 for(int i=0;i<prob.l;i++)
1310 QD[i]= (Qfloat)(this->*kernel_function)(i,i);
1313 Qfloat *get_Q(int i, int len) const
1317 if((start = cache->get_data(i,&data,len)) < len)
1319 for(j=start;j<len;j++)
1320 data[j] = (Qfloat)(this->*kernel_function)(i,j);
1325 Qfloat *get_QD() const
1330 void swap_index(int i, int j) const
1332 cache->swap_index(i,j);
1333 Kernel::swap_index(i,j);
1347 class SVR_Q: public Kernel
1350 SVR_Q(const svm_problem& prob, const svm_parameter& param)
1351 :Kernel(prob.l, prob.x, param)
1354 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
1355 QD = new Qfloat[2*l];
1356 sign = new schar[2*l];
1357 index = new int[2*l];
1358 for(int k=0;k<l;k++)
1364 QD[k]= (Qfloat)(this->*kernel_function)(k,k);
1367 buffer[0] = new Qfloat[2*l];
1368 buffer[1] = new Qfloat[2*l];
1372 void swap_index(int i, int j) const
1374 swap(sign[i],sign[j]);
1375 swap(index[i],index[j]);
1379 Qfloat *get_Q(int i, int len) const
1382 int j, real_i = index[i];
1383 if(cache->get_data(real_i,&data,l) < l)
1386 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
1390 Qfloat *buf = buffer[next_buffer];
1391 next_buffer = 1 - next_buffer;
1394 buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1398 Qfloat *get_QD() const
1417 mutable int next_buffer;
1423 // construct and solve various formulations
1425 static void solve_c_svc(
1426 const svm_problem *prob, const svm_parameter* param,
1427 double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
1430 double *minus_ones = new double[l];
1431 schar *y = new schar[l];
1439 if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
1443 s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1444 alpha, Cp, Cn, param->eps, si, param->shrinking);
1448 sum_alpha += alpha[i];
1451 info("nu = %f\n", sum_alpha/(Cp*prob->l));
1456 delete[] minus_ones;
1460 static void solve_nu_svc(
1461 const svm_problem *prob, const svm_parameter *param,
1462 double *alpha, Solver::SolutionInfo* si)
1466 double nu = param->nu;
1468 schar *y = new schar[l];
1476 double sum_pos = nu*l/2;
1477 double sum_neg = nu*l/2;
1482 alpha[i] = min(1.0,sum_pos);
1483 sum_pos -= alpha[i];
1487 alpha[i] = min(1.0,sum_neg);
1488 sum_neg -= alpha[i];
1491 double *zeros = new double[l];
1497 s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1498 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1501 info("C = %f\n",1/r);
1508 si->upper_bound_p = 1/r;
1509 si->upper_bound_n = 1/r;
1515 static void solve_one_class(
1516 const svm_problem *prob, const svm_parameter *param,
1517 double *alpha, Solver::SolutionInfo* si)
1520 double *zeros = new double[l];
1521 schar *ones = new schar[l];
1524 int n = (int)(param->nu*prob->l); // # of alpha's at upper bound
1529 alpha[n] = param->nu * prob->l - n;
1540 s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
1541 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1547 static void solve_epsilon_svr(
1548 const svm_problem *prob, const svm_parameter *param,
1549 double *alpha, Solver::SolutionInfo* si)
1552 double *alpha2 = new double[2*l];
1553 double *linear_term = new double[2*l];
1554 schar *y = new schar[2*l];
1560 linear_term[i] = param->p - prob->y[i];
1564 linear_term[i+l] = param->p + prob->y[i];
1569 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1570 alpha2, param->C, param->C, param->eps, si, param->shrinking);
1572 double sum_alpha = 0;
1575 alpha[i] = alpha2[i] - alpha2[i+l];
1576 sum_alpha += fabs(alpha[i]);
1578 info("nu = %f\n",sum_alpha/(param->C*l));
1581 delete[] linear_term;
1585 static void solve_nu_svr(
1586 const svm_problem *prob, const svm_parameter *param,
1587 double *alpha, Solver::SolutionInfo* si)
1590 double C = param->C;
1591 double *alpha2 = new double[2*l];
1592 double *linear_term = new double[2*l];
1593 schar *y = new schar[2*l];
1596 double sum = C * param->nu * l / 2;
1599 alpha2[i] = alpha2[i+l] = min(sum,C);
1602 linear_term[i] = - prob->y[i];
1605 linear_term[i+l] = prob->y[i];
1610 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1611 alpha2, C, C, param->eps, si, param->shrinking);
1613 info("epsilon = %f\n",-si->r);
1616 alpha[i] = alpha2[i] - alpha2[i+l];
1619 delete[] linear_term;
1624 // decision_function
1626 struct decision_function
1632 static decision_function svm_train_one(
1633 const svm_problem *prob, const svm_parameter *param,
1634 double Cp, double Cn)
1636 double *alpha = Malloc(double,prob->l);
1637 Solver::SolutionInfo si;
1638 switch(param->svm_type)
1641 solve_c_svc(prob,param,alpha,&si,Cp,Cn);
1644 solve_nu_svc(prob,param,alpha,&si);
1647 solve_one_class(prob,param,alpha,&si);
1650 solve_epsilon_svr(prob,param,alpha,&si);
1653 solve_nu_svr(prob,param,alpha,&si);
1657 info("obj = %f, rho = %f\n",si.obj,si.rho);
1663 for(int i=0;i<prob->l;i++)
1665 if(fabs(alpha[i]) > 0)
1670 if(fabs(alpha[i]) >= si.upper_bound_p)
1675 if(fabs(alpha[i]) >= si.upper_bound_n)
1681 info("nSV = %d, nBSV = %d\n",nSV,nBSV);
1683 decision_function f;
1694 struct svm_parameter param; /* parameter */
1695 int nr_class; /* number of classes, = 2 in regression/one class svm */
1696 int l; /* total #SV */
1697 struct svm_node **SV; /* SVs (SV[l]) */
1698 double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
1699 double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
1700 double *probA; /* pariwise probability information */
1703 /* for classification only */
1705 int *label; /* label of each class (label[k]) */
1706 int *nSV; /* number of SVs for each class (nSV[k]) */
1707 /* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
1709 int free_sv; /* 1 if svm_model is created by svm_load_model*/
1710 /* 0 if svm_model is created by svm_train */
1713 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
1714 static void sigmoid_train(
1715 int l, const double *dec_values, const double *labels,
1716 double& A, double& B)
1718 double prior1=0, prior0 = 0;
1722 if (labels[i] > 0) prior1+=1;
1725 int max_iter=100; // Maximal number of iterations
1726 double min_step=1e-10; // Minimal step taken in line search
1727 double sigma=1e-12; // For numerically strict PD of Hessian
1729 double hiTarget=(prior1+1.0)/(prior1+2.0);
1730 double loTarget=1/(prior0+2.0);
1731 double *t=Malloc(double,l);
1732 double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1733 double newA,newB,newf,d1,d2;
1736 // Initial Point and Initial Fun Value
1737 A=0.0; B=log((prior0+1.0)/(prior1+1.0));
1742 if (labels[i]>0) t[i]=hiTarget;
1744 fApB = dec_values[i]*A+B;
1746 fval += t[i]*fApB + log(1+exp(-fApB));
1748 fval += (t[i] - 1)*fApB +log(1+exp(fApB));
1750 for (iter=0;iter<max_iter;iter++)
1752 // Update Gradient and Hessian (use H' = H + sigma I)
1753 h11=sigma; // numerically ensures strict PD
1755 h21=0.0;g1=0.0;g2=0.0;
1758 fApB = dec_values[i]*A+B;
1761 p=exp(-fApB)/(1.0+exp(-fApB));
1762 q=1.0/(1.0+exp(-fApB));
1766 p=1.0/(1.0+exp(fApB));
1767 q=exp(fApB)/(1.0+exp(fApB));
1770 h11+=dec_values[i]*dec_values[i]*d2;
1772 h21+=dec_values[i]*d2;
1774 g1+=dec_values[i]*d1;
1778 // Stopping Criteria
1779 if (fabs(g1)<eps && fabs(g2)<eps)
1782 // Finding Newton direction: -inv(H') * g
1783 det=h11*h22-h21*h21;
1784 dA=-(h22*g1 - h21 * g2) / det;
1785 dB=-(-h21*g1+ h11 * g2) / det;
1789 stepsize = 1; // Line Search
1790 while (stepsize >= min_step)
1792 newA = A + stepsize * dA;
1793 newB = B + stepsize * dB;
1795 // New function value
1799 fApB = dec_values[i]*newA+newB;
1801 newf += t[i]*fApB + log(1+exp(-fApB));
1803 newf += (t[i] - 1)*fApB +log(1+exp(fApB));
1805 // Check sufficient decrease
1806 if (newf<fval+0.0001*stepsize*gd)
1808 A=newA;B=newB;fval=newf;
1812 stepsize = stepsize / 2.0;
1815 if (stepsize < min_step)
1817 info("Line search fails in two-class probability estimates\n");
1823 info("Reaching maximal iterations in two-class probability estimates\n");
1827 static double sigmoid_predict(double decision_value, double A, double B)
1829 double fApB = decision_value*A+B;
1831 return exp(-fApB)/(1.0+exp(-fApB));
1833 return 1.0/(1+exp(fApB)) ;
1836 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
1837 static void multiclass_probability(int k, double **r, double *p)
1840 int iter = 0, max_iter=max(100,k);
1841 double **Q=Malloc(double *,k);
1842 double *Qp=Malloc(double,k);
1843 double pQp, eps=0.005/k;
1847 p[t]=1.0/k; // Valid if k = 1
1848 Q[t]=Malloc(double,k);
1852 Q[t][t]+=r[j][t]*r[j][t];
1857 Q[t][t]+=r[j][t]*r[j][t];
1858 Q[t][j]=-r[j][t]*r[t][j];
1861 for (iter=0;iter<max_iter;iter++)
1863 // stopping condition, recalculate QP,pQP for numerical accuracy
1869 Qp[t]+=Q[t][j]*p[j];
1875 double error=fabs(Qp[t]-pQp);
1876 if (error>max_error)
1879 if (max_error<eps) break;
1883 double diff=(-Qp[t]+pQp)/Q[t][t];
1885 pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1888 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1894 info("Exceeds max_iter in multiclass_prob\n");
1895 for(t=0;t<k;t++) free(Q[t]);
1900 // Cross-validation decision values for probability estimates
1901 static void svm_binary_svc_probability(
1902 const svm_problem *prob, const svm_parameter *param,
1903 double Cp, double Cn, double& probA, double& probB)
1907 int *perm = Malloc(int,prob->l);
1908 double *dec_values = Malloc(double,prob->l);
1911 for(i=0;i<prob->l;i++) perm[i]=i;
1912 for(i=0;i<prob->l;i++)
1914 int j = i+rand()%(prob->l-i);
1915 swap(perm[i],perm[j]);
1917 for(i=0;i<nr_fold;i++)
1919 int begin = i*prob->l/nr_fold;
1920 int end = (i+1)*prob->l/nr_fold;
1922 struct svm_problem subprob;
1924 subprob.l = prob->l-(end-begin);
1925 subprob.x = Malloc(struct svm_node*,subprob.l);
1926 subprob.y = Malloc(double,subprob.l);
1929 for(j=0;j<begin;j++)
1931 subprob.x[k] = prob->x[perm[j]];
1932 subprob.y[k] = prob->y[perm[j]];
1935 for(j=end;j<prob->l;j++)
1937 subprob.x[k] = prob->x[perm[j]];
1938 subprob.y[k] = prob->y[perm[j]];
1941 int p_count=0,n_count=0;
1948 if(p_count==0 && n_count==0)
1949 for(j=begin;j<end;j++)
1950 dec_values[perm[j]] = 0;
1951 else if(p_count > 0 && n_count == 0)
1952 for(j=begin;j<end;j++)
1953 dec_values[perm[j]] = 1;
1954 else if(p_count == 0 && n_count > 0)
1955 for(j=begin;j<end;j++)
1956 dec_values[perm[j]] = -1;
1959 svm_parameter subparam = *param;
1960 subparam.probability=0;
1962 subparam.nr_weight=2;
1963 subparam.weight_label = Malloc(int,2);
1964 subparam.weight = Malloc(double,2);
1965 subparam.weight_label[0]=+1;
1966 subparam.weight_label[1]=-1;
1967 subparam.weight[0]=Cp;
1968 subparam.weight[1]=Cn;
1969 struct svm_model *submodel = svm_train(&subprob,&subparam);
1970 for(j=begin;j<end;j++)
1972 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
1973 // ensure +1 -1 order; reason not using CV subroutine
1974 dec_values[perm[j]] *= submodel->label[0];
1976 svm_destroy_model(submodel);
1977 svm_destroy_param(&subparam);
1982 sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
1987 // Return parameter of a Laplace distribution
1988 static double svm_svr_probability(
1989 const svm_problem *prob, const svm_parameter *param)
1993 double *ymv = Malloc(double,prob->l);
1996 svm_parameter newparam = *param;
1997 newparam.probability = 0;
1998 svm_cross_validation(prob,&newparam,nr_fold,ymv);
1999 for(i=0;i<prob->l;i++)
2001 ymv[i]=prob->y[i]-ymv[i];
2002 mae += fabs(ymv[i]);
2005 double std=sqrt(2*mae*mae);
2008 for(i=0;i<prob->l;i++)
2009 if (fabs(ymv[i]) > 5*std)
2013 mae /= (prob->l-count);
2014 info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
2020 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2021 // perm, length l, must be allocated before calling this subroutine
2022 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2025 int max_nr_class = 16;
2027 int *label = Malloc(int,max_nr_class);
2028 int *count = Malloc(int,max_nr_class);
2029 int *data_label = Malloc(int,l);
2034 int this_label = (int)prob->y[i];
2036 for(j=0;j<nr_class;j++)
2038 if(this_label == label[j])
2047 if(nr_class == max_nr_class)
2050 label = (int *)realloc(label,max_nr_class*sizeof(int));
2051 count = (int *)realloc(count,max_nr_class*sizeof(int));
2053 label[nr_class] = this_label;
2054 count[nr_class] = 1;
2059 int *start = Malloc(int,nr_class);
2061 for(i=1;i<nr_class;i++)
2062 start[i] = start[i-1]+count[i-1];
2065 perm[start[data_label[i]]] = i;
2066 ++start[data_label[i]];
2069 for(i=1;i<nr_class;i++)
2070 start[i] = start[i-1]+count[i-1];
2072 *nr_class_ret = nr_class;
2080 // Interface functions
2082 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2084 svm_model *model = Malloc(svm_model,1);
2085 model->param = *param;
2086 model->free_sv = 0; // XXX
2088 if(param->svm_type == ONE_CLASS ||
2089 param->svm_type == EPSILON_SVR ||
2090 param->svm_type == NU_SVR)
2092 // regression or one-class-svm
2093 model->nr_class = 2;
2094 model->label = NULL;
2096 model->probA = NULL; model->probB = NULL;
2097 model->sv_coef = Malloc(double *,1);
2099 if(param->probability &&
2100 (param->svm_type == EPSILON_SVR ||
2101 param->svm_type == NU_SVR))
2103 model->probA = Malloc(double,1);
2104 model->probA[0] = svm_svr_probability(prob,param);
2107 decision_function f = svm_train_one(prob,param,0,0);
2108 model->rho = Malloc(double,1);
2109 model->rho[0] = f.rho;
2113 for(i=0;i<prob->l;i++)
2114 if(fabs(f.alpha[i]) > 0) ++nSV;
2116 model->SV = Malloc(svm_node *,nSV);
2117 model->sv_coef[0] = Malloc(double,nSV);
2119 for(i=0;i<prob->l;i++)
2120 if(fabs(f.alpha[i]) > 0)
2122 model->SV[j] = prob->x[i];
2123 model->sv_coef[0][j] = f.alpha[i];
2137 int *perm = Malloc(int,l);
2139 // group training data of the same class
2140 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2141 svm_node **x = Malloc(svm_node *,l);
2144 x[i] = prob->x[perm[i]];
2146 // calculate weighted C
2148 double *weighted_C = Malloc(double, nr_class);
2149 for(i=0;i<nr_class;i++)
2150 weighted_C[i] = param->C;
2151 for(i=0;i<param->nr_weight;i++)
2154 for(j=0;j<nr_class;j++)
2155 if(param->weight_label[i] == label[j])
2158 fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
2160 weighted_C[j] *= param->weight[i];
2163 // train k*(k-1)/2 models
2165 bool *nonzero = Malloc(bool,l);
2168 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
2170 double *probA=NULL,*probB=NULL;
2171 if (param->probability)
2173 probA=Malloc(double,nr_class*(nr_class-1)/2);
2174 probB=Malloc(double,nr_class*(nr_class-1)/2);
2178 for(i=0;i<nr_class;i++)
2179 for(int j=i+1;j<nr_class;j++)
2181 svm_problem sub_prob;
2182 int si = start[i], sj = start[j];
2183 int ci = count[i], cj = count[j];
2185 sub_prob.x = Malloc(svm_node *,sub_prob.l);
2186 sub_prob.y = Malloc(double,sub_prob.l);
2190 sub_prob.x[k] = x[si+k];
2195 sub_prob.x[ci+k] = x[sj+k];
2196 sub_prob.y[ci+k] = -1;
2199 if(param->probability)
2200 svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
2202 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2204 if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2205 nonzero[si+k] = true;
2207 if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2208 nonzero[sj+k] = true;
2216 model->nr_class = nr_class;
2218 model->label = Malloc(int,nr_class);
2219 for(i=0;i<nr_class;i++)
2220 model->label[i] = label[i];
2222 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
2223 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2224 model->rho[i] = f[i].rho;
2226 if(param->probability)
2228 model->probA = Malloc(double,nr_class*(nr_class-1)/2);
2229 model->probB = Malloc(double,nr_class*(nr_class-1)/2);
2230 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2232 model->probA[i] = probA[i];
2233 model->probB[i] = probB[i];
2243 int *nz_count = Malloc(int,nr_class);
2244 model->nSV = Malloc(int,nr_class);
2245 for(i=0;i<nr_class;i++)
2248 for(int j=0;j<count[i];j++)
2249 if(nonzero[start[i]+j])
2254 model->nSV[i] = nSV;
2258 info("Total nSV = %d\n",total_sv);
2260 model->l = total_sv;
2261 model->SV = Malloc(svm_node *,total_sv);
2264 if(nonzero[i]) model->SV[p++] = x[i];
2266 int *nz_start = Malloc(int,nr_class);
2268 for(i=1;i<nr_class;i++)
2269 nz_start[i] = nz_start[i-1]+nz_count[i-1];
2271 model->sv_coef = Malloc(double *,nr_class-1);
2272 for(i=0;i<nr_class-1;i++)
2273 model->sv_coef[i] = Malloc(double,total_sv);
2276 for(i=0;i<nr_class;i++)
2277 for(int j=i+1;j<nr_class;j++)
2279 // classifier (i,j): coefficients with
2280 // i are in sv_coef[j-1][nz_start[i]...],
2281 // j are in sv_coef[i][nz_start[j]...]
2288 int q = nz_start[i];
2292 model->sv_coef[j-1][q++] = f[p].alpha[k];
2296 model->sv_coef[i][q++] = f[p].alpha[ci+k];
2309 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2318 // Stratified cross validation
2319 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
2322 int *fold_start = Malloc(int,nr_fold+1);
2324 int *perm = Malloc(int,l);
2327 // stratified cv may not give leave-one-out rate
2328 // Each class to l folds -> some folds may have zero elements
2329 if((param->svm_type == C_SVC ||
2330 param->svm_type == NU_SVC) && nr_fold < l)
2335 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2337 // random shuffle and then data grouped by fold using the array perm
2338 int *fold_count = Malloc(int,nr_fold);
2340 int *index = Malloc(int,l);
2343 for (c=0; c<nr_class; c++)
2344 for(i=0;i<count[c];i++)
2346 int j = i+rand()%(count[c]-i);
2347 swap(index[start[c]+j],index[start[c]+i]);
2349 for(i=0;i<nr_fold;i++)
2352 for (c=0; c<nr_class;c++)
2353 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2356 for (i=1;i<=nr_fold;i++)
2357 fold_start[i] = fold_start[i-1]+fold_count[i-1];
2358 for (c=0; c<nr_class;c++)
2359 for(i=0;i<nr_fold;i++)
2361 int begin = start[c]+i*count[c]/nr_fold;
2362 int end = start[c]+(i+1)*count[c]/nr_fold;
2363 for(int j=begin;j<end;j++)
2365 perm[fold_start[i]] = index[j];
2370 for (i=1;i<=nr_fold;i++)
2371 fold_start[i] = fold_start[i-1]+fold_count[i-1];
2380 for(i=0;i<l;i++) perm[i]=i;
2383 int j = i+rand()%(l-i);
2384 swap(perm[i],perm[j]);
2386 for(i=0;i<=nr_fold;i++)
2387 fold_start[i]=i*l/nr_fold;
2390 for(i=0;i<nr_fold;i++)
2392 int begin = fold_start[i];
2393 int end = fold_start[i+1];
2395 struct svm_problem subprob;
2397 subprob.l = l-(end-begin);
2398 subprob.x = Malloc(struct svm_node*,subprob.l);
2399 subprob.y = Malloc(double,subprob.l);
2402 for(j=0;j<begin;j++)
2404 subprob.x[k] = prob->x[perm[j]];
2405 subprob.y[k] = prob->y[perm[j]];
2410 subprob.x[k] = prob->x[perm[j]];
2411 subprob.y[k] = prob->y[perm[j]];
2414 struct svm_model *submodel = svm_train(&subprob,param);
2415 if(param->probability &&
2416 (param->svm_type == C_SVC || param->svm_type == NU_SVC))
2418 double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
2419 for(j=begin;j<end;j++)
2420 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
2421 free(prob_estimates);
2424 for(j=begin;j<end;j++)
2425 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
2426 svm_destroy_model(submodel);
2435 int svm_get_svm_type(const svm_model *model)
2437 return model->param.svm_type;
2440 int svm_get_nr_class(const svm_model *model)
2442 return model->nr_class;
2445 void svm_get_labels(const svm_model *model, int* label)
2447 if (model->label != NULL)
2448 for(int i=0;i<model->nr_class;i++)
2449 label[i] = model->label[i];
2452 double svm_get_svr_probability(const svm_model *model)
2454 if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
2456 return model->probA[0];
2459 fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
2464 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
2466 if(model->param.svm_type == ONE_CLASS ||
2467 model->param.svm_type == EPSILON_SVR ||
2468 model->param.svm_type == NU_SVR)
2470 double *sv_coef = model->sv_coef[0];
2472 for(int i=0;i<model->l;i++)
2473 sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
2474 sum -= model->rho[0];
2477 if(model->param.svm_type == ONE_CLASS)
2478 return (sum>0)?1:-1;
2485 int nr_class = model->nr_class;
2488 double *kvalue = Malloc(double,l);
2490 kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2492 int *start = Malloc(int,nr_class);
2494 for(i=1;i<nr_class;i++)
2495 start[i] = start[i-1]+model->nSV[i-1];
2497 int *vote = Malloc(int,nr_class);
2498 for(i=0;i<nr_class;i++)
2502 for(i=0;i<nr_class;i++)
2503 for(int j=i+1;j<nr_class;j++)
2508 int ci = model->nSV[i];
2509 int cj = model->nSV[j];
2512 double *coef1 = model->sv_coef[j-1];
2513 double *coef2 = model->sv_coef[i];
2515 sum += coef1[si+k] * kvalue[si+k];
2517 sum += coef2[sj+k] * kvalue[sj+k];
2518 sum -= model->rho[p];
2519 dec_values[p] = sum;
2521 if(dec_values[p] > 0)
2528 int vote_max_idx = 0;
2529 for(i=1;i<nr_class;i++)
2530 if(vote[i] > vote[vote_max_idx])
2536 return model->label[vote_max_idx];
2540 double svm_predict(const svm_model *model, const svm_node *x)
2542 int nr_class = model->nr_class;
2544 if(model->param.svm_type == ONE_CLASS ||
2545 model->param.svm_type == EPSILON_SVR ||
2546 model->param.svm_type == NU_SVR)
2547 dec_values = Malloc(double, 1);
2549 dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2550 double pred_result = svm_predict_values(model, x, dec_values);
2555 double svm_predict_probability(
2556 const svm_model *model, const svm_node *x, double *prob_estimates)
2558 if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
2559 model->probA!=NULL && model->probB!=NULL)
2562 int nr_class = model->nr_class;
2563 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2564 svm_predict_values(model, x, dec_values);
2566 double min_prob=1e-7;
2567 double **pairwise_prob=Malloc(double *,nr_class);
2568 for(i=0;i<nr_class;i++)
2569 pairwise_prob[i]=Malloc(double,nr_class);
2571 for(i=0;i<nr_class;i++)
2572 for(int j=i+1;j<nr_class;j++)
2574 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
2575 pairwise_prob[j][i]=1-pairwise_prob[i][j];
2578 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2580 int prob_max_idx = 0;
2581 for(i=1;i<nr_class;i++)
2582 if(prob_estimates[i] > prob_estimates[prob_max_idx])
2584 for(i=0;i<nr_class;i++)
2585 free(pairwise_prob[i]);
2587 free(pairwise_prob);
2588 return model->label[prob_max_idx];
2591 return svm_predict(model, x);
2594 static const char *svm_type_table[] =
2596 "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
2599 static const char *kernel_type_table[]=
2601 "linear","polynomial","rbf","sigmoid","precomputed",NULL
2604 int svm_save_model(const char *model_file_name, const svm_model *model)
2606 FILE *fp = fopen(model_file_name,"w");
2607 if(fp==NULL) return -1;
2609 const svm_parameter& param = model->param;
2611 fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
2612 fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
2614 if(param.kernel_type == POLY)
2615 fprintf(fp,"degree %d\n", param.degree);
2617 if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
2618 fprintf(fp,"gamma %g\n", param.gamma);
2620 if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
2621 fprintf(fp,"coef0 %g\n", param.coef0);
2623 int nr_class = model->nr_class;
2625 fprintf(fp, "nr_class %d\n", nr_class);
2626 fprintf(fp, "total_sv %d\n",l);
2630 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2631 fprintf(fp," %g",model->rho[i]);
2637 fprintf(fp, "label");
2638 for(int i=0;i<nr_class;i++)
2639 fprintf(fp," %d",model->label[i]);
2643 if(model->probA) // regression has probA only
2645 fprintf(fp, "probA");
2646 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2647 fprintf(fp," %g",model->probA[i]);
2652 fprintf(fp, "probB");
2653 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2654 fprintf(fp," %g",model->probB[i]);
2660 fprintf(fp, "nr_sv");
2661 for(int i=0;i<nr_class;i++)
2662 fprintf(fp," %d",model->nSV[i]);
2666 fprintf(fp, "SV\n");
2667 const double * const *sv_coef = model->sv_coef;
2668 const svm_node * const *SV = model->SV;
2670 for(int i=0;i<l;i++)
2672 for(int j=0;j<nr_class-1;j++)
2673 fprintf(fp, "%.16g ",sv_coef[j][i]);
2675 const svm_node *p = SV[i];
2677 if(param.kernel_type == PRECOMPUTED)
2678 fprintf(fp,"0:%d ",(int)(p->value));
2680 while(p->index != -1)
2682 fprintf(fp,"%d:%.8g ",p->index,p->value);
2687 if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2691 static char *line = NULL;
2692 static int max_line_len;
2694 static char* readline(FILE *input)
2698 if(fgets(line,max_line_len,input) == NULL)
2701 while(strrchr(line,'\n') == NULL)
2704 line = (char *) realloc(line,max_line_len);
2705 len = (int) strlen(line);
2706 if(fgets(line+len,max_line_len-len,input) == NULL)
2712 svm_model *svm_load_model(const char *model_file_name)
2714 FILE *fp = fopen(model_file_name,"rb");
2715 if(fp==NULL) return NULL;
2719 svm_model *model = Malloc(svm_model,1);
2720 svm_parameter& param = model->param;
2722 model->probA = NULL;
2723 model->probB = NULL;
2724 model->label = NULL;
2730 fscanf(fp,"%80s",cmd);
2732 if(strcmp(cmd,"svm_type")==0)
2734 fscanf(fp,"%80s",cmd);
2736 for(i=0;svm_type_table[i];i++)
2738 if(strcmp(svm_type_table[i],cmd)==0)
2744 if(svm_type_table[i] == NULL)
2746 fprintf(stderr,"unknown svm type.\n");
2754 else if(strcmp(cmd,"kernel_type")==0)
2756 fscanf(fp,"%80s",cmd);
2758 for(i=0;kernel_type_table[i];i++)
2760 if(strcmp(kernel_type_table[i],cmd)==0)
2762 param.kernel_type=i;
2766 if(kernel_type_table[i] == NULL)
2768 fprintf(stderr,"unknown kernel function.\n");
2776 else if(strcmp(cmd,"degree")==0)
2777 fscanf(fp,"%d",¶m.degree);
2778 else if(strcmp(cmd,"gamma")==0)
2779 fscanf(fp,"%lf",¶m.gamma);
2780 else if(strcmp(cmd,"coef0")==0)
2781 fscanf(fp,"%lf",¶m.coef0);
2782 else if(strcmp(cmd,"nr_class")==0)
2783 fscanf(fp,"%d",&model->nr_class);
2784 else if(strcmp(cmd,"total_sv")==0)
2785 fscanf(fp,"%d",&model->l);
2786 else if(strcmp(cmd,"rho")==0)
2788 int n = model->nr_class * (model->nr_class-1)/2;
2789 model->rho = Malloc(double,n);
2790 for(int i=0;i<n;i++)
2791 fscanf(fp,"%lf",&model->rho[i]);
2793 else if(strcmp(cmd,"label")==0)
2795 int n = model->nr_class;
2796 model->label = Malloc(int,n);
2797 for(int i=0;i<n;i++)
2798 fscanf(fp,"%d",&model->label[i]);
2800 else if(strcmp(cmd,"probA")==0)
2802 int n = model->nr_class * (model->nr_class-1)/2;
2803 model->probA = Malloc(double,n);
2804 for(int i=0;i<n;i++)
2805 fscanf(fp,"%lf",&model->probA[i]);
2807 else if(strcmp(cmd,"probB")==0)
2809 int n = model->nr_class * (model->nr_class-1)/2;
2810 model->probB = Malloc(double,n);
2811 for(int i=0;i<n;i++)
2812 fscanf(fp,"%lf",&model->probB[i]);
2814 else if(strcmp(cmd,"nr_sv")==0)
2816 int n = model->nr_class;
2817 model->nSV = Malloc(int,n);
2818 for(int i=0;i<n;i++)
2819 fscanf(fp,"%d",&model->nSV[i]);
2821 else if(strcmp(cmd,"SV")==0)
2826 if(c==EOF || c=='\n') break;
2832 fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2841 // read sv_coef and SV
2844 long pos = ftell(fp);
2846 max_line_len = 1024;
2847 line = Malloc(char,max_line_len);
2848 char *p,*endptr,*idx,*val;
2850 while(readline(fp)!=NULL)
2852 p = strtok(line,":");
2855 p = strtok(NULL,":");
2861 elements += model->l;
2863 fseek(fp,pos,SEEK_SET);
2865 int m = model->nr_class - 1;
2867 model->sv_coef = Malloc(double *,m);
2870 model->sv_coef[i] = Malloc(double,l);
2871 model->SV = Malloc(svm_node*,l);
2872 svm_node *x_space = NULL;
2873 if(l>0) x_space = Malloc(svm_node,elements);
2879 model->SV[i] = &x_space[j];
2881 p = strtok(line, " \t");
2882 model->sv_coef[0][i] = strtod(p,&endptr);
2883 for(int k=1;k<m;k++)
2885 p = strtok(NULL, " \t");
2886 model->sv_coef[k][i] = strtod(p,&endptr);
2891 idx = strtok(NULL, ":");
2892 val = strtok(NULL, " \t");
2896 x_space[j].index = (int) strtol(idx,&endptr,10);
2897 x_space[j].value = strtod(val,&endptr);
2901 x_space[j++].index = -1;
2905 if (ferror(fp) != 0 || fclose(fp) != 0)
2908 model->free_sv = 1; // XXX
2912 void svm_destroy_model(svm_model* model)
2914 if(model->free_sv && model->l > 0)
2915 free((void *)(model->SV[0]));
2916 for(int i=0;i<model->nr_class-1;i++)
2917 free(model->sv_coef[i]);
2919 free(model->sv_coef);
2928 void svm_destroy_param(svm_parameter* param)
2930 free(param->weight_label);
2931 free(param->weight);
2934 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
2938 int svm_type = param->svm_type;
2939 if(svm_type != C_SVC &&
2940 svm_type != NU_SVC &&
2941 svm_type != ONE_CLASS &&
2942 svm_type != EPSILON_SVR &&
2944 return "unknown svm type";
2946 // kernel_type, degree
2948 int kernel_type = param->kernel_type;
2949 if(kernel_type != LINEAR &&
2950 kernel_type != POLY &&
2951 kernel_type != RBF &&
2952 kernel_type != SIGMOID &&
2953 kernel_type != PRECOMPUTED)
2954 return "unknown kernel type";
2956 if(param->gamma < 0)
2959 if(param->degree < 0)
2960 return "degree of polynomial kernel < 0";
2962 // cache_size,eps,C,nu,p,shrinking
2964 if(param->cache_size <= 0)
2965 return "cache_size <= 0";
2970 if(svm_type == C_SVC ||
2971 svm_type == EPSILON_SVR ||
2976 if(svm_type == NU_SVC ||
2977 svm_type == ONE_CLASS ||
2979 if(param->nu <= 0 || param->nu > 1)
2980 return "nu <= 0 or nu > 1";
2982 if(svm_type == EPSILON_SVR)
2986 if(param->shrinking != 0 &&
2987 param->shrinking != 1)
2988 return "shrinking != 0 and shrinking != 1";
2990 if(param->probability != 0 &&
2991 param->probability != 1)
2992 return "probability != 0 and probability != 1";
2994 if(param->probability == 1 &&
2995 svm_type == ONE_CLASS)
2996 return "one-class SVM probability output not supported yet";
2999 // check whether nu-svc is feasible
3001 if(svm_type == NU_SVC)
3004 int max_nr_class = 16;
3006 int *label = Malloc(int,max_nr_class);
3007 int *count = Malloc(int,max_nr_class);
3012 int this_label = (int)prob->y[i];
3014 for(j=0;j<nr_class;j++)
3015 if(this_label == label[j])
3022 if(nr_class == max_nr_class)
3025 label = (int *)realloc(label,max_nr_class*sizeof(int));
3026 count = (int *)realloc(count,max_nr_class*sizeof(int));
3028 label[nr_class] = this_label;
3029 count[nr_class] = 1;
3034 for(i=0;i<nr_class;i++)
3037 for(int j=i+1;j<nr_class;j++)
3040 if(param->nu*(n1+n2)/2 > min(n1,n2))
3044 return "specified nu is infeasible";
3055 int svm_check_probability_model(const svm_model *model)
3057 return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3058 model->probA!=NULL && model->probB!=NULL) ||
3059 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3060 model->probA!=NULL);
3063 void svm_set_print_string_function(void (*print_func)(const char *))
3065 if(print_func == NULL)
3066 svm_print_string = &print_string_stdout;
3068 svm_print_string = print_func;