Multilabel LibSVM modifications
Still to come
- more detailed description
- full source code package
- interface for Mulan
Instructions
In order to enable LibSVM to handle multilabel data, essentially you have only to do one modification: download the source code of the Java implementation of LibSVM and add the following two methods to svm.java in the package/directory libsvm, that's essentially it:
/**
* Training like in Eneldo Loza Mencia, Johannes Fürnkranz: Pairwise Learning of Multilabel Classifications with Perceptrons, IJCNN 2008
* This means that the classifiers are the same as for pairwise multiclass, and that we can use the normal prediction method
* If the calibration like in Johannes Fürnkranz, Eyke Hüllermeier, Eneldo Loza Mencía and Klaus Brinker, Multilabel Classification via Calibrated Label Ranking, Machine Learning Journal, 2008 is used, then the last label will be the virtual label.
*
* @param prob Training instances of the problem
* @param param Parameter of SVM and training
* @param yLabels an array[instanceID][numLabelsForInstance] containing the label indices the instance belongs to, same order as in prob.x and the label index begins at 0
* @param numLabels number of labels, so that we don't need to count. it's always the same amount, even if CLR or BR is applied (don't count the virtual label)
* @param normal0CLR1BR2 set 0 for normal pairwise learning, 1 for calibrated pairwise learning (CLR) and 2 for binary relevance (BR)
* @return the ("multiclass") pairwise model
*/
public static svm_model svm_train_multilabel(svm_problem prob, svm_parameter param, int[][]yLabels, int numLabels, int normal0CLR1BR2)
{
svm_model model = new svm_model();
model.param = param;
if(normal0CLR1BR2==1 || normal0CLR1BR2==2){
numLabels++; //include the virtual calibration label. for BR this is also achieved via using the calibrated label
}
// classification
int l = prob.l;
int nr_class = numLabels;
int[] label = new int[nr_class];
for(int i=0;i<nr_class;i++)
label[i]=i+1; //FIXME: this may not be true
svm_node[][] x = prob.x;
int i;
//TODO: actually, C parameter and others should be computed on each sub-problem individually. So we should include parameter search here
double[] weighted_C = new double[nr_class];
for(i=0;i<nr_class;i++)
weighted_C[i] = param.C;
for(i=0;i<param.nr_weight;i++)
{
int j;
for(j=0;j<nr_class;j++)
if(param.weight_label[i] == label[j])
break;
if(j == nr_class)
System.err.print("warning: class label "+param.weight_label[i]+" specified in weight is not found\n");
else
weighted_C[j] *= param.weight[i];
}
// train k*(k-1)/2 models
boolean[][] isSV = new boolean[nr_class][l];
for(i=0;i<nr_class;i++)
Arrays.fill(isSV[i], false);
decision_function[] f = new decision_function[nr_class*(nr_class-1)/2];
double[] probA=null,probB=null;
if (param.probability == 1)
{
probA=new double[nr_class*(nr_class-1)/2];
probB=new double[nr_class*(nr_class-1)/2];
}
int numSVs[]=new int[nr_class];
Arrays.fill(numSVs, 0);
int nnz = 0;
double [][][] SVcoef=new double[nr_class][l][nr_class-1];
int p = 0;
for(i=0;i<nr_class;i++){
for(int j=i+1;j<nr_class;j++)
{
svm_problem sub_prob = new svm_problem();
sub_prob.l=0;
byte binaryLabel[]=new byte[l];
for(int k=0;k<l;k++){
//if not BR, and then if normal mode, or (CLR, but the virtual label is not involved)
if(normal0CLR1BR2!=2 && (normal0CLR1BR2==0 || (normal0CLR1BR2==1 && j!=nr_class-1))){
if(isLabel(i,yLabels[k]) && !isLabel(j,yLabels[k])){
binaryLabel[k]=1;
sub_prob.l++;
}else if(!isLabel(i,yLabels[k]) && isLabel(j,yLabels[k])){
binaryLabel[k]=-1;
sub_prob.l++;
}else{
binaryLabel[k]=0;
}
//clr modus and virtual label is involved. This corresponds to learning BR classifiers
}else if((normal0CLR1BR2==1 || normal0CLR1BR2==2) && j==nr_class-1){
if(isLabel(i,yLabels[k])){
binaryLabel[k]=1;
sub_prob.l++;
}else {//if(!isLabel(i,yLabels[k]))
binaryLabel[k]=-1;
sub_prob.l++;
} //no else, all examples are added, either as positive or negative examples
}
}
//build up the binary training set x and y
sub_prob.x = new svm_node[sub_prob.l][];
sub_prob.y = new double[sub_prob.l];
int k2=0;
for(int k=0;k<l;k++){
if(binaryLabel[k]!=0){
//x[k] is the k-th example of the problem, but the k2-th example of the subproblem
sub_prob.x[k2] = x[k];
sub_prob.y[k2] = binaryLabel[k];
k2++;
}
}
if(param.probability == 1)
{
double[] probAB=new double[2];
svm_binary_svc_probability(sub_prob,param,weighted_C[i],weighted_C[j],probAB);
probA[p]=probAB[0];
probB[p]=probAB[1];
}
//train the binary svm classifier
f[p] = svm_train_one(sub_prob,param,weighted_C[i],weighted_C[j]);
//info about the SVs
k2=0;
for(int k=0;k<l;k++){
if(binaryLabel[k]!=0){
//x[k] is the k-th example of the problem, but the k2-th example of the subproblem
double alpha=f[p].alpha[k2];
if(Math.abs(alpha)>0){
if(binaryLabel[k]==1){
if(!isSV[i][k]){
isSV[i][k]=true;
numSVs[i]++;
nnz++;
}
SVcoef[i][k][j-1]=alpha;
}else{ //if(isLabel(j,prob.y[k]))
if(!isSV[j][k]){
isSV[j][k]=true;
numSVs[j]++;
nnz++;
}
SVcoef[j][k][i]=alpha;
}
}
k2++;
}
}
++p;
}
}
// build output and model
model.nr_class = nr_class;
model.label = new int[nr_class];
for(i=0;i<nr_class;i++)
model.label[i] = label[i];
model.rho = new double[nr_class*(nr_class-1)/2];
for(i=0;i<nr_class*(nr_class-1)/2;i++)
model.rho[i] = f[i].rho;
if(param.probability == 1)
{
model.probA = new double[nr_class*(nr_class-1)/2];
model.probB = new double[nr_class*(nr_class-1)/2];
for(i=0;i<nr_class*(nr_class-1)/2;i++)
{
model.probA[i] = probA[i];
model.probB[i] = probB[i];
}
}
else
{
model.probA=null;
model.probB=null;
}
// int[] nz_count = new int[nr_class];
model.nSV = numSVs;
svm.info("Total nSV = "+nnz+"\n");
model.l = nnz;
model.SV = new svm_node[nnz][];
model.sv_coef = new double[nr_class-1][nnz];
p = 0;
for(i=0;i<nr_class;i++){
for(int k=0;k<l;k++){
//go through the whole SV vector (numClass * numInstances) and add the instances as SV if they were SV
if(isSV[i][k]){
// model.SV[p] = SVs[i][k];
model.SV[p] = x[k];
//copy the coefficients too
for(int j=0;j<nr_class-1;j++){
model.sv_coef[j][p]= SVcoef[i][k][j];
}
p++;
}
}
}
return model;
}
public static boolean isLabel(int labelIndex, int[] labelArrayWithIndicesBeginningWithZero) {
for(int i=0;i<labelArrayWithIndicesBeginningWithZero.length;i++){
if(labelArrayWithIndicesBeginningWithZero[i]==labelIndex)
return true;
}
return false;
}
During prediction, just query svm_predict_values(svm_model model, svm_node[] x, double[] dec_values) and you will find the pairwise predictions in dec_values. Here is a little bit of code if you want to just use the robust full voting, but note that you may e.g. also use the pairwise coupling approach build in LibSVM.
public static double[] svm_predict_votevector(svm_model model, svm_node[] x)
{
if(model.param.svm_type == svm_parameter.ONE_CLASS ||
model.param.svm_type == svm_parameter.EPSILON_SVR ||
model.param.svm_type == svm_parameter.NU_SVR)
{
System.err.println("Wrong modus for multilabel");
return null;
}
else
{
int i;
int nr_class = model.nr_class;
int l = model.l;
double[] kvalue = new double[l];
for(i=0;i<l;i++)
kvalue[i] = Kernel.k_function(x,model.SV[i],model.param);
int[] start = new int[nr_class];
start[0] = 0;
for(i=1;i<nr_class;i++)
start[i] = start[i-1]+model.nSV[i-1];
double[] vote = new double[nr_class];
int p=0;
for(i=0;i<nr_class;i++)
for(int j=i+1;j<nr_class;j++)
{
double sum = 0;
int si = start[i];
int sj = start[j];
int ci = model.nSV[i];
int cj = model.nSV[j];
int k;
double[] coef1 = model.sv_coef[j-1];
double[] coef2 = model.sv_coef[i];
for(k=0;k<ci;k++)
sum += coef1[si+k] * kvalue[si+k];
for(k=0;k<cj;k++)
sum += coef2[sj+k] * kvalue[sj+k];
sum -= model.rho[p];
if(sum > 0)
++vote[i];
else if (sum < 0)
++vote[j];
else{
vote[i]+=0.5
vote[i]+=0.5
}
p++;
}
return vote;
}
}