Multilabel LibSVM modifications

Very preliminary site with modification for LibSVM in order to enable efficient support for multilabel data

Still to come

more detailed description
full source code package
interface for Mulan

Instructions

In order to enable LibSVM to handle multilabel data, essentially you have only to do one modification: download the source code of the Java implementation of LibSVM and add the following two methods to svm.java in the package/directory libsvm, that's essentially it:

/**
   * Training like in Eneldo Loza Mencia, Johannes Fürnkranz: Pairwise Learning of Multilabel Classifications with Perceptrons, IJCNN 2008
   * This means that the classifiers are the same as for pairwise multiclass, and that we can use the normal prediction method
   * If the calibration like in Johannes Fürnkranz, Eyke Hüllermeier, Eneldo Loza Mencía and Klaus Brinker, Multilabel Classification via Calibrated Label Ranking, Machine Learning Journal, 2008 is used, then the last label will be the virtual label.
   *
   * @param prob Training instances of the problem
   * @param param Parameter of SVM and training
   * @param yLabels an array[instanceID][numLabelsForInstance] containing the label indices the instance belongs to, same order as in prob.x and the label index begins at 0
   * @param numLabels number of labels, so that we don't need to count. it's always the same amount, even if CLR or BR is applied (don't count the virtual label)
   * @param normal0CLR1BR2 set 0 for normal pairwise learning, 1 for calibrated pairwise learning (CLR) and 2 for binary relevance (BR)
   * @return the ("multiclass") pairwise model
   */
   public static svm_model svm_train_multilabel(svm_problem prob, svm_parameter param, int[][]yLabels, int numLabels, int normal0CLR1BR2)
   {
       svm_model model = new svm_model();
       model.param = param;

           if(normal0CLR1BR2==1 || normal0CLR1BR2==2){
               numLabels++; //include the virtual calibration label. for BR this is also achieved via using the calibrated label
           }

           // classification
           int l = prob.l;

           int nr_class = numLabels;
           int[] label = new int[nr_class];
           for(int i=0;i<nr_class;i++)
               label[i]=i+1; //FIXME: this may not be true
           svm_node[][] x = prob.x;
           int i;

           //TODO: actually, C parameter and others should be computed on each sub-problem individually. So we should include parameter search here
           double[] weighted_C = new double[nr_class];
           for(i=0;i<nr_class;i++)
               weighted_C[i] = param.C;
           for(i=0;i<param.nr_weight;i++)
           {
               int j;
               for(j=0;j<nr_class;j++)
                   if(param.weight_label[i] == label[j])
                       break;
               if(j == nr_class)
                   System.err.print("warning: class label "+param.weight_label[i]+" specified in weight is not found\n");
               else
                   weighted_C[j] *= param.weight[i];
           }

           // train k*(k-1)/2 models

           boolean[][] isSV = new boolean[nr_class][l];
           for(i=0;i<nr_class;i++)
               Arrays.fill(isSV[i], false);
           decision_function[] f = new decision_function[nr_class*(nr_class-1)/2];

           double[] probA=null,probB=null;
           if (param.probability == 1)
           {
               probA=new double[nr_class*(nr_class-1)/2];
               probB=new double[nr_class*(nr_class-1)/2];
           }

           int numSVs[]=new int[nr_class];
           Arrays.fill(numSVs, 0);
           int nnz = 0;
           double [][][] SVcoef=new double[nr_class][l][nr_class-1];

           int p = 0;
           for(i=0;i<nr_class;i++){
               for(int j=i+1;j<nr_class;j++)
               {
                   svm_problem sub_prob = new svm_problem();
                   sub_prob.l=0;
                   byte binaryLabel[]=new byte[l];
                   for(int k=0;k<l;k++){
                       //if not BR, and then if normal mode, or (CLR, but the virtual label is not involved)
                       if(normal0CLR1BR2!=2 && (normal0CLR1BR2==0 || (normal0CLR1BR2==1 && j!=nr_class-1))){
                           if(isLabel(i,yLabels[k]) && !isLabel(j,yLabels[k])){
                               binaryLabel[k]=1;
                               sub_prob.l++;
                           }else if(!isLabel(i,yLabels[k]) && isLabel(j,yLabels[k])){
                               binaryLabel[k]=-1;
                               sub_prob.l++;
                           }else{
                               binaryLabel[k]=0;
                           }
                       //clr modus and virtual label is involved. This corresponds to learning BR classifiers
                       }else if((normal0CLR1BR2==1 || normal0CLR1BR2==2) && j==nr_class-1){
                           if(isLabel(i,yLabels[k])){
                               binaryLabel[k]=1;
                               sub_prob.l++;
                           }else {//if(!isLabel(i,yLabels[k]))
                               binaryLabel[k]=-1;
                               sub_prob.l++;
                           } //no else, all examples are added, either as positive or negative examples
                       }
                   }

                   //build up the binary training set x and y

                   sub_prob.x = new svm_node[sub_prob.l][];
                   sub_prob.y = new double[sub_prob.l];
                   int k2=0;
                   for(int k=0;k<l;k++){
                       if(binaryLabel[k]!=0){
                           //x[k] is the k-th example of the problem, but the k2-th example of the subproblem
                           sub_prob.x[k2] = x[k];
                           sub_prob.y[k2] = binaryLabel[k];
                           k2++;
                       }
                   }

                   if(param.probability == 1)
                   {
                       double[] probAB=new double[2];
                       svm_binary_svc_probability(sub_prob,param,weighted_C[i],weighted_C[j],probAB);
                       probA[p]=probAB[0];
                       probB[p]=probAB[1];
                   }

                   //train the binary svm classifier
                   f[p] = svm_train_one(sub_prob,param,weighted_C[i],weighted_C[j]);

                   //info about the SVs
                   k2=0;
                   for(int k=0;k<l;k++){
                       if(binaryLabel[k]!=0){
                           //x[k] is the k-th example of the problem, but the k2-th example of the subproblem
                           double alpha=f[p].alpha[k2];
                           if(Math.abs(alpha)>0){
                               if(binaryLabel[k]==1){
                                   if(!isSV[i][k]){
                                       isSV[i][k]=true;
                                       numSVs[i]++;
                                       nnz++;
                                   }
                                   SVcoef[i][k][j-1]=alpha;
                               }else{ //if(isLabel(j,prob.y[k]))
                                   if(!isSV[j][k]){
                                       isSV[j][k]=true;
                                       numSVs[j]++;
                                       nnz++;
                                   }
                                   SVcoef[j][k][i]=alpha;
                               }
                           }
                           k2++;
                       }
                   }


                   ++p;
               }
           }


           // build output and model

           model.nr_class = nr_class;

           model.label = new int[nr_class];
           for(i=0;i<nr_class;i++)
               model.label[i] = label[i];

           model.rho = new double[nr_class*(nr_class-1)/2];
           for(i=0;i<nr_class*(nr_class-1)/2;i++)
               model.rho[i] = f[i].rho;

           if(param.probability == 1)
           {
               model.probA = new double[nr_class*(nr_class-1)/2];
               model.probB = new double[nr_class*(nr_class-1)/2];
               for(i=0;i<nr_class*(nr_class-1)/2;i++)
               {
                   model.probA[i] = probA[i];
                   model.probB[i] = probB[i];
               }
           }
           else
           {
               model.probA=null;
               model.probB=null;
           }

//           int[] nz_count = new int[nr_class];
           model.nSV = numSVs;

           svm.info("Total nSV = "+nnz+"\n");

           model.l = nnz;
           model.SV = new svm_node[nnz][];
           model.sv_coef = new double[nr_class-1][nnz];
           p = 0;
           for(i=0;i<nr_class;i++){
               for(int k=0;k<l;k++){
                   //go through the whole SV vector (numClass * numInstances) and add the instances as SV if they were SV
                   if(isSV[i][k]){
//                       model.SV[p] = SVs[i][k];
                       model.SV[p] = x[k];
                       //copy the coefficients too
                       for(int j=0;j<nr_class-1;j++){
                           model.sv_coef[j][p]= SVcoef[i][k][j];
                       }
                       p++;
                   }
               }
           }

       return model;
   }

   public static boolean isLabel(int labelIndex, int[] labelArrayWithIndicesBeginningWithZero) {
       for(int i=0;i<labelArrayWithIndicesBeginningWithZero.length;i++){
           if(labelArrayWithIndicesBeginningWithZero[i]==labelIndex)
               return true;
       }
       return false;
   }

During prediction, just query svm_predict_values(svm_model model, svm_node[] x, double[] dec_values) and you will find the pairwise predictions in dec_values. Here is a little bit of code if you want to just use the robust full voting, but note that you may e.g. also use the pairwise coupling approach build in LibSVM.

    public static double[] svm_predict_votevector(svm_model model, svm_node[] x)
    {
        if(model.param.svm_type == svm_parameter.ONE_CLASS ||
           model.param.svm_type == svm_parameter.EPSILON_SVR ||
           model.param.svm_type == svm_parameter.NU_SVR)
        {
            System.err.println("Wrong modus for multilabel");
            return null;
        }
        else
        {
            int i;
            int nr_class = model.nr_class;
            int l = model.l;

            double[] kvalue = new double[l];
            for(i=0;i<l;i++)
                kvalue[i] = Kernel.k_function(x,model.SV[i],model.param);

            int[] start = new int[nr_class];
            start[0] = 0;
            for(i=1;i<nr_class;i++)
                start[i] = start[i-1]+model.nSV[i-1];

            double[] vote = new double[nr_class];

            int p=0;
            for(i=0;i<nr_class;i++)
                for(int j=i+1;j<nr_class;j++)
                {
                    double sum = 0;
                    int si = start[i];
                    int sj = start[j];
                    int ci = model.nSV[i];
                    int cj = model.nSV[j];

                    int k;
                    double[] coef1 = model.sv_coef[j-1];
                    double[] coef2 = model.sv_coef[i];
                    for(k=0;k<ci;k++)
                        sum += coef1[si+k] * kvalue[si+k];
                    for(k=0;k<cj;k++)
                        sum += coef2[sj+k] * kvalue[sj+k];
                    sum -= model.rho[p];

                    if(sum > 0)
                        ++vote[i];
                    else if (sum < 0)
                        ++vote[j];
                    else{
                        vote[i]+=0.5
                        vote[i]+=0.5
                    }
                    p++;
                }
            return vote;
        }
    }