LibsvmFileLoader.cpp

Go to the documentation of this file.
00001 #include "LibsvmFileLoader.h"
00002 #include "KernelMachines.h"
00003 
00004 namespace damina
00005 {
00006 
00012         LibsvmFileLoader::LibsvmFileLoader(string s) {
00013                 this->filename = s;
00014         }
00015         
00020         LibsvmFileLoader::~LibsvmFileLoader() {
00021         }
00022         
00023         
00029         struct svm_problem *LibsvmFileLoader::load(bool precomputedKernel) {
00030                 int elements, max_index, i, j;
00031                 FILE *fp = fopen(this->filename.c_str(),"r");
00032                 struct svm_problem *prob = (struct svm_problem *) malloc(sizeof(struct svm_problem));
00033                 
00034                 if (fp == NULL) {
00035                         fprintf(stderr, "can't open input file %s\n", this->filename.c_str());
00036                         fprintf(stderr, "reading from standard input...");
00037                         fp = stdin;
00038                         //return NULL;
00039                 }
00040         
00041                 prob->l = 0;
00042                 elements = 0;
00043                 while(1)
00044                 {
00045                         int c = fgetc(fp);
00046                         switch(c)
00047                         {
00048                                 case '\n':
00049                                         ++(prob->l);
00050                                         // fall through,
00051                                         // count the '-1' element
00052                                 case ':':
00053                                         ++elements;
00054                                         break;
00055                                 case EOF:
00056                                         goto out;
00057                                 default:
00058                                         ;
00059                         }
00060                 }
00061         out:
00062                 rewind(fp);
00063         
00064                 prob->y = (double *) malloc(sizeof(double) * prob->l);
00065                 prob->x = (struct svm_node **) malloc(sizeof(struct svm_node*) * prob->l);
00066                 struct svm_node *x_space = (struct svm_node *) malloc(sizeof(struct svm_node) * elements);
00067         
00068                 max_index = 0;
00069                 j=0;
00070                 for(i=0;i<prob->l;i++)
00071                 {
00072                         double label;
00073                         prob->x[i] = &x_space[j];
00074                         fscanf(fp,"%lf",&label);
00075                         prob->y[i] = label;
00076         
00077                         while(1)
00078                         {
00079                                 int c;
00080                                 do {
00081                                         c = getc(fp);
00082                                         if(c=='\n') goto out2;
00083                                 } while(isspace(c));
00084                                 ungetc(c,fp);
00085                                 if (fscanf(fp,"%d:%lf",&(x_space[j].index),&(x_space[j].value)) < 2)
00086                                 {
00087                                         fprintf(stderr,"Wrong input format at line %d\n", i+1);
00088                                         return NULL;
00089                                 }
00090                                 ++j;
00091                         }       
00092         out2:
00093                         if(j>=1 && x_space[j-1].index > max_index)
00094                                 max_index = x_space[j-1].index;
00095                         x_space[j++].index = -1;
00096                 }
00097                 
00098                 
00099         
00100 //              if(param->gamma == 0)
00101 //                      param->gamma = 1.0/max_index; //max_index is the number of attributes in the input data.
00102 //      
00103                 if (precomputedKernel) {
00104                         for (i = 0; i < prob->l; i++)
00105                         {
00106                                 if (prob->x[i][0].index != 0)
00107                                 {
00108                                         fprintf(stderr,"Wrong input format: first column must be 0:sample_serial_number\n");
00109                                         return NULL;
00110                                 }
00111                                 if ((int)prob->x[i][0].value <= 0 || (int)prob->x[i][0].value > max_index)
00112                                 {
00113                                         fprintf(stderr,"Wrong input format: sample_serial_number out of range\n");
00114                                         return NULL;
00115                                 }
00116                         }
00117                 }
00118         
00119                 fclose(fp);
00120                 
00121                 return prob;
00122         }
00123 
00124         struct svm_problem *LibsvmFileLoader::load(bool precomputedKernel, string s) {
00125                 this->filename = s;
00126                 return load(precomputedKernel);
00127         }
00128 }

Generated on Mon Sep 24 22:26:48 2007 for SVClustering by  doxygen 1.5.2