svc.c File Reference

#include <getopt.h>
#include <fcntl.h>
#include <errno.h>
#include <err.h>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cmath>
#include <iostream>
#include <fstream>
#include <string>
#include <boost/timer.hpp>
#include "svm.h"
#include "LibsvmFileLoader.h"
#include "SVClustering.h"
#include "CCLSVClustering.h"
#include "GKWGenerator.h"
#include "KernelMachines.h"
#include "L1Distance.h"
#include "L2Distance.h"

Include dependency graph for svc.c:

Go to the source code of this file.

Classes

struct  svc_options
 The structure encapsulating the SVC runing options. More...

Defines

#define DEFAULT_ALG   0
#define DEFAULT_C   1
#define DEFAULT_EDIST   2
#define DEFAULT_KERNEL   -1
#define DEFAULT_KW   -1
 Support Vector Clustering Test and Benchmark.
#define DEFAULT_L_OFFSET   1
#define DEFAULT_RUNS   100000
#define DEFAULT_SOFTENING   1
#define DEFAULT_VALID_THRESHOLD   2

Enumerations

enum  { CCL = 0, CG = 1 }
 The enumeration for Cluster Labeling algorithm types. More...
enum  { L1 = 1, L2 = 2 }
 The enumeration for Euclidean Distance types. More...
enum  { BSV_CLASSIC = 0, BSV_ENHANCED = 1 }
 The enumeration for the BSV clustering policy. More...

Functions

void computeEvaluatingMeasreusPerClass (vector< int > tp, vector< int > fp, vector< int > tn, vector< int > fn, vector< double > &contamination, vector< double > &specificity, vector< double > &precision, vector< double > &recall, vector< double > &f1, double &ma, double &accuracy)
 For all classes computes the precision/recall/f1 values.
void countRightAndWrongClassificationsPerClass (damina::SVClustering *clust, vector< int > &tp, vector< int > &fp, vector< int > &tn, vector< int > &fn, int nrClasses, int classOffset, vector< unsigned long int > &ppc, unsigned long int valid_t)
 For all classes computes the number of true/false positives and true/false negatives.
int main (int argc, char *argv[])
 The main flow for SVC benchmark.
svc_optionsparseCommandLineArgs (int argc, char **argv)
 Parse the command line arguments and return the options for the SVC running.
void printAllPoints (damina::SVClustering *clust, unsigned long int classOffset, char *filename)
 Print all points with their original classes and cluster assigned.
void printUsedCommandLine (int argc, char *argv[])
 Print the whole command line used to invoke the program.
void usage (char **argv)
 Print the usage on the standard error.


Define Documentation

#define DEFAULT_ALG   0

Definition at line 38 of file svc.c.

Referenced by parseCommandLineArgs(), and usage().

#define DEFAULT_C   1

Definition at line 37 of file svc.c.

Referenced by parseCommandLineArgs(), and usage().

#define DEFAULT_EDIST   2

Definition at line 43 of file svc.c.

Referenced by parseCommandLineArgs().

#define DEFAULT_KERNEL   -1

Definition at line 44 of file svc.c.

Referenced by main(), and parseCommandLineArgs().

#define DEFAULT_KW   -1

Support Vector Clustering Test and Benchmark.

Author:
Vincenzo Russo - vincenzo.russo@neminis.org

Definition at line 36 of file svc.c.

Referenced by parseCommandLineArgs().

#define DEFAULT_L_OFFSET   1

Definition at line 40 of file svc.c.

Referenced by parseCommandLineArgs(), and usage().

#define DEFAULT_RUNS   100000

Definition at line 39 of file svc.c.

Referenced by parseCommandLineArgs(), and usage().

#define DEFAULT_SOFTENING   1

Definition at line 42 of file svc.c.

Referenced by parseCommandLineArgs(), and usage().

#define DEFAULT_VALID_THRESHOLD   2

Definition at line 41 of file svc.c.

Referenced by parseCommandLineArgs(), and usage().


Enumeration Type Documentation

anonymous enum

The enumeration for Cluster Labeling algorithm types.

Enumerator:
CCL 
CG 

Definition at line 61 of file svc.c.

00061 {CCL = 0, CG = 1}; /* alg_type: cluster labeling algorithms types */

anonymous enum

The enumeration for Euclidean Distance types.

Enumerator:
L1 
L2 

Definition at line 67 of file svc.c.

00067 {L1 = 1, L2 = 2};       

anonymous enum

The enumeration for the BSV clustering policy.

Enumerator:
BSV_CLASSIC 
BSV_ENHANCED 

Definition at line 72 of file svc.c.

00072 {BSV_CLASSIC = 0, BSV_ENHANCED = 1};


Function Documentation

void computeEvaluatingMeasreusPerClass ( vector< int >  tp,
vector< int >  fp,
vector< int >  tn,
vector< int >  fn,
vector< double > &  contamination,
vector< double > &  specificity,
vector< double > &  precision,
vector< double > &  recall,
vector< double > &  f1,
double &  ma,
double &  accuracy 
)

For all classes computes the precision/recall/f1 values.

Parameters:
tp The true positives vector for all classes
fp The flase positives vector for all classes
tn The true negatives vector for all classes
fp The flase negatives vector for all classes
contamination The contamination value for all classes
precision The precision value for all classes
recall The recall value for all classes
f1 The f1 value for all classes

Definition at line 396 of file svc.c.

Referenced by main().

00398                                                                                                                              {
00399 
00400         unsigned long int nrClasses = tp.size();
00401         int datasedDim = tp[0] + fp[0] + tn[0] + fn[0];
00402         
00403         precision.clear();
00404         recall.clear();
00405         f1.clear();
00406         
00407         contamination.resize(nrClasses);
00408         precision.resize(nrClasses);
00409         recall.resize(nrClasses);
00410         f1.resize(nrClasses);
00411         specificity.resize(nrClasses);
00412         
00413         ma = 0;
00414         accuracy = 0;
00415         
00416         for (unsigned long int i = 0; i < nrClasses ; i++) {
00417                 accuracy += tp[i];
00418                 precision[i] = ((double)tp[i] / (double)(tp[i] + fp[i])) * 100;
00419                 recall[i] = ((double)tp[i] / (double)(tp[i] + fn[i])) * 100;
00420                 specificity[i] = ((double)tn[i] / (double)(tn[i] + fp[i])) * 100;
00421                 contamination[i] = (double)fp[i] / (double)tp[i];
00422                 if (precision[i] + recall[i] == 0) {
00423                         f1[i] = -1; 
00424                 }
00425                 else {
00426                         f1[i] = (2 * (precision[i] * recall[i])) / (precision[i] + recall[i]);
00427                         ma += f1[i];
00428                 }
00429 
00430         }
00431                 
00432         ma = ma / (double) nrClasses;
00433         accuracy = accuracy / (double)datasedDim * 100;
00434 }

Here is the caller graph for this function:

void countRightAndWrongClassificationsPerClass ( damina::SVClustering clust,
vector< int > &  tp,
vector< int > &  fp,
vector< int > &  tn,
vector< int > &  fn,
int  nrClasses,
int  classOffset,
vector< unsigned long int > &  ppc,
unsigned long int  valid_t 
)

For all classes computes the number of true/false positives and true/false negatives.

Parameters:
clust The involved Support Vector Clustering engine
tp The true positives vector for all classes
fp The flase positives vector for all classes
tn The true negatives vector for all classes
fp The flase negatives vector for all classes
nrClasses The number of the original classes
classOffset The label of the first class in the original classification

Definition at line 349 of file svc.c.

References damina::SVClustering::getClustersAssignment(), damina::SVClustering::getOriginalClasses(), and damina::SVClustering::getProblem().

Referenced by main().

00349                                                                                                                                                                                                                                          {
00350         
00351         std::vector<int> labels = clust->getClustersAssignment();               //      clustering result
00352         std::vector<int> orig_c = clust->getOriginalClasses(classOffset);               //      original classification
00353         
00354         int datasetDimension = clust->getProblem()->l;
00355 
00356         tp.clear();     
00357         fp.clear();     
00358         tn.clear();     
00359         fn.clear();     
00360         
00361         tp.resize(nrClasses);
00362         fp.resize(nrClasses);
00363         tn.resize(nrClasses);
00364         fn.resize(nrClasses);   
00365         
00366         for (unsigned long int i = 0; i < labels.size(); i++) {
00367                 if (orig_c[i] >= nrClasses) continue;
00368                 
00369                 if ((ppc[labels[i]] >= valid_t) && (labels[i] == orig_c[i])) {
00370                         tp[orig_c[i]]++;
00371                 }
00372                 else {
00373                         fp[labels[i]]++;
00374                         fn[orig_c[i]]++;
00375                 }
00376         }
00377         
00378         for (int i = 0; i < nrClasses; i++) {
00379                 tn[i] = datasetDimension - tp[i] - fp[i] - fn[i];
00380         }
00381 }

Here is the call graph for this function:

Here is the caller graph for this function:

int main ( int  argc,
char *  argv[] 
)

The main flow for SVC benchmark.

Load data file in LibSVM format

New SVC instance

Change kernel type, in requested

Kernel generator instance

Change Euclidean Distance, if requested

Perform clustering

Definition at line 97 of file svc.c.

References svc_options::alg_type, svc_options::bsv, svc_options::C, CCL, CG, svc_options::clusters, computeEvaluatingMeasreusPerClass(), countRightAndWrongClassificationsPerClass(), DEFAULT_KERNEL, svc_options::detfile, svc_options::e_dist, svc_options::filename, svc_options::kernel, L1, L2, svc_options::l_offset, parseCommandLineArgs(), printAllPoints(), printUsedCommandLine(), svc_options::q, svc_options::runs, svc_options::softening, usage(), and svc_options::valid_threshold.

00097                                  {
00098 
00099         using namespace std;
00100         using namespace damina;
00101         using namespace boost;
00102         
00103         struct svc_options *opt;
00104         
00105         opt = parseCommandLineArgs(argc, argv); //      parse command line to get parameters
00106         if (opt == NULL) {                                              //      parsing fails
00107                 usage(argv);                                            //      show usage
00108                 exit(1);
00109         }
00110 
00114         LibsvmFileLoader *fl;
00115         string datafile(opt->filename);
00116         struct svm_problem *p = NULL;
00117         cout << "==================================" << endl;
00118         cout << "Loading file" << endl;
00119         cout << "==================================" << endl;
00120         fl = new LibsvmFileLoader(datafile);
00121         timer tfl;
00122         p = fl->load(false);
00123         cout << "File loaded in " << tfl.elapsed() << " secs" << endl;
00124         cout << "Number of objects: " << p->l << endl << endl;
00125         delete fl;
00126 
00127         
00131         SVClustering *svc;                      //      Support Vector Clustering Engine
00132         switch (opt->alg_type) {
00133                 case CG: svc = new SVClustering(opt->C, p); break;              //      classic one
00134                 case CCL: svc = new CCLSVClustering(opt->C, p); break;  //      with CCL
00135                 default:
00136                         svc = new SVClustering(opt->C, p); break;               //      default is classic
00137         }
00138 
00139         if (opt->bsv) {
00140                 svc->useSpecialClusteringPolicyForBSV();
00141         }
00142         else {
00143                 svc->useClassicClusteringPolicyForBSV();
00144         }
00145                 
00149         if (opt->kernel != DEFAULT_KERNEL) {
00150                 svc->setKernelType(opt->kernel);
00151         }
00152 
00156         GKWGenerator *kwg;
00157         if (opt->q == -1) {     //      no kernel width value provided on command line  
00158                 kwg = new GKWGenerator(svc, 0.000000001);
00159         }
00160         else {  //      kernel width value provided on command line
00161                 kwg = new GKWGenerator(svc, 0.000000001, opt->q);
00162         }
00163 
00164         kwg->setGrowthSoftening(opt->softening);
00165 
00169         switch (opt->e_dist) {
00170                 case L1: { svc->setEuclideanDistance(new L1Distance()); kwg->setEuclideanDistance(new L1Distance()); } break;
00171                 case L2: { svc->setEuclideanDistance(new L2Distance()); kwg->setEuclideanDistance(new L2Distance()); } break;
00172                 default: ;
00173         }
00174 
00175         bool target = false;
00176         unsigned long int invalid = 0, runs = 0;
00177         double q, prev_q = -1, prev_C = -1;
00178         
00179         unsigned long int nrClusters, 
00180                                   // nrNonSingletonClusters;
00181                                   nrValidClusters;
00182         
00183         timer partial,  //      timer for intermediate steps
00184                         total;  //      timer for overall time measurement 
00185         
00186         vector<unsigned long int> ppc;
00187         vector<int> tp, fp, tn, fn;
00188         vector<double> precision, recall, f1, contamination, specificity;
00189         double ma, accuracy, overall_time = 0;
00190         
00191         while (true) {
00192                 prev_q = q;
00193                 prev_C = svc->getSoftConstraintEstimate();
00194                 
00195                 runs++;
00196                 if (runs > opt->runs) break;
00197 
00198                 q = kwg->getNextKernelWidthValue();
00199                 if (q == -1) {
00200                         cout << "No more valid kernel width values" << endl << endl;
00201                         break;
00202                 }
00203                 
00204                 if (opt->runs > 1) {
00205                         cout << "==================================" << endl;
00206                         cout << " Finding Kernel Width # " << runs << endl;
00207                         cout << "==================================" << endl;
00208                 }
00209 
00210                 svc->setKernelWidth(q);
00211                 
00215                 total.restart();                                //      start total time counter
00216                 partial.restart();                              //      start domain description process time counter
00217 
00218                 svc->learn();                                   //      domain description process
00219                 cout << "radius = " << svc->getSphereRadius() << endl;
00220                 cout << endl << "Domain description time taken: ";
00221                 cout << partial.elapsed() << " seconds " << endl;               //      stop domain description process time counter
00222 
00223                 partial.restart();                              //      start clustering process time counter
00224 
00225                 svc->clusterize();                              //      cluster labeling process
00226                 cout << "Clustering time taken: ";
00227                 cout << partial.elapsed() << " seconds " << endl;       //      stop clustering process time counter
00228 
00229                 cout << "Total time taken: ";
00230                 overall_time += total.elapsed();
00231                 cout << total.elapsed() << " seconds " << endl << endl;         //      stop total time counter
00232                 
00233                 
00234                 nrClusters = svc->getNumberOfClusters();
00235                 // nrNonSingletonClusters = svc->getNumberOfNonSingletonClusters();
00236                 nrValidClusters = svc->getNumberOfValidClusters(opt->valid_threshold);
00237 
00238                 cout << "Soft Constraint Estimate: " << svc->getSoftConstraintEstimate() << endl;       
00239                 cout << "Current Soft Constraint: " << svc->getSoftConstraint() << endl;
00240                 cout << "Kernel Width: " << q << endl;
00241                 cout << "Number of clusters: " << nrClusters << endl;
00242                 cout << "Number of valid clusters: " << nrValidClusters << endl;
00243                 cout << "Number of non valid clusters (cardinality < " << opt->valid_threshold << "): " << nrClusters - nrValidClusters << endl;
00244                 
00245                 cout << "Points per cluster: " << endl;
00246         
00247                 ppc.clear();    
00248                 ppc = svc->getPointPerCluster();
00249         
00250                 for (unsigned long int i = 0; i < ppc.size(); i++) {
00251                         if (ppc[i] >= opt->valid_threshold) {
00252                                 cout << "\tCluster " << i << ": " << ppc[i] << endl;
00253                         }
00254                 }
00255                 cout << endl << endl;
00256 
00257                 if (opt->detfile != NULL) {
00258                         printAllPoints(svc, opt->l_offset, opt->detfile);
00259                 }
00260 
00261                 countRightAndWrongClassificationsPerClass(svc, tp, fp, tn, fn, opt->clusters, opt->l_offset, ppc, opt->valid_threshold);
00262                 computeEvaluatingMeasreusPerClass(tp, fp, tn, fn, contamination, specificity, precision, recall, f1, ma, accuracy);
00263                 
00264                 for (unsigned long int i = 0; i < tp.size(); i++) {
00265                         cout << "Class " << i << endl << "\tTP: " << tp[i] << "\tFP: " << fp[i] << endl << "\tFN: " << fn[i] << "\tTN: " << tn[i] << endl << endl; 
00266                         cout << "Precision: " << precision[i] << " - Recall: " << recall[i] << " - F1: " << f1[i] << endl;
00267                         cout << "Completeness: " << recall[i] << " - Contamination: " << contamination[i] << endl;
00268                         cout << "Sensitivity: " << recall[i]  << " - Specificity: " << specificity[i] << endl << endl;
00269                 }
00270                 
00271                 if (opt->clusters > 2) {
00272                         cout << "Macroaveraging: " << ma << endl; 
00273                 }
00274                 
00275                 cout << "Accuracy: " << accuracy << endl << endl;
00276 
00277                 // check stop criteria
00278                 if (target) {
00279                         if (nrValidClusters != opt->clusters) {
00280                                 break;
00281                         }
00282                         else {
00283                                 if (nrClusters - nrValidClusters > invalid) {
00284                                         break;
00285                                 }
00286                         }
00287                 }
00288                 else {
00289                         if (nrValidClusters == opt->clusters) {
00290                                 target = true;
00291                                 invalid = nrClusters - nrValidClusters;
00292                         }
00293                 }
00294                         
00295         }
00296 
00297         cout << "==================================" << endl;
00298         cout << "Clustering process finished" << endl;
00299         cout << "==================================" << endl;
00300         if (opt->runs > 1) {
00301                 cout << "Check out the last run and second last run." << endl;
00302                 cout << "Kernel Width Found: " << prev_q << endl;
00303         }   
00304         cout << "Soft Constraint Estimate: " << prev_C << endl;
00305     cout << "Overall time elapsed: " << overall_time << " secs" << endl;
00306 //      cout << "Memory used: " << sizeof(svc) + sizeof(kwg) << endl;
00307         cout << "Original command line: "; 
00308         printUsedCommandLine(argc, argv);
00309 
00310         delete svc;
00311         delete kwg;
00312         free(opt);
00313 }

Here is the call graph for this function:

struct svc_options * parseCommandLineArgs ( int  argc,
char **  argv 
) [read]

Parse the command line arguments and return the options for the SVC running.

Parameters:
argc The number of arguments
argv The arguments
Returns:
The options

Definition at line 446 of file svc.c.

References svc_options::alg_type, svc_options::bsv, BSV_CLASSIC, svc_options::C, svc_options::clusters, DEFAULT_ALG, DEFAULT_C, DEFAULT_EDIST, DEFAULT_KERNEL, DEFAULT_KW, DEFAULT_L_OFFSET, DEFAULT_RUNS, DEFAULT_SOFTENING, DEFAULT_VALID_THRESHOLD, svc_options::detfile, svc_options::e_dist, svc_options::filename, svc_options::kernel, svc_options::l_offset, svc_options::q, svc_options::runs, svc_options::softening, usage(), and svc_options::valid_threshold.

Referenced by main().

00446                                                                 {
00447 
00448         using namespace damina;
00449         
00450         struct svc_options *opt; 
00451         opt = (struct svc_options *)malloc(sizeof(struct svc_options));
00452         
00453         opt->filename = (char *)malloc(2 * sizeof(char));
00454         strcpy(opt->filename, "");
00455         
00456         opt->q = DEFAULT_KW;
00457         opt->C = DEFAULT_C;
00458         opt->alg_type = DEFAULT_ALG;
00459         opt->runs = DEFAULT_RUNS;
00460         opt->l_offset = DEFAULT_L_OFFSET;
00461         opt->valid_threshold = DEFAULT_VALID_THRESHOLD;
00462         opt->softening = DEFAULT_SOFTENING;
00463         opt->detfile = NULL;
00464         opt->e_dist = DEFAULT_EDIST;
00465         opt->kernel = DEFAULT_KERNEL;
00466         opt->bsv = BSV_CLASSIC;
00467 
00468         bool file_f = true;
00469         bool cluster_f = false;
00470         
00471         int ch;
00472         
00473         /* options descriptor */
00474         static struct option longopts[] = {
00475                 { "kernel-width",       required_argument,      NULL,   'q' },
00476                 { "soft-margin",        required_argument,      NULL,   'C' },
00477                 { "num-clusters",       required_argument,      NULL,   'c' },
00478                 { "labeling-alg",       required_argument,      NULL,   'l' },
00479                 { "dataset-file",       required_argument,      NULL,   'f' },
00480                 { "runs",                       required_argument,      NULL,   'r' },
00481                 { "labels-offset",      required_argument,      NULL,   'o' },
00482                 { "valid-threshold",required_argument,  NULL,   't' },
00483                 { "softening",          required_argument,      NULL,   's' },
00484                 { "dump-details",       required_argument,      NULL,   'D' },
00485                 { "euclidean-distance", required_argument,      NULL,   'e' },
00486                 { "kernel-type",        required_argument,      NULL,   'k' },
00487                 { "bsv-clustering",     required_argument,      NULL,   'b' }
00488 
00489         };
00490 
00491         while ((ch = getopt_long(argc, argv, "q:C:c:l:f:r:o:t:s:D:e:k:b:", longopts, NULL)) != -1) {
00492                 switch (ch) {
00493                                 
00494                         case 'b': {
00495                                         opt->bsv = (int)strtol(optarg, (char**)NULL, 10);
00496                                         //if ((errno == EINVAL) || (opt->bsv != (int)BSV_CLASSIC) || (opt->bsv != (int)BSV_ENHANCED))
00497                                         //      err(1, "%s is an invalid value for BSV Clustering Policy. It must be an inter value in [%d,%d].", optarg, BSV_CLASSIC, BSV_ENHANCED);
00498                                         break;
00499                         }
00500 
00501 
00502                         case 'k': {
00503                                         opt->kernel = (int)strtol(optarg, (char**)NULL, 10);
00504                                         //if ((errno == EINVAL) || (opt->kernel != KernelType::gaussian) || (opt->kernel != KernelType::laplacian))
00505                                         //      err(1, "%s is an invalid value for Kernel Type. It must be %d or %d.", optarg, KernelType::gaussian, KernelType::laplacian);
00506                                         break;
00507                         }
00508 
00509                         case 'e': {
00510                                         opt->e_dist = (int)strtol(optarg, (char**)NULL, 10);
00511                                         if ((errno == EINVAL) || (opt->e_dist < 1) || (opt->e_dist > 2))
00512                                                 err(1, "%s is an invalid value for Euclidean Distance type. It must be an inter value in [1,2].", optarg);
00513                                         break;
00514                         }
00515 
00516                         case 'q': {
00517                                         opt->q = (double)strtod(optarg, (char**)NULL);
00518                                         if ((errno == EINVAL) || (opt->q <= 0))
00519                                                 err(1, "%s is an invalid value for Kernel Width. It must be a real value greater than 0.", optarg);
00520                                         break;
00521                         }
00522                         
00523                         case 'C': {
00524                                         opt->C = (double)strtod(optarg, (char**)NULL);
00525                                         if ((errno == EINVAL) || (opt->C <= 0))
00526                                                 err(1, "%s is an invalid value for Soft Margin Constant. It must be a real value greater than 0.", optarg);
00527                                         break;
00528                         }
00529                         
00530                         case 'c': {
00531                                         opt->clusters = (int)strtol(optarg, (char**)NULL, 10);
00532                                         if ((errno == EINVAL) || (opt->clusters <= 0))
00533                                                 err(1, "%s is an invalid value for Number of Clusters. It must be an integer value greater than 0.", optarg);
00534                                         
00535                                         cluster_f = true;
00536                                         break;
00537                         }
00538                         
00539                         case 'r': {
00540                                         opt->runs = (int)strtol(optarg, (char**)NULL, 10);
00541                                         if ((errno == EINVAL) || (opt->runs <= 0))
00542                                                 err(1, "%s is an invalid value for Number of Runs. It must be an integer value greater than 0.", optarg);
00543                                         break;
00544                         }
00545                         
00546                         case 's': {
00547                                         opt->softening = (double)strtod(optarg, (char**)NULL);
00548                                         if ((errno == EINVAL) || (opt->softening <= 0))
00549                                                 err(1, "%s is an invalid value for the Softening Constant. It must be an real value greater than 0.", optarg);
00550                                         break;
00551                         }
00552                 
00553                         case 't': {
00554                                         opt->valid_threshold = (int)strtol(optarg, (char**)NULL, 10);
00555                                         if ((errno == EINVAL) || (opt->valid_threshold <= 0))
00556                                                 err(1, "%s is an invalid value for Minimum Clusters Cardinality. It must be an integer value greater than 0.", optarg);
00557                                         break;
00558                         }
00559 
00560                         case 'o': {
00561                                         opt->l_offset = (int)strtol(optarg, (char**)NULL, 10);
00562                                         if ((errno == EINVAL) || (opt->l_offset < 0))
00563                                                 err(1, "%s is an invalid value for Labels Offset. It must be an integer value greater than or equal to 0.", optarg);
00564                                         break;
00565                         }
00566                         
00567                         case 'l': {
00568                                         opt->alg_type = (int)strtol(optarg, (char**)NULL, 10);
00569                                         if ((errno == EINVAL) || (opt->alg_type > 1) || (opt->alg_type < 0))
00570                                                 err(1, "%s is an invalid value for Cluster Labeling algorithm. It must be 0 or 1.", optarg);
00571                                         break;
00572                         }
00573                         
00574                         case 'f': {
00575                                         if ((open(optarg, O_RDONLY, 0)) == -1) {
00576                         err(1, "Unable to open %s", optarg);
00577                         break;
00578                                         }                       
00579 
00580                                         free(opt->filename);    
00581                                         opt->filename = (char *)malloc((strlen(optarg) + 1) * sizeof(char));
00582                                         strcpy(opt->filename, optarg);
00583                                                                                         
00584                                         file_f = true;
00585                                         break;
00586                         }
00587                         
00588                         case 'D': {
00589                                         free(opt->detfile);
00590                                         opt->detfile = (char *)malloc((strlen(optarg) + 1) * sizeof(char));
00591                                         strcpy(opt->detfile, optarg);
00592                                         break;
00593                         }
00594                         
00595                         default: {
00596                                 usage(argv);
00597                         }
00598                 }
00599         }
00600         argc -= optind;
00601         argv += optind; 
00602 
00603         if ((file_f == false) || (cluster_f == false)) {
00604                 return NULL;
00605         }
00606         return opt;
00607 }

Here is the call graph for this function:

Here is the caller graph for this function:

void printAllPoints ( damina::SVClustering clust,
unsigned long int  classOffset,
char *  filename 
)

Print all points with their original classes and cluster assigned.

Parameters:
clust The involved Support Vector Clustering engine
classOffset The label of the first class in the original classification

Definition at line 322 of file svc.c.

References damina::SVClustering::getClustersAssignment(), and damina::SVClustering::getOriginalClasses().

Referenced by main().

00322                                                                                               {
00323         
00324         filebuf fb;
00325         fb.open(filename, ios::app);
00326         ostream out(&fb);
00327                 
00328         std::vector<int> labels = clust->getClustersAssignment();               //      clustering result
00329         std::vector<int> orig_c = clust->getOriginalClasses((int)classOffset);          //      original classification
00330 
00331         for (unsigned long int i = 0; i < labels.size(); i++) {
00332                 out << "Point #" << i << " - " << "Cluster: " << labels[i] << ", Class: " << orig_c[i] << endl;
00333         }
00334         out << endl;
00335         fb.close();
00336 }

Here is the call graph for this function:

Here is the caller graph for this function:

void printUsedCommandLine ( int  argc,
char *  argv[] 
)

Print the whole command line used to invoke the program.

Parameters:
opt The result of the command line parsing

Definition at line 646 of file svc.c.

Referenced by main().

00646                                                   {
00647         cout << argv[0] << " ";
00648 
00649         for (int i = 1; i <= argc; i++) {
00650                 cout << argv[i] << " ";
00651         }
00652         cout << endl << endl;
00653 }

Here is the caller graph for this function:

void usage ( char **  argv  ) 

Print the usage on the standard error.

Parameters:
argv The argv parameter passed on the comand line

Definition at line 614 of file svc.c.

References BSV_CLASSIC, BSV_ENHANCED, CCL, CG, DEFAULT_ALG, DEFAULT_C, DEFAULT_L_OFFSET, DEFAULT_RUNS, DEFAULT_SOFTENING, DEFAULT_VALID_THRESHOLD, L1, and L2.

Referenced by main(), and parseCommandLineArgs().

00614                         {
00615         using namespace damina;
00616         
00617         cerr << endl << "Usage: " << argv[0] << " -q (0,+inf) -C (0,+inf) -b [" << BSV_CLASSIC << "," << BSV_ENHANCED << "] -k" << KernelType::gaussian << "," << KernelType::laplacian << " -e [1,2] -c [1, N] -l [0-1] -f <dataset-filename> -D <detail-filename> -r [1, N] -o [0, N] -t [1,N] -s (0,1]" << endl << endl;
00618         cerr << "\t -q: the kernel width value must be a real value greater than zero - DEFAULT: auto" << endl; 
00619         cerr << "\t -C: the soft margin constant must be a real value greater than zero - DEFAULT: " << DEFAULT_C << endl;
00620         cerr << "\t -b: the BSV Clustering Policy - DEFAULT: " << BSV_ENHANCED << endl;
00621                 cerr << "\t\t " << BSV_CLASSIC << " for classic policy" << endl;
00622                 cerr << "\t\t " << BSV_ENHANCED << " for enhanced policy (aim to get better results)" << endl;
00623         cerr << "\t -k: the Kernel Type - DEFAULT: " << KernelType::gaussian << endl;
00624                 cerr << "\t\t " << KernelType::gaussian << " for Gaussian Kernel (K(x,y) = exp(-gamma * ||x-y||^2))" << endl;
00625                 cerr << "\t\t " << KernelType::laplacian << " for Laplacian Kernel (K(x,y) = exp(-gamma*|x-y|_1)) " << endl;
00626         cerr << "\t -e: the Euclidean Measure to use in vector-distance measurement - DEFAULT: " << L2 << endl;
00627                 cerr << "\t\t " << L1 << " for L1 distance (sum |x_i - y_i|) " << endl;
00628                 cerr << "\t\t " << L2 << " for L2 distance (sum sqrt(||x_i - y_i||^2)) " << endl;
00629         cerr << "\t -l: the cluster labeling algorithm type - DEFAULT: " << DEFAULT_ALG << endl;
00630                 cerr << "\t\t " << CCL << " for Cone Cluster Labeling" << endl;
00631                 cerr << "\t\t " << CG << " for Complete Graph Cluster Labeling" << endl;
00632         cerr << "\t -c: the number of cluster to request must be an integer value greater than zero" << endl;
00633         cerr << "\t -f: the filename of the dataset file, in LIBSVM format" << endl;
00634         cerr << "\t -D: the filename of the file to store detailed clusters assignment" << endl;
00635         cerr << "\t -r: the number of runs to perform - DEFAULT: " << DEFAULT_RUNS      << endl;
00636         cerr << "\t -o: the labels offset (i.e. the label of the first class) - DEFAULT: " << DEFAULT_L_OFFSET << endl;
00637         cerr << "\t -s: the softening constant for kernel width generation - DEFAULT: " << DEFAULT_SOFTENING << endl;
00638         cerr << "\t -t: the minimum clusters cardinality - DEFAULT: " << DEFAULT_VALID_THRESHOLD << endl << endl;
00639 }

Here is the caller graph for this function:


Generated on Mon Sep 24 22:26:53 2007 for SVClustering by  doxygen 1.5.2