CCLSVClustering.cpp

Go to the documentation of this file.
00001 #include "CCLSVClustering.h"
00002 
00003 namespace damina
00004 {
00005 
00006         
00012         CCLSVClustering::CCLSVClustering(DataSet *train): SVClustering(train) {
00013         }
00014         
00020         CCLSVClustering::CCLSVClustering(struct svm_problem *p): SVClustering(p) {
00021                 
00022         }
00023         
00030         CCLSVClustering::CCLSVClustering(double C, DataSet *train): SVClustering(C, train) {
00031                 
00032         }
00033         
00040         CCLSVClustering::CCLSVClustering(double C, struct svm_problem *p): SVClustering(C, p) {
00041                 
00042         }
00043         
00051         CCLSVClustering::CCLSVClustering(double C, double w, DataSet *train): SVClustering(C, w, train) {
00052                 
00053         }
00054         
00062         CCLSVClustering::CCLSVClustering(double C, double w, struct svm_problem *p): SVClustering(C, w, p) {
00063                 
00064         }
00065         
00066         
00071         CCLSVClustering::~CCLSVClustering()     {
00072                  
00073         }
00074         
00092         void CCLSVClustering::clusterize() {
00093                 this->separateClusters();
00094                 //this->experimentalSeparateClusters();
00095         }
00096 
00097         
00098         /*      PRIVATE METHODS FOLLOW  ************************************************************************/
00099         
00108         void CCLSVClustering::calculateZeta() {
00109                 double R = getSphereRadius();
00110                 double q = getKernelWidth();
00111                 
00112                 //double sqrt_1_R = sqrt(1 - R*R);
00113                 double sqrt_1_R = sqrt(1 - R);
00114                 double ln = log(sqrt_1_R);
00115 
00116                 this->zeta = sqrt(-(ln/q));
00117                 //this->zeta = -(ln/q);
00118         }
00119         
00120         /*      PROTECTED METHODS FOLLOW        ************************************************************************/
00121         
00122                 
00140         void CCLSVClustering::separateClusters() {
00141                 
00142                 // compute Zeta
00143                 calculateZeta();
00144                 
00145                 int i,j;                //      to run over the SVs 
00146                         
00147                 int nSV = this->trainer->getModel()->l - this->trainer->getModel()->lbounded;
00148                 int *SV_index = this->trainer->getModel()->SV_index;
00149                 
00150                 
00151                 /*      original dataset */
00152                 struct svm_node **x = this->trainer->getProblem()->x;
00153                 double dist;
00154                 
00155                 
00156                 /* arithmetic mean of all betas */
00157                 double betas_average = 0;
00158                 double beta_max = -1;
00159                 
00160                 /*      adj matrix only for real SVs */
00161                 UndirectedGraph adj(nSV);
00162                 for (i = 0; i < nSV; i++) {
00163                         betas_average += beta(i, false);                                                        //      sum all betas
00164                         if (beta(i, false) > beta_max) beta_max = beta(i, false);       //      max beta
00165 
00166                         for (j = 0; j < nSV; j++) {
00167                                 
00168                                 //      trivial: each node is connected to itself
00169                                 if (i == j) {
00170                                         add_edge(i, j, adj);
00171                                         continue;
00172                                 }
00173                                 
00174                                 // avoid to set the same edge more than once
00175                                 if (edge(i, j, adj).second) continue;
00176                                 
00177                                 
00178                                 dist = eucDist->calculateDistance(x[SV_index[i]], x[SV_index[j]]);
00179                                 
00180                                 //      zeta is the radius of the hyperspheres in data space centered in the SVs
00181                                 //      (all hyperspheres have the same radius)
00182                                 //      Two SVs are connected if their hyperspheres overlap, i.e. if the 
00183                                 //      distance between two SVs are at most 2*zeta (overlap in 1 point)
00184                                 if (dist <= 2*zeta) {             
00185                                         add_edge(i, j, adj);
00186                                         add_edge(j, i, adj);
00187                                 }
00188                                 else {
00189                                         remove_edge(i, j, adj);
00190                                         remove_edge(j, i, adj);
00191                                 }
00192                         }
00193                 }
00194                 betas_average = betas_average / nSV;    //      divide the sum of all betas by numer of betas => arithmetic mean
00195                 
00196                 
00197                 // clustering SVs finding connected components of the adj matrix just built
00198                 std::vector<int> svLabels(nSV);
00199                 connected_components(adj, &svLabels[0]);
00200                 
00201                 
00202                 // total points
00203                 int N = this->trainer->getProblem()->l;
00204                 
00205                 // total labels
00206                 labels.resize(N);
00207 
00208                 double mindist;
00209                 int minsvindex;
00210                 
00211                 C_estimation = 10.0 * getSoftConstraint() / N;  
00212 
00213                 // clustering points other than SVs
00214                 // for each point in the input data set 
00215                 for (i = 0; i < N; i++) {
00216                         mindist = MAXFLOAT; 
00217                         minsvindex = -1;
00218                         for (j = 0; j < nSV; j++) {     // we find the nearest SV
00219                         
00220                                 if (i == SV_index[j]) { //      trivial if the point is the current SV
00221                                         minsvindex = j;
00222                                         break;
00223                                 }
00224                                 
00225                                 dist = eucDist->calculateDistance(x[SV_index[j]], x[i]);
00226                                 if (dist < mindist) {   //      if the distance between a point and an SV is less or equal than zeta
00227                                                                                 //      and it is also less than the last distance found 
00228                                 //if ((dist < mindist) && (dist <= zeta)) {
00229                                         mindist = dist;
00230                                         minsvindex = j;
00231                                 }  
00232                         }
00233 
00234                         labels[i] = svLabels[minsvindex];
00235                         /*if (minsvindex == -1) {
00236                                 mindist = MAXFLOAT; 
00237                                 minsvindex = -1;
00238                                 for (j = 0; j < N; j++) {
00239                                         if (i != j) {
00240                                                 dist = eucDist->calculateDistance(x[j], x[i]);
00241                                                 if (dist < mindist) {
00242                                                         mindist = dist;
00243                                                         minsvindex = j;
00244                                                 }
00245                                         }       
00246                                 }
00247                                 labels[i] = labels[minsvindex];
00248                         }
00249                         else {
00250                                 labels[i] = svLabels[minsvindex];
00251                         }*/
00252                 }
00253 
00254                 if (usingSpecialClusteringPolicyForBSV()) {
00255                                 // try to cluster BSV points
00256                                 int nBSV = this->trainer->getModel()->lbounded;
00257                                 int *BSV_index = this->trainer->getModel()->BSV_index;
00258                                 
00259                                 for (i = 0; i < nBSV; i++) {
00260                                         mindist = MAXFLOAT; 
00261                                         minsvindex = 0;
00262                                         for (j = 0; j < N; j++) {
00263                                                 if (j == BSV_index[i]) continue;        //      ignore if the point is the current BSV
00264                                                 if (beta(j, true) == 1) continue;       //      ignore all BSVs
00265                                                 dist = eucDist->calculateDistance(x[BSV_index[i]],x[j]);
00266                                                 if (dist < mindist) {
00267                                                         mindist = dist;
00268                                                         minsvindex = j;
00269                                                 }
00270                                         }
00271                                         labels[BSV_index[i]] = labels[minsvindex];
00272                                 }
00273                 }
00274         }
00275         
00276         
00277         void CCLSVClustering::experimentalSeparateClusters() {
00278                 // compute Zeta
00279                 calculateZeta();
00280 
00281                 double ker_zeta = exp(-1 * getKernelWidth() * zeta);
00282                 
00283                 int i,j;                //      to run over the SVs 
00284                         
00285                 int nSV = this->trainer->getModel()->l - this->trainer->getModel()->lbounded;
00286                 int *SV_index = this->trainer->getModel()->SV_index;
00287                 
00288                 
00289                 /*      original dataset */
00290                 struct svm_node **x = this->trainer->getProblem()->x;
00291                 double dist;
00292                 
00293                 
00294                 /* arithmetic mean of all betas */
00295                 double betas_average = 0;
00296                 double beta_max = -1;
00297                 
00298                 /*      adj matrix only for real SVs */
00299                 UndirectedGraph adj(nSV);
00300                 for (i = 0; i < nSV; i++) {
00301                         betas_average += beta(i, false);                                                        //      sum all betas
00302                         if (beta(i, false) > beta_max) beta_max = beta(i, false);       //      max beta
00303 
00304                         for (j = 0; j < nSV; j++) {
00305                                 
00306                                 //      trivial: each node is connected to itself
00307                                 if (i == j) {
00308                                         add_edge(i, j, adj);
00309                                         continue;
00310                                 }
00311                                 
00312                                 // avoid to set the same edge more than once
00313                                 if (edge(i, j, adj).second) continue;
00314                                 
00315                                 
00316                                 //dist = eucDist->calculateDistance(x[SV_index[i]], x[SV_index[j]]);
00317                                 dist = 2 - 2 * kernelfunction(x[SV_index[i]], x[SV_index[j]], *(getParameters()));
00318 
00319                                 //      zeta is the radius of the hyperspheres in data space centered in the SVs
00320                                 //      (all hyperspheres have the same radius)
00321                                 //      Two SVs are connected if their hyperspheres overlap, i.e. if the 
00322                                 //      distance between two SVs are at most 2*zeta (overlap in 1 point)
00323                                 if (dist <=  2 * ker_zeta) {              
00324                                         add_edge(i, j, adj);
00325                                         add_edge(j, i, adj);
00326                                 }
00327                                 else {
00328                                         remove_edge(i, j, adj);
00329                                         remove_edge(j, i, adj);
00330                                 }
00331                         }
00332                 }
00333                 betas_average = betas_average / nSV;    //      divide the sum of all betas by numer of betas => arithmetic mean
00334                 
00335                 
00336                 // clustering SVs finding connected components of the adj matrix just built
00337                 std::vector<int> svLabels(nSV);
00338                 connected_components(adj, &svLabels[0]);
00339                 
00340                 
00341                 // total points
00342                 int N = this->trainer->getProblem()->l;
00343                 
00344                 // total labels
00345                 labels.resize(N);
00346 
00347                 double mindist;
00348                 int minsvindex;
00349                 
00350                 C_estimation = 10.0 * getSoftConstraint() / N;  
00351 
00352                 // clustering points other than SVs
00353                 // for each point in the input data set 
00354                 for (i = 0; i < N; i++) {
00355                         mindist = MAXFLOAT; 
00356                         minsvindex = -1;
00357                         for (j = 0; j < nSV; j++) {     // we find the nearest SV
00358                         
00359                                 if (i == SV_index[j]) { //      trivial if the point is the current SV
00360                                         minsvindex = j;
00361                                         break;
00362                                 }
00363                                 
00364                                 //dist = eucDist->calculateDistance(x[SV_index[j]], x[i]);
00365 
00366                                 dist = 2 - 2 * kernelfunction(x[SV_index[j]], x[i], *(getParameters()));
00367 
00368                                 //if (dist < mindist) { //      if the distance between a point and an SV is less or equal than zeta
00369                                                                                 //      and it is also less than the last distance found 
00370                                 if ((dist < mindist) && (dist <= ker_zeta)) {
00371                                         mindist = dist;
00372                                         minsvindex = j;
00373                                 }  
00374                         }
00375 
00376                         //labels[i] = svLabels[minsvindex];
00377                         if (minsvindex == -1) {
00378                                 mindist = MAXFLOAT; 
00379                                 minsvindex = -1;
00380                                 for (j = 0; j < N; j++) {
00381                                         if (i != j) {
00382                                                 dist = 2 - 2 * kernelfunction(x[i], x[j], *(getParameters()));
00383                                                 //dist = eucDist->calculateDistance(x[j], x[i]);
00384                                                 if (dist < mindist) {
00385                                                         mindist = dist;
00386                                                         minsvindex = j;
00387                                                 }
00388                                         }       
00389                                 }
00390                                 labels[i] = labels[minsvindex];
00391                         }
00392                         else {
00393                                 labels[i] = svLabels[minsvindex];
00394                         }
00395                 }
00396 
00397                 if (usingSpecialClusteringPolicyForBSV()) {
00398                                 // try to cluster BSV points
00399                                 int nBSV = this->trainer->getModel()->lbounded;
00400                                 int *BSV_index = this->trainer->getModel()->BSV_index;
00401                                 
00402                                 for (i = 0; i < nBSV; i++) {
00403                                         mindist = MAXFLOAT; 
00404                                         minsvindex = 0;
00405                                         for (j = 0; j < N; j++) {
00406                                                 if (j == BSV_index[i]) continue;        //      ignore if the point is the current BSV
00407                                                 if (beta(j, true) == 1) continue;       //      ignore all BSVs
00408                                                 dist = eucDist->calculateDistance(x[BSV_index[i]],x[j]);
00409                                                 if (dist < mindist) {
00410                                                         mindist = dist;
00411                                                         minsvindex = j;
00412                                                 }
00413                                         }
00414                                         labels[BSV_index[i]] = labels[minsvindex];
00415                                 }
00416                 }
00417         
00418         }
00419 }

Generated on Mon Sep 24 22:26:48 2007 for SVClustering by  doxygen 1.5.2