00001 #include "CCLSVClustering.h"
00002
00003 namespace damina
00004 {
00005
00006
00012 CCLSVClustering::CCLSVClustering(DataSet *train): SVClustering(train) {
00013 }
00014
00020 CCLSVClustering::CCLSVClustering(struct svm_problem *p): SVClustering(p) {
00021
00022 }
00023
00030 CCLSVClustering::CCLSVClustering(double C, DataSet *train): SVClustering(C, train) {
00031
00032 }
00033
00040 CCLSVClustering::CCLSVClustering(double C, struct svm_problem *p): SVClustering(C, p) {
00041
00042 }
00043
00051 CCLSVClustering::CCLSVClustering(double C, double w, DataSet *train): SVClustering(C, w, train) {
00052
00053 }
00054
00062 CCLSVClustering::CCLSVClustering(double C, double w, struct svm_problem *p): SVClustering(C, w, p) {
00063
00064 }
00065
00066
00071 CCLSVClustering::~CCLSVClustering() {
00072
00073 }
00074
00092 void CCLSVClustering::clusterize() {
00093 this->separateClusters();
00094
00095 }
00096
00097
00098
00099
00108 void CCLSVClustering::calculateZeta() {
00109 double R = getSphereRadius();
00110 double q = getKernelWidth();
00111
00112
00113 double sqrt_1_R = sqrt(1 - R);
00114 double ln = log(sqrt_1_R);
00115
00116 this->zeta = sqrt(-(ln/q));
00117
00118 }
00119
00120
00121
00122
00140 void CCLSVClustering::separateClusters() {
00141
00142
00143 calculateZeta();
00144
00145 int i,j;
00146
00147 int nSV = this->trainer->getModel()->l - this->trainer->getModel()->lbounded;
00148 int *SV_index = this->trainer->getModel()->SV_index;
00149
00150
00151
00152 struct svm_node **x = this->trainer->getProblem()->x;
00153 double dist;
00154
00155
00156
00157 double betas_average = 0;
00158 double beta_max = -1;
00159
00160
00161 UndirectedGraph adj(nSV);
00162 for (i = 0; i < nSV; i++) {
00163 betas_average += beta(i, false);
00164 if (beta(i, false) > beta_max) beta_max = beta(i, false);
00165
00166 for (j = 0; j < nSV; j++) {
00167
00168
00169 if (i == j) {
00170 add_edge(i, j, adj);
00171 continue;
00172 }
00173
00174
00175 if (edge(i, j, adj).second) continue;
00176
00177
00178 dist = eucDist->calculateDistance(x[SV_index[i]], x[SV_index[j]]);
00179
00180
00181
00182
00183
00184 if (dist <= 2*zeta) {
00185 add_edge(i, j, adj);
00186 add_edge(j, i, adj);
00187 }
00188 else {
00189 remove_edge(i, j, adj);
00190 remove_edge(j, i, adj);
00191 }
00192 }
00193 }
00194 betas_average = betas_average / nSV;
00195
00196
00197
00198 std::vector<int> svLabels(nSV);
00199 connected_components(adj, &svLabels[0]);
00200
00201
00202
00203 int N = this->trainer->getProblem()->l;
00204
00205
00206 labels.resize(N);
00207
00208 double mindist;
00209 int minsvindex;
00210
00211 C_estimation = 10.0 * getSoftConstraint() / N;
00212
00213
00214
00215 for (i = 0; i < N; i++) {
00216 mindist = MAXFLOAT;
00217 minsvindex = -1;
00218 for (j = 0; j < nSV; j++) {
00219
00220 if (i == SV_index[j]) {
00221 minsvindex = j;
00222 break;
00223 }
00224
00225 dist = eucDist->calculateDistance(x[SV_index[j]], x[i]);
00226 if (dist < mindist) {
00227
00228
00229 mindist = dist;
00230 minsvindex = j;
00231 }
00232 }
00233
00234 labels[i] = svLabels[minsvindex];
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252 }
00253
00254 if (usingSpecialClusteringPolicyForBSV()) {
00255
00256 int nBSV = this->trainer->getModel()->lbounded;
00257 int *BSV_index = this->trainer->getModel()->BSV_index;
00258
00259 for (i = 0; i < nBSV; i++) {
00260 mindist = MAXFLOAT;
00261 minsvindex = 0;
00262 for (j = 0; j < N; j++) {
00263 if (j == BSV_index[i]) continue;
00264 if (beta(j, true) == 1) continue;
00265 dist = eucDist->calculateDistance(x[BSV_index[i]],x[j]);
00266 if (dist < mindist) {
00267 mindist = dist;
00268 minsvindex = j;
00269 }
00270 }
00271 labels[BSV_index[i]] = labels[minsvindex];
00272 }
00273 }
00274 }
00275
00276
00277 void CCLSVClustering::experimentalSeparateClusters() {
00278
00279 calculateZeta();
00280
00281 double ker_zeta = exp(-1 * getKernelWidth() * zeta);
00282
00283 int i,j;
00284
00285 int nSV = this->trainer->getModel()->l - this->trainer->getModel()->lbounded;
00286 int *SV_index = this->trainer->getModel()->SV_index;
00287
00288
00289
00290 struct svm_node **x = this->trainer->getProblem()->x;
00291 double dist;
00292
00293
00294
00295 double betas_average = 0;
00296 double beta_max = -1;
00297
00298
00299 UndirectedGraph adj(nSV);
00300 for (i = 0; i < nSV; i++) {
00301 betas_average += beta(i, false);
00302 if (beta(i, false) > beta_max) beta_max = beta(i, false);
00303
00304 for (j = 0; j < nSV; j++) {
00305
00306
00307 if (i == j) {
00308 add_edge(i, j, adj);
00309 continue;
00310 }
00311
00312
00313 if (edge(i, j, adj).second) continue;
00314
00315
00316
00317 dist = 2 - 2 * kernelfunction(x[SV_index[i]], x[SV_index[j]], *(getParameters()));
00318
00319
00320
00321
00322
00323 if (dist <= 2 * ker_zeta) {
00324 add_edge(i, j, adj);
00325 add_edge(j, i, adj);
00326 }
00327 else {
00328 remove_edge(i, j, adj);
00329 remove_edge(j, i, adj);
00330 }
00331 }
00332 }
00333 betas_average = betas_average / nSV;
00334
00335
00336
00337 std::vector<int> svLabels(nSV);
00338 connected_components(adj, &svLabels[0]);
00339
00340
00341
00342 int N = this->trainer->getProblem()->l;
00343
00344
00345 labels.resize(N);
00346
00347 double mindist;
00348 int minsvindex;
00349
00350 C_estimation = 10.0 * getSoftConstraint() / N;
00351
00352
00353
00354 for (i = 0; i < N; i++) {
00355 mindist = MAXFLOAT;
00356 minsvindex = -1;
00357 for (j = 0; j < nSV; j++) {
00358
00359 if (i == SV_index[j]) {
00360 minsvindex = j;
00361 break;
00362 }
00363
00364
00365
00366 dist = 2 - 2 * kernelfunction(x[SV_index[j]], x[i], *(getParameters()));
00367
00368
00369
00370 if ((dist < mindist) && (dist <= ker_zeta)) {
00371 mindist = dist;
00372 minsvindex = j;
00373 }
00374 }
00375
00376
00377 if (minsvindex == -1) {
00378 mindist = MAXFLOAT;
00379 minsvindex = -1;
00380 for (j = 0; j < N; j++) {
00381 if (i != j) {
00382 dist = 2 - 2 * kernelfunction(x[i], x[j], *(getParameters()));
00383
00384 if (dist < mindist) {
00385 mindist = dist;
00386 minsvindex = j;
00387 }
00388 }
00389 }
00390 labels[i] = labels[minsvindex];
00391 }
00392 else {
00393 labels[i] = svLabels[minsvindex];
00394 }
00395 }
00396
00397 if (usingSpecialClusteringPolicyForBSV()) {
00398
00399 int nBSV = this->trainer->getModel()->lbounded;
00400 int *BSV_index = this->trainer->getModel()->BSV_index;
00401
00402 for (i = 0; i < nBSV; i++) {
00403 mindist = MAXFLOAT;
00404 minsvindex = 0;
00405 for (j = 0; j < N; j++) {
00406 if (j == BSV_index[i]) continue;
00407 if (beta(j, true) == 1) continue;
00408 dist = eucDist->calculateDistance(x[BSV_index[i]],x[j]);
00409 if (dist < mindist) {
00410 mindist = dist;
00411 minsvindex = j;
00412 }
00413 }
00414 labels[BSV_index[i]] = labels[minsvindex];
00415 }
00416 }
00417
00418 }
00419 }