00001 #include "SVClustering.h"
00002 #include "L2Distance.h"
00003
00004 namespace damina
00005 {
00006
00012 SVClustering::SVClustering(DataSet *train) {
00013 this->eucDist = new L2Distance();
00014 this->trainer = new OneClassSVM();
00015 this->trainer->setKernel(KernelType::gaussian);
00016 this->trainer->setCacheSize(100);
00017 this->trainer->setTolerance(0.000001);
00018 this->trainer->setTrainingSet(train);
00019 this->trainer->setKernelWidth(initialKernelWidth());
00020 this->quadratic = NULL;
00021 this->clusterizeBSV = true;
00022 }
00023
00029 SVClustering::SVClustering(struct svm_problem *p) {
00030 this->eucDist = new L2Distance();
00031 this->trainer = new OneClassSVM();
00032 this->trainer->setKernel(KernelType::gaussian);
00033 this->trainer->setCacheSize(100);
00034 this->trainer->setTolerance(0.000001);
00035 this->trainer->setProblem(p);
00036 this->trainer->setKernelWidth(initialKernelWidth());
00037 this->quadratic = NULL;
00038 this->clusterizeBSV = true;
00039 }
00040
00047 SVClustering::SVClustering(double C, DataSet *train) {
00048 this->eucDist = new L2Distance();
00049 this->trainer = new OneClassSVM();
00050 this->trainer->setKernel(KernelType::gaussian);
00051 this->trainer->setCacheSize(100);
00052 this->trainer->setTolerance(0.000001);
00053
00054 this->trainer->setTrainingSet(train);
00055
00056 this->trainer->setKernelWidth(initialKernelWidth());
00057
00058
00059 this->trainer->setNU((1 / (train->getSize() * C)));
00060 this->C = C;
00061 this->quadratic = NULL;
00062 this->clusterizeBSV = true;
00063 }
00064
00071 SVClustering::SVClustering(double C, struct svm_problem *prob) {
00072 this->eucDist = new L2Distance();
00073 this->trainer = new OneClassSVM();
00074 this->trainer->setKernel(KernelType::gaussian);
00075 this->trainer->setCacheSize(100);
00076 this->trainer->setTolerance(0.000001);
00077
00078 this->trainer->setProblem(prob);
00079
00080 this->trainer->setKernelWidth(initialKernelWidth());
00081
00082 this->trainer->setNU((1 / (prob->l * C)));
00083 this->C = C;
00084 this->quadratic = NULL;
00085 this->clusterizeBSV = true;
00086 }
00087
00095 SVClustering::SVClustering(double C, double w, DataSet *train) {
00096 this->eucDist = new L2Distance();
00097 this->trainer = new OneClassSVM();
00098 this->trainer->setKernel(KernelType::gaussian);
00099 this->trainer->setCacheSize(100);
00100 this->trainer->setTolerance(0.000001);
00101
00102 this->trainer->setTrainingSet(train);
00103
00104 this->trainer->setNU((1 / (train->getSize() * C)));
00105 this->C = C;
00106
00107 this->trainer->setKernelWidth(w);
00108 this->quadratic = NULL;
00109 this->clusterizeBSV = true;
00110 }
00111
00119 SVClustering::SVClustering(double C, double w, struct svm_problem *prob) {
00120 this->eucDist = new L2Distance();
00121 this->trainer = new OneClassSVM();
00122 this->trainer->setKernel(KernelType::gaussian);
00123 this->trainer->setCacheSize(100);
00124 this->trainer->setTolerance(0.000001);
00125
00126 this->trainer->setProblem(prob);
00127
00128 this->trainer->setNU((1 / (prob->l * C)));
00129 this->C = C;
00130
00131 this->trainer->setKernelWidth(w);
00132 this->quadratic = NULL;
00133 this->clusterizeBSV = true;
00134 }
00135
00140 SVClustering::~SVClustering() {
00141 delete trainer;
00142 delete eucDist;
00143
00144 }
00145
00149 void SVClustering::learn() {
00150 free(quadratic);
00151 quadratic = NULL;
00152 this->numberOfClusters = -1;
00153 this->pointPerCluster.clear();
00154 this->labels.clear();
00155 this->trainer->learn();
00156 }
00157
00163 void SVClustering::learn(DataSet *train) {
00164 free(quadratic);
00165 quadratic = NULL;
00166 this->numberOfClusters = -1;
00167 this->pointPerCluster.clear();
00168 this->labels.clear();
00169 this->trainer->learn(train);
00170 }
00171
00177 void SVClustering::learn(struct svm_problem *p) {
00178 free(quadratic);
00179 quadratic = NULL;
00180 this->numberOfClusters = -1;
00181 this->pointPerCluster.clear();
00182 this->labels.clear();
00183 this->trainer->learn(p);
00184 }
00185
00186
00187
00193 double SVClustering::getSphereRadius() {
00194 calculateSphereRadius();
00195 return sphereRadius;
00196 }
00197
00203 void SVClustering::setTrainingSet(DataSet *t) {
00204 this->trainer->setTrainingSet(t);
00205 }
00206
00212 DataSet *SVClustering::getTrainingSet() {
00213 return this->trainer->getTrainingSet();
00214 }
00215
00216
00222 void SVClustering::setProblem(struct svm_problem *p) {
00223 this->trainer->setProblem(p);
00224 }
00225
00231 struct svm_problem *SVClustering::getProblem() {
00232 return this->trainer->getProblem();
00233 }
00234
00235
00241 struct svm_model *SVClustering::getModel() {
00242 return this->trainer->getModel();
00243 }
00244
00250 struct svm_parameter *SVClustering::getParameters() {
00251 return trainer->getParameters();
00252 }
00253
00265 double SVClustering::initialKernelWidth() {
00266 struct svm_problem *p = this->trainer->getProblem();
00267 int N = this->trainer->getProblem()->l;
00268
00269 double max = eucDist->calculateDistance(p->x[0], p->x[1]);
00270 double dist;
00271
00272 for (int i = 0; i < N; i++) {
00273 for (int j = 0; j < N; j++) {
00274 dist = eucDist->calculateDistance(p->x[i], p->x[j]);
00275 if (dist > max) {
00276 max = dist;
00277 }
00278 }
00279 }
00280
00281 this->trainer->setKernelWidth(1/max);
00282
00283 return (1/max);
00284 }
00285
00286
00292 void SVClustering::setKernelWidth(double w) {
00293 this->trainer->setKernelWidth(w);
00294 }
00295
00296
00302 double SVClustering::getKernelWidth() {
00303 return this->trainer->getKernelWidth();
00304 }
00305
00316 void SVClustering::setKernelType(int k) {
00317 this->trainer->setKernel(k);
00318 }
00319
00326 int SVClustering::getKernelType() {
00327 return this->trainer->getKernel();
00328 }
00329
00335 void SVClustering::setSoftConstraint(double C) {
00336 this->C = C;
00337
00338
00339 this->trainer->setNU((1 / (this->trainer->getProblem()->l * C)));
00340 }
00341
00347 double SVClustering::getSoftConstraint() {
00348 return this->C;
00349 }
00350
00351
00357 double SVClustering::getSoftConstraintEstimate() {
00358 return C_estimation;
00359 }
00360
00361
00376 void SVClustering::clusterize() {
00377 this->separateClusters();
00378 }
00379
00387 std::vector<int>& SVClustering::getClustersAssignment() {
00388 return labels;
00389 }
00390
00398 std::vector<unsigned long int>& SVClustering::getPointPerCluster() {
00399 if (numberOfClusters > -1) {
00400 return pointPerCluster;
00401 }
00402
00403
00404
00405
00406
00407 if (labels.size() == 0) {
00408 this->numberOfClusters = 0;
00409 return pointPerCluster;
00410 }
00411
00412 std::vector<unsigned long int>::size_type idx;
00413 this->pointPerCluster.clear();
00414 this->pointPerCluster.push_back(0);
00415 this->numberOfClusters = 1;
00416 for (idx = 0; idx < labels.size(); idx++) {
00417 if ((int)(pointPerCluster.size() - 1) < labels[idx]) {
00418 this->numberOfClusters++;
00419 this->pointPerCluster.push_back(1);
00420 }
00421 else {
00422 this->pointPerCluster[labels[idx]]++;
00423 }
00424 }
00425
00426 return pointPerCluster;
00427 }
00428
00434 unsigned long int SVClustering::getNumberOfClusters() {
00435
00436 if (numberOfClusters > -1)
00437 return numberOfClusters;
00438
00439
00440
00441
00442
00443 if (labels.size() == 0) {
00444 this->numberOfClusters = 0;
00445 return numberOfClusters;
00446 }
00447
00448
00449 std::vector<int>::size_type idx;
00450 this->pointPerCluster.push_back(0);
00451 this->numberOfClusters = 1;
00452 for (idx = 0; idx < labels.size(); idx++) {
00453 if ((int)(pointPerCluster.size() - 1) < labels[idx]) {
00454 this->numberOfClusters++;
00455 this->pointPerCluster.push_back(1);
00456 }
00457 else {
00458 this->pointPerCluster[labels[idx]]++;
00459 }
00460 }
00461
00462 return numberOfClusters;
00463 }
00464
00470 unsigned long int SVClustering::getNumberOfNonSingletonClusters() {
00471 return getNumberOfValidClusters(2);
00472 }
00473
00480 unsigned long int SVClustering::getNumberOfValidClusters(unsigned long int threshold) {
00481 getPointPerCluster();
00482 unsigned long int counter = 0;
00483 for (unsigned long int i = 0; i < pointPerCluster.size(); i++) {
00484 if (pointPerCluster[i] >= threshold) {
00485 counter++;
00486 }
00487 }
00488 return counter;
00489 }
00490
00504 std::vector<int>& SVClustering::getOriginalClasses(int classOffset) {
00505 double *classes = this->trainer->getProblem()->y;
00506 unsigned long int i, N = this->trainer->getProblem()->l;
00507
00508 originalClasses.resize(N);
00509
00510 for (i = 0; i < N; i++) {
00511 originalClasses[i] = (int)classes[i] - classOffset;
00512 }
00513
00514 return originalClasses;
00515 }
00516
00517
00523 void SVClustering::setEuclideanDistance(EuclideanDistance *ed) {
00524 eucDist = ed;
00525 }
00526
00527
00532 void SVClustering::useSpecialClusteringPolicyForBSV() {
00533 this->clusterizeBSV = true;
00534 }
00535
00540 void SVClustering::useClassicClusteringPolicyForBSV() {
00541 this->clusterizeBSV = false;
00542 }
00543
00549 bool SVClustering::usingSpecialClusteringPolicyForBSV() {
00550 return this->clusterizeBSV;
00551 }
00552
00553
00554
00555
00556
00569 double SVClustering::beta(int i) {
00570 return this->trainer->getModel()->sv_coef[0][i] * C;
00571 }
00572
00573
00590 double SVClustering::beta(int i, bool scaled) {
00591 if (scaled) {
00592 return this->trainer->getModel()->sv_coef[0][i];
00593 }
00594 else {
00595 return this->trainer->getModel()->sv_coef[0][i] * getSoftConstraint();
00596 }
00597 }
00598
00599
00609 double SVClustering::rho() {
00610 return this->trainer->getModel()->rho[0] * C;
00611 }
00612
00624 double SVClustering::rho(bool scaled) {
00625 if (scaled) {
00626 return this->trainer->getModel()->rho[0];
00627 }
00628 else {
00629 return this->trainer->getModel()->rho[0] * getSoftConstraint();
00630 }
00631 }
00632
00633
00652 void SVClustering::calculateQuadraticPartOfDistanceFromCenter() {
00653
00654 if (quadratic != NULL) {
00655 return;
00656 }
00657
00658 struct svm_node **SV = this->trainer->getModel()->SV;
00659 int nSV = this->trainer->getModel()->l;
00660 double tmp = 0;
00661
00662
00663 for (int i = 0; i < nSV; i++) {
00664 for (int j = 0; j < nSV; j++) {
00665 tmp += beta(i) * beta(j) * kernelfunction(SV[i], SV[j], this->trainer->getModel()->param);
00666 }
00667 }
00668
00669 this->quadratic = (double *)malloc(sizeof(double));
00670 *(this->quadratic) = tmp;
00671 }
00672
00695 double SVClustering::calculateDistanceFromCenter(struct svm_node *x) {
00696 double sum = 0;
00697
00698 struct svm_node **SV = this->trainer->getModel()->SV;
00699 int nSV = this->trainer->getModel()->l;
00700
00701
00702 for (int i = 0; i < nSV; i++) {
00703 sum += beta(i) * kernelfunction(SV[i], x, this->trainer->getModel()->param);
00704 }
00705
00706
00707 calculateQuadraticPartOfDistanceFromCenter();
00708
00709
00710
00711
00712 return 1 - (2 * sum) + *(this->quadratic);
00713 }
00714
00741 void SVClustering::calculateSphereRadius() {
00742
00743
00744
00745
00746
00747
00748 this->sphereRadius = this->calculateDistanceFromCenter(this->trainer->getModel()->SV[0]);
00749
00750
00751
00752
00753
00754
00755
00756
00757
00758
00759
00760
00761
00762
00763
00764
00765
00766
00767
00768
00769 }
00770
00771
00779 struct svm_node *SVClustering::pointOnThePath(struct svm_node *start, struct svm_node *stop, double interval) {
00780 int start_dim = 0, stop_dim = 0;
00781 struct svm_node *t, *z;
00782
00783 t = start;
00784 while (t->index != -1) {
00785 start_dim++;
00786 t++;
00787 }
00788
00789 t = stop;
00790 while (t->index != -1) {
00791 stop_dim++;
00792 t++;
00793 }
00794
00795 start_dim++;
00796 stop_dim++;
00797
00798
00799 if (start_dim >= stop_dim) {
00800 z = (struct svm_node *) malloc(sizeof(struct svm_node) * start_dim);
00801 }
00802 else {
00803 z = (struct svm_node *) malloc(sizeof(struct svm_node) * stop_dim);
00804 }
00805
00806 t = z;
00807
00808 struct svm_node *x = start;
00809 struct svm_node *y = stop;
00810
00811 while(x->index != -1 && y->index !=-1) {
00812 if (x->index == y->index) {
00813 z->index = x->index;
00814 z->value = ((y->value - x->value) * interval) + x->value;
00815 x++;
00816 y++;
00817 z++;
00818 }
00819 else {
00820 if (x->index > y->index) {
00821 z->index = y->index;
00822 z->value = y->value * interval;
00823 ++y;
00824 ++z;
00825 }
00826 else {
00827 z->index = x->index;
00828 z->value = -(x->value * interval) + x->value;
00829 ++x;
00830 ++z;
00831 }
00832 }
00833 }
00834
00835 while(x->index != -1) {
00836 z->index = x->index;
00837 z->value = -(x->value * interval) + x->value;
00838 ++x;
00839 ++z;
00840 }
00841
00842 while(y->index != -1) {
00843 z->index = y->index;
00844 z->value = y->value * interval;
00845 ++y;
00846 ++z;
00847 }
00848
00849 z->index = -1;
00850 z->value = 0;
00851
00852 return t;
00853 }
00854
00855
00874 void SVClustering::separateClusters() {
00875 static struct svm_node *z;
00876 bool flag;
00877 int i,j;
00878 double zdist;
00879 int N = this->trainer->getProblem()->l;
00880
00881 std::pair<UndirectedGraph::edge_descriptor, bool> temp;
00882
00883 UndirectedGraph aGraph(N);
00884
00885
00886
00887 for (i = 0; i < N; i++) {
00888 for (j = 0; j < N; j++) {
00889
00890
00891 if (j < i) {
00892 temp = edge(i, j, aGraph);
00893 if (temp.second == true) {
00894 for (int k = 0; k < N; k++) {
00895 if (edge(i, k, aGraph).second || edge(j, k, aGraph).second) {
00896 add_edge(i, k, aGraph);
00897 }
00898 else {
00899 remove_edge(i, k, aGraph);
00900 }
00901 }
00902 }
00903 }
00904 else {
00905
00906 if (i == j) {
00907 add_edge(i, j, aGraph);
00908 continue;
00909 }
00910
00911
00912 temp = edge(i, j, aGraph);
00913 if (temp.second == false) {
00914 flag = true;
00915
00916
00917
00918
00919 for (double k = 1; k <= 10; k = k + 1) {
00920 z = pointOnThePath(this->trainer->getProblem()->x[i], this->trainer->getProblem()->x[j], k / 10);
00921
00922 zdist = calculateDistanceFromCenter(z);
00923
00924 if (zdist > this->getSphereRadius()) {
00925 flag = false;
00926 break;
00927 }
00928 }
00929
00930 if (flag) {
00931 add_edge(i, j, aGraph);
00932 add_edge(j, i, aGraph);
00933 }
00934 else {
00935 remove_edge(i, j, aGraph);
00936 remove_edge(j, i, aGraph);
00937 }
00938 }
00939 }
00940 }
00941 }
00942
00943
00944
00945 labels.resize(N);
00946 connected_components(aGraph, &labels[0]);
00947
00948
00949
00950
00951 int nBSV = this->trainer->getModel()->lbounded;
00952 int *BSV_index = this->trainer->getModel()->BSV_index;
00953
00954 int minimizer;
00955 double min_BSV_distance, d;
00956
00957 for (int i = 0; i < nBSV; i++) {
00958 min_BSV_distance = MAXFLOAT;
00959 minimizer = 0;
00960 for (int j = 0; j < (int) labels.size(); j++) {
00961 if (j == BSV_index[i]) continue;
00962 if (beta(j, true) == 1) continue;
00963
00964 d = eucDist->calculateDistance(this->trainer->getProblem()->x[BSV_index[i]], this->trainer->getProblem()->x[j]);
00965 if (d < min_BSV_distance) {
00966 min_BSV_distance = d;
00967 minimizer = i;
00968 }
00969 }
00970 labels[BSV_index[i]] = labels[minimizer];
00971 }
00972
00973 }
00974
00975 }