toolkit/components/places/ClusterLib.js

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* This Source Code Form is subject to the terms of the Mozilla Public
     2  * License, v. 2.0. If a copy of the MPL was not distributed with this
     3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 /**
     6  * Class that can run the hierarchical clustering algorithm with the given
     7  * parameters.
     8  *
     9  * @param distance
    10  *        Function that should return the distance between two items.
    11  *        Defaults to clusterlib.euclidean_distance.
    12  * @param merge
    13  *        Function that should take in two items and return a merged one.
    14  *        Defaults to clusterlib.average_linkage.
    15  * @param threshold
    16  *        The maximum distance between two items for which their clusters
    17  *        can be merged.
    18  */
    19 function HierarchicalClustering(distance, merge, threshold) {
    20   this.distance = distance || clusterlib.euclidean_distance;
    21   this.merge = merge || clusterlib.average_linkage;
    22   this.threshold = threshold == undefined ? Infinity : threshold;
    23 }
    25 HierarchicalClustering.prototype = {
    26   /**
    27    * Run the hierarchical clustering algorithm on the given items to produce
    28    * a final set of clusters.  Uses the parameters set in the constructor.
    29    *
    30    * @param items
    31    *        An array of "things" to cluster - this is the domain-specific
    32    *        collection you're trying to cluster (colors, points, etc.)
    33    * @param snapshotGap
    34    *        How many iterations of the clustering algorithm to wait between
    35    *        calling the snapshotCallback
    36    * @param snapshotCallback
    37    *        If provided, will be called as clusters are merged to let you view
    38    *        the progress of the algorithm.  Passed the current array of
    39    *        clusters, cached distances, and cached closest clusters.
    40    *
    41    * @return An array of merged clusters.  The represented item can be
    42    *         found in the "item" property of the cluster.
    43    */
    44   cluster: function HC_cluster(items, snapshotGap, snapshotCallback) {
    45     // array of all remaining clusters
    46     let clusters = [];
    47     // 2D matrix of distances between each pair of clusters, indexed by key
    48     let distances = [];
    49     // closest cluster key for each cluster, indexed by key
    50     let neighbors = [];
    51     // an array of all clusters, but indexed by key
    52     let clustersByKey = [];
    54     // set up clusters from the initial items array
    55     for (let index = 0; index < items.length; index++) {
    56       let cluster = {
    57         // the item this cluster represents
    58         item: items[index],
    59         // a unique key for this cluster, stays constant unless merged itself
    60         key: index,
    61         // index of cluster in clusters array, can change during any merge
    62         index: index,
    63         // how many clusters have been merged into this one
    64         size: 1
    65       };
    66       clusters[index] = cluster;
    67       clustersByKey[index] = cluster;
    68       distances[index] = [];
    69       neighbors[index] = 0;
    70     }
    72     // initialize distance matrix and cached neighbors
    73     for (let i = 0; i < clusters.length; i++) {
    74       for (let j = 0; j <= i; j++) {
    75         var dist = (i == j) ? Infinity :
    76           this.distance(clusters[i].item, clusters[j].item);
    77         distances[i][j] = dist;
    78         distances[j][i] = dist;
    80         if (dist < distances[i][neighbors[i]]) {
    81           neighbors[i] = j;
    82         }
    83       }
    84     }
    86     // merge the next two closest clusters until none of them are close enough
    87     let next = null, i = 0;
    88     for (; next = this.closestClusters(clusters, distances, neighbors); i++) {
    89       if (snapshotCallback && (i % snapshotGap) == 0) {
    90         snapshotCallback(clusters);
    91       }
    92       this.mergeClusters(clusters, distances, neighbors, clustersByKey,
    93                          clustersByKey[next[0]], clustersByKey[next[1]]);
    94     }
    95     return clusters;
    96   },
    98   /**
    99    * Once we decide to merge two clusters in the cluster method, actually
   100    * merge them.  Alters the given state of the algorithm.
   101    *
   102    * @param clusters
   103    *        The array of all remaining clusters
   104    * @param distances
   105    *        Cached distances between pairs of clusters
   106    * @param neighbors
   107    *        Cached closest clusters
   108    * @param clustersByKey
   109    *        Array of all clusters, indexed by key
   110    * @param cluster1
   111    *        First cluster to merge
   112    * @param cluster2
   113    *        Second cluster to merge
   114    */
   115   mergeClusters: function HC_mergeClus(clusters, distances, neighbors,
   116                                        clustersByKey, cluster1, cluster2) {
   117     let merged = { item: this.merge(cluster1.item, cluster2.item),
   118                    left: cluster1,
   119                    right: cluster2,
   120                    key: cluster1.key,
   121                    size: cluster1.size + cluster2.size };
   123     clusters[cluster1.index] = merged;
   124     clusters.splice(cluster2.index, 1);
   125     clustersByKey[cluster1.key] = merged;
   127     // update distances with new merged cluster
   128     for (let i = 0; i < clusters.length; i++) {
   129       var ci = clusters[i];
   130       var dist;
   131       if (cluster1.key == ci.key) {
   132         dist = Infinity;
   133       } else if (this.merge == clusterlib.single_linkage) {
   134         dist = distances[cluster1.key][ci.key];
   135         if (distances[cluster1.key][ci.key] >
   136             distances[cluster2.key][ci.key]) {
   137           dist = distances[cluster2.key][ci.key];
   138         }
   139       } else if (this.merge == clusterlib.complete_linkage) {
   140         dist = distances[cluster1.key][ci.key];
   141         if (distances[cluster1.key][ci.key] <
   142             distances[cluster2.key][ci.key]) {
   143           dist = distances[cluster2.key][ci.key];
   144         }
   145       } else if (this.merge == clusterlib.average_linkage) {
   146         dist = (distances[cluster1.key][ci.key] * cluster1.size
   147               + distances[cluster2.key][ci.key] * cluster2.size)
   148               / (cluster1.size + cluster2.size);
   149       } else {
   150         dist = this.distance(ci.item, cluster1.item);
   151       }
   153       distances[cluster1.key][ci.key] = distances[ci.key][cluster1.key]
   154                                       = dist;
   155     }
   157     // update cached neighbors
   158     for (let i = 0; i < clusters.length; i++) {
   159       var key1 = clusters[i].key;
   160       if (neighbors[key1] == cluster1.key ||
   161           neighbors[key1] == cluster2.key) {
   162         let minKey = key1;
   163         for (let j = 0; j < clusters.length; j++) {
   164           var key2 = clusters[j].key;
   165           if (distances[key1][key2] < distances[key1][minKey]) {
   166             minKey = key2;
   167           }
   168         }
   169         neighbors[key1] = minKey;
   170       }
   171       clusters[i].index = i;
   172     }
   173   },
   175   /**
   176    * Given the current state of the algorithm, return the keys of the two
   177    * clusters that are closest to each other so we know which ones to merge
   178    * next.
   179    *
   180    * @param clusters
   181    *        The array of all remaining clusters
   182    * @param distances
   183    *        Cached distances between pairs of clusters
   184    * @param neighbors
   185    *        Cached closest clusters
   186    *
   187    * @return An array of two keys of clusters to merge, or null if there are
   188    *         no more clusters close enough to merge
   189    */
   190   closestClusters: function HC_closestClus(clusters, distances, neighbors) {
   191     let minKey = 0, minDist = Infinity;
   192     for (let i = 0; i < clusters.length; i++) {
   193       var key = clusters[i].key;
   194       if (distances[key][neighbors[key]] < minDist) {
   195         minKey = key;
   196         minDist = distances[key][neighbors[key]];
   197       }
   198     }
   199     if (minDist < this.threshold) {
   200       return [minKey, neighbors[minKey]];
   201     }
   202     return null;
   203   }
   204 };
   206 let clusterlib = {
   207   hcluster: function hcluster(items, distance, merge, threshold, snapshotGap,
   208                               snapshotCallback) {
   209     return (new HierarchicalClustering(distance, merge, threshold))
   210            .cluster(items, snapshotGap, snapshotCallback);
   211   },
   213   single_linkage: function single_linkage(cluster1, cluster2) {
   214     return cluster1;
   215   },
   217   complete_linkage: function complete_linkage(cluster1, cluster2) {
   218     return cluster1;
   219   },
   221   average_linkage: function average_linkage(cluster1, cluster2) {
   222     return cluster1;
   223   },
   225   euclidean_distance: function euclidean_distance(v1, v2) {
   226     let total = 0;
   227     for (let i = 0; i < v1.length; i++) {
   228       total += Math.pow(v2[i] - v1[i], 2);
   229     }
   230     return Math.sqrt(total);
   231   },
   233   manhattan_distance: function manhattan_distance(v1, v2) {
   234     let total = 0;
   235     for (let i = 0; i < v1.length; i++) {
   236       total += Math.abs(v2[i] - v1[i]);
   237     }
   238     return total;
   239   },
   241   max_distance: function max_distance(v1, v2) {
   242     let max = 0;
   243     for (let i = 0; i < v1.length; i++) {
   244       max = Math.max(max, Math.abs(v2[i] - v1[i]));
   245     }
   246     return max;
   247   }
   248 };

mercurial