toolkit/components/places/ClusterLib.js

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

michael@0 1 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 4
michael@0 5 /**
michael@0 6 * Class that can run the hierarchical clustering algorithm with the given
michael@0 7 * parameters.
michael@0 8 *
michael@0 9 * @param distance
michael@0 10 * Function that should return the distance between two items.
michael@0 11 * Defaults to clusterlib.euclidean_distance.
michael@0 12 * @param merge
michael@0 13 * Function that should take in two items and return a merged one.
michael@0 14 * Defaults to clusterlib.average_linkage.
michael@0 15 * @param threshold
michael@0 16 * The maximum distance between two items for which their clusters
michael@0 17 * can be merged.
michael@0 18 */
michael@0 19 function HierarchicalClustering(distance, merge, threshold) {
michael@0 20 this.distance = distance || clusterlib.euclidean_distance;
michael@0 21 this.merge = merge || clusterlib.average_linkage;
michael@0 22 this.threshold = threshold == undefined ? Infinity : threshold;
michael@0 23 }
michael@0 24
michael@0 25 HierarchicalClustering.prototype = {
michael@0 26 /**
michael@0 27 * Run the hierarchical clustering algorithm on the given items to produce
michael@0 28 * a final set of clusters. Uses the parameters set in the constructor.
michael@0 29 *
michael@0 30 * @param items
michael@0 31 * An array of "things" to cluster - this is the domain-specific
michael@0 32 * collection you're trying to cluster (colors, points, etc.)
michael@0 33 * @param snapshotGap
michael@0 34 * How many iterations of the clustering algorithm to wait between
michael@0 35 * calling the snapshotCallback
michael@0 36 * @param snapshotCallback
michael@0 37 * If provided, will be called as clusters are merged to let you view
michael@0 38 * the progress of the algorithm. Passed the current array of
michael@0 39 * clusters, cached distances, and cached closest clusters.
michael@0 40 *
michael@0 41 * @return An array of merged clusters. The represented item can be
michael@0 42 * found in the "item" property of the cluster.
michael@0 43 */
michael@0 44 cluster: function HC_cluster(items, snapshotGap, snapshotCallback) {
michael@0 45 // array of all remaining clusters
michael@0 46 let clusters = [];
michael@0 47 // 2D matrix of distances between each pair of clusters, indexed by key
michael@0 48 let distances = [];
michael@0 49 // closest cluster key for each cluster, indexed by key
michael@0 50 let neighbors = [];
michael@0 51 // an array of all clusters, but indexed by key
michael@0 52 let clustersByKey = [];
michael@0 53
michael@0 54 // set up clusters from the initial items array
michael@0 55 for (let index = 0; index < items.length; index++) {
michael@0 56 let cluster = {
michael@0 57 // the item this cluster represents
michael@0 58 item: items[index],
michael@0 59 // a unique key for this cluster, stays constant unless merged itself
michael@0 60 key: index,
michael@0 61 // index of cluster in clusters array, can change during any merge
michael@0 62 index: index,
michael@0 63 // how many clusters have been merged into this one
michael@0 64 size: 1
michael@0 65 };
michael@0 66 clusters[index] = cluster;
michael@0 67 clustersByKey[index] = cluster;
michael@0 68 distances[index] = [];
michael@0 69 neighbors[index] = 0;
michael@0 70 }
michael@0 71
michael@0 72 // initialize distance matrix and cached neighbors
michael@0 73 for (let i = 0; i < clusters.length; i++) {
michael@0 74 for (let j = 0; j <= i; j++) {
michael@0 75 var dist = (i == j) ? Infinity :
michael@0 76 this.distance(clusters[i].item, clusters[j].item);
michael@0 77 distances[i][j] = dist;
michael@0 78 distances[j][i] = dist;
michael@0 79
michael@0 80 if (dist < distances[i][neighbors[i]]) {
michael@0 81 neighbors[i] = j;
michael@0 82 }
michael@0 83 }
michael@0 84 }
michael@0 85
michael@0 86 // merge the next two closest clusters until none of them are close enough
michael@0 87 let next = null, i = 0;
michael@0 88 for (; next = this.closestClusters(clusters, distances, neighbors); i++) {
michael@0 89 if (snapshotCallback && (i % snapshotGap) == 0) {
michael@0 90 snapshotCallback(clusters);
michael@0 91 }
michael@0 92 this.mergeClusters(clusters, distances, neighbors, clustersByKey,
michael@0 93 clustersByKey[next[0]], clustersByKey[next[1]]);
michael@0 94 }
michael@0 95 return clusters;
michael@0 96 },
michael@0 97
michael@0 98 /**
michael@0 99 * Once we decide to merge two clusters in the cluster method, actually
michael@0 100 * merge them. Alters the given state of the algorithm.
michael@0 101 *
michael@0 102 * @param clusters
michael@0 103 * The array of all remaining clusters
michael@0 104 * @param distances
michael@0 105 * Cached distances between pairs of clusters
michael@0 106 * @param neighbors
michael@0 107 * Cached closest clusters
michael@0 108 * @param clustersByKey
michael@0 109 * Array of all clusters, indexed by key
michael@0 110 * @param cluster1
michael@0 111 * First cluster to merge
michael@0 112 * @param cluster2
michael@0 113 * Second cluster to merge
michael@0 114 */
michael@0 115 mergeClusters: function HC_mergeClus(clusters, distances, neighbors,
michael@0 116 clustersByKey, cluster1, cluster2) {
michael@0 117 let merged = { item: this.merge(cluster1.item, cluster2.item),
michael@0 118 left: cluster1,
michael@0 119 right: cluster2,
michael@0 120 key: cluster1.key,
michael@0 121 size: cluster1.size + cluster2.size };
michael@0 122
michael@0 123 clusters[cluster1.index] = merged;
michael@0 124 clusters.splice(cluster2.index, 1);
michael@0 125 clustersByKey[cluster1.key] = merged;
michael@0 126
michael@0 127 // update distances with new merged cluster
michael@0 128 for (let i = 0; i < clusters.length; i++) {
michael@0 129 var ci = clusters[i];
michael@0 130 var dist;
michael@0 131 if (cluster1.key == ci.key) {
michael@0 132 dist = Infinity;
michael@0 133 } else if (this.merge == clusterlib.single_linkage) {
michael@0 134 dist = distances[cluster1.key][ci.key];
michael@0 135 if (distances[cluster1.key][ci.key] >
michael@0 136 distances[cluster2.key][ci.key]) {
michael@0 137 dist = distances[cluster2.key][ci.key];
michael@0 138 }
michael@0 139 } else if (this.merge == clusterlib.complete_linkage) {
michael@0 140 dist = distances[cluster1.key][ci.key];
michael@0 141 if (distances[cluster1.key][ci.key] <
michael@0 142 distances[cluster2.key][ci.key]) {
michael@0 143 dist = distances[cluster2.key][ci.key];
michael@0 144 }
michael@0 145 } else if (this.merge == clusterlib.average_linkage) {
michael@0 146 dist = (distances[cluster1.key][ci.key] * cluster1.size
michael@0 147 + distances[cluster2.key][ci.key] * cluster2.size)
michael@0 148 / (cluster1.size + cluster2.size);
michael@0 149 } else {
michael@0 150 dist = this.distance(ci.item, cluster1.item);
michael@0 151 }
michael@0 152
michael@0 153 distances[cluster1.key][ci.key] = distances[ci.key][cluster1.key]
michael@0 154 = dist;
michael@0 155 }
michael@0 156
michael@0 157 // update cached neighbors
michael@0 158 for (let i = 0; i < clusters.length; i++) {
michael@0 159 var key1 = clusters[i].key;
michael@0 160 if (neighbors[key1] == cluster1.key ||
michael@0 161 neighbors[key1] == cluster2.key) {
michael@0 162 let minKey = key1;
michael@0 163 for (let j = 0; j < clusters.length; j++) {
michael@0 164 var key2 = clusters[j].key;
michael@0 165 if (distances[key1][key2] < distances[key1][minKey]) {
michael@0 166 minKey = key2;
michael@0 167 }
michael@0 168 }
michael@0 169 neighbors[key1] = minKey;
michael@0 170 }
michael@0 171 clusters[i].index = i;
michael@0 172 }
michael@0 173 },
michael@0 174
michael@0 175 /**
michael@0 176 * Given the current state of the algorithm, return the keys of the two
michael@0 177 * clusters that are closest to each other so we know which ones to merge
michael@0 178 * next.
michael@0 179 *
michael@0 180 * @param clusters
michael@0 181 * The array of all remaining clusters
michael@0 182 * @param distances
michael@0 183 * Cached distances between pairs of clusters
michael@0 184 * @param neighbors
michael@0 185 * Cached closest clusters
michael@0 186 *
michael@0 187 * @return An array of two keys of clusters to merge, or null if there are
michael@0 188 * no more clusters close enough to merge
michael@0 189 */
michael@0 190 closestClusters: function HC_closestClus(clusters, distances, neighbors) {
michael@0 191 let minKey = 0, minDist = Infinity;
michael@0 192 for (let i = 0; i < clusters.length; i++) {
michael@0 193 var key = clusters[i].key;
michael@0 194 if (distances[key][neighbors[key]] < minDist) {
michael@0 195 minKey = key;
michael@0 196 minDist = distances[key][neighbors[key]];
michael@0 197 }
michael@0 198 }
michael@0 199 if (minDist < this.threshold) {
michael@0 200 return [minKey, neighbors[minKey]];
michael@0 201 }
michael@0 202 return null;
michael@0 203 }
michael@0 204 };
michael@0 205
michael@0 206 let clusterlib = {
michael@0 207 hcluster: function hcluster(items, distance, merge, threshold, snapshotGap,
michael@0 208 snapshotCallback) {
michael@0 209 return (new HierarchicalClustering(distance, merge, threshold))
michael@0 210 .cluster(items, snapshotGap, snapshotCallback);
michael@0 211 },
michael@0 212
michael@0 213 single_linkage: function single_linkage(cluster1, cluster2) {
michael@0 214 return cluster1;
michael@0 215 },
michael@0 216
michael@0 217 complete_linkage: function complete_linkage(cluster1, cluster2) {
michael@0 218 return cluster1;
michael@0 219 },
michael@0 220
michael@0 221 average_linkage: function average_linkage(cluster1, cluster2) {
michael@0 222 return cluster1;
michael@0 223 },
michael@0 224
michael@0 225 euclidean_distance: function euclidean_distance(v1, v2) {
michael@0 226 let total = 0;
michael@0 227 for (let i = 0; i < v1.length; i++) {
michael@0 228 total += Math.pow(v2[i] - v1[i], 2);
michael@0 229 }
michael@0 230 return Math.sqrt(total);
michael@0 231 },
michael@0 232
michael@0 233 manhattan_distance: function manhattan_distance(v1, v2) {
michael@0 234 let total = 0;
michael@0 235 for (let i = 0; i < v1.length; i++) {
michael@0 236 total += Math.abs(v2[i] - v1[i]);
michael@0 237 }
michael@0 238 return total;
michael@0 239 },
michael@0 240
michael@0 241 max_distance: function max_distance(v1, v2) {
michael@0 242 let max = 0;
michael@0 243 for (let i = 0; i < v1.length; i++) {
michael@0 244 max = Math.max(max, Math.abs(v2[i] - v1[i]));
michael@0 245 }
michael@0 246 return max;
michael@0 247 }
michael@0 248 };

mercurial