toolkit/components/places/ClusterLib.js

branch
TOR_BUG_3246
changeset 7
129ffea94266
equal deleted inserted replaced
-1:000000000000 0:c10e5ee33c58
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 /**
6 * Class that can run the hierarchical clustering algorithm with the given
7 * parameters.
8 *
9 * @param distance
10 * Function that should return the distance between two items.
11 * Defaults to clusterlib.euclidean_distance.
12 * @param merge
13 * Function that should take in two items and return a merged one.
14 * Defaults to clusterlib.average_linkage.
15 * @param threshold
16 * The maximum distance between two items for which their clusters
17 * can be merged.
18 */
19 function HierarchicalClustering(distance, merge, threshold) {
20 this.distance = distance || clusterlib.euclidean_distance;
21 this.merge = merge || clusterlib.average_linkage;
22 this.threshold = threshold == undefined ? Infinity : threshold;
23 }
24
25 HierarchicalClustering.prototype = {
26 /**
27 * Run the hierarchical clustering algorithm on the given items to produce
28 * a final set of clusters. Uses the parameters set in the constructor.
29 *
30 * @param items
31 * An array of "things" to cluster - this is the domain-specific
32 * collection you're trying to cluster (colors, points, etc.)
33 * @param snapshotGap
34 * How many iterations of the clustering algorithm to wait between
35 * calling the snapshotCallback
36 * @param snapshotCallback
37 * If provided, will be called as clusters are merged to let you view
38 * the progress of the algorithm. Passed the current array of
39 * clusters, cached distances, and cached closest clusters.
40 *
41 * @return An array of merged clusters. The represented item can be
42 * found in the "item" property of the cluster.
43 */
44 cluster: function HC_cluster(items, snapshotGap, snapshotCallback) {
45 // array of all remaining clusters
46 let clusters = [];
47 // 2D matrix of distances between each pair of clusters, indexed by key
48 let distances = [];
49 // closest cluster key for each cluster, indexed by key
50 let neighbors = [];
51 // an array of all clusters, but indexed by key
52 let clustersByKey = [];
53
54 // set up clusters from the initial items array
55 for (let index = 0; index < items.length; index++) {
56 let cluster = {
57 // the item this cluster represents
58 item: items[index],
59 // a unique key for this cluster, stays constant unless merged itself
60 key: index,
61 // index of cluster in clusters array, can change during any merge
62 index: index,
63 // how many clusters have been merged into this one
64 size: 1
65 };
66 clusters[index] = cluster;
67 clustersByKey[index] = cluster;
68 distances[index] = [];
69 neighbors[index] = 0;
70 }
71
72 // initialize distance matrix and cached neighbors
73 for (let i = 0; i < clusters.length; i++) {
74 for (let j = 0; j <= i; j++) {
75 var dist = (i == j) ? Infinity :
76 this.distance(clusters[i].item, clusters[j].item);
77 distances[i][j] = dist;
78 distances[j][i] = dist;
79
80 if (dist < distances[i][neighbors[i]]) {
81 neighbors[i] = j;
82 }
83 }
84 }
85
86 // merge the next two closest clusters until none of them are close enough
87 let next = null, i = 0;
88 for (; next = this.closestClusters(clusters, distances, neighbors); i++) {
89 if (snapshotCallback && (i % snapshotGap) == 0) {
90 snapshotCallback(clusters);
91 }
92 this.mergeClusters(clusters, distances, neighbors, clustersByKey,
93 clustersByKey[next[0]], clustersByKey[next[1]]);
94 }
95 return clusters;
96 },
97
98 /**
99 * Once we decide to merge two clusters in the cluster method, actually
100 * merge them. Alters the given state of the algorithm.
101 *
102 * @param clusters
103 * The array of all remaining clusters
104 * @param distances
105 * Cached distances between pairs of clusters
106 * @param neighbors
107 * Cached closest clusters
108 * @param clustersByKey
109 * Array of all clusters, indexed by key
110 * @param cluster1
111 * First cluster to merge
112 * @param cluster2
113 * Second cluster to merge
114 */
115 mergeClusters: function HC_mergeClus(clusters, distances, neighbors,
116 clustersByKey, cluster1, cluster2) {
117 let merged = { item: this.merge(cluster1.item, cluster2.item),
118 left: cluster1,
119 right: cluster2,
120 key: cluster1.key,
121 size: cluster1.size + cluster2.size };
122
123 clusters[cluster1.index] = merged;
124 clusters.splice(cluster2.index, 1);
125 clustersByKey[cluster1.key] = merged;
126
127 // update distances with new merged cluster
128 for (let i = 0; i < clusters.length; i++) {
129 var ci = clusters[i];
130 var dist;
131 if (cluster1.key == ci.key) {
132 dist = Infinity;
133 } else if (this.merge == clusterlib.single_linkage) {
134 dist = distances[cluster1.key][ci.key];
135 if (distances[cluster1.key][ci.key] >
136 distances[cluster2.key][ci.key]) {
137 dist = distances[cluster2.key][ci.key];
138 }
139 } else if (this.merge == clusterlib.complete_linkage) {
140 dist = distances[cluster1.key][ci.key];
141 if (distances[cluster1.key][ci.key] <
142 distances[cluster2.key][ci.key]) {
143 dist = distances[cluster2.key][ci.key];
144 }
145 } else if (this.merge == clusterlib.average_linkage) {
146 dist = (distances[cluster1.key][ci.key] * cluster1.size
147 + distances[cluster2.key][ci.key] * cluster2.size)
148 / (cluster1.size + cluster2.size);
149 } else {
150 dist = this.distance(ci.item, cluster1.item);
151 }
152
153 distances[cluster1.key][ci.key] = distances[ci.key][cluster1.key]
154 = dist;
155 }
156
157 // update cached neighbors
158 for (let i = 0; i < clusters.length; i++) {
159 var key1 = clusters[i].key;
160 if (neighbors[key1] == cluster1.key ||
161 neighbors[key1] == cluster2.key) {
162 let minKey = key1;
163 for (let j = 0; j < clusters.length; j++) {
164 var key2 = clusters[j].key;
165 if (distances[key1][key2] < distances[key1][minKey]) {
166 minKey = key2;
167 }
168 }
169 neighbors[key1] = minKey;
170 }
171 clusters[i].index = i;
172 }
173 },
174
175 /**
176 * Given the current state of the algorithm, return the keys of the two
177 * clusters that are closest to each other so we know which ones to merge
178 * next.
179 *
180 * @param clusters
181 * The array of all remaining clusters
182 * @param distances
183 * Cached distances between pairs of clusters
184 * @param neighbors
185 * Cached closest clusters
186 *
187 * @return An array of two keys of clusters to merge, or null if there are
188 * no more clusters close enough to merge
189 */
190 closestClusters: function HC_closestClus(clusters, distances, neighbors) {
191 let minKey = 0, minDist = Infinity;
192 for (let i = 0; i < clusters.length; i++) {
193 var key = clusters[i].key;
194 if (distances[key][neighbors[key]] < minDist) {
195 minKey = key;
196 minDist = distances[key][neighbors[key]];
197 }
198 }
199 if (minDist < this.threshold) {
200 return [minKey, neighbors[minKey]];
201 }
202 return null;
203 }
204 };
205
206 let clusterlib = {
207 hcluster: function hcluster(items, distance, merge, threshold, snapshotGap,
208 snapshotCallback) {
209 return (new HierarchicalClustering(distance, merge, threshold))
210 .cluster(items, snapshotGap, snapshotCallback);
211 },
212
213 single_linkage: function single_linkage(cluster1, cluster2) {
214 return cluster1;
215 },
216
217 complete_linkage: function complete_linkage(cluster1, cluster2) {
218 return cluster1;
219 },
220
221 average_linkage: function average_linkage(cluster1, cluster2) {
222 return cluster1;
223 },
224
225 euclidean_distance: function euclidean_distance(v1, v2) {
226 let total = 0;
227 for (let i = 0; i < v1.length; i++) {
228 total += Math.pow(v2[i] - v1[i], 2);
229 }
230 return Math.sqrt(total);
231 },
232
233 manhattan_distance: function manhattan_distance(v1, v2) {
234 let total = 0;
235 for (let i = 0; i < v1.length; i++) {
236 total += Math.abs(v2[i] - v1[i]);
237 }
238 return total;
239 },
240
241 max_distance: function max_distance(v1, v2) {
242 let max = 0;
243 for (let i = 0; i < v1.length; i++) {
244 max = Math.max(max, Math.abs(v2[i] - v1[i]));
245 }
246 return max;
247 }
248 };

mercurial