1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/toolkit/components/places/ClusterLib.js Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,248 @@ 1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.7 + 1.8 +/** 1.9 + * Class that can run the hierarchical clustering algorithm with the given 1.10 + * parameters. 1.11 + * 1.12 + * @param distance 1.13 + * Function that should return the distance between two items. 1.14 + * Defaults to clusterlib.euclidean_distance. 1.15 + * @param merge 1.16 + * Function that should take in two items and return a merged one. 1.17 + * Defaults to clusterlib.average_linkage. 1.18 + * @param threshold 1.19 + * The maximum distance between two items for which their clusters 1.20 + * can be merged. 1.21 + */ 1.22 +function HierarchicalClustering(distance, merge, threshold) { 1.23 + this.distance = distance || clusterlib.euclidean_distance; 1.24 + this.merge = merge || clusterlib.average_linkage; 1.25 + this.threshold = threshold == undefined ? Infinity : threshold; 1.26 +} 1.27 + 1.28 +HierarchicalClustering.prototype = { 1.29 + /** 1.30 + * Run the hierarchical clustering algorithm on the given items to produce 1.31 + * a final set of clusters. Uses the parameters set in the constructor. 1.32 + * 1.33 + * @param items 1.34 + * An array of "things" to cluster - this is the domain-specific 1.35 + * collection you're trying to cluster (colors, points, etc.) 1.36 + * @param snapshotGap 1.37 + * How many iterations of the clustering algorithm to wait between 1.38 + * calling the snapshotCallback 1.39 + * @param snapshotCallback 1.40 + * If provided, will be called as clusters are merged to let you view 1.41 + * the progress of the algorithm. Passed the current array of 1.42 + * clusters, cached distances, and cached closest clusters. 1.43 + * 1.44 + * @return An array of merged clusters. The represented item can be 1.45 + * found in the "item" property of the cluster. 1.46 + */ 1.47 + cluster: function HC_cluster(items, snapshotGap, snapshotCallback) { 1.48 + // array of all remaining clusters 1.49 + let clusters = []; 1.50 + // 2D matrix of distances between each pair of clusters, indexed by key 1.51 + let distances = []; 1.52 + // closest cluster key for each cluster, indexed by key 1.53 + let neighbors = []; 1.54 + // an array of all clusters, but indexed by key 1.55 + let clustersByKey = []; 1.56 + 1.57 + // set up clusters from the initial items array 1.58 + for (let index = 0; index < items.length; index++) { 1.59 + let cluster = { 1.60 + // the item this cluster represents 1.61 + item: items[index], 1.62 + // a unique key for this cluster, stays constant unless merged itself 1.63 + key: index, 1.64 + // index of cluster in clusters array, can change during any merge 1.65 + index: index, 1.66 + // how many clusters have been merged into this one 1.67 + size: 1 1.68 + }; 1.69 + clusters[index] = cluster; 1.70 + clustersByKey[index] = cluster; 1.71 + distances[index] = []; 1.72 + neighbors[index] = 0; 1.73 + } 1.74 + 1.75 + // initialize distance matrix and cached neighbors 1.76 + for (let i = 0; i < clusters.length; i++) { 1.77 + for (let j = 0; j <= i; j++) { 1.78 + var dist = (i == j) ? Infinity : 1.79 + this.distance(clusters[i].item, clusters[j].item); 1.80 + distances[i][j] = dist; 1.81 + distances[j][i] = dist; 1.82 + 1.83 + if (dist < distances[i][neighbors[i]]) { 1.84 + neighbors[i] = j; 1.85 + } 1.86 + } 1.87 + } 1.88 + 1.89 + // merge the next two closest clusters until none of them are close enough 1.90 + let next = null, i = 0; 1.91 + for (; next = this.closestClusters(clusters, distances, neighbors); i++) { 1.92 + if (snapshotCallback && (i % snapshotGap) == 0) { 1.93 + snapshotCallback(clusters); 1.94 + } 1.95 + this.mergeClusters(clusters, distances, neighbors, clustersByKey, 1.96 + clustersByKey[next[0]], clustersByKey[next[1]]); 1.97 + } 1.98 + return clusters; 1.99 + }, 1.100 + 1.101 + /** 1.102 + * Once we decide to merge two clusters in the cluster method, actually 1.103 + * merge them. Alters the given state of the algorithm. 1.104 + * 1.105 + * @param clusters 1.106 + * The array of all remaining clusters 1.107 + * @param distances 1.108 + * Cached distances between pairs of clusters 1.109 + * @param neighbors 1.110 + * Cached closest clusters 1.111 + * @param clustersByKey 1.112 + * Array of all clusters, indexed by key 1.113 + * @param cluster1 1.114 + * First cluster to merge 1.115 + * @param cluster2 1.116 + * Second cluster to merge 1.117 + */ 1.118 + mergeClusters: function HC_mergeClus(clusters, distances, neighbors, 1.119 + clustersByKey, cluster1, cluster2) { 1.120 + let merged = { item: this.merge(cluster1.item, cluster2.item), 1.121 + left: cluster1, 1.122 + right: cluster2, 1.123 + key: cluster1.key, 1.124 + size: cluster1.size + cluster2.size }; 1.125 + 1.126 + clusters[cluster1.index] = merged; 1.127 + clusters.splice(cluster2.index, 1); 1.128 + clustersByKey[cluster1.key] = merged; 1.129 + 1.130 + // update distances with new merged cluster 1.131 + for (let i = 0; i < clusters.length; i++) { 1.132 + var ci = clusters[i]; 1.133 + var dist; 1.134 + if (cluster1.key == ci.key) { 1.135 + dist = Infinity; 1.136 + } else if (this.merge == clusterlib.single_linkage) { 1.137 + dist = distances[cluster1.key][ci.key]; 1.138 + if (distances[cluster1.key][ci.key] > 1.139 + distances[cluster2.key][ci.key]) { 1.140 + dist = distances[cluster2.key][ci.key]; 1.141 + } 1.142 + } else if (this.merge == clusterlib.complete_linkage) { 1.143 + dist = distances[cluster1.key][ci.key]; 1.144 + if (distances[cluster1.key][ci.key] < 1.145 + distances[cluster2.key][ci.key]) { 1.146 + dist = distances[cluster2.key][ci.key]; 1.147 + } 1.148 + } else if (this.merge == clusterlib.average_linkage) { 1.149 + dist = (distances[cluster1.key][ci.key] * cluster1.size 1.150 + + distances[cluster2.key][ci.key] * cluster2.size) 1.151 + / (cluster1.size + cluster2.size); 1.152 + } else { 1.153 + dist = this.distance(ci.item, cluster1.item); 1.154 + } 1.155 + 1.156 + distances[cluster1.key][ci.key] = distances[ci.key][cluster1.key] 1.157 + = dist; 1.158 + } 1.159 + 1.160 + // update cached neighbors 1.161 + for (let i = 0; i < clusters.length; i++) { 1.162 + var key1 = clusters[i].key; 1.163 + if (neighbors[key1] == cluster1.key || 1.164 + neighbors[key1] == cluster2.key) { 1.165 + let minKey = key1; 1.166 + for (let j = 0; j < clusters.length; j++) { 1.167 + var key2 = clusters[j].key; 1.168 + if (distances[key1][key2] < distances[key1][minKey]) { 1.169 + minKey = key2; 1.170 + } 1.171 + } 1.172 + neighbors[key1] = minKey; 1.173 + } 1.174 + clusters[i].index = i; 1.175 + } 1.176 + }, 1.177 + 1.178 + /** 1.179 + * Given the current state of the algorithm, return the keys of the two 1.180 + * clusters that are closest to each other so we know which ones to merge 1.181 + * next. 1.182 + * 1.183 + * @param clusters 1.184 + * The array of all remaining clusters 1.185 + * @param distances 1.186 + * Cached distances between pairs of clusters 1.187 + * @param neighbors 1.188 + * Cached closest clusters 1.189 + * 1.190 + * @return An array of two keys of clusters to merge, or null if there are 1.191 + * no more clusters close enough to merge 1.192 + */ 1.193 + closestClusters: function HC_closestClus(clusters, distances, neighbors) { 1.194 + let minKey = 0, minDist = Infinity; 1.195 + for (let i = 0; i < clusters.length; i++) { 1.196 + var key = clusters[i].key; 1.197 + if (distances[key][neighbors[key]] < minDist) { 1.198 + minKey = key; 1.199 + minDist = distances[key][neighbors[key]]; 1.200 + } 1.201 + } 1.202 + if (minDist < this.threshold) { 1.203 + return [minKey, neighbors[minKey]]; 1.204 + } 1.205 + return null; 1.206 + } 1.207 +}; 1.208 + 1.209 +let clusterlib = { 1.210 + hcluster: function hcluster(items, distance, merge, threshold, snapshotGap, 1.211 + snapshotCallback) { 1.212 + return (new HierarchicalClustering(distance, merge, threshold)) 1.213 + .cluster(items, snapshotGap, snapshotCallback); 1.214 + }, 1.215 + 1.216 + single_linkage: function single_linkage(cluster1, cluster2) { 1.217 + return cluster1; 1.218 + }, 1.219 + 1.220 + complete_linkage: function complete_linkage(cluster1, cluster2) { 1.221 + return cluster1; 1.222 + }, 1.223 + 1.224 + average_linkage: function average_linkage(cluster1, cluster2) { 1.225 + return cluster1; 1.226 + }, 1.227 + 1.228 + euclidean_distance: function euclidean_distance(v1, v2) { 1.229 + let total = 0; 1.230 + for (let i = 0; i < v1.length; i++) { 1.231 + total += Math.pow(v2[i] - v1[i], 2); 1.232 + } 1.233 + return Math.sqrt(total); 1.234 + }, 1.235 + 1.236 + manhattan_distance: function manhattan_distance(v1, v2) { 1.237 + let total = 0; 1.238 + for (let i = 0; i < v1.length; i++) { 1.239 + total += Math.abs(v2[i] - v1[i]); 1.240 + } 1.241 + return total; 1.242 + }, 1.243 + 1.244 + max_distance: function max_distance(v1, v2) { 1.245 + let max = 0; 1.246 + for (let i = 0; i < v1.length; i++) { 1.247 + max = Math.max(max, Math.abs(v2[i] - v1[i])); 1.248 + } 1.249 + return max; 1.250 + } 1.251 +};