Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright (C) 2003-2013, International Business Machines |
michael@0 | 5 | * Corporation and others. All Rights Reserved. |
michael@0 | 6 | * |
michael@0 | 7 | ******************************************************************************* |
michael@0 | 8 | * file name: uarrsort.c |
michael@0 | 9 | * encoding: US-ASCII |
michael@0 | 10 | * tab size: 8 (not used) |
michael@0 | 11 | * indentation:4 |
michael@0 | 12 | * |
michael@0 | 13 | * created on: 2003aug04 |
michael@0 | 14 | * created by: Markus W. Scherer |
michael@0 | 15 | * |
michael@0 | 16 | * Internal function for sorting arrays. |
michael@0 | 17 | */ |
michael@0 | 18 | |
michael@0 | 19 | #include "unicode/utypes.h" |
michael@0 | 20 | #include "cmemory.h" |
michael@0 | 21 | #include "uarrsort.h" |
michael@0 | 22 | |
michael@0 | 23 | enum { |
michael@0 | 24 | /** |
michael@0 | 25 | * "from Knuth" |
michael@0 | 26 | * |
michael@0 | 27 | * A binary search over 8 items performs 4 comparisons: |
michael@0 | 28 | * log2(8)=3 to subdivide, +1 to check for equality. |
michael@0 | 29 | * A linear search over 8 items on average also performs 4 comparisons. |
michael@0 | 30 | */ |
michael@0 | 31 | MIN_QSORT=9, |
michael@0 | 32 | STACK_ITEM_SIZE=200 |
michael@0 | 33 | }; |
michael@0 | 34 | |
michael@0 | 35 | /* UComparator convenience implementations ---------------------------------- */ |
michael@0 | 36 | |
michael@0 | 37 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 38 | uprv_uint16Comparator(const void *context, const void *left, const void *right) { |
michael@0 | 39 | return (int32_t)*(const uint16_t *)left - (int32_t)*(const uint16_t *)right; |
michael@0 | 40 | } |
michael@0 | 41 | |
michael@0 | 42 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 43 | uprv_int32Comparator(const void *context, const void *left, const void *right) { |
michael@0 | 44 | return *(const int32_t *)left - *(const int32_t *)right; |
michael@0 | 45 | } |
michael@0 | 46 | |
michael@0 | 47 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 48 | uprv_uint32Comparator(const void *context, const void *left, const void *right) { |
michael@0 | 49 | uint32_t l=*(const uint32_t *)left, r=*(const uint32_t *)right; |
michael@0 | 50 | |
michael@0 | 51 | /* compare directly because (l-r) would overflow the int32_t result */ |
michael@0 | 52 | if(l<r) { |
michael@0 | 53 | return -1; |
michael@0 | 54 | } else if(l==r) { |
michael@0 | 55 | return 0; |
michael@0 | 56 | } else /* l>r */ { |
michael@0 | 57 | return 1; |
michael@0 | 58 | } |
michael@0 | 59 | } |
michael@0 | 60 | |
michael@0 | 61 | /* Insertion sort using binary search --------------------------------------- */ |
michael@0 | 62 | |
michael@0 | 63 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 64 | uprv_stableBinarySearch(char *array, int32_t limit, void *item, int32_t itemSize, |
michael@0 | 65 | UComparator *cmp, const void *context) { |
michael@0 | 66 | int32_t start=0; |
michael@0 | 67 | UBool found=FALSE; |
michael@0 | 68 | |
michael@0 | 69 | /* Binary search until we get down to a tiny sub-array. */ |
michael@0 | 70 | while((limit-start)>=MIN_QSORT) { |
michael@0 | 71 | int32_t i=(start+limit)/2; |
michael@0 | 72 | int32_t diff=cmp(context, item, array+i*itemSize); |
michael@0 | 73 | if(diff==0) { |
michael@0 | 74 | /* |
michael@0 | 75 | * Found the item. We look for the *last* occurrence of such |
michael@0 | 76 | * an item, for stable sorting. |
michael@0 | 77 | * If we knew that there will be only few equal items, |
michael@0 | 78 | * we could break now and enter the linear search. |
michael@0 | 79 | * However, if there are many equal items, then it should be |
michael@0 | 80 | * faster to continue with the binary search. |
michael@0 | 81 | * It seems likely that we either have all unique items |
michael@0 | 82 | * (where found will never become TRUE in the insertion sort) |
michael@0 | 83 | * or potentially many duplicates. |
michael@0 | 84 | */ |
michael@0 | 85 | found=TRUE; |
michael@0 | 86 | start=i+1; |
michael@0 | 87 | } else if(diff<0) { |
michael@0 | 88 | limit=i; |
michael@0 | 89 | } else { |
michael@0 | 90 | start=i; |
michael@0 | 91 | } |
michael@0 | 92 | } |
michael@0 | 93 | |
michael@0 | 94 | /* Linear search over the remaining tiny sub-array. */ |
michael@0 | 95 | while(start<limit) { |
michael@0 | 96 | int32_t diff=cmp(context, item, array+start*itemSize); |
michael@0 | 97 | if(diff==0) { |
michael@0 | 98 | found=TRUE; |
michael@0 | 99 | } else if(diff<0) { |
michael@0 | 100 | break; |
michael@0 | 101 | } |
michael@0 | 102 | ++start; |
michael@0 | 103 | } |
michael@0 | 104 | return found ? (start-1) : ~start; |
michael@0 | 105 | } |
michael@0 | 106 | |
michael@0 | 107 | static void |
michael@0 | 108 | doInsertionSort(char *array, int32_t length, int32_t itemSize, |
michael@0 | 109 | UComparator *cmp, const void *context, void *pv) { |
michael@0 | 110 | int32_t j; |
michael@0 | 111 | |
michael@0 | 112 | for(j=1; j<length; ++j) { |
michael@0 | 113 | char *item=array+j*itemSize; |
michael@0 | 114 | int32_t insertionPoint=uprv_stableBinarySearch(array, j, item, itemSize, cmp, context); |
michael@0 | 115 | if(insertionPoint<0) { |
michael@0 | 116 | insertionPoint=~insertionPoint; |
michael@0 | 117 | } else { |
michael@0 | 118 | ++insertionPoint; /* one past the last equal item */ |
michael@0 | 119 | } |
michael@0 | 120 | if(insertionPoint<j) { |
michael@0 | 121 | char *dest=array+insertionPoint*itemSize; |
michael@0 | 122 | uprv_memcpy(pv, item, itemSize); /* v=array[j] */ |
michael@0 | 123 | uprv_memmove(dest+itemSize, dest, (j-insertionPoint)*itemSize); |
michael@0 | 124 | uprv_memcpy(dest, pv, itemSize); /* array[insertionPoint]=v */ |
michael@0 | 125 | } |
michael@0 | 126 | } |
michael@0 | 127 | } |
michael@0 | 128 | |
michael@0 | 129 | static void |
michael@0 | 130 | insertionSort(char *array, int32_t length, int32_t itemSize, |
michael@0 | 131 | UComparator *cmp, const void *context, UErrorCode *pErrorCode) { |
michael@0 | 132 | UAlignedMemory v[STACK_ITEM_SIZE/sizeof(UAlignedMemory)+1]; |
michael@0 | 133 | void *pv; |
michael@0 | 134 | |
michael@0 | 135 | /* allocate an intermediate item variable (v) */ |
michael@0 | 136 | if(itemSize<=STACK_ITEM_SIZE) { |
michael@0 | 137 | pv=v; |
michael@0 | 138 | } else { |
michael@0 | 139 | pv=uprv_malloc(itemSize); |
michael@0 | 140 | if(pv==NULL) { |
michael@0 | 141 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 142 | return; |
michael@0 | 143 | } |
michael@0 | 144 | } |
michael@0 | 145 | |
michael@0 | 146 | doInsertionSort(array, length, itemSize, cmp, context, pv); |
michael@0 | 147 | |
michael@0 | 148 | if(pv!=v) { |
michael@0 | 149 | uprv_free(pv); |
michael@0 | 150 | } |
michael@0 | 151 | } |
michael@0 | 152 | |
michael@0 | 153 | /* QuickSort ---------------------------------------------------------------- */ |
michael@0 | 154 | |
michael@0 | 155 | /* |
michael@0 | 156 | * This implementation is semi-recursive: |
michael@0 | 157 | * It recurses for the smaller sub-array to shorten the recursion depth, |
michael@0 | 158 | * and loops for the larger sub-array. |
michael@0 | 159 | * |
michael@0 | 160 | * Loosely after QuickSort algorithms in |
michael@0 | 161 | * Niklaus Wirth |
michael@0 | 162 | * Algorithmen und Datenstrukturen mit Modula-2 |
michael@0 | 163 | * B.G. Teubner Stuttgart |
michael@0 | 164 | * 4. Auflage 1986 |
michael@0 | 165 | * ISBN 3-519-02260-5 |
michael@0 | 166 | */ |
michael@0 | 167 | static void |
michael@0 | 168 | subQuickSort(char *array, int32_t start, int32_t limit, int32_t itemSize, |
michael@0 | 169 | UComparator *cmp, const void *context, |
michael@0 | 170 | void *px, void *pw) { |
michael@0 | 171 | int32_t left, right; |
michael@0 | 172 | |
michael@0 | 173 | /* start and left are inclusive, limit and right are exclusive */ |
michael@0 | 174 | do { |
michael@0 | 175 | if((start+MIN_QSORT)>=limit) { |
michael@0 | 176 | doInsertionSort(array+start*itemSize, limit-start, itemSize, cmp, context, px); |
michael@0 | 177 | break; |
michael@0 | 178 | } |
michael@0 | 179 | |
michael@0 | 180 | left=start; |
michael@0 | 181 | right=limit; |
michael@0 | 182 | |
michael@0 | 183 | /* x=array[middle] */ |
michael@0 | 184 | uprv_memcpy(px, array+((start+limit)/2)*itemSize, itemSize); |
michael@0 | 185 | |
michael@0 | 186 | do { |
michael@0 | 187 | while(/* array[left]<x */ |
michael@0 | 188 | cmp(context, array+left*itemSize, px)<0 |
michael@0 | 189 | ) { |
michael@0 | 190 | ++left; |
michael@0 | 191 | } |
michael@0 | 192 | while(/* x<array[right-1] */ |
michael@0 | 193 | cmp(context, px, array+(right-1)*itemSize)<0 |
michael@0 | 194 | ) { |
michael@0 | 195 | --right; |
michael@0 | 196 | } |
michael@0 | 197 | |
michael@0 | 198 | /* swap array[left] and array[right-1] via w; ++left; --right */ |
michael@0 | 199 | if(left<right) { |
michael@0 | 200 | --right; |
michael@0 | 201 | |
michael@0 | 202 | if(left<right) { |
michael@0 | 203 | uprv_memcpy(pw, array+left*itemSize, itemSize); |
michael@0 | 204 | uprv_memcpy(array+left*itemSize, array+right*itemSize, itemSize); |
michael@0 | 205 | uprv_memcpy(array+right*itemSize, pw, itemSize); |
michael@0 | 206 | } |
michael@0 | 207 | |
michael@0 | 208 | ++left; |
michael@0 | 209 | } |
michael@0 | 210 | } while(left<right); |
michael@0 | 211 | |
michael@0 | 212 | /* sort sub-arrays */ |
michael@0 | 213 | if((right-start)<(limit-left)) { |
michael@0 | 214 | /* sort [start..right[ */ |
michael@0 | 215 | if(start<(right-1)) { |
michael@0 | 216 | subQuickSort(array, start, right, itemSize, cmp, context, px, pw); |
michael@0 | 217 | } |
michael@0 | 218 | |
michael@0 | 219 | /* sort [left..limit[ */ |
michael@0 | 220 | start=left; |
michael@0 | 221 | } else { |
michael@0 | 222 | /* sort [left..limit[ */ |
michael@0 | 223 | if(left<(limit-1)) { |
michael@0 | 224 | subQuickSort(array, left, limit, itemSize, cmp, context, px, pw); |
michael@0 | 225 | } |
michael@0 | 226 | |
michael@0 | 227 | /* sort [start..right[ */ |
michael@0 | 228 | limit=right; |
michael@0 | 229 | } |
michael@0 | 230 | } while(start<(limit-1)); |
michael@0 | 231 | } |
michael@0 | 232 | |
michael@0 | 233 | static void |
michael@0 | 234 | quickSort(char *array, int32_t length, int32_t itemSize, |
michael@0 | 235 | UComparator *cmp, const void *context, UErrorCode *pErrorCode) { |
michael@0 | 236 | UAlignedMemory xw[(2*STACK_ITEM_SIZE)/sizeof(UAlignedMemory)+1]; |
michael@0 | 237 | void *p; |
michael@0 | 238 | |
michael@0 | 239 | /* allocate two intermediate item variables (x and w) */ |
michael@0 | 240 | if(itemSize<=STACK_ITEM_SIZE) { |
michael@0 | 241 | p=xw; |
michael@0 | 242 | } else { |
michael@0 | 243 | p=uprv_malloc(2*itemSize); |
michael@0 | 244 | if(p==NULL) { |
michael@0 | 245 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
michael@0 | 246 | return; |
michael@0 | 247 | } |
michael@0 | 248 | } |
michael@0 | 249 | |
michael@0 | 250 | subQuickSort(array, 0, length, itemSize, |
michael@0 | 251 | cmp, context, p, (char *)p+itemSize); |
michael@0 | 252 | |
michael@0 | 253 | if(p!=xw) { |
michael@0 | 254 | uprv_free(p); |
michael@0 | 255 | } |
michael@0 | 256 | } |
michael@0 | 257 | |
michael@0 | 258 | /* uprv_sortArray() API ----------------------------------------------------- */ |
michael@0 | 259 | |
michael@0 | 260 | /* |
michael@0 | 261 | * Check arguments, select an appropriate implementation, |
michael@0 | 262 | * cast the array to char * so that array+i*itemSize works. |
michael@0 | 263 | */ |
michael@0 | 264 | U_CAPI void U_EXPORT2 |
michael@0 | 265 | uprv_sortArray(void *array, int32_t length, int32_t itemSize, |
michael@0 | 266 | UComparator *cmp, const void *context, |
michael@0 | 267 | UBool sortStable, UErrorCode *pErrorCode) { |
michael@0 | 268 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
michael@0 | 269 | return; |
michael@0 | 270 | } |
michael@0 | 271 | if((length>0 && array==NULL) || length<0 || itemSize<=0 || cmp==NULL) { |
michael@0 | 272 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
michael@0 | 273 | return; |
michael@0 | 274 | } |
michael@0 | 275 | |
michael@0 | 276 | if(length<=1) { |
michael@0 | 277 | return; |
michael@0 | 278 | } else if(length<MIN_QSORT || sortStable) { |
michael@0 | 279 | insertionSort((char *)array, length, itemSize, cmp, context, pErrorCode); |
michael@0 | 280 | } else { |
michael@0 | 281 | quickSort((char *)array, length, itemSize, cmp, context, pErrorCode); |
michael@0 | 282 | } |
michael@0 | 283 | } |