1 /* 2 * AlphabeticIndex.js - Represent an alphabetic index 3 * 4 * Copyright © 2017-2018, JEDLSoft 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 20 // !data nfc nfkd 21 22 var ilib = require("./ilib.js"); 23 var Utils = require("./Utils.js"); 24 var Locale = require("./Locale.js"); 25 var CType = require("./CType.js"); 26 var IString = require("./IString.js"); 27 var isIdeo = require("./isIdeo.js"); 28 var isAscii = require("./isAscii.js"); 29 var isDigit = require("./isDigit.js"); 30 var Collator = require("./Collator.js"); 31 var NormString = require("./NormString.js"); 32 33 34 /** 35 * @class Create a new alphabetic index instance. 36 * 37 * This class handles alphabetic indexes which are collated sequences of 38 * buckets into which elements are placed, sorted appropriate to the given 39 * language. An example would be an index of person names in a contact 40 * list, organized by the first letter of the family name.<p> 41 * 42 * Example in English:<p> 43 * 44 * Buckets: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z #<p> 45 * 46 * <pre> 47 * A 48 * Adams 49 * Albers 50 * Alvarez 51 * B 52 * Baker 53 * Banerjee 54 * Brunshteyn 55 * ... 56 * </pre> 57 * 58 * This class can give you the sorted list of labels to show in the UI. It can 59 * also organize a list of string elements into buckets for each 60 * label so that you can display the correctly sorted elements. This depends 61 * on the {@link Collator} class to perform the sorting/collation.<p> 62 * 63 * The class also supports having buckets for strings before the first (underflow) 64 * and after the last (overflow) bucket. <p> 65 * 66 * If you have a lot of characters that are not commonly used in the current 67 * locale, you can add more labels for those characters as well. Elements will 68 * match those buckets only if they have the same first character as the 69 * bucket label.<p> 70 * 71 * The options object may contain any (or none) of the following properties: 72 * 73 * <ul> 74 * <li><i>locale</i> - locale or localeSpec to use to parse the address. If not 75 * specified, this function will use the current ilib locale 76 * 77 * <i><i>style</i> - the style of collation to use for this index. 78 * For some locales, there are different styles of collating strings depending 79 * on what kind of strings are being collated or what the preference of the user 80 * is. For example, in German, there is a phonebook order and a dictionary ordering 81 * that sort the same array of strings slightly differently. The static method 82 * {@link Collator#getAvailableStyles} will return a list of collation styles that ilib 83 * currently knows about for any given locale. If the value of the style option is 84 * not recognized for a locale, it will be ignored. Default style is "standard". 85 * 86 * <li><i>overflowLabel</i> - the label to use for the overflow bucket. 87 * Default: "#" 88 * 89 * <li><i>underflowLabel</i> - the label to use for the underflow bucket. 90 * Default: "*" 91 * 92 * <li><i>onLoad</i> - a callback function to call when the address info for the 93 * locale is fully loaded and the index is ready to be used. When the onLoad 94 * option is given, the alphabetic index object 95 * will attempt to load any missing locale data using the ilib loader callback. 96 * When the constructor is done (even if the data is already preassembled), the 97 * onLoad function is called with the current instance as a parameter, so this 98 * callback can be used with preassembled or dynamic loading or a mix of the two. 99 * 100 * <li><i>sync</i> - tell whether to load any missing locale data synchronously or 101 * asynchronously. If this option is given as "false", then the "onLoad" 102 * callback must be given, as the instance returned from this constructor will 103 * not be usable for a while. 104 * 105 * <li><i>loadParams</i> - an object containing parameters to pass to the 106 * loader callback function when locale data is missing. The parameters are not 107 * interpretted or modified in any way. They are simply passed along. The object 108 * may contain any property/value pairs as long as the calling code is in 109 * agreement with the loader callback function as to what those parameters mean. 110 * </ul> 111 * 112 * @constructor 113 * @param {Object} options options to the parser 114 */ 115 var AlphabeticIndex = function (options) { 116 this.sync = true; 117 this.loadParams = {}; 118 this.caseSensitive = false; 119 this.accentSensitive = false; 120 this.overflowLabel = "#"; 121 this.underflowLabel = "*"; 122 this.style = "standard"; 123 this.index = []; 124 125 if (options) { 126 if (options.locale) { 127 this.locale = (typeof(options.locale) === 'string') ? new Locale(options.locale) : options.locale; 128 } 129 130 if (typeof(options.style) !== 'undefined') { 131 this.style = options.style; 132 } 133 134 if (typeof(options.overflowLabel) !== 'undefined') { 135 this.overflowLabel = options.overflowLabel; 136 } 137 138 if (typeof(options.underflowLabel) !== 'undefined') { 139 this.underflowLabel = options.underflowLabel; 140 } 141 142 if (typeof(options.sync) !== 'undefined') { 143 this.sync = !!options.sync; 144 } 145 if (options.loadParams) { 146 this.loadParams = options.loadParams; 147 } 148 } 149 150 this.locale = this.locale || new Locale(); 151 152 isAscii._init(this.sync, this.loadParams, ilib.bind(this, function() { 153 isIdeo._init(this.sync, this.loadParams, ilib.bind(this, function(){ 154 isDigit._init(this.sync, this.loadParams, ilib.bind(this, function(){ 155 NormString.init({ 156 sync: this.sync, 157 loadParam: this.loadParams, 158 onLoad: ilib.bind(this, function() { 159 new Collator ({ 160 locale: this.locale, 161 useNative: false, 162 sensitivity: "primary", 163 usage: "search", 164 sync: this.sync, 165 loadParam : this.loadParams, 166 onLoad: ilib.bind(this, function(collation) { 167 this.collationObj = collation; 168 this._init(); 169 if (options && typeof(options.onLoad) === 'function') { 170 options.onLoad(this); 171 } 172 }) 173 }); 174 }) 175 }) 176 })); 177 })); 178 })); 179 }; 180 181 /** 182 * @private 183 */ 184 AlphabeticIndex.prototype._updateCollationMap = function() { 185 this.mixedCollationMap = new Array(); 186 187 // we just loaded it, so it should already be in the cache, 188 // so we can always do sync=true 189 Utils.loadData({ 190 object: "Collator", 191 locale: this.locale, 192 name: "collation.json", 193 sync: true, 194 loadParams: this.loadParams, 195 callback: ilib.bind(this, function (collations) { 196 for (var i=0; i < this.inherit.length; i++) { 197 var collationData = {}; 198 199 if (this.inherit[i] === "this") { 200 collationData.style = this.style; 201 collationData.flowBoundaries = this.flowBoundaries; 202 collationData.indexUnits = this.indexUnits 203 collationData.map = this.collationMap; 204 this.mixedCollationMap.push(collationData); 205 } else { 206 collationData.style = this.inherit[i]; 207 collationData.flowBoundaries = collations[this.inherit[i]].flowBoundaries; 208 collationData.indexUnits = collations[this.inherit[i]].indexUnits; 209 collationData.map = collations[this.inherit[i]].map; 210 this.mixedCollationMap.push(collationData); 211 } 212 } 213 }) 214 }); 215 } 216 /** 217 * @private 218 */ 219 AlphabeticIndex.prototype._init = function() { 220 221 this.flowBoundaries = new Array(); 222 223 if (this.style === 'standard') { 224 this.style = this.collationObj.defaultRule; 225 } 226 227 this.collationMap = this.collationObj.collation.map; 228 this.flowBoundaries = this.collationObj.collation.flowBoundaries; 229 this.indexUnits = this.collationObj.collation.indexUnits; 230 231 this.inherit = this.collationObj.collation.inherit; 232 233 if (this.inherit !== undefined) { 234 this._updateCollationMap(); 235 } 236 } 237 238 /** 239 * @private 240 */ 241 AlphabeticIndex.prototype._getKeyByValue = function(value, validMapNum) { 242 var i,label; 243 244 if (!value || (!ilib.isArray(value))) { 245 return ""; 246 } 247 248 if (this.inherit) { 249 if (validMapNum > -1) { 250 this.collationMap = this.mixedCollationMap[validMapNum].map; 251 this.indexUnits = this.mixedCollationMap[validMapNum].indexUnits; 252 } 253 } 254 255 for (i=0; i < this.indexUnits.length; i++) { 256 if (this.collationMap[this.indexUnits[i]][0] 257 === value[0]) { 258 label = this.indexUnits[i]; 259 break; 260 } 261 } 262 263 return label; 264 }; 265 266 /** 267 * @private 268 */ 269 AlphabeticIndex.prototype._getFirstChar = function(element) { 270 if (!element) { 271 return ""; 272 } 273 var firstChar, normString, it; 274 var source = new NormString(element); 275 276 normString = source.normalize("nfc"); 277 it = normString.charIterator(); 278 firstChar = it.next(); 279 280 if (CType.withinRange(firstChar, "hangul")) { 281 normString = source.normalize("nfkd"); 282 it = normString.charIterator(); 283 firstChar = it.next(); 284 } 285 return firstChar; 286 }; 287 288 /** 289 * @private 290 */ 291 AlphabeticIndex.prototype._getLabelIndex = function(label) { 292 if (!label) { 293 return undefined; 294 } 295 296 var i, indexNum; 297 298 for (i=0; i < this.index.length; i++) { 299 if (this.index[i].label === label) { 300 indexNum = i; 301 break; 302 } 303 } 304 return indexNum; 305 }; 306 307 /** 308 * Return the locale used with this instance. 309 * @return {Locale} the Locale instance for this index 310 */ 311 AlphabeticIndex.prototype.getLocale = function() { 312 return this.locale; 313 }; 314 315 /** 316 * Add an element to the index. The element is added to the 317 * appropriate bucket and sorted within that bucket according 318 * to the collation for the locale set up within this index. 319 * 320 * @param {string|undefined} element the element to add 321 * @returns {string|undefined} the label of the bucket into which 322 * this element was added 323 */ 324 AlphabeticIndex.prototype.addElement = function(element) { 325 if (typeof(element) !== 'string') { 326 return undefined; 327 } 328 329 var i; 330 var label = this.getBucket(element); 331 var newItem = true; 332 var itemSet = {}; 333 334 itemSet.label = label; 335 itemSet.elements = []; 336 337 for (i = 0; i < this.index.length; i++) { 338 if (this.index[i].label === label) { 339 if (this.index[i].elements.indexOf(element) === -1) { 340 this.index[i].elements.push(element); 341 } 342 newItem = false; 343 break; 344 } 345 } 346 347 if (newItem) { 348 itemSet.elements.push(element); 349 this.index.push(itemSet); 350 } 351 352 return label; 353 }; 354 355 /** 356 * Add labels to this index for characters that are not 357 * commonly used in the current locale. These are added 358 * into the list of bucket labels at the given start 359 * index. If start is not given, or is not within the 360 * range of 0 (the overflow bucket) to N (the underflow 361 * bucket), then the default position is at the end of 362 * the list right before the underflow bucket. 363 * 364 * @param {Array.<string>} labels array of labels to add 365 * in the order you would like to see them returned 366 * @param {number=} start the position in the bucket 367 * labels list to add these new labels 368 */ 369 AlphabeticIndex.prototype.addLabels = function(labels, start) { 370 var allBucketLabels = []; 371 372 if (!labels && !ilib.isArray(labels)) { 373 return; 374 } 375 376 allBucketLabels = this.getAllBucketLabels(); 377 378 if (!start || 379 start > allBucketLabels.length) { 380 allBucketLabels = allBucketLabels.concat(labels); 381 382 } else { 383 if (typeof labels === 'string') { 384 allBucketLabels.splice(start, 0, labels); 385 } else if (typeof labels === 'object') { 386 for (var j = labels.length-1; j >= 0; j--) { 387 allBucketLabels.splice(start, 0, labels[j]); 388 } 389 390 } 391 } 392 this.allBucketLabels = allBucketLabels; 393 }; 394 395 /** 396 * Clear all elements from the buckets. This index can be 397 * reused for a new batch of elements by clearing it 398 * first. 399 */ 400 AlphabeticIndex.prototype.clear = function() { 401 for (var prop in this.index) { 402 if (this.index.hasOwnProperty(prop)){ 403 this.index[prop] = ""; 404 } 405 } 406 }; 407 408 /** 409 * Return a javascript hash containing all elements in 410 * the index. The hash has a property for each bucket, 411 * and the value of the property is an array of elements. 412 * Example: 413 * 414 * <code> 415 * 416 * [ 417 * { 418 * label: "A", 419 * elements: [ "A", "Aachen", "Adams", ... ] 420 * }, 421 * { 422 * label: "B", 423 * elements: ["B", "Baaa", "Boo"] 424 * }, 425 * ... 426 * { 427 * label: "#", 428 * elements: ["3par.com", "@handle"] 429 * } 430 * ] 431 * </code> 432 * 433 * All elements within a bucket are sorted per the collation 434 * for the locale of this index. 435 * 436 * @returns {Object} a hash of all buckets and elements 437 * as per the description above. 438 */ 439 AlphabeticIndex.prototype.getAllBuckets = function() { 440 var underflowIndex = -1, overflowIndex = -1; 441 var mixedScriptEndIndex = -1; 442 var count = 0; 443 var temp; 444 var i; 445 446 var tempArr = []; 447 var tempIndex = []; 448 var tempBucket = {}; 449 var itemIndex; 450 451 for (i=0; i < this.index.length; i++) { 452 tempArr.push(this.index[i].label); 453 } 454 455 tempArr.sort(ilib.bind(this.collationObj, this.collationObj.compare)); 456 457 for (i=0; i < tempArr.length; i++) { 458 tempBucket={}; 459 tempBucket.label = tempArr[i]; 460 itemIndex = this._getLabelIndex(tempArr[i]); 461 462 this.index[itemIndex].elements.sort(ilib.bind(this.collationObj, this.collationObj.compare)); 463 tempBucket.elements = this.index[itemIndex].elements; 464 tempIndex[i] = tempBucket; 465 tempBucket={}; 466 } 467 468 this.index = tempIndex; 469 470 for (i=0; i < this.index.length; i++) { 471 if (this.inherit && 472 this.mixedCollationMap[0].indexUnits.indexOf(this.index[i].label) === -1) { 473 mixedScriptEndIndex = i; 474 count++; 475 } 476 } 477 478 if (this.inherit && count > 0) { 479 temp = this.index.splice((mixedScriptEndIndex - count) +1 , count); 480 this.index = this.index.concat(temp); 481 } 482 483 for (i=0; i < this.index.length; i++) { 484 if (this.index[i].label === this.underflowLabel) { 485 underflowIndex = i 486 break; 487 } 488 } 489 490 if (underflowIndex > 0) { 491 temp = this.index.splice(underflowIndex,1)[0]; 492 this.index.unshift(temp); 493 } 494 495 for (i=0; i < this.index.length; i++) { 496 if (this.index[i].label === this.overflowLabel) { 497 overflowIndex = i 498 break; 499 } 500 } 501 502 if (overflowIndex > -1) { 503 temp = this.index.splice(overflowIndex,1)[0]; 504 this.index.push(temp); 505 } 506 507 return this.index; 508 }; 509 510 /** 511 * Return the label of the bucket for a given element. This 512 * follows the rules set up when the index was instantiated to 513 * find the bucket into which the element would go if it 514 * were added to this index. The element is not added to 515 * the index, however. (See addElement for that.) 516 * 517 * @param {string|undefined} element the element to check 518 * @returns {string|undefined} the label for the bucket for this element 519 */ 520 AlphabeticIndex.prototype.getBucket = function(element) { 521 var label; 522 var firstChar; 523 var collationValue; 524 var charNum, firstBoundaryChar, endBoundaryChar, firstCharNum, endCharNum; 525 var i, validMapNum = -1; 526 527 if (!element) { 528 return undefined; 529 } 530 531 firstChar = this._getFirstChar(element); 532 533 if (this.inherit) { 534 for (i = 0; i < this.mixedCollationMap.length; i++) { 535 if (this.mixedCollationMap[i].map[firstChar]) { 536 collationValue = this.mixedCollationMap[i].map[firstChar]; 537 validMapNum = i; 538 this.flowBoundaries = this.mixedCollationMap[validMapNum].flowBoundaries; 539 this.indexUnits = this.mixedCollationMap[validMapNum].indexUnits; 540 break; 541 } 542 } 543 } else { 544 collationValue = this.collationMap[firstChar]; 545 } 546 547 if (collationValue) { 548 if (typeof collationValue[0] === 'number') { 549 if (collationValue[0] < this.flowBoundaries[0]) { 550 label = this.underflowLabel; 551 } else if (collationValue[0] > this.flowBoundaries[1]){ 552 label = this.overflowLabel; 553 } else { 554 label = this._getKeyByValue(collationValue, validMapNum); 555 } 556 } else if (typeof collationValue[0] === 'object') { 557 label = this._getKeyByValue(collationValue[0], validMapNum); 558 } 559 } else { 560 charNum = IString.toCodePoint(firstChar, 0); 561 562 if (this.inherit) { 563 for (i = 0; i < this.inherit.length; i++) { 564 firstBoundaryChar = this._getKeyByValue([this.mixedCollationMap[i].flowBoundaries[0]], i); 565 firstCharNum = IString.toCodePoint(firstBoundaryChar, 0); 566 567 if (charNum < firstCharNum) { 568 continue; 569 } else { 570 break; 571 } 572 } 573 label = ((i === this.inherit.length)? this.underflowLabel: this.overflowLabel) 574 } else { 575 firstBoundaryChar = this._getKeyByValue([this.flowBoundaries[0]], 0); 576 endBoundaryChar = this._getKeyByValue([this.flowBoundaries[1]], 0); 577 578 firstCharNum = IString.toCodePoint(firstBoundaryChar, 0); 579 endCharNum = IString.toCodePoint(endBoundaryChar, 0); 580 581 if (charNum < firstCharNum) { 582 label = this.underflowLabel; 583 } else if (charNum > endCharNum) { 584 label = this.overflowLabel; 585 } else { 586 label = this.overflowLabel; 587 } 588 } 589 } 590 return label; 591 }; 592 593 /** 594 * Return default indexing style in the current locale. 595 * @returns {string} the default indexing style for this locale. 596 */ 597 AlphabeticIndex.prototype.getIndexStyle = function() { 598 return this.style; 599 }; 600 601 /** 602 * Return the total number of buckets in this index. 603 * @returns {number} the number of buckets in this index 604 */ 605 AlphabeticIndex.prototype.getBucketCount = function() { 606 var count = Object.keys(this.index).length; 607 return count; 608 }; 609 610 /** 611 * Return the bucket labels for this index in order. This 612 * method only returns the index labels for buckets that 613 * actually contain elements. This 614 * will include the under- and overflow labels if 615 * they are used in this index. 616 * 617 * @returns {Array.<string>} the array of bucket labels 618 * for this index in collation order 619 */ 620 AlphabeticIndex.prototype.getBucketLabels = function() { 621 return this.getAllBuckets().map(function(bucket){ 622 return bucket.label; 623 }) 624 }; 625 626 /** 627 * Return the all the bucket labels typically used in the 628 * locale. This includes all bucket labels, even if those 629 * buckets do not contain any elements. 630 * 631 * @returns {Array.<string>} the array of bucket labels 632 * for this index in collation order 633 */ 634 AlphabeticIndex.prototype.getAllBucketLabels = function() { 635 if (this.allBucketLabels) { 636 return this.allBucketLabels; 637 } 638 639 this.allBucketLabels = new Array(); 640 this.allBucketLabels = [this.underflowLabel].concat(this.indexUnits, this.overflowLabel); 641 return this.allBucketLabels; 642 }; 643 644 /** 645 * Return the collator used to sort elements in this 646 * index. 647 * 648 * @return {Collator} the ilib Collator instance used 649 * in this index 650 */ 651 AlphabeticIndex.prototype.getCollator = function() { 652 return this.collationObj; 653 }; 654 655 /** 656 * Get the default label used in the for overflow bucket. 657 * This is the first item in a list. eg. ... A B C 658 * 659 * @return {string} the overflow bucket label 660 */ 661 AlphabeticIndex.prototype.getOverflowLabel = function() { 662 return this.overflowLabel; 663 }; 664 665 /** 666 * Return the total number of elements in the index. This includes 667 * all elements across all buckets. 668 * 669 * @returns {number} The number of elements in the index 670 */ 671 AlphabeticIndex.prototype.getElementCount = function() { 672 var buckets = this.index; 673 var i, count = 0; 674 675 for (i=0; i < buckets.length; i++) { 676 count += buckets[i].elements.length; 677 } 678 679 return count; 680 }; 681 682 /** 683 * Get the default label used in underflow, 684 * This is the last item in a list. eg. the last 685 * item in: X Y Z # 686 * 687 * @returns {string} the label used for underflow elements 688 */ 689 AlphabeticIndex.prototype.getUnderflowLabel = function() { 690 return this.underflowLabel; 691 }; 692 693 /** 694 * Set the overflow bucket label. 695 * 696 * @param {string} overflowLabel the label to use for the overflow buckets 697 */ 698 AlphabeticIndex.prototype.setOverflowLabel = function(overflowLabel) { 699 this.overflowLabel = overflowLabel; 700 }; 701 702 /** 703 * Set the underflow bucket label. 704 * 705 * @param {string} underflowLabel the label to use for the underflow buckets 706 */ 707 AlphabeticIndex.prototype.setUnderflowLabel = function(underflowLabel) { 708 this.underflowLabel = underflowLabel; 709 }; 710 711 module.exports = AlphabeticIndex; 712