1 /*
  2  * AlphabeticIndex.js - Represent an alphabetic index
  3  *
  4  * Copyright © 2017-2018, JEDLSoft
  5  *
  6  * Licensed under the Apache License, Version 2.0 (the "License");
  7  * you may not use this file except in compliance with the License.
  8  * You may obtain a copy of the License at
  9  *
 10  *     http://www.apache.org/licenses/LICENSE-2.0
 11  *
 12  * Unless required by applicable law or agreed to in writing, software
 13  * distributed under the License is distributed on an "AS IS" BASIS,
 14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15  *
 16  * See the License for the specific language governing permissions and
 17  * limitations under the License.
 18  */
 19 
 20 // !data nfc nfkd
 21 
 22 var ilib = require("./ilib.js");
 23 var Utils = require("./Utils.js");
 24 var Locale = require("./Locale.js");
 25 var CType = require("./CType.js");
 26 var IString = require("./IString.js");
 27 var isIdeo = require("./isIdeo.js");
 28 var isAscii = require("./isAscii.js");
 29 var isDigit = require("./isDigit.js");
 30 var Collator = require("./Collator.js");
 31 var NormString = require("./NormString.js");
 32 
 33 
 34 /**
 35  * @class Create a new alphabetic index instance.
 36  *
 37  * This class handles alphabetic indexes which are collated sequences of
 38  * buckets into which elements are placed, sorted appropriate to the given
 39  * language. An example would be an index of person names in a contact
 40  * list, organized by the first letter of the family name.<p>
 41  *
 42  * Example in English:<p>
 43  *
 44  * Buckets: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z #<p>
 45  *
 46  * <pre>
 47  * A
 48  *    Adams
 49  *    Albers
 50  *    Alvarez
 51  * B
 52  *    Baker
 53  *    Banerjee
 54  *    Brunshteyn
 55  * ...
 56  * </pre>
 57  *
 58  * This class can give you the sorted list of labels to show in the UI. It can
 59  * also organize a list of string elements into buckets for each
 60  * label so that you can display the correctly sorted elements. This depends
 61  * on the {@link Collator} class to perform the sorting/collation.<p>
 62  *
 63  * The class also supports having buckets for strings before the first (underflow)
 64  * and after the last (overflow) bucket. <p>
 65  *
 66  * If you have a lot of characters that are not commonly used in the current
 67  * locale, you can add more labels for those characters as well. Elements will
 68  * match those buckets only if they have the same first character as the
 69  * bucket label.<p>
 70  *
 71  * The options object may contain any (or none) of the following properties:
 72  *
 73  * <ul>
 74  * <li><i>locale</i> - locale or localeSpec to use to parse the address. If not
 75  * specified, this function will use the current ilib locale
 76  *
 77  * <i><i>style</i> - the style of collation to use for this index.
 78  * For some locales, there are different styles of collating strings depending
 79  * on what kind of strings are being collated or what the preference of the user
 80  * is. For example, in German, there is a phonebook order and a dictionary ordering
 81  * that sort the same array of strings slightly differently. The static method
 82  * {@link Collator#getAvailableStyles} will return a list of collation styles that ilib
 83  * currently knows about for any given locale. If the value of the style option is
 84  * not recognized for a locale, it will be ignored. Default style is "standard".
 85  *
 86  * <li><i>overflowLabel</i> - the label to use for the overflow bucket.
 87  * Default: "#"
 88  *
 89  * <li><i>underflowLabel</i> - the label to use for the underflow bucket.
 90  * Default: "*"
 91  *
 92  * <li><i>onLoad</i> - a callback function to call when the address info for the
 93  * locale is fully loaded and the index is ready to be used. When the onLoad
 94  * option is given, the alphabetic index object
 95  * will attempt to load any missing locale data using the ilib loader callback.
 96  * When the constructor is done (even if the data is already preassembled), the
 97  * onLoad function is called with the current instance as a parameter, so this
 98  * callback can be used with preassembled or dynamic loading or a mix of the two.
 99  *
100  * <li><i>sync</i> - tell whether to load any missing locale data synchronously or
101  * asynchronously. If this option is given as "false", then the "onLoad"
102  * callback must be given, as the instance returned from this constructor will
103  * not be usable for a while.
104  *
105  * <li><i>loadParams</i> - an object containing parameters to pass to the
106  * loader callback function when locale data is missing. The parameters are not
107  * interpretted or modified in any way. They are simply passed along. The object
108  * may contain any property/value pairs as long as the calling code is in
109  * agreement with the loader callback function as to what those parameters mean.
110  * </ul>
111  *
112  * @constructor
113  * @param {Object} options options to the parser
114  */
115 var AlphabeticIndex = function (options) {
116     this.sync = true;
117     this.loadParams = {};
118     this.caseSensitive = false;
119     this.accentSensitive = false;
120     this.overflowLabel = "#";
121     this.underflowLabel = "*";
122     this.style = "standard";
123     this.index = [];
124 
125     if (options) {
126         if (options.locale) {
127             this.locale = (typeof(options.locale) === 'string') ? new Locale(options.locale) : options.locale;
128         }
129 
130         if (typeof(options.style) !== 'undefined') {
131             this.style = options.style;
132         }
133 
134         if (typeof(options.overflowLabel) !== 'undefined') {
135             this.overflowLabel = options.overflowLabel;
136         }
137 
138         if (typeof(options.underflowLabel) !== 'undefined') {
139             this.underflowLabel = options.underflowLabel;
140         }
141 
142         if (typeof(options.sync) !== 'undefined') {
143             this.sync = !!options.sync;
144         }
145         if (options.loadParams) {
146             this.loadParams = options.loadParams;
147         }
148     }
149 
150     this.locale = this.locale || new Locale();
151 
152     isAscii._init(this.sync, this.loadParams, ilib.bind(this, function() {
153         isIdeo._init(this.sync, this.loadParams, ilib.bind(this, function(){
154             isDigit._init(this.sync, this.loadParams, ilib.bind(this, function(){
155                 NormString.init({
156                     sync: this.sync,
157                     loadParam: this.loadParams,
158                     onLoad: ilib.bind(this, function() {
159                         new Collator ({
160                             locale: this.locale,
161                             useNative: false,
162                             sensitivity: "primary",
163                             usage: "search",
164                             sync: this.sync,
165                             loadParam : this.loadParams,
166                             onLoad: ilib.bind(this, function(collation) {
167                                 this.collationObj = collation;
168                                 this._init();
169                                 if (options && typeof(options.onLoad) === 'function') {
170                                     options.onLoad(this);
171                                 }
172                             })
173                         });
174                     })
175                 })
176             }));
177         }));
178     }));
179 };
180 
181 /**
182  * @private
183  */
184 AlphabeticIndex.prototype._updateCollationMap = function() {
185     this.mixedCollationMap = new Array();
186 
187     // we just loaded it, so it should already be in the cache,
188     // so we can always do sync=true
189     Utils.loadData({
190         object: "Collator",
191         locale: this.locale,
192         name: "collation.json",
193         sync: true,
194         loadParams: this.loadParams,
195         callback: ilib.bind(this, function (collations) {
196             for (var i=0; i < this.inherit.length; i++) {
197                 var collationData = {};
198 
199                 if (this.inherit[i] === "this") {
200                     collationData.style = this.style;
201                     collationData.flowBoundaries = this.flowBoundaries;
202                     collationData.indexUnits = this.indexUnits
203                     collationData.map = this.collationMap;
204                     this.mixedCollationMap.push(collationData);
205                 } else {
206                     collationData.style = this.inherit[i];
207                     collationData.flowBoundaries = collations[this.inherit[i]].flowBoundaries;
208                     collationData.indexUnits = collations[this.inherit[i]].indexUnits;
209                     collationData.map = collations[this.inherit[i]].map;
210                     this.mixedCollationMap.push(collationData);
211                 }
212             }
213         })
214     });
215 }
216 /**
217  * @private
218  */
219 AlphabeticIndex.prototype._init = function() {
220 
221     this.flowBoundaries = new Array();
222 
223     if (this.style === 'standard') {
224         this.style = this.collationObj.defaultRule;
225     }
226 
227     this.collationMap = this.collationObj.collation.map;
228     this.flowBoundaries = this.collationObj.collation.flowBoundaries;
229     this.indexUnits = this.collationObj.collation.indexUnits;
230 
231     this.inherit = this.collationObj.collation.inherit;
232 
233     if (this.inherit !== undefined) {
234         this._updateCollationMap();
235     }
236 }
237 
238 /**
239  * @private
240  */
241 AlphabeticIndex.prototype._getKeyByValue = function(value, validMapNum) {
242     var i,label;
243 
244     if (!value || (!ilib.isArray(value))) {
245         return "";
246     }
247 
248     if (this.inherit) {
249         if (validMapNum > -1) {
250             this.collationMap = this.mixedCollationMap[validMapNum].map;
251             this.indexUnits = this.mixedCollationMap[validMapNum].indexUnits;
252         }
253     }
254 
255     for (i=0; i < this.indexUnits.length; i++) {
256         if (this.collationMap[this.indexUnits[i]][0]
257             === value[0]) {
258             label = this.indexUnits[i];
259             break;
260         }
261     }
262 
263     return label;
264 };
265 
266 /**
267  * @private
268  */
269 AlphabeticIndex.prototype._getFirstChar = function(element) {
270     if (!element) {
271         return "";
272     }
273     var firstChar, normString, it;
274     var source = new NormString(element);
275 
276     normString = source.normalize("nfc");
277     it = normString.charIterator();
278     firstChar = it.next();
279 
280     if (CType.withinRange(firstChar, "hangul")) {
281         normString = source.normalize("nfkd");
282         it = normString.charIterator();
283         firstChar = it.next();
284     }
285     return firstChar;
286 };
287 
288 /**
289  * @private
290  */
291 AlphabeticIndex.prototype._getLabelIndex = function(label) {
292     if (!label) {
293         return undefined;
294     }
295 
296     var i, indexNum;
297 
298     for (i=0; i < this.index.length; i++) {
299         if (this.index[i].label === label) {
300             indexNum = i;
301             break;
302         }
303     }
304     return indexNum;
305 };
306 
307 /**
308  * Return the locale used with this instance.
309  * @return {Locale} the Locale instance for this index
310  */
311 AlphabeticIndex.prototype.getLocale = function() {
312     return this.locale;
313 };
314 
315 /**
316  * Add an element to the index. The element is added to the
317  * appropriate bucket and sorted within that bucket according
318  * to the collation for the locale set up within this index.
319  *
320  * @param {string|undefined} element the element to add
321  * @returns {string|undefined} the label of the bucket into which
322  * this element was added
323  */
324 AlphabeticIndex.prototype.addElement = function(element) {
325     if (typeof(element) !== 'string') {
326         return undefined;
327     }
328 
329     var i;
330     var label = this.getBucket(element);
331     var newItem = true;
332     var itemSet = {};
333 
334     itemSet.label = label;
335     itemSet.elements = [];
336 
337     for (i = 0; i < this.index.length; i++) {
338         if (this.index[i].label === label) {
339             if (this.index[i].elements.indexOf(element) === -1) {
340                 this.index[i].elements.push(element);
341             }
342             newItem = false;
343             break;
344         }
345     }
346 
347     if (newItem) {
348         itemSet.elements.push(element);
349         this.index.push(itemSet);
350     }
351 
352     return label;
353 };
354 
355 /**
356  * Add labels to this index for characters that are not
357  * commonly used in the current locale. These are added
358  * into the list of bucket labels at the given start
359  * index. If start is not given, or is not within the
360  * range of 0 (the overflow bucket) to N (the underflow
361  * bucket), then the default position is at the end of
362  * the list right before the underflow bucket.
363  *
364  * @param {Array.<string>} labels array of labels to add
365  * in the order you would like to see them returned
366  * @param {number=} start the position in the bucket
367  * labels list to add these new labels
368  */
369 AlphabeticIndex.prototype.addLabels = function(labels, start) {
370     var allBucketLabels = [];
371 
372     if (!labels && !ilib.isArray(labels)) {
373         return;
374     }
375 
376     allBucketLabels = this.getAllBucketLabels();
377 
378     if (!start ||
379         start > allBucketLabels.length) {
380         allBucketLabels = allBucketLabels.concat(labels);
381 
382     } else {
383         if (typeof labels === 'string') {
384             allBucketLabels.splice(start, 0, labels);
385         } else if (typeof labels === 'object') {
386             for (var j = labels.length-1; j >= 0; j--) {
387                 allBucketLabels.splice(start, 0, labels[j]);
388             }
389 
390         }
391     }
392     this.allBucketLabels = allBucketLabels;
393 };
394 
395 /**
396  * Clear all elements from the buckets. This index can be
397  * reused for a new batch of elements by clearing it
398  * first.
399  */
400 AlphabeticIndex.prototype.clear = function() {
401     for (var prop in this.index) {
402         if (this.index.hasOwnProperty(prop)){
403             this.index[prop] = "";
404         }
405     }
406 };
407 
408 /**
409  * Return a javascript hash containing all elements in
410  * the index. The hash has a property for each bucket,
411  * and the value of the property is an array of elements.
412  * Example:
413  *
414  * <code>
415  *
416  * [
417  *     {
418  *         label: "A",
419  *         elements:  [ "A", "Aachen", "Adams", ... ]
420  *     },
421  *     {
422  *         label: "B",
423  *         elements: ["B", "Baaa", "Boo"]
424  *     },
425  *     ...
426  *     {
427  *         label: "#",
428  *         elements: ["3par.com", "@handle"]
429  *     }
430  * ]
431  * </code>
432  *
433  * All elements within a bucket are sorted per the collation
434  * for the locale of this index.
435  *
436  * @returns {Object} a hash of all buckets and elements
437  * as per the description above.
438  */
439 AlphabeticIndex.prototype.getAllBuckets = function() {
440     var underflowIndex = -1, overflowIndex = -1;
441     var mixedScriptEndIndex = -1;
442     var count = 0;
443     var temp;
444     var i;
445 
446     var tempArr = [];
447     var tempIndex = [];
448     var tempBucket = {};
449     var itemIndex;
450 
451     for (i=0; i < this.index.length; i++) {
452         tempArr.push(this.index[i].label);
453     }
454 
455     tempArr.sort(ilib.bind(this.collationObj, this.collationObj.compare));
456 
457     for (i=0; i < tempArr.length; i++) {
458         tempBucket={};
459         tempBucket.label = tempArr[i];
460         itemIndex = this._getLabelIndex(tempArr[i]);
461 
462         this.index[itemIndex].elements.sort(ilib.bind(this.collationObj, this.collationObj.compare));
463         tempBucket.elements = this.index[itemIndex].elements;
464         tempIndex[i] = tempBucket;
465         tempBucket={};
466     }
467 
468     this.index = tempIndex;
469 
470     for (i=0; i < this.index.length; i++) {
471         if (this.inherit &&
472             this.mixedCollationMap[0].indexUnits.indexOf(this.index[i].label) === -1) {
473             mixedScriptEndIndex = i;
474             count++;
475         }
476     }
477 
478     if (this.inherit && count > 0) {
479         temp = this.index.splice((mixedScriptEndIndex - count) +1 , count);
480         this.index = this.index.concat(temp);
481     }
482 
483     for (i=0; i < this.index.length; i++) {
484         if (this.index[i].label === this.underflowLabel) {
485             underflowIndex = i
486             break;
487         }
488     }
489 
490     if (underflowIndex > 0) {
491         temp = this.index.splice(underflowIndex,1)[0];
492         this.index.unshift(temp);
493     }
494 
495     for (i=0; i < this.index.length; i++) {
496         if (this.index[i].label === this.overflowLabel) {
497             overflowIndex = i
498             break;
499         }
500     }
501 
502     if (overflowIndex > -1) {
503         temp = this.index.splice(overflowIndex,1)[0];
504         this.index.push(temp);
505     }
506 
507     return this.index;
508 };
509 
510 /**
511  * Return the label of the bucket for a given element. This
512  * follows the rules set up when the index was instantiated to
513  * find the bucket into which the element would go if it
514  * were added to this index. The element is not added to
515  * the index, however. (See addElement for that.)
516  *
517  * @param {string|undefined} element the element to check
518  * @returns {string|undefined} the label for the bucket for this element
519  */
520 AlphabeticIndex.prototype.getBucket = function(element) {
521     var label;
522     var firstChar;
523     var collationValue;
524     var charNum, firstBoundaryChar, endBoundaryChar, firstCharNum, endCharNum;
525     var i, validMapNum = -1;
526 
527     if (!element) {
528         return undefined;
529     }
530 
531     firstChar = this._getFirstChar(element);
532 
533     if (this.inherit) {
534         for (i = 0; i < this.mixedCollationMap.length; i++) {
535             if (this.mixedCollationMap[i].map[firstChar]) {
536                 collationValue = this.mixedCollationMap[i].map[firstChar];
537                 validMapNum = i;
538                 this.flowBoundaries = this.mixedCollationMap[validMapNum].flowBoundaries;
539                 this.indexUnits = this.mixedCollationMap[validMapNum].indexUnits;
540                 break;
541             }
542         }
543     } else {
544         collationValue = this.collationMap[firstChar];
545     }
546 
547     if (collationValue) {
548         if (typeof collationValue[0] === 'number') {
549             if (collationValue[0] < this.flowBoundaries[0]) {
550                 label = this.underflowLabel;
551             } else if (collationValue[0] > this.flowBoundaries[1]){
552                 label = this.overflowLabel;
553             } else {
554                 label = this._getKeyByValue(collationValue, validMapNum);
555             }
556         } else if (typeof collationValue[0] === 'object') {
557             label = this._getKeyByValue(collationValue[0], validMapNum);
558         }
559     } else {
560         charNum = IString.toCodePoint(firstChar, 0);
561 
562         if (this.inherit) {
563             for (i = 0; i < this.inherit.length; i++) {
564                 firstBoundaryChar = this._getKeyByValue([this.mixedCollationMap[i].flowBoundaries[0]], i);
565                 firstCharNum = IString.toCodePoint(firstBoundaryChar, 0);
566 
567                 if (charNum < firstCharNum) {
568                     continue;
569                 } else {
570                     break;
571                 }
572             }
573             label = ((i === this.inherit.length)? this.underflowLabel: this.overflowLabel)
574         } else {
575             firstBoundaryChar = this._getKeyByValue([this.flowBoundaries[0]], 0);
576             endBoundaryChar = this._getKeyByValue([this.flowBoundaries[1]], 0);
577 
578             firstCharNum = IString.toCodePoint(firstBoundaryChar, 0);
579             endCharNum = IString.toCodePoint(endBoundaryChar, 0);
580 
581             if (charNum < firstCharNum) {
582                 label = this.underflowLabel;
583             } else if (charNum > endCharNum) {
584                 label = this.overflowLabel;
585             } else {
586                 label = this.overflowLabel;
587             }
588         }
589     }
590     return label;
591 };
592 
593 /**
594  * Return default indexing style in the current locale.
595  * @returns {string} the default indexing style for this locale.
596  */
597 AlphabeticIndex.prototype.getIndexStyle = function() {
598     return this.style;
599 };
600 
601 /**
602  * Return the total number of buckets in this index.
603  * @returns {number} the number of buckets in this index
604  */
605 AlphabeticIndex.prototype.getBucketCount = function() {
606     var count = Object.keys(this.index).length;
607     return count;
608 };
609 
610 /**
611  * Return the bucket labels for this index in order. This
612  * method only returns the index labels for buckets that
613  * actually contain elements. This
614  * will include the under- and overflow labels if
615  * they are used in this index.
616  *
617  * @returns {Array.<string>} the array of bucket labels
618  * for this index in collation order
619  */
620 AlphabeticIndex.prototype.getBucketLabels = function() {
621     return this.getAllBuckets().map(function(bucket){
622         return bucket.label;
623     })
624 };
625 
626 /**
627  * Return the all the bucket labels typically used in the
628  * locale. This includes all bucket labels, even if those
629  * buckets do not contain any elements.
630  *
631  * @returns {Array.<string>} the array of bucket labels
632  * for this index in collation order
633  */
634 AlphabeticIndex.prototype.getAllBucketLabels = function() {
635     if (this.allBucketLabels) {
636         return this.allBucketLabels;
637     }
638 
639     this.allBucketLabels = new Array();
640     this.allBucketLabels = [this.underflowLabel].concat(this.indexUnits, this.overflowLabel);
641     return this.allBucketLabels;
642 };
643 
644 /**
645  * Return the collator used to sort elements in this
646  * index.
647  *
648  * @return {Collator} the ilib Collator instance used
649  * in this index
650  */
651 AlphabeticIndex.prototype.getCollator = function() {
652     return this.collationObj;
653 };
654 
655 /**
656  * Get the default label used in the for overflow bucket.
657  * This is the first item in a list. eg. ... A B C
658  *
659  * @return {string} the overflow bucket label
660  */
661 AlphabeticIndex.prototype.getOverflowLabel = function() {
662     return this.overflowLabel;
663 };
664 
665 /**
666  * Return the total number of elements in the index. This includes
667  * all elements across all buckets.
668  *
669  * @returns {number} The number of elements in the index
670  */
671 AlphabeticIndex.prototype.getElementCount = function() {
672     var buckets = this.index;
673     var i, count = 0;
674 
675     for (i=0; i < buckets.length; i++) {
676         count += buckets[i].elements.length;
677     }
678 
679     return count;
680 };
681 
682 /**
683  * Get the default label used in underflow,
684  * This is the last item in a list. eg. the last
685  * item in: X Y Z #
686  *
687  * @returns {string} the label used for underflow elements
688  */
689 AlphabeticIndex.prototype.getUnderflowLabel = function() {
690     return this.underflowLabel;
691 };
692 
693 /**
694  * Set the overflow bucket label.
695  *
696  * @param {string} overflowLabel the label to use for the overflow buckets
697  */
698 AlphabeticIndex.prototype.setOverflowLabel = function(overflowLabel) {
699     this.overflowLabel = overflowLabel;
700 };
701 
702 /**
703  * Set the underflow bucket label.
704  *
705  * @param {string} underflowLabel the label to use for the underflow buckets
706  */
707 AlphabeticIndex.prototype.setUnderflowLabel = function(underflowLabel) {
708     this.underflowLabel = underflowLabel;
709 };
710 
711 module.exports = AlphabeticIndex;
712