1 /*
  2  * LocaleMatcher.js - Locale matcher definition
  3  *
  4  * Copyright © 2013-2015, 2018, JEDLSoft
  5  *
  6  * Licensed under the Apache License, Version 2.0 (the "License");
  7  * you may not use this file except in compliance with the License.
  8  * You may obtain a copy of the License at
  9  *
 10  *     http://www.apache.org/licenses/LICENSE-2.0
 11  *
 12  * Unless required by applicable law or agreed to in writing, software
 13  * distributed under the License is distributed on an "AS IS" BASIS,
 14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15  *
 16  * See the License for the specific language governing permissions and
 17  * limitations under the License.
 18  */
 19 
 20 // !data localematch
 21 
 22 var ilib = require("./ilib.js");
 23 var Utils = require("./Utils.js");
 24 var Locale = require("./Locale.js");
 25 
 26 var componentWeights = [
 27     0.5,   // language
 28     0.2,   // script
 29     0.25,  // region
 30     0.05   // variant
 31 ];
 32 
 33 /**
 34  * @class
 35  * Create a new locale matcher instance. This is used
 36  * to see which locales can be matched with each other in
 37  * various ways.<p>
 38  *
 39  * The options object may contain any of the following properties:
 40  *
 41  * <ul>
 42  * <li><i>locale</i> - the locale instance or locale spec to match
 43  *
 44  * <li><i>onLoad</i> - a callback function to call when the locale matcher object is fully
 45  * loaded. When the onLoad option is given, the locale matcher object will attempt to
 46  * load any missing locale data using the ilib loader callback.
 47  * When the constructor is done (even if the data is already preassembled), the
 48  * onLoad function is called with the current instance as a parameter, so this
 49  * callback can be used with preassembled or dynamic loading or a mix of the two.
 50  *
 51  * <li><i>sync</i> - tell whether to load any missing locale data synchronously or
 52  * asynchronously. If this option is given as "false", then the "onLoad"
 53  * callback must be given, as the instance returned from this constructor will
 54  * not be usable for a while.
 55  *
 56  * <li><i>loadParams</i> - an object containing parameters to pass to the
 57  * loader callback function when locale data is missing. The parameters are not
 58  * interpretted or modified in any way. They are simply passed along. The object
 59  * may contain any property/value pairs as long as the calling code is in
 60  * agreement with the loader callback function as to what those parameters mean.
 61  * </ul>
 62  *
 63  *
 64  * @constructor
 65  * @param {Object} options parameters to initialize this matcher
 66  */
 67 var LocaleMatcher = function(options) {
 68     var sync = true,
 69         loadParams = undefined;
 70 
 71     this.locale = new Locale();
 72 
 73     if (options) {
 74         if (typeof(options.locale) !== 'undefined') {
 75             this.locale = (typeof(options.locale) === 'string') ? new Locale(options.locale) : options.locale;
 76         }
 77 
 78         if (typeof(options.sync) !== 'undefined') {
 79             sync = !!options.sync;
 80         }
 81 
 82         if (typeof(options.loadParams) !== 'undefined') {
 83             loadParams = options.loadParams;
 84         }
 85     }
 86 
 87     if (typeof(ilib.data.localematch) === 'undefined') {
 88         Utils.loadData({
 89             object: "LocaleMatcher",
 90             locale: "-",
 91             name: "localematch.json",
 92             sync: sync,
 93             loadParams: loadParams,
 94             callback: ilib.bind(this, function (info) {
 95                 if (!info) {
 96                     info = {};
 97                 }
 98                 /** @type {Object.<string,string>} */
 99                 this.info = info;
100                 if (options && typeof(options.onLoad) === 'function') {
101                     options.onLoad(this);
102                 }
103             })
104         });
105     } else {
106         this.info = ilib.data.localematch;
107         if (options && typeof(options.onLoad) === 'function') {
108             options.onLoad(this);
109         }
110     }
111 };
112 
113 
114 LocaleMatcher.prototype = {
115     /**
116      * Return the locale used to construct this instance.
117      * @return {Locale|undefined} the locale for this matcher
118      */
119     getLocale: function() {
120         return this.locale;
121     },
122 
123     /**
124      * @private
125      * Do the work
126      */
127     _getLikelyLocale: function(locale) {
128         // already full specified
129         if (locale.language && locale.script && locale.region) return locale;
130 
131         if (typeof(this.info.likelyLocales[locale.getSpec()]) === 'undefined') {
132             // try various partials before giving up
133             var partial = this.info.likelyLocales[new Locale(locale.language, undefined, locale.region).getSpec()];
134             if (typeof(partial) !== 'undefined') return new Locale(partial);
135 
136             partial = this.info.likelyLocales[new Locale(locale.language, locale.script, undefined).getSpec()];
137             if (typeof(partial) !== 'undefined') return new Locale(partial);
138 
139             partial = this.info.likelyLocales[new Locale(locale.language, undefined, undefined).getSpec()];
140             if (typeof(partial) !== 'undefined') return new Locale(partial);
141 
142             partial = this.info.likelyLocales[new Locale(undefined, locale.script, locale.region).getSpec()];
143             if (typeof(partial) !== 'undefined') return new Locale(partial);
144 
145             partial = this.info.likelyLocales[new Locale(undefined, undefined, locale.region).getSpec()];
146             if (typeof(partial) !== 'undefined') return new Locale(partial);
147 
148             partial = this.info.likelyLocales[new Locale(undefined, locale.script, undefined).getSpec()];
149             if (typeof(partial) !== 'undefined') return new Locale(partial);
150 
151             return locale;
152         }
153 
154         return new Locale(this.info.likelyLocales[locale.getSpec()]);
155     },
156 
157     /**
158      * Return an Locale instance that is fully specified based on partial information
159      * given to the constructor of this locale matcher instance. For example, if the locale
160      * spec given to this locale matcher instance is simply "ru" (for the Russian language),
161      * then it will fill in the missing region and script tags and return a locale with
162      * the specifier "ru-Cyrl-RU". (ie. Russian language, Cyrillic, Russian Federation).
163      * Any one or two of the language, script, or region parts may be left unspecified,
164      * and the other one or two parts will be filled in automatically. If this
165      * class has no information about the given locale, then the locale of this
166      * locale matcher instance is returned unchanged.
167      *
168      * @returns {Locale} the most likely completion of the partial locale given
169      * to the constructor of this locale matcher instance
170      */
171     getLikelyLocale: function () {
172         return this._getLikelyLocale(this.locale);
173     },
174 
175     /**
176      * Return the degree that the given locale matches the current locale of this
177      * matcher. This method returns an integer from 0 to 100. A value of 100 is
178      * a 100% match, meaning that the two locales are exactly equivalent to each
179      * other. (eg. "ja-JP" and "ja-JP") A value of 0 means that there 0% match or
180      * that the two locales have nothing in common. (eg. "en-US" and "ja-JP") <p>
181      *
182      * Locale matching is not the same as equivalence, as the degree of matching
183      * is returned. (See Locale.equals for equivalence.)<p>
184      *
185      * The match score is calculated based on matching the 4 locale components,
186      * weighted by importance:
187      *
188      * <ul>
189      * <li> language - this accounts for 50% of the match score
190      * <li> region - accounts for 25% of the match score
191      * <li> script - accounts for 20% of the match score
192      * <li> variant - accounts for 5% of the match score
193      * </ul>
194      *
195      * The score is affected by the following things:
196      *
197      * <ul>
198      * <li> A large language score is given when the language components of the locales
199      * match exactly.
200      * <li> Higher language scores are given when the languages are linguistically
201      * close to each other, such as dialects.
202      * <li> A small score is given when two languages are in the same
203      * linguistic family, but one is not a dialect of the other, such as German
204      * and Dutch.
205      * <li> A large region score is given when two locales share the same region.
206      * <li> A smaller region score is given when one region is contained within
207      * another. For example, Hong Kong is part of China, so a moderate score is
208      * given instead of a full score.
209      * <li> A small score is given if two regions are geographically close to
210      * each other or are tied by history. For example, Ireland and Great Britain
211      * are both adjacent and tied by history, so they receive a moderate score.
212      * <li> A high script score is given if the two locales share the same script.
213      * The legibility of a common script means that there is some small kinship of the
214      * different languages.
215      * <li> A high variant score is given if the two locales share the same
216      * variant. Full score is given when both locales have no variant at all.
217      * <li> Locale components that are unspecified in both locales are given high
218      * scores.
219      * <li> Locales where a particular locale component is missing in only one
220      * locale can still match when the default for that locale component matches
221      * the component in the other locale. The
222      * default value for the missing component is determined using the likely locales
223      * data. (See getLikelyLocale()) For example, "en-US" and "en-Latn-US" receive
224      * a high script score because the default script for "en" is "Latn".
225      * </ul>
226      *
227      * The intention of this method is that it can be used to determine
228      * compatibility of locales. For example, when a user signs up for an
229      * account on a web site, the locales that the web site supports and
230      * the locale of the user's browser may differ, and the site needs to
231      * pick the best locale to show the user. Let's say the
232      * web site supports a selection of European languages such as "it-IT",
233      * "fr-FR", "de-DE", and "en-GB". The user's
234      * browser may be set to "it-CH". The web site code can then match "it-CH"
235      * against each of the supported locales to find the one with the
236      * highest score. In
237      * this case, the best match would be "it-IT" because it shares a
238      * language and script in common with "it-CH" and differs only in the region
239      * component. It is not a 100% match, but it is pretty good. The web site
240      * may decide if the match scores all fall
241      * below a chosen threshold (perhaps 50%?), it should show the user the
242      * default language "en-GB", because that is probably a better choice
243      * than any other supported locale.<p>
244      *
245      * @param {Locale} locale the other locale to match against the current one
246      * @return {number} an integer from 0 to 100 that indicates the degree to
247      * which these locales match each other
248      */
249     match: function(locale) {
250         var other = new Locale(locale);
251         var scores = [0, 0, 0, 0];
252         var thisfull, otherfull, i;
253 
254         if (this.locale.language === other.language) {
255             scores[0] = 100;
256         } else {
257             if (!this.locale.language || !other.language) {
258                 // check for default language
259                 thisfull = this.getLikelyLocale();
260                 otherfull = new Locale(this.info.likelyLocales[other.getSpec()] || other.getSpec());
261                 if (thisfull.language === otherfull.language) {
262                     scores[0] = 100;
263                 }
264             } else {
265                 // check for macro languages
266                 var mlthis = this.info.macroLanguagesReverse[this.locale.language] || this.locale.language;
267                 var mlother = this.info.macroLanguagesReverse[other.language] || other.language;
268                 if (mlthis === mlother) {
269                     scores[0] = 90;
270                 } else {
271                     // check for mutual intelligibility
272                     var pair = this.locale.language + "-" + other.language;
273                     scores[0] = this.info.mutualIntelligibility[pair] || 0;
274                 }
275             }
276         }
277 
278         if (this.locale.script === other.script) {
279             scores[1] = 100;
280         } else {
281             if (!this.locale.script || !other.script) {
282                 // check for default script
283                 thisfull = this.locale.script ? this.locale : new Locale(this.info.likelyLocales[this.locale.language]);
284                 otherfull = other.script ? other : new Locale(this.info.likelyLocales[other.language]);
285                 if (thisfull.script === otherfull.script) {
286                     scores[1] = 100;
287                 }
288             }
289         }
290 
291         if (this.locale.region === other.region) {
292             scores[2] = 100;
293         } else {
294             if (!this.locale.region || !other.region) {
295                 // check for default region
296                 thisfull = this.getLikelyLocale();
297                 otherfull = new Locale(this.info.likelyLocales[other.getSpec()] || other.getSpec());
298                 if (thisfull.region === otherfull.region) {
299                     scores[2] = 100;
300                 }
301             } else {
302                 // check for containment
303                 var containers = this.info.territoryContainmentReverse[this.locale.region] || [];
304                 // end at 1 because 0 is "001" which is "the whole world" -- which is not useful
305                 for (i = containers.length-1; i > 0; i--) {
306                     var container = this.info.territoryContainment[containers[i]];
307                     if (container && container.indexOf(other.region) > -1) {
308                         // same area only accounts for 20% of the region score
309                         scores[2] = ((i+1) * 100 / containers.length) * 0.2;
310                         break;
311                     }
312                 }
313             }
314         }
315 
316         if (this.locale.variant === other.variant) {
317             scores[3] = 100;
318         }
319 
320         var total = 0;
321 
322         for (i = 0; i < 4; i++) {
323             total += scores[i] * componentWeights[i];
324         }
325 
326         return Math.round(total);
327     },
328 
329     /**
330      * Return the macrolanguage associated with this locale. If the
331      * locale's language is not part of a macro-language, then the
332      * locale's language is returned as-is.
333      *
334      * @returns {string} the ISO code for the macrolanguage associated
335      * with this locale, or language of the locale
336      */
337     getMacroLanguage: function() {
338         return this.info.macroLanguagesReverse[this.locale.language] || this.locale.language;
339     },
340 
341     /**
342      * @private
343      * Return the containment array for the given region code.
344      */
345     _getRegionContainment: function(region) {
346         return this.info.territoryContainmentReverse[region] || []
347     },
348 
349     /**
350      * Return the list of regions that this locale is contained within. Regions are
351      * nested, so locales can be in multiple regions. (eg. US is in Northern North
352      * America, North America, the Americas, the World.) Most regions are specified
353      * using UN.49 region numbers, though some, like "EU", are letters. If the
354      * locale is underspecified, this method will use the most likely locale method
355      * to get the region first. For example, the locale "ja" (Japanese) is most
356      * likely "ja-JP" (Japanese for Japan), and the region containment info for Japan
357      * is returned.
358      *
359      * @returns {Array.<string>} an array of region specifiers that this locale is within
360      */
361     getRegionContainment: function() {
362         var region = this.locale.region || this.getLikelyLocale().region;
363         return this._getRegionContainment(region);
364     },
365 
366     /**
367      * Find the smallest region that contains both the current locale and the other locale.
368      * If the current or other locales are underspecified, this method will use the most
369      * likely locale method
370      * to get their regions first. For example, the locale "ja" (Japanese) is most
371      * likely "ja-JP" (Japanese for Japan), and the region containment info for Japan
372      * is checked against the other locale's region containment info.
373      *
374      * @param {string|Locale} otherLocale a locale specifier or a Locale instance to
375      * compare against
376      * @returns {string} the region specifier of the smallest region containing both the
377      * current locale and other locale
378      */
379     smallestCommonRegion: function(otherLocale) {
380         if (typeof(otherLocale) === "undefined") return "001";
381 
382         var thisRegion = this.locale.region || this.getLikelyLocale().region;
383         var otherLoc = typeof(otherLocale) === "string" ? new Locale(otherLocale) : otherLocale;
384         var otherRegion = this._getLikelyLocale(otherLoc).region;
385 
386         var thisRegions = this._getRegionContainment(thisRegion);
387         var otherRegions = this._getRegionContainment(otherRegion);
388 
389         // region containment arrays are arranged from largest to smallest, so start
390         // at the end of the array
391         for (var i = thisRegions.length-1; i > 0; i--) {
392             if (otherRegions.indexOf(thisRegions[i]) > -1) {
393                 return thisRegions[i];
394             }
395         }
396 
397         // this default should never be reached because the world should be common to all regions
398         return "001";
399     }
400 };
401 
402 module.exports = LocaleMatcher;
403