1 /* 2 * LocaleMatcher.js - Locale matcher definition 3 * 4 * Copyright © 2013-2015, 2018, JEDLSoft 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 20 // !data localematch 21 22 var ilib = require("./ilib.js"); 23 var Utils = require("./Utils.js"); 24 var Locale = require("./Locale.js"); 25 26 var componentWeights = [ 27 0.5, // language 28 0.2, // script 29 0.25, // region 30 0.05 // variant 31 ]; 32 33 /** 34 * @class 35 * Create a new locale matcher instance. This is used 36 * to see which locales can be matched with each other in 37 * various ways.<p> 38 * 39 * The options object may contain any of the following properties: 40 * 41 * <ul> 42 * <li><i>locale</i> - the locale instance or locale spec to match 43 * 44 * <li><i>onLoad</i> - a callback function to call when the locale matcher object is fully 45 * loaded. When the onLoad option is given, the locale matcher object will attempt to 46 * load any missing locale data using the ilib loader callback. 47 * When the constructor is done (even if the data is already preassembled), the 48 * onLoad function is called with the current instance as a parameter, so this 49 * callback can be used with preassembled or dynamic loading or a mix of the two. 50 * 51 * <li><i>sync</i> - tell whether to load any missing locale data synchronously or 52 * asynchronously. If this option is given as "false", then the "onLoad" 53 * callback must be given, as the instance returned from this constructor will 54 * not be usable for a while. 55 * 56 * <li><i>loadParams</i> - an object containing parameters to pass to the 57 * loader callback function when locale data is missing. The parameters are not 58 * interpretted or modified in any way. They are simply passed along. The object 59 * may contain any property/value pairs as long as the calling code is in 60 * agreement with the loader callback function as to what those parameters mean. 61 * </ul> 62 * 63 * 64 * @constructor 65 * @param {Object} options parameters to initialize this matcher 66 */ 67 var LocaleMatcher = function(options) { 68 var sync = true, 69 loadParams = undefined; 70 71 this.locale = new Locale(); 72 73 if (options) { 74 if (typeof(options.locale) !== 'undefined') { 75 this.locale = (typeof(options.locale) === 'string') ? new Locale(options.locale) : options.locale; 76 } 77 78 if (typeof(options.sync) !== 'undefined') { 79 sync = !!options.sync; 80 } 81 82 if (typeof(options.loadParams) !== 'undefined') { 83 loadParams = options.loadParams; 84 } 85 } 86 87 if (typeof(ilib.data.localematch) === 'undefined') { 88 Utils.loadData({ 89 object: "LocaleMatcher", 90 locale: "-", 91 name: "localematch.json", 92 sync: sync, 93 loadParams: loadParams, 94 callback: ilib.bind(this, function (info) { 95 if (!info) { 96 info = {}; 97 } 98 /** @type {Object.<string,string>} */ 99 this.info = info; 100 if (options && typeof(options.onLoad) === 'function') { 101 options.onLoad(this); 102 } 103 }) 104 }); 105 } else { 106 this.info = ilib.data.localematch; 107 if (options && typeof(options.onLoad) === 'function') { 108 options.onLoad(this); 109 } 110 } 111 }; 112 113 114 LocaleMatcher.prototype = { 115 /** 116 * Return the locale used to construct this instance. 117 * @return {Locale|undefined} the locale for this matcher 118 */ 119 getLocale: function() { 120 return this.locale; 121 }, 122 123 /** 124 * @private 125 * Do the work 126 */ 127 _getLikelyLocale: function(locale) { 128 // already full specified 129 if (locale.language && locale.script && locale.region) return locale; 130 131 if (typeof(this.info.likelyLocales[locale.getSpec()]) === 'undefined') { 132 // try various partials before giving up 133 var partial = this.info.likelyLocales[new Locale(locale.language, undefined, locale.region).getSpec()]; 134 if (typeof(partial) !== 'undefined') return new Locale(partial); 135 136 partial = this.info.likelyLocales[new Locale(locale.language, locale.script, undefined).getSpec()]; 137 if (typeof(partial) !== 'undefined') return new Locale(partial); 138 139 partial = this.info.likelyLocales[new Locale(locale.language, undefined, undefined).getSpec()]; 140 if (typeof(partial) !== 'undefined') return new Locale(partial); 141 142 partial = this.info.likelyLocales[new Locale(undefined, locale.script, locale.region).getSpec()]; 143 if (typeof(partial) !== 'undefined') return new Locale(partial); 144 145 partial = this.info.likelyLocales[new Locale(undefined, undefined, locale.region).getSpec()]; 146 if (typeof(partial) !== 'undefined') return new Locale(partial); 147 148 partial = this.info.likelyLocales[new Locale(undefined, locale.script, undefined).getSpec()]; 149 if (typeof(partial) !== 'undefined') return new Locale(partial); 150 151 return locale; 152 } 153 154 return new Locale(this.info.likelyLocales[locale.getSpec()]); 155 }, 156 157 /** 158 * Return an Locale instance that is fully specified based on partial information 159 * given to the constructor of this locale matcher instance. For example, if the locale 160 * spec given to this locale matcher instance is simply "ru" (for the Russian language), 161 * then it will fill in the missing region and script tags and return a locale with 162 * the specifier "ru-Cyrl-RU". (ie. Russian language, Cyrillic, Russian Federation). 163 * Any one or two of the language, script, or region parts may be left unspecified, 164 * and the other one or two parts will be filled in automatically. If this 165 * class has no information about the given locale, then the locale of this 166 * locale matcher instance is returned unchanged. 167 * 168 * @returns {Locale} the most likely completion of the partial locale given 169 * to the constructor of this locale matcher instance 170 */ 171 getLikelyLocale: function () { 172 return this._getLikelyLocale(this.locale); 173 }, 174 175 /** 176 * Return the degree that the given locale matches the current locale of this 177 * matcher. This method returns an integer from 0 to 100. A value of 100 is 178 * a 100% match, meaning that the two locales are exactly equivalent to each 179 * other. (eg. "ja-JP" and "ja-JP") A value of 0 means that there 0% match or 180 * that the two locales have nothing in common. (eg. "en-US" and "ja-JP") <p> 181 * 182 * Locale matching is not the same as equivalence, as the degree of matching 183 * is returned. (See Locale.equals for equivalence.)<p> 184 * 185 * The match score is calculated based on matching the 4 locale components, 186 * weighted by importance: 187 * 188 * <ul> 189 * <li> language - this accounts for 50% of the match score 190 * <li> region - accounts for 25% of the match score 191 * <li> script - accounts for 20% of the match score 192 * <li> variant - accounts for 5% of the match score 193 * </ul> 194 * 195 * The score is affected by the following things: 196 * 197 * <ul> 198 * <li> A large language score is given when the language components of the locales 199 * match exactly. 200 * <li> Higher language scores are given when the languages are linguistically 201 * close to each other, such as dialects. 202 * <li> A small score is given when two languages are in the same 203 * linguistic family, but one is not a dialect of the other, such as German 204 * and Dutch. 205 * <li> A large region score is given when two locales share the same region. 206 * <li> A smaller region score is given when one region is contained within 207 * another. For example, Hong Kong is part of China, so a moderate score is 208 * given instead of a full score. 209 * <li> A small score is given if two regions are geographically close to 210 * each other or are tied by history. For example, Ireland and Great Britain 211 * are both adjacent and tied by history, so they receive a moderate score. 212 * <li> A high script score is given if the two locales share the same script. 213 * The legibility of a common script means that there is some small kinship of the 214 * different languages. 215 * <li> A high variant score is given if the two locales share the same 216 * variant. Full score is given when both locales have no variant at all. 217 * <li> Locale components that are unspecified in both locales are given high 218 * scores. 219 * <li> Locales where a particular locale component is missing in only one 220 * locale can still match when the default for that locale component matches 221 * the component in the other locale. The 222 * default value for the missing component is determined using the likely locales 223 * data. (See getLikelyLocale()) For example, "en-US" and "en-Latn-US" receive 224 * a high script score because the default script for "en" is "Latn". 225 * </ul> 226 * 227 * The intention of this method is that it can be used to determine 228 * compatibility of locales. For example, when a user signs up for an 229 * account on a web site, the locales that the web site supports and 230 * the locale of the user's browser may differ, and the site needs to 231 * pick the best locale to show the user. Let's say the 232 * web site supports a selection of European languages such as "it-IT", 233 * "fr-FR", "de-DE", and "en-GB". The user's 234 * browser may be set to "it-CH". The web site code can then match "it-CH" 235 * against each of the supported locales to find the one with the 236 * highest score. In 237 * this case, the best match would be "it-IT" because it shares a 238 * language and script in common with "it-CH" and differs only in the region 239 * component. It is not a 100% match, but it is pretty good. The web site 240 * may decide if the match scores all fall 241 * below a chosen threshold (perhaps 50%?), it should show the user the 242 * default language "en-GB", because that is probably a better choice 243 * than any other supported locale.<p> 244 * 245 * @param {Locale} locale the other locale to match against the current one 246 * @return {number} an integer from 0 to 100 that indicates the degree to 247 * which these locales match each other 248 */ 249 match: function(locale) { 250 var other = new Locale(locale); 251 var scores = [0, 0, 0, 0]; 252 var thisfull, otherfull, i; 253 254 if (this.locale.language === other.language) { 255 scores[0] = 100; 256 } else { 257 if (!this.locale.language || !other.language) { 258 // check for default language 259 thisfull = this.getLikelyLocale(); 260 otherfull = new Locale(this.info.likelyLocales[other.getSpec()] || other.getSpec()); 261 if (thisfull.language === otherfull.language) { 262 scores[0] = 100; 263 } 264 } else { 265 // check for macro languages 266 var mlthis = this.info.macroLanguagesReverse[this.locale.language] || this.locale.language; 267 var mlother = this.info.macroLanguagesReverse[other.language] || other.language; 268 if (mlthis === mlother) { 269 scores[0] = 90; 270 } else { 271 // check for mutual intelligibility 272 var pair = this.locale.language + "-" + other.language; 273 scores[0] = this.info.mutualIntelligibility[pair] || 0; 274 } 275 } 276 } 277 278 if (this.locale.script === other.script) { 279 scores[1] = 100; 280 } else { 281 if (!this.locale.script || !other.script) { 282 // check for default script 283 thisfull = this.locale.script ? this.locale : new Locale(this.info.likelyLocales[this.locale.language]); 284 otherfull = other.script ? other : new Locale(this.info.likelyLocales[other.language]); 285 if (thisfull.script === otherfull.script) { 286 scores[1] = 100; 287 } 288 } 289 } 290 291 if (this.locale.region === other.region) { 292 scores[2] = 100; 293 } else { 294 if (!this.locale.region || !other.region) { 295 // check for default region 296 thisfull = this.getLikelyLocale(); 297 otherfull = new Locale(this.info.likelyLocales[other.getSpec()] || other.getSpec()); 298 if (thisfull.region === otherfull.region) { 299 scores[2] = 100; 300 } 301 } else { 302 // check for containment 303 var containers = this.info.territoryContainmentReverse[this.locale.region] || []; 304 // end at 1 because 0 is "001" which is "the whole world" -- which is not useful 305 for (i = containers.length-1; i > 0; i--) { 306 var container = this.info.territoryContainment[containers[i]]; 307 if (container && container.indexOf(other.region) > -1) { 308 // same area only accounts for 20% of the region score 309 scores[2] = ((i+1) * 100 / containers.length) * 0.2; 310 break; 311 } 312 } 313 } 314 } 315 316 if (this.locale.variant === other.variant) { 317 scores[3] = 100; 318 } 319 320 var total = 0; 321 322 for (i = 0; i < 4; i++) { 323 total += scores[i] * componentWeights[i]; 324 } 325 326 return Math.round(total); 327 }, 328 329 /** 330 * Return the macrolanguage associated with this locale. If the 331 * locale's language is not part of a macro-language, then the 332 * locale's language is returned as-is. 333 * 334 * @returns {string} the ISO code for the macrolanguage associated 335 * with this locale, or language of the locale 336 */ 337 getMacroLanguage: function() { 338 return this.info.macroLanguagesReverse[this.locale.language] || this.locale.language; 339 }, 340 341 /** 342 * @private 343 * Return the containment array for the given region code. 344 */ 345 _getRegionContainment: function(region) { 346 return this.info.territoryContainmentReverse[region] || [] 347 }, 348 349 /** 350 * Return the list of regions that this locale is contained within. Regions are 351 * nested, so locales can be in multiple regions. (eg. US is in Northern North 352 * America, North America, the Americas, the World.) Most regions are specified 353 * using UN.49 region numbers, though some, like "EU", are letters. If the 354 * locale is underspecified, this method will use the most likely locale method 355 * to get the region first. For example, the locale "ja" (Japanese) is most 356 * likely "ja-JP" (Japanese for Japan), and the region containment info for Japan 357 * is returned. 358 * 359 * @returns {Array.<string>} an array of region specifiers that this locale is within 360 */ 361 getRegionContainment: function() { 362 var region = this.locale.region || this.getLikelyLocale().region; 363 return this._getRegionContainment(region); 364 }, 365 366 /** 367 * Find the smallest region that contains both the current locale and the other locale. 368 * If the current or other locales are underspecified, this method will use the most 369 * likely locale method 370 * to get their regions first. For example, the locale "ja" (Japanese) is most 371 * likely "ja-JP" (Japanese for Japan), and the region containment info for Japan 372 * is checked against the other locale's region containment info. 373 * 374 * @param {string|Locale} otherLocale a locale specifier or a Locale instance to 375 * compare against 376 * @returns {string} the region specifier of the smallest region containing both the 377 * current locale and other locale 378 */ 379 smallestCommonRegion: function(otherLocale) { 380 if (typeof(otherLocale) === "undefined") return "001"; 381 382 var thisRegion = this.locale.region || this.getLikelyLocale().region; 383 var otherLoc = typeof(otherLocale) === "string" ? new Locale(otherLocale) : otherLocale; 384 var otherRegion = this._getLikelyLocale(otherLoc).region; 385 386 var thisRegions = this._getRegionContainment(thisRegion); 387 var otherRegions = this._getRegionContainment(otherRegion); 388 389 // region containment arrays are arranged from largest to smallest, so start 390 // at the end of the array 391 for (var i = thisRegions.length-1; i > 0; i--) { 392 if (otherRegions.indexOf(thisRegions[i]) > -1) { 393 return thisRegions[i]; 394 } 395 } 396 397 // this default should never be reached because the world should be common to all regions 398 return "001"; 399 } 400 }; 401 402 module.exports = LocaleMatcher; 403