1 /* 2 * Name.js - Person name parser 3 * 4 * Copyright © 2013-2015, 2018, JEDLSoft 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 20 // !data name 21 22 // notes: 23 // icelandic given names: http://en.wiktionary.org/wiki/Appendix:Icelandic_given_names 24 // danish approved given names: http://www.familiestyrelsen.dk/samliv/navne/ 25 // http://www.mentalfloss.com/blogs/archives/59277 26 // other countries with first name restrictions: Norway, China, New Zealand, Japan, Sweden, Germany, Hungary 27 28 var ilib = require("./ilib.js"); 29 var Utils = require("./Utils.js"); 30 var JSUtils = require("./JSUtils.js"); 31 32 var Locale = require("./Locale.js"); 33 var IString = require("./IString.js"); 34 var CType = require("./CType.js"); 35 var isAlpha = require("./isAlpha.js"); 36 var isIdeo = require("./isIdeo.js"); 37 var isPunct = require("./isPunct.js"); 38 var isSpace = require("./isSpace.js"); 39 40 /** 41 * @class 42 * A class to parse names of people. Different locales have different conventions when it 43 * comes to naming people.<p> 44 * 45 * The options can contain any of the following properties: 46 * 47 * <ul> 48 * <li><i>locale</i> - use the rules and conventions of the given locale in order to parse 49 * the name 50 * <li><i>style</i> - explicitly use the named style to parse the name. Valid values so 51 * far are "western" and "asian". If this property is not specified, then the style will 52 * be gleaned from the name itself. This class will count the total number of Latin or Asian 53 * characters. If the majority of the characters are in one style, that style will be 54 * used to parse the whole name. 55 * <li><i>order</i> - explicitly use the given order for names. In some locales, such 56 * as Russian, names may be written equally validly as "givenName familyName" or "familyName 57 * givenName". This option tells the parser which order to prefer, and overrides the 58 * default order for the locale. Valid values are "gf" (given-family) or "fg" (family-given). 59 * <li><i>useSpaces</i> - explicitly specifies whether to use spaces or not between the given name , middle name 60 * and family name. 61 * <li><i>compoundFamilyName</i> - for Asian and some other types of names, search for compound 62 * family names. If this parameter is not specified, the default is to use the setting that is 63 * most common for the locale. If it is specified, the locale default is 64 * overridden with this flag. 65 * <li>onLoad - a callback function to call when the name info is fully 66 * loaded and the name has been parsed. When the onLoad option is given, the name object 67 * will attempt to load any missing locale data using the ilib loader callback. 68 * When the constructor is done (even if the data is already preassembled), the 69 * onLoad function is called with the current instance as a parameter, so this 70 * callback can be used with preassembled or dynamic loading or a mix of the two. 71 * 72 * <li>sync - tell whether to load any missing locale data synchronously or 73 * asynchronously. If this option is given as "false", then the "onLoad" 74 * callback must be given, as the instance returned from this constructor will 75 * not be usable for a while. 76 * 77 * <li><i>loadParams</i> - an object containing parameters to pass to the 78 * loader callback function when locale data is missing. The parameters are not 79 * interpretted or modified in any way. They are simply passed along. The object 80 * may contain any property/value pairs as long as the calling code is in 81 * agreement with the loader callback function as to what those parameters mean. 82 * </ul> 83 * 84 * Additionally, a name instance can be constructed by giving the explicit 85 * already-parsed fields or by using another name instance as the parameter. (That is, 86 * it becomes a copy constructor.) The name fields can be any of the following: 87 * 88 * <ul> 89 * <li><i>prefix</i> - the prefix prepended to the name 90 * <li><i>givenName</i> - the person's given or "first" name 91 * <li><i>middleName</i> - one or more middle names, separated by spaces even if the 92 * language doesn't use usually use spaces. The spaces are merely separators. 93 * <li><i>familyName</i> - one or more family or "last" names, separated by spaces 94 * even if the language doesn't use usually use spaces. The spaces are merely separators. 95 * <li><i>suffix</i> - the suffix appended to the name 96 * <li><i>honorific</i> - the honorific title of the name. This could be formatted 97 * as a prefix or a suffix, depending on the customs of the particular locale. You 98 * should only give either an honorific or a prefix/suffix, but not both. 99 * </ul> 100 * 101 * When the parser has completed its parsing, it fills in the same fields as listed 102 * above.<p> 103 * 104 * For names that include auxilliary words, such as the family name "van der Heijden", all 105 * of the auxilliary words ("van der") will be included in the field.<p> 106 * 107 * For names in some Spanish locales, it is assumed that the family name is doubled. That is, 108 * a person may have a paternal family name followed by a maternal family name. All 109 * family names will be listed in the familyName field as normal, separated by spaces. 110 * When formatting the short version of such names, only the paternal family name is used. 111 * 112 * @constructor 113 * @param {string|Name|Object=} name the name to parse 114 * @param {Object=} options Options governing the construction of this name instance 115 */ 116 var Name = function (name, options) { 117 var sync = true; 118 119 if (!name || name.length === 0) { 120 if (options && typeof(options.onLoad) === 'function') { 121 options.onLoad(undefined); 122 } 123 return; 124 } 125 126 this.loadParams = {}; 127 128 if (options) { 129 if (options.locale) { 130 this.locale = (typeof (options.locale) === 'string') ? new Locale(options.locale) : options.locale; 131 } 132 133 if (options.style && (options.style === "asian" || options.style === "western")) { 134 this.style = options.style; 135 } 136 137 if (options.order && (options.order === "gmf" || options.order === "fmg" || options.order === "fgm")) { 138 this.order = options.order; 139 } 140 141 if (typeof(options.sync) === 'boolean') { 142 sync = options.sync; 143 } 144 145 if (typeof(options.loadParams) !== 'undefined') { 146 this.loadParams = options.loadParams; 147 } 148 149 if (typeof(options.compoundFamilyName) !== 'undefined') { 150 this.singleFamilyName = !options.compoundFamilyName; 151 } 152 } 153 154 this.locale = this.locale || new Locale(); 155 156 isAlpha._init(sync, this.loadParams, ilib.bind(this, function() { 157 isIdeo._init(sync, this.loadParams, ilib.bind(this, function() { 158 isPunct._init(sync, this.loadParams, ilib.bind(this, function() { 159 isSpace._init(sync, this.loadParams, ilib.bind(this, function() { 160 Utils.loadData({ 161 object: "Name", 162 locale: this.locale, 163 name: "name.json", 164 sync: sync, 165 loadParams: this.loadParams, 166 callback: ilib.bind(this, function (info) { 167 if (!info) { 168 info = Name.defaultInfo[this.style || "western"]; 169 } 170 if (typeof (name) === 'object') { 171 // copy constructor 172 /** 173 * The prefixes for this name 174 * @type {string|Array.<string>} 175 */ 176 this.prefix = name.prefix; 177 /** 178 * The given (personal) name in this name. 179 * @type {string|Array.<string>} 180 */ 181 this.givenName = name.givenName; 182 /** 183 * The middle names used in this name. If there are multiple middle names, they all 184 * appear in this field separated by spaces. 185 * @type {string|Array.<string>} 186 */ 187 this.middleName = name.middleName; 188 /** 189 * The family names in this name. If there are multiple family names, they all 190 * appear in this field separated by spaces. 191 * @type {string|Array.<string>} 192 */ 193 this.familyName = name.familyName; 194 /** 195 * The suffixes for this name. If there are multiple suffixes, they all 196 * appear in this field separated by spaces. 197 * @type {string|Array.<string>} 198 */ 199 this.suffix = name.suffix; 200 /** 201 * The honorific title for this name. This honorific will be used as the prefix 202 * or suffix as dictated by the locale 203 * @type {string|Array.<string>} 204 */ 205 this.honorific = name.honorific; 206 207 // private properties 208 this.locale = name.locale; 209 this.style = name.style; 210 this.order = name.order; 211 this.useSpaces = name.useSpaces; 212 this.isAsianName = name.isAsianName; 213 214 if (options && typeof(options.onLoad) === 'function') { 215 options.onLoad(this); 216 } 217 218 return; 219 } 220 /** 221 * @type {{ 222 * nameStyle:string, 223 * order:string, 224 * prefixes:Array.<string>, 225 * suffixes:Array.<string>, 226 * auxillaries:Array.<string>, 227 * honorifics:Array.<string>, 228 * knownFamilyNames:Array.<string>, 229 * noCompoundFamilyNames:boolean, 230 * sortByHeadWord:boolean 231 * }} */ 232 this.info = info; 233 this._init(name); 234 if (options && typeof(options.onLoad) === 'function') { 235 options.onLoad(this); 236 } 237 }) 238 }); 239 })); 240 })); 241 })); 242 })); 243 }; 244 245 Name.defaultInfo = { 246 "western": ilib.data.name || { 247 "components": { 248 "short": "gf", 249 "medium": "gmf", 250 "long": "pgmf", 251 "full": "pgmfs", 252 "formal_short": "hf", 253 "formal_long": "hgf" 254 }, 255 "format": "{prefix} {givenName} {middleName} {familyName}{suffix}", 256 "sortByHeadWord": false, 257 "nameStyle": "western", 258 "conjunctions": { 259 "and1": "and", 260 "and2": "and", 261 "or1": "or", 262 "or2": "or" 263 }, 264 "auxillaries": { 265 "von": 1, 266 "von der": 1, 267 "von den": 1, 268 "van": 1, 269 "van der": 1, 270 "van de": 1, 271 "van den": 1, 272 "de": 1, 273 "di": 1, 274 "la": 1, 275 "lo": 1, 276 "des": 1, 277 "le": 1, 278 "les": 1, 279 "du": 1, 280 "de la": 1, 281 "del": 1, 282 "de los": 1, 283 "de las": 1 284 }, 285 "prefixes": [ 286 "doctor", 287 "dr", 288 "mr", 289 "mrs", 290 "ms", 291 "mister", 292 "madame", 293 "madamoiselle", 294 "miss", 295 "monsieur", 296 "señor", 297 "señora", 298 "señorita" 299 ], 300 "suffixes": [ 301 ",", 302 "junior", 303 "jr", 304 "senior", 305 "sr", 306 "i", 307 "ii", 308 "iii", 309 "esq", 310 "phd", 311 "md" 312 ], 313 "patronymicName":[ ], 314 "familyNames":[ ] 315 }, 316 "asian": { 317 "components": { 318 "short": "gf", 319 "medium": "gmf", 320 "long": "hgmf", 321 "full": "hgmf", 322 "formal_short": "hf", 323 "formal_long": "hgf" 324 }, 325 "format": "{prefix}{familyName}{middleName}{givenName}{suffix}", 326 "nameStyle": "asian", 327 "sortByHeadWord": false, 328 "conjunctions": {}, 329 "auxillaries": {}, 330 "prefixes": [], 331 "suffixes": [], 332 "patronymicName":[], 333 "familyNames":[] 334 } 335 }; 336 337 /** 338 * Return true if the given character is in the range of the Han, Hangul, or kana 339 * scripts. 340 * @static 341 * @protected 342 */ 343 Name._isAsianChar = function(c) { 344 return isIdeo(c) || 345 CType.withinRange(c, "hangul") || 346 CType.withinRange(c, "hiragana") || 347 CType.withinRange(c, "katakana"); 348 }; 349 350 351 /** 352 * @static 353 * @protected 354 */ 355 Name._isAsianName = function (name, language) { 356 // the idea is to count the number of asian chars and the number 357 // of latin chars. If one is greater than the other, choose 358 // that style. 359 var asian = 0, 360 latin = 0, 361 i; 362 363 if (name && name.length > 0) { 364 for (i = 0; i < name.length; i++) { 365 var c = name.charAt(i); 366 367 if (Name._isAsianChar(c)) { 368 if (language =="ko" || language =="ja" || language =="zh") { 369 return true; 370 } 371 asian++; 372 } else if (isAlpha(c)) { 373 if (!language =="ko" || !language =="ja" || !language =="zh") { 374 return false; 375 } 376 latin++; 377 } 378 } 379 380 return latin < asian; 381 } 382 383 return false; 384 }; 385 386 /** 387 * Return true if any Latin letters are found in the string. Return 388 * false if all the characters are non-Latin. 389 * @static 390 * @protected 391 */ 392 Name._isEuroName = function (name, language) { 393 var c, 394 n = new IString(name), 395 it = n.charIterator(); 396 397 while (it.hasNext()) { 398 c = it.next(); 399 400 if (!Name._isAsianChar(c) && !isPunct(c) && !isSpace(c)) { 401 return true; 402 } else if (Name._isAsianChar(c) && (language =="ko" || language =="ja" || language =="zh")) { 403 return false; 404 } 405 } 406 return false; 407 }; 408 409 Name.prototype = { 410 /** 411 * @protected 412 */ 413 _init: function (name) { 414 var parts, prefixArray, prefix, prefixLower, 415 suffixArray, suffix, suffixLower, 416 i, info, hpSuffix; 417 var currentLanguage = this.locale.getLanguage(); 418 419 if (name) { 420 // for DFISH-12905, pick off the part that the LDAP server automatically adds to our names in HP emails 421 i = name.search(/\s*[,\/\(\[\{<]/); 422 if (i !== -1) { 423 hpSuffix = name.substring(i); 424 hpSuffix = hpSuffix.replace(/\s+/g, ' '); // compress multiple whitespaces 425 suffixArray = hpSuffix.split(" "); 426 var conjunctionIndex = this._findLastConjunction(suffixArray); 427 if (conjunctionIndex > -1) { 428 // it's got conjunctions in it, so this is not really a suffix 429 hpSuffix = undefined; 430 } else { 431 name = name.substring(0, i); 432 } 433 } 434 435 this.isAsianName = Name._isAsianName(name, currentLanguage); 436 if (this.info.nameStyle === "asian") { 437 info = this.isAsianName ? this.info : Name.defaultInfo.western; 438 } else { 439 info = this.isAsianName ? Name.defaultInfo.asian : this.info; 440 } 441 442 if (this.isAsianName) { 443 // all-asian names 444 if (this.useSpaces === false) { 445 name = name.replace(/\s+/g, ''); // eliminate all whitespaces 446 } 447 parts = name.trim().split(''); 448 } 449 //} 450 else { 451 name = name.replace(/, /g, ' , '); 452 name = name.replace(/\s+/g, ' '); // compress multiple whitespaces 453 parts = name.trim().split(' '); 454 } 455 456 // check for prefixes 457 if (parts.length > 1) { 458 for (i = parts.length; i > 0; i--) { 459 prefixArray = parts.slice(0, i); 460 prefix = prefixArray.join(this.isAsianName ? '' : ' '); 461 prefixLower = prefix.toLowerCase(); 462 prefixLower = prefixLower.replace(/[,\.]/g, ''); // ignore commas and periods 463 if (ilib.isArray(this.info.prefixes) && 464 (JSUtils.indexOf(this.info.prefixes, prefixLower) > -1 || this._isConjunction(prefixLower))) { 465 if (this.prefix) { 466 if (!this.isAsianName) { 467 this.prefix += ' '; 468 } 469 this.prefix += prefix; 470 } else { 471 this.prefix = prefix; 472 } 473 parts = parts.slice(i); 474 i = parts.length; 475 } 476 } 477 } 478 // check for suffixes 479 if (parts.length > 1) { 480 for (i = parts.length; i > 0; i--) { 481 suffixArray = parts.slice(-i); 482 suffix = suffixArray.join(this.isAsianName ? '' : ' '); 483 suffixLower = suffix.toLowerCase(); 484 suffixLower = suffixLower.replace(/[\.]/g, ''); // ignore periods 485 if (ilib.isArray(this.info.suffixes) && JSUtils.indexOf(this.info.suffixes, suffixLower) > -1) { 486 if (this.suffix) { 487 if (!this.isAsianName && !isPunct(this.suffix.charAt(0))) { 488 this.suffix = ' ' + this.suffix; 489 } 490 this.suffix = suffix + this.suffix; 491 } else { 492 this.suffix = suffix; 493 } 494 parts = parts.slice(0, parts.length - i); 495 i = parts.length; 496 } 497 } 498 } 499 500 if (hpSuffix) { 501 this.suffix = (this.suffix && this.suffix + hpSuffix) || hpSuffix; 502 } 503 504 // adjoin auxillary words to their headwords 505 if (parts.length > 1 && !this.isAsianName) { 506 parts = this._joinAuxillaries(parts, this.isAsianName); 507 } 508 509 if (this.isAsianName) { 510 this._parseAsianName(parts, currentLanguage); 511 } else { 512 this._parseWesternName(parts); 513 } 514 515 this._joinNameArrays(); 516 } 517 }, 518 519 /** 520 * @return {number} 521 * 522 _findSequence: function(parts, hash, isAsian) { 523 var sequence, sequenceLower, sequenceArray, aux = [], i, ret = {}; 524 525 if (parts.length > 0 && hash) { 526 //console.info("_findSequence: finding sequences"); 527 for (var start = 0; start < parts.length-1; start++) { 528 for ( i = parts.length; i > start; i-- ) { 529 sequenceArray = parts.slice(start, i); 530 sequence = sequenceArray.join(isAsian ? '' : ' '); 531 sequenceLower = sequence.toLowerCase(); 532 sequenceLower = sequenceLower.replace(/[,\.]/g, ''); // ignore commas and periods 533 534 //console.info("_findSequence: checking sequence: '" + sequenceLower + "'"); 535 536 if ( sequenceLower in hash ) { 537 ret.match = sequenceArray; 538 ret.start = start; 539 ret.end = i; 540 return ret; 541 //console.info("_findSequence: Found sequence '" + sequence + "' New parts list is " + JSON.stringify(parts)); 542 } 543 } 544 } 545 } 546 547 return undefined; 548 }, 549 */ 550 551 /** 552 * @protected 553 * @param {Array} parts 554 * @param {Array} names 555 * @param {boolean} isAsian 556 * @param {boolean=} noCompoundPrefix 557 */ 558 _findPrefix: function (parts, names, isAsian, noCompoundPrefix) { 559 var i, prefix, prefixLower, prefixArray, aux = []; 560 561 if (parts.length > 0 && names) { 562 for (i = parts.length; i > 0; i--) { 563 prefixArray = parts.slice(0, i); 564 prefix = prefixArray.join(isAsian ? '' : ' '); 565 prefixLower = prefix.toLowerCase(); 566 prefixLower = prefixLower.replace(/[,\.]/g, ''); // ignore commas and periods 567 568 if (prefixLower in names) { 569 aux = aux.concat(isAsian ? prefix : prefixArray); 570 if (noCompoundPrefix) { 571 // don't need to parse further. Just return it as is. 572 return aux; 573 } 574 parts = parts.slice(i); 575 i = parts.length + 1; 576 } 577 } 578 } 579 580 return aux; 581 }, 582 583 /** 584 * @protected 585 */ 586 _findSuffix: function (parts, names, isAsian) { 587 var i, j, seq = ""; 588 589 for (i = 0; i < names.length; i++) { 590 if (parts.length >= names[i].length) { 591 j = 0; 592 while (j < names[i].length && parts[parts.length - j] === names[i][names[i].length - j]) { 593 j++; 594 } 595 if (j >= names[i].length) { 596 seq = parts.slice(parts.length - j).join(isAsian ? "" : " ") + (isAsian ? "" : " ") + seq; 597 parts = parts.slice(0, parts.length - j); 598 i = -1; // restart the search 599 } 600 } 601 } 602 603 this.suffix = seq; 604 return parts; 605 }, 606 607 /** 608 * @protected 609 * Tell whether or not the given word is a conjunction in this language. 610 * @param {string} word the word to test 611 * @return {boolean} true if the word is a conjunction 612 */ 613 _isConjunction: function _isConjunction(word) { 614 return (this.info.conjunctions.and1 === word || 615 this.info.conjunctions.and2 === word || 616 this.info.conjunctions.or1 === word || 617 this.info.conjunctions.or2 === word || 618 ("&" === word) || 619 ("+" === word)); 620 }, 621 622 /** 623 * Find the last instance of 'and' in the name 624 * @protected 625 * @param {Array.<string>} parts 626 * @return {number} 627 */ 628 _findLastConjunction: function _findLastConjunction(parts) { 629 var conjunctionIndex = -1, 630 index, part; 631 632 for (index = 0; index < parts.length; index++) { 633 part = parts[index]; 634 if (typeof (part) === 'string') { 635 part = part.toLowerCase(); 636 // also recognize English 637 if ("and" === part || "or" === part || "&" === part || "+" === part) { 638 conjunctionIndex = index; 639 } 640 if (this._isConjunction(part)) { 641 conjunctionIndex = index; 642 } 643 } 644 } 645 return conjunctionIndex; 646 }, 647 648 /** 649 * @protected 650 * @param {Array.<string>} parts the current array of name parts 651 * @param {boolean} isAsian true if the name is being parsed as an Asian name 652 * @return {Array.<string>} the remaining parts after the prefixes have been removed 653 */ 654 _extractPrefixes: function (parts, isAsian) { 655 var i = this._findPrefix(parts, this.info.prefixes, isAsian); 656 if (i > 0) { 657 this.prefix = parts.slice(0, i).join(isAsian ? "" : " "); 658 return parts.slice(i); 659 } 660 // prefixes not found, so just return the array unmodified 661 return parts; 662 }, 663 664 /** 665 * @protected 666 * @param {Array.<string>} parts the current array of name parts 667 * @param {boolean} isAsian true if the name is being parsed as an Asian name 668 * @return {Array.<string>} the remaining parts after the suffices have been removed 669 */ 670 _extractSuffixes: function (parts, isAsian) { 671 var i = this._findSuffix(parts, this.info.suffixes, isAsian); 672 if (i > 0) { 673 this.suffix = parts.slice(i).join(isAsian ? "" : " "); 674 return parts.slice(0, i); 675 } 676 // suffices not found, so just return the array unmodified 677 return parts; 678 }, 679 680 /** 681 * Adjoin auxillary words to their head words. 682 * @protected 683 * @param {Array.<string>} parts the current array of name parts 684 * @param {boolean} isAsian true if the name is being parsed as an Asian name 685 * @return {Array.<string>} the parts after the auxillary words have been plucked onto their head word 686 */ 687 _joinAuxillaries: function (parts, isAsian) { 688 var start, i, prefixArray, prefix, prefixLower; 689 690 if (this.info.auxillaries && (parts.length > 2 || this.prefix)) { 691 for (start = 0; start < parts.length - 1; start++) { 692 for (i = parts.length; i > start; i--) { 693 prefixArray = parts.slice(start, i); 694 prefix = prefixArray.join(' '); 695 prefixLower = prefix.toLowerCase(); 696 prefixLower = prefixLower.replace(/[,\.]/g, ''); // ignore commas and periods 697 698 if (prefixLower in this.info.auxillaries) { 699 parts.splice(start, i + 1 - start, prefixArray.concat(parts[i])); 700 i = start; 701 } 702 } 703 } 704 } 705 706 return parts; 707 }, 708 709 /** 710 * Recursively join an array or string into a long string. 711 * @protected 712 */ 713 _joinArrayOrString: function _joinArrayOrString(part) { 714 var i; 715 if (typeof (part) === 'object') { 716 for (i = 0; i < part.length; i++) { 717 part[i] = this._joinArrayOrString(part[i]); 718 } 719 var ret = ""; 720 part.forEach(function (segment) { 721 if (ret.length > 0 && !isPunct(segment.charAt(0))) { 722 ret += ' '; 723 } 724 ret += segment; 725 }); 726 727 return ret; 728 } 729 730 return part; 731 }, 732 733 /** 734 * @protected 735 */ 736 _joinNameArrays: function _joinNameArrays() { 737 var prop; 738 for (prop in this) { 739 740 if (this[prop] !== undefined && typeof (this[prop]) === 'object' && ilib.isArray(this[prop])) { 741 742 this[prop] = this._joinArrayOrString(this[prop]); 743 } 744 } 745 }, 746 747 /** 748 * @protected 749 */ 750 _parseAsianName: function (parts, language) { 751 var familyNameArray = this._findPrefix(parts, this.info.knownFamilyNames, true, typeof(this.singleFamilyName) !== 'undefined' ? this.singleFamilyName : this.info.noCompoundFamilyNames); 752 var tempFullName = parts.join(''); 753 754 if (familyNameArray && familyNameArray.length > 0) { 755 this.familyName = familyNameArray.join(''); 756 this.givenName = parts.slice(this.familyName.length).join(''); 757 758 //Overide parsing rules if spaces are found in korean 759 if (language === "ko" && tempFullName.search(/\s*[/\s]/) > -1 && !this.suffix) { 760 this._parseKoreanName(tempFullName); 761 } 762 } else if (this.locale.getLanguage() === "ja") { 763 this._parseJapaneseName(parts); 764 } else if (this.suffix || this.prefix) { 765 this.familyName = parts.join(''); 766 } else { 767 this.givenName = parts.join(''); 768 } 769 }, 770 771 /** 772 * @protected 773 */ 774 _parseKoreanName: function (name) { 775 var tempName = name; 776 777 var spaceSplit = tempName.split(" "); 778 var spceCount = spaceSplit.length; 779 var fistSpaceIndex = tempName.indexOf(" "); 780 var lastSpaceIndex = tempName.lastIndexOf(" "); 781 782 if (spceCount === 2) { 783 this.familyName = spaceSplit[0]; 784 this.givenName = tempName.slice(fistSpaceIndex, tempName.length); 785 } else { 786 this.familyName = spaceSplit[0]; 787 this.middleName = tempName.slice(fistSpaceIndex, lastSpaceIndex); 788 this.givenName = tempName.slice(lastSpaceIndex, tempName.length); 789 } 790 791 }, 792 793 /** 794 * @protected 795 */ 796 _parseJapaneseName: function (parts) { 797 if (this.suffix && this.suffix.length > 1 && this.info.honorifics.indexOf(this.suffix)>-1) { 798 if (parts.length === 1) { 799 if (CType.withinRange(parts[0], "cjk")) { 800 this.familyName = parts[0]; 801 } else { 802 this.givenName = parts[0]; 803 } 804 return; 805 } else if (parts.length === 2) { 806 this.familyName = parts.slice(0,parts.length).join("") 807 return; 808 } 809 } 810 if (parts.length > 1) { 811 var fn = ""; 812 for (var i = 0; i < parts.length; i++) { 813 if (CType.withinRange(parts[i], "cjk")) { 814 fn += parts[i]; 815 } else if (fn.length > 1 && CType.withinRange(parts[i], "hiragana")) { 816 this.familyName = fn; 817 this.givenName = parts.slice(i,parts.length).join(""); 818 return; 819 } else { 820 break; 821 } 822 } 823 } 824 if (parts.length === 1) { 825 this.familyName = parts[0]; 826 } else if (parts.length === 2) { 827 this.familyName = parts[0]; 828 this.givenName = parts[1]; 829 } else if (parts.length === 3) { 830 this.familyName = parts[0]; 831 this.givenName = parts.slice(1,parts.length).join(""); 832 } else if (parts.length > 3) { 833 this.familyName = parts.slice(0,2).join("") 834 this.givenName = parts.slice(2,parts.length).join(""); 835 } 836 }, 837 838 /** 839 * @protected 840 */ 841 _parseSpanishName: function (parts) { 842 var conjunctionIndex; 843 844 if (parts.length === 1) { 845 if (this.prefix || typeof (parts[0]) === 'object') { 846 this.familyName = parts[0]; 847 } else { 848 this.givenName = parts[0]; 849 } 850 } else if (parts.length === 2) { 851 // we do G F 852 this.givenName = parts[0]; 853 this.familyName = parts[1]; 854 } else if (parts.length === 3) { 855 conjunctionIndex = this._findLastConjunction(parts); 856 // if there's an 'and' in the middle spot, put everything in the first name 857 if (conjunctionIndex === 1) { 858 this.givenName = parts; 859 } else { 860 // else, do G F F 861 this.givenName = parts[0]; 862 this.familyName = parts.slice(1); 863 } 864 } else if (parts.length > 3) { 865 //there are at least 4 parts to this name 866 867 conjunctionIndex = this._findLastConjunction(parts); 868 ////console.log("@@@@@@@@@@@@@@@@"+conjunctionIndex) 869 if (conjunctionIndex > 0) { 870 // if there's a conjunction that's not the first token, put everything up to and 871 // including the token after it into the first name, the last 2 tokens into 872 // the family name (if they exist) and everything else in to the middle name 873 // 0 1 2 3 4 5 874 // G A G 875 // G A G F 876 // G G A G 877 // G A G F F 878 // G G A G F 879 // G G G A G 880 // G A G M F F 881 // G G A G F F 882 // G G G A G F 883 // G G G G A G 884 this.givenName = parts.splice(0, conjunctionIndex + 2); 885 if (parts.length > 1) { 886 this.familyName = parts.splice(parts.length - 2, 2); 887 if (parts.length > 0) { 888 this.middleName = parts; 889 } 890 } else if (parts.length === 1) { 891 this.familyName = parts[0]; 892 } 893 } else { 894 this.givenName = parts.splice(0, 1); 895 this.familyName = parts.splice(parts.length - 2, 2); 896 this.middleName = parts; 897 } 898 } 899 }, 900 901 /** 902 * @protected 903 */ 904 _parseIndonesianName: function (parts) { 905 var conjunctionIndex; 906 907 if (parts.length === 1) { 908 //if (this.prefix || typeof(parts[0]) === 'object') { 909 //this.familyName = parts[0]; 910 //} else { 911 this.givenName = parts[0]; 912 //} 913 //} else if (parts.length === 2) { 914 // we do G F 915 //this.givenName = parts[0]; 916 //this.familyName = parts[1]; 917 } else if (parts.length >= 2) { 918 //there are at least 3 parts to this name 919 920 conjunctionIndex = this._findLastConjunction(parts); 921 if (conjunctionIndex > 0) { 922 // if there's a conjunction that's not the first token, put everything up to and 923 // including the token after it into the first name, the last 2 tokens into 924 // the family name (if they exist) and everything else in to the middle name 925 // 0 1 2 3 4 5 926 // G A G 927 // G A G F 928 // G G A G 929 // G A G F F 930 // G G A G F 931 // G G G A G 932 // G A G M F F 933 // G G A G F F 934 // G G G A G F 935 // G G G G A G 936 this.givenName = parts.splice(0, conjunctionIndex + 2); 937 if (parts.length > 1) { 938 //this.familyName = parts.splice(parts.length-2, 2); 939 //if ( parts.length > 0 ) { 940 this.middleName = parts; 941 } 942 //} else if (parts.length === 1) { 943 // this.familyName = parts[0]; 944 //} 945 } else { 946 this.givenName = parts.splice(0, 1); 947 //this.familyName = parts.splice(parts.length-2, 2); 948 this.middleName = parts; 949 } 950 } 951 }, 952 953 /** 954 * @protected 955 */ 956 _parseGenericWesternName: function (parts) { 957 /* Western names are parsed as follows, and rules are applied in this 958 * order: 959 * 960 * G 961 * G F 962 * G M F 963 * G M M F 964 * P F 965 * P G F 966 */ 967 var conjunctionIndex; 968 969 if (parts.length === 1) { 970 if (this.prefix || typeof (parts[0]) === 'object') { 971 // already has a prefix, so assume it goes with the family name like "Dr. Roberts" or 972 // it is a name with auxillaries, which is almost always a family name 973 this.familyName = parts[0]; 974 } else { 975 this.givenName = parts[0]; 976 } 977 } else if (parts.length === 2) { 978 // we do G F 979 if (this.info.order == 'fgm') { 980 this.givenName = parts[1]; 981 this.familyName = parts[0]; 982 } else if (this.info.order == "gmf" || typeof (this.info.order) == 'undefined') { 983 this.givenName = parts[0]; 984 this.familyName = parts[1]; 985 } 986 } else if (parts.length >= 3) { 987 //find the first instance of 'and' in the name 988 conjunctionIndex = this._findLastConjunction(parts); 989 990 if (conjunctionIndex > 0) { 991 // if there's a conjunction that's not the first token, put everything up to and 992 // including the token after it into the first name, the last token into 993 // the family name (if it exists) and everything else in to the middle name 994 // 0 1 2 3 4 5 995 // G A G M M F 996 // G G A G M F 997 // G G G A G F 998 // G G G G A G 999 //if(this.order == "gmf") { 1000 this.givenName = parts.slice(0, conjunctionIndex + 2); 1001 1002 if (conjunctionIndex + 1 < parts.length - 1) { 1003 this.familyName = parts.splice(parts.length - 1, 1); 1004 ////console.log(this.familyName); 1005 if (conjunctionIndex + 2 < parts.length - 1) { 1006 this.middleName = parts.slice(conjunctionIndex + 2, parts.length - conjunctionIndex - 3); 1007 } 1008 } else if (this.info.order == "fgm") { 1009 this.familyName = parts.slice(0, conjunctionIndex + 2); 1010 if (conjunctionIndex + 1 < parts.length - 1) { 1011 this.middleName = parts.splice(parts.length - 1, 1); 1012 if (conjunctionIndex + 2 < parts.length - 1) { 1013 this.givenName = parts.slice(conjunctionIndex + 2, parts.length - conjunctionIndex - 3); 1014 } 1015 } 1016 } 1017 } else if (this.info.order === "fgm") { 1018 this.givenName = parts[1]; 1019 this.middleName = parts.slice(2); 1020 this.familyName = parts[0]; 1021 } else { 1022 this.givenName = parts[0]; 1023 this.middleName = parts.slice(1, parts.length - 1); 1024 this.familyName = parts[parts.length - 1]; 1025 } 1026 } 1027 }, 1028 1029 /** 1030 * parse patrinomic name from the russian names 1031 * @protected 1032 * @param {Array.<string>} parts the current array of name parts 1033 * @return number index of the part which contains patronymic name 1034 */ 1035 _findPatronymicName: function(parts) { 1036 var index, part; 1037 for (index = 0; index < parts.length; index++) { 1038 part = parts[index]; 1039 if (typeof (part) === 'string') { 1040 part = part.toLowerCase(); 1041 1042 var subLength = this.info.patronymicName.length; 1043 while(subLength--) { 1044 if(part.indexOf(this.info.patronymicName[subLength])!== -1 ) 1045 return index; 1046 } 1047 } 1048 } 1049 return -1; 1050 }, 1051 1052 /** 1053 * find if the given part is patronymic name 1054 * 1055 * @protected 1056 * @param {string} part string from name parts @ 1057 * @return number index of the part which contains familyName 1058 */ 1059 _isPatronymicName: function(part) { 1060 var pName; 1061 if ( typeof (part) === 'string') { 1062 pName = part.toLowerCase(); 1063 1064 var subLength = this.info.patronymicName.length; 1065 while (subLength--) { 1066 if (pName.indexOf(this.info.patronymicName[subLength]) !== -1) 1067 return true; 1068 } 1069 } 1070 return false; 1071 }, 1072 1073 /** 1074 * find family name from the russian name 1075 * 1076 * @protected 1077 * @param {Array.<string>} parts the current array of name parts 1078 * @return boolean true if patronymic, false otherwise 1079 */ 1080 _findFamilyName: function(parts) { 1081 var index, part, substring; 1082 for (index = 0; index < parts.length; index++) { 1083 part = parts[index]; 1084 1085 if ( typeof (part) === 'string') { 1086 part = part.toLowerCase(); 1087 var length = part.length - 1; 1088 1089 if (this.info.familyName.indexOf(part) !== -1) { 1090 return index; 1091 } else if (part[length] === 'в' || part[length] === 'н' || 1092 part[length] === 'й') { 1093 substring = part.slice(0, -1); 1094 if (this.info.familyName.indexOf(substring) !== -1) { 1095 return index; 1096 } 1097 } else if ((part[length - 1] === 'в' && part[length] === 'а') || 1098 (part[length - 1] === 'н' && part[length] === 'а') || 1099 (part[length - 1] === 'а' && part[length] === 'я')) { 1100 substring = part.slice(0, -2); 1101 if (this.info.familyName.indexOf(substring) !== -1) { 1102 return index; 1103 } 1104 } 1105 } 1106 } 1107 return -1; 1108 }, 1109 1110 /** 1111 * parse russian name 1112 * 1113 * @protected 1114 * @param {Array.<string>} parts the current array of name parts 1115 * @return 1116 */ 1117 _parseRussianName: function(parts) { 1118 var conjunctionIndex, familyIndex = -1; 1119 1120 if (parts.length === 1) { 1121 if (this.prefix || typeof (parts[0]) === 'object') { 1122 // already has a prefix, so assume it goes with the family name 1123 // like "Dr. Roberts" or 1124 // it is a name with auxillaries, which is almost always a 1125 // family name 1126 this.familyName = parts[0]; 1127 } else { 1128 this.givenName = parts[0]; 1129 } 1130 } else if (parts.length === 2) { 1131 // we do G F 1132 if (this.info.order === 'fgm') { 1133 this.givenName = parts[1]; 1134 this.familyName = parts[0]; 1135 } else if (this.info.order === "gmf") { 1136 this.givenName = parts[0]; 1137 this.familyName = parts[1]; 1138 } else if ( typeof (this.info.order) === 'undefined') { 1139 if (this._isPatronymicName(parts[1]) === true) { 1140 this.middleName = parts[1]; 1141 this.givenName = parts[0]; 1142 } else if ((familyIndex = this._findFamilyName(parts)) !== -1) { 1143 if (familyIndex === 1) { 1144 this.givenName = parts[0]; 1145 this.familyName = parts[1]; 1146 } else { 1147 this.familyName = parts[0]; 1148 this.givenName = parts[1]; 1149 } 1150 1151 } else { 1152 this.givenName = parts[0]; 1153 this.familyName = parts[1]; 1154 } 1155 1156 } 1157 } else if (parts.length >= 3) { 1158 // find the first instance of 'and' in the name 1159 conjunctionIndex = this._findLastConjunction(parts); 1160 var patronymicNameIndex = this._findPatronymicName(parts); 1161 if (conjunctionIndex > 0) { 1162 // if there's a conjunction that's not the first token, put 1163 // everything up to and 1164 // including the token after it into the first name, the last 1165 // token into 1166 // the family name (if it exists) and everything else in to the 1167 // middle name 1168 // 0 1 2 3 4 5 1169 // G A G M M F 1170 // G G A G M F 1171 // G G G A G F 1172 // G G G G A G 1173 // if(this.order == "gmf") { 1174 this.givenName = parts.slice(0, conjunctionIndex + 2); 1175 1176 if (conjunctionIndex + 1 < parts.length - 1) { 1177 this.familyName = parts.splice(parts.length - 1, 1); 1178 // //console.log(this.familyName); 1179 if (conjunctionIndex + 2 < parts.length - 1) { 1180 this.middleName = parts.slice(conjunctionIndex + 2, 1181 parts.length - conjunctionIndex - 3); 1182 } 1183 } else if (this.order == "fgm") { 1184 this.familyName = parts.slice(0, conjunctionIndex + 2); 1185 if (conjunctionIndex + 1 < parts.length - 1) { 1186 this.middleName = parts.splice(parts.length - 1, 1); 1187 if (conjunctionIndex + 2 < parts.length - 1) { 1188 this.givenName = parts.slice(conjunctionIndex + 2, 1189 parts.length - conjunctionIndex - 3); 1190 } 1191 } 1192 } 1193 } else if (patronymicNameIndex !== -1) { 1194 this.middleName = parts[patronymicNameIndex]; 1195 1196 if (patronymicNameIndex === (parts.length - 1)) { 1197 this.familyName = parts[0]; 1198 this.givenName = parts.slice(1, patronymicNameIndex); 1199 } else { 1200 this.givenName = parts.slice(0, patronymicNameIndex); 1201 1202 this.familyName = parts[parts.length - 1]; 1203 } 1204 } else { 1205 this.givenName = parts[0]; 1206 1207 this.middleName = parts.slice(1, parts.length - 1); 1208 1209 this.familyName = parts[parts.length - 1]; 1210 } 1211 } 1212 }, 1213 1214 1215 /** 1216 * @protected 1217 */ 1218 _parseWesternName: function (parts) { 1219 1220 if (this.locale.getLanguage() === "es" || this.locale.getLanguage() === "pt") { 1221 // in spain and mexico and portugal, we parse names differently than in the rest of the world 1222 // because of the double family names 1223 this._parseSpanishName(parts); 1224 } else if (this.locale.getLanguage() === "ru") { 1225 /* 1226 * In Russian, names can be given equally validly as given-family 1227 * or family-given. Use the value of the "order" property of the 1228 * constructor options to give the default when the order is ambiguous. 1229 */ 1230 this._parseRussianName(parts); 1231 } else if (this.locale.getLanguage() === "id") { 1232 // in indonesia, we parse names differently than in the rest of the world 1233 // because names don't have family names usually. 1234 this._parseIndonesianName(parts); 1235 } else { 1236 this._parseGenericWesternName(parts); 1237 } 1238 }, 1239 1240 /** 1241 * When sorting names with auxiliary words (like "van der" or "de los"), determine 1242 * which is the "head word" and return a string that can be easily sorted by head 1243 * word. In English, names are always sorted by initial characters. In places like 1244 * the Netherlands or Germany, family names are sorted by the head word of a list 1245 * of names rather than the first element of that name. 1246 * @return {string|undefined} a string containing the family name[s] to be used for sorting 1247 * in the current locale, or undefined if there is no family name in this object 1248 */ 1249 getSortFamilyName: function () { 1250 var name, 1251 auxillaries, 1252 auxString, 1253 parts, 1254 i; 1255 1256 // no name to sort by 1257 if (!this.familyName) { 1258 return undefined; 1259 } 1260 1261 // first break the name into parts 1262 if (this.info) { 1263 if (this.info.sortByHeadWord) { 1264 if (typeof (this.familyName) === 'string') { 1265 name = this.familyName.replace(/\s+/g, ' '); // compress multiple whitespaces 1266 parts = name.trim().split(' '); 1267 } else { 1268 // already split 1269 parts = this.familyName; 1270 } 1271 1272 auxillaries = this._findPrefix(parts, this.info.auxillaries, false); 1273 if (auxillaries && auxillaries.length > 0) { 1274 if (typeof (this.familyName) === 'string') { 1275 auxString = auxillaries.join(' '); 1276 name = this.familyName.substring(auxString.length + 1) + ', ' + auxString; 1277 } else { 1278 name = parts.slice(auxillaries.length).join(' ') + 1279 ', ' + 1280 parts.slice(0, auxillaries.length).join(' '); 1281 } 1282 } 1283 } else if (this.info.knownFamilyNames && this.familyName) { 1284 parts = this.familyName.split(''); 1285 var familyNameArray = this._findPrefix(parts, this.info.knownFamilyNames, true, this.info.noCompoundFamilyNames); 1286 name = ""; 1287 for (i = 0; i < familyNameArray.length; i++) { 1288 name += (this.info.knownFamilyNames[familyNameArray[i]] || ""); 1289 } 1290 } 1291 } 1292 1293 return name || this.familyName; 1294 }, 1295 1296 getHeadFamilyName: function () {}, 1297 1298 /** 1299 * @protected 1300 * Return a shallow copy of the current instance. 1301 */ 1302 clone: function () { 1303 return new Name(this); 1304 } 1305 }; 1306 1307 module.exports = Name;