1 /*
  2  * Name.js - Person name parser
  3  *
  4  * Copyright © 2013-2015, 2018, JEDLSoft
  5  *
  6  * Licensed under the Apache License, Version 2.0 (the "License");
  7  * you may not use this file except in compliance with the License.
  8  * You may obtain a copy of the License at
  9  *
 10  *     http://www.apache.org/licenses/LICENSE-2.0
 11  *
 12  * Unless required by applicable law or agreed to in writing, software
 13  * distributed under the License is distributed on an "AS IS" BASIS,
 14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15  *
 16  * See the License for the specific language governing permissions and
 17  * limitations under the License.
 18  */
 19 
 20 // !data name
 21 
 22 // notes:
 23 // icelandic given names: http://en.wiktionary.org/wiki/Appendix:Icelandic_given_names
 24 // danish approved given names: http://www.familiestyrelsen.dk/samliv/navne/
 25 // http://www.mentalfloss.com/blogs/archives/59277
 26 // other countries with first name restrictions: Norway, China, New Zealand, Japan, Sweden, Germany, Hungary
 27 
 28 var ilib = require("./ilib.js");
 29 var Utils = require("./Utils.js");
 30 var JSUtils = require("./JSUtils.js");
 31 
 32 var Locale = require("./Locale.js");
 33 var IString = require("./IString.js");
 34 var CType = require("./CType.js");
 35 var isAlpha = require("./isAlpha.js");
 36 var isIdeo = require("./isIdeo.js");
 37 var isPunct = require("./isPunct.js");
 38 var isSpace = require("./isSpace.js");
 39 
 40 /**
 41  * @class
 42  * A class to parse names of people. Different locales have different conventions when it
 43  * comes to naming people.<p>
 44  *
 45  * The options can contain any of the following properties:
 46  *
 47  * <ul>
 48  * <li><i>locale</i> - use the rules and conventions of the given locale in order to parse
 49  * the name
 50  * <li><i>style</i> - explicitly use the named style to parse the name. Valid values so
 51  * far are "western" and "asian". If this property is not specified, then the style will
 52  * be gleaned from the name itself. This class will count the total number of Latin or Asian
 53  * characters. If the majority of the characters are in one style, that style will be
 54  * used to parse the whole name.
 55  * <li><i>order</i> - explicitly use the given order for names. In some locales, such
 56  * as Russian, names may be written equally validly as "givenName familyName" or "familyName
 57  * givenName". This option tells the parser which order to prefer, and overrides the
 58  * default order for the locale. Valid values are "gf" (given-family) or "fg" (family-given).
 59  * <li><i>useSpaces</i> - explicitly specifies whether to use spaces or not between the given name , middle name
 60  * and family name.
 61  * <li><i>compoundFamilyName</i> - for Asian and some other types of names, search for compound
 62  * family names. If this parameter is not specified, the default is to use the setting that is
 63  * most common for the locale. If it is specified, the locale default is
 64  * overridden with this flag.
 65  * <li>onLoad - a callback function to call when the name info is fully
 66  * loaded and the name has been parsed. When the onLoad option is given, the name object
 67  * will attempt to load any missing locale data using the ilib loader callback.
 68  * When the constructor is done (even if the data is already preassembled), the
 69  * onLoad function is called with the current instance as a parameter, so this
 70  * callback can be used with preassembled or dynamic loading or a mix of the two.
 71  *
 72  * <li>sync - tell whether to load any missing locale data synchronously or
 73  * asynchronously. If this option is given as "false", then the "onLoad"
 74  * callback must be given, as the instance returned from this constructor will
 75  * not be usable for a while.
 76  *
 77  * <li><i>loadParams</i> - an object containing parameters to pass to the
 78  * loader callback function when locale data is missing. The parameters are not
 79  * interpretted or modified in any way. They are simply passed along. The object
 80  * may contain any property/value pairs as long as the calling code is in
 81  * agreement with the loader callback function as to what those parameters mean.
 82  * </ul>
 83  *
 84  * Additionally, a name instance can be constructed by giving the explicit
 85  * already-parsed fields or by using another name instance as the parameter. (That is,
 86  * it becomes a copy constructor.) The name fields can be any of the following:
 87  *
 88  * <ul>
 89  * <li><i>prefix</i> - the prefix prepended to the name
 90  * <li><i>givenName</i> - the person's given or "first" name
 91  * <li><i>middleName</i> - one or more middle names, separated by spaces even if the
 92  * language doesn't use usually use spaces. The spaces are merely separators.
 93  * <li><i>familyName</i> - one or more family or "last" names, separated by spaces
 94  * even if the language doesn't use usually use spaces. The spaces are merely separators.
 95  * <li><i>suffix</i> - the suffix appended to the name
 96  * <li><i>honorific</i> - the honorific title of the name. This could be formatted
 97  * as a prefix or a suffix, depending on the customs of the particular locale. You
 98  * should only give either an honorific or a prefix/suffix, but not both.
 99  * </ul>
100  *
101  * When the parser has completed its parsing, it fills in the same fields as listed
102  * above.<p>
103  *
104  * For names that include auxilliary words, such as the family name "van der Heijden", all
105  * of the auxilliary words ("van der") will be included in the field.<p>
106  *
107  * For names in some Spanish locales, it is assumed that the family name is doubled. That is,
108  * a person may have a paternal family name followed by a maternal family name. All
109  * family names will be listed in the familyName field as normal, separated by spaces.
110  * When formatting the short version of such names, only the paternal family name is used.
111  *
112  * @constructor
113  * @param {string|Name|Object=} name the name to parse
114  * @param {Object=} options Options governing the construction of this name instance
115  */
116 var Name = function (name, options) {
117     var sync = true;
118 
119     if (!name || name.length === 0) {
120         if (options && typeof(options.onLoad) === 'function') {
121             options.onLoad(undefined);
122         }
123         return;
124     }
125 
126     this.loadParams = {};
127 
128     if (options) {
129         if (options.locale) {
130             this.locale = (typeof (options.locale) === 'string') ? new Locale(options.locale) : options.locale;
131         }
132 
133         if (options.style && (options.style === "asian" || options.style === "western")) {
134             this.style = options.style;
135         }
136 
137         if (options.order && (options.order === "gmf" || options.order === "fmg" || options.order === "fgm")) {
138             this.order = options.order;
139         }
140 
141         if (typeof(options.sync) === 'boolean') {
142             sync = options.sync;
143         }
144 
145         if (typeof(options.loadParams) !== 'undefined') {
146             this.loadParams = options.loadParams;
147         }
148 
149         if (typeof(options.compoundFamilyName) !== 'undefined') {
150             this.singleFamilyName = !options.compoundFamilyName;
151         }
152     }
153 
154     this.locale = this.locale || new Locale();
155 
156     isAlpha._init(sync, this.loadParams, ilib.bind(this, function() {
157         isIdeo._init(sync, this.loadParams, ilib.bind(this, function() {
158             isPunct._init(sync, this.loadParams, ilib.bind(this, function() {
159                 isSpace._init(sync, this.loadParams, ilib.bind(this, function() {
160                     Utils.loadData({
161                         object: "Name",
162                         locale: this.locale,
163                         name: "name.json",
164                         sync: sync,
165                         loadParams: this.loadParams,
166                         callback: ilib.bind(this, function (info) {
167                             if (!info) {
168                                 info = Name.defaultInfo[this.style || "western"];
169                             }
170                             if (typeof (name) === 'object') {
171                                     // copy constructor
172                                 /**
173                                  * The prefixes for this name
174                                  * @type {string|Array.<string>}
175                                  */
176                                 this.prefix = name.prefix;
177                                 /**
178                                  * The given (personal) name in this name.
179                                  * @type {string|Array.<string>}
180                                  */
181                                 this.givenName = name.givenName;
182                                 /**
183                                  * The middle names used in this name. If there are multiple middle names, they all
184                                  * appear in this field separated by spaces.
185                                  * @type {string|Array.<string>}
186                                  */
187                                 this.middleName = name.middleName;
188                                 /**
189                                  * The family names in this name. If there are multiple family names, they all
190                                  * appear in this field separated by spaces.
191                                  * @type {string|Array.<string>}
192                                  */
193                                 this.familyName = name.familyName;
194                                 /**
195                                  * The suffixes for this name. If there are multiple suffixes, they all
196                                  * appear in this field separated by spaces.
197                                  * @type {string|Array.<string>}
198                                  */
199                                 this.suffix = name.suffix;
200                                 /**
201                                  * The honorific title for this name. This honorific will be used as the prefix
202                                  * or suffix as dictated by the locale
203                                  * @type {string|Array.<string>}
204                                  */
205                                 this.honorific = name.honorific;
206 
207                                 // private properties
208                                 this.locale = name.locale;
209                                 this.style = name.style;
210                                 this.order = name.order;
211                                 this.useSpaces = name.useSpaces;
212                                 this.isAsianName = name.isAsianName;
213 
214                                 if (options && typeof(options.onLoad) === 'function') {
215                                     options.onLoad(this);
216                                 }
217 
218                                     return;
219                             }
220                             /**
221                              * @type {{
222                              *   nameStyle:string,
223                              *   order:string,
224                              *   prefixes:Array.<string>,
225                              *   suffixes:Array.<string>,
226                              *   auxillaries:Array.<string>,
227                              *   honorifics:Array.<string>,
228                              *   knownFamilyNames:Array.<string>,
229                              *   noCompoundFamilyNames:boolean,
230                              *   sortByHeadWord:boolean
231                              * }} */
232                             this.info = info;
233                             this._init(name);
234                             if (options && typeof(options.onLoad) === 'function') {
235                                 options.onLoad(this);
236                             }
237                         })
238                     });
239                 }));
240             }));
241         }));
242     }));
243 };
244 
245 Name.defaultInfo = {
246     "western": ilib.data.name || {
247         "components": {
248             "short": "gf",
249             "medium": "gmf",
250             "long": "pgmf",
251             "full": "pgmfs",
252             "formal_short": "hf",
253             "formal_long": "hgf"
254         },
255         "format": "{prefix} {givenName} {middleName} {familyName}{suffix}",
256         "sortByHeadWord": false,
257         "nameStyle": "western",
258         "conjunctions": {
259             "and1": "and",
260             "and2": "and",
261             "or1": "or",
262             "or2": "or"
263         },
264         "auxillaries": {
265             "von": 1,
266             "von der": 1,
267             "von den": 1,
268             "van": 1,
269             "van der": 1,
270             "van de": 1,
271             "van den": 1,
272             "de": 1,
273             "di": 1,
274             "la": 1,
275             "lo": 1,
276             "des": 1,
277             "le": 1,
278             "les": 1,
279             "du": 1,
280             "de la": 1,
281             "del": 1,
282             "de los": 1,
283             "de las": 1
284         },
285         "prefixes": [
286             "doctor",
287             "dr",
288             "mr",
289             "mrs",
290             "ms",
291             "mister",
292             "madame",
293             "madamoiselle",
294             "miss",
295             "monsieur",
296             "señor",
297             "señora",
298             "señorita"
299         ],
300         "suffixes": [
301             ",",
302             "junior",
303             "jr",
304             "senior",
305             "sr",
306             "i",
307             "ii",
308             "iii",
309             "esq",
310             "phd",
311             "md"
312         ],
313         "patronymicName":[ ],
314         "familyNames":[ ]
315     },
316     "asian": {
317         "components": {
318             "short": "gf",
319             "medium": "gmf",
320             "long": "hgmf",
321             "full": "hgmf",
322             "formal_short": "hf",
323             "formal_long": "hgf"
324         },
325         "format": "{prefix}{familyName}{middleName}{givenName}{suffix}",
326         "nameStyle": "asian",
327         "sortByHeadWord": false,
328         "conjunctions": {},
329         "auxillaries": {},
330         "prefixes": [],
331         "suffixes": [],
332         "patronymicName":[],
333         "familyNames":[]
334     }
335 };
336 
337 /**
338  * Return true if the given character is in the range of the Han, Hangul, or kana
339  * scripts.
340  * @static
341  * @protected
342  */
343 Name._isAsianChar = function(c) {
344     return isIdeo(c) ||
345         CType.withinRange(c, "hangul") ||
346         CType.withinRange(c, "hiragana") ||
347         CType.withinRange(c, "katakana");
348 };
349 
350 
351 /**
352  * @static
353  * @protected
354  */
355 Name._isAsianName = function (name, language) {
356     // the idea is to count the number of asian chars and the number
357     // of latin chars. If one is greater than the other, choose
358     // that style.
359     var asian = 0,
360         latin = 0,
361         i;
362 
363     if (name && name.length > 0) {
364         for (i = 0; i < name.length; i++) {
365             var c = name.charAt(i);
366 
367             if (Name._isAsianChar(c)) {
368                 if (language =="ko" || language =="ja" || language =="zh") {
369                     return true;
370                 }
371                 asian++;
372             } else if (isAlpha(c)) {
373                 if (!language =="ko" || !language =="ja" || !language =="zh") {
374                     return false;
375                 }
376                 latin++;
377             }
378         }
379 
380         return latin < asian;
381     }
382 
383     return false;
384 };
385 
386 /**
387  * Return true if any Latin letters are found in the string. Return
388  * false if all the characters are non-Latin.
389  * @static
390  * @protected
391  */
392 Name._isEuroName = function (name, language) {
393     var c,
394         n = new IString(name),
395         it = n.charIterator();
396 
397     while (it.hasNext()) {
398         c = it.next();
399 
400         if (!Name._isAsianChar(c) && !isPunct(c) && !isSpace(c)) {
401             return true;
402         } else if (Name._isAsianChar(c) && (language =="ko" || language =="ja" || language =="zh")) {
403             return false;
404         }
405     }
406     return false;
407 };
408 
409 Name.prototype = {
410     /**
411      * @protected
412      */
413     _init: function (name) {
414         var parts, prefixArray, prefix, prefixLower,
415             suffixArray, suffix, suffixLower,
416             i, info, hpSuffix;
417         var currentLanguage = this.locale.getLanguage();
418 
419         if (name) {
420             // for DFISH-12905, pick off the part that the LDAP server automatically adds to our names in HP emails
421             i = name.search(/\s*[,\/\(\[\{<]/);
422             if (i !== -1) {
423                 hpSuffix = name.substring(i);
424                 hpSuffix = hpSuffix.replace(/\s+/g, ' '); // compress multiple whitespaces
425                 suffixArray = hpSuffix.split(" ");
426                 var conjunctionIndex = this._findLastConjunction(suffixArray);
427                 if (conjunctionIndex > -1) {
428                     // it's got conjunctions in it, so this is not really a suffix
429                     hpSuffix = undefined;
430                 } else {
431                     name = name.substring(0, i);
432                 }
433             }
434 
435             this.isAsianName = Name._isAsianName(name, currentLanguage);
436             if (this.info.nameStyle === "asian") {
437                 info = this.isAsianName ? this.info : Name.defaultInfo.western;
438             } else {
439                 info = this.isAsianName ? Name.defaultInfo.asian : this.info;
440             }
441 
442             if (this.isAsianName) {
443                 // all-asian names
444                 if (this.useSpaces === false) {
445                     name = name.replace(/\s+/g, ''); // eliminate all whitespaces
446                 }
447                 parts = name.trim().split('');
448             }
449             //}
450             else {
451                 name = name.replace(/, /g, ' , ');
452                 name = name.replace(/\s+/g, ' '); // compress multiple whitespaces
453                 parts = name.trim().split(' ');
454             }
455 
456             // check for prefixes
457             if (parts.length > 1) {
458                 for (i = parts.length; i > 0; i--) {
459                     prefixArray = parts.slice(0, i);
460                     prefix = prefixArray.join(this.isAsianName ? '' : ' ');
461                     prefixLower = prefix.toLowerCase();
462                     prefixLower = prefixLower.replace(/[,\.]/g, ''); // ignore commas and periods
463                     if (ilib.isArray(this.info.prefixes) &&
464                         (JSUtils.indexOf(this.info.prefixes, prefixLower) > -1 || this._isConjunction(prefixLower))) {
465                         if (this.prefix) {
466                             if (!this.isAsianName) {
467                                 this.prefix += ' ';
468                             }
469                             this.prefix += prefix;
470                         } else {
471                             this.prefix = prefix;
472                         }
473                         parts = parts.slice(i);
474                         i = parts.length;
475                     }
476                 }
477             }
478             // check for suffixes
479             if (parts.length > 1) {
480                 for (i = parts.length; i > 0; i--) {
481                     suffixArray = parts.slice(-i);
482                     suffix = suffixArray.join(this.isAsianName ? '' : ' ');
483                     suffixLower = suffix.toLowerCase();
484                     suffixLower = suffixLower.replace(/[\.]/g, ''); // ignore periods
485                     if (ilib.isArray(this.info.suffixes) && JSUtils.indexOf(this.info.suffixes, suffixLower) > -1) {
486                         if (this.suffix) {
487                             if (!this.isAsianName && !isPunct(this.suffix.charAt(0))) {
488                                 this.suffix = ' ' + this.suffix;
489                             }
490                             this.suffix = suffix + this.suffix;
491                         } else {
492                             this.suffix = suffix;
493                         }
494                         parts = parts.slice(0, parts.length - i);
495                         i = parts.length;
496                     }
497                 }
498             }
499 
500             if (hpSuffix) {
501                 this.suffix = (this.suffix && this.suffix + hpSuffix) || hpSuffix;
502             }
503 
504             // adjoin auxillary words to their headwords
505             if (parts.length > 1 && !this.isAsianName) {
506                 parts = this._joinAuxillaries(parts, this.isAsianName);
507             }
508 
509             if (this.isAsianName) {
510                 this._parseAsianName(parts, currentLanguage);
511             } else {
512                 this._parseWesternName(parts);
513             }
514 
515             this._joinNameArrays();
516         }
517     },
518 
519     /**
520      * @return {number}
521      *
522     _findSequence: function(parts, hash, isAsian) {
523         var sequence, sequenceLower, sequenceArray, aux = [], i, ret = {};
524 
525         if (parts.length > 0 && hash) {
526             //console.info("_findSequence: finding sequences");
527             for (var start = 0; start < parts.length-1; start++) {
528                 for ( i = parts.length; i > start; i-- ) {
529                     sequenceArray = parts.slice(start, i);
530                     sequence = sequenceArray.join(isAsian ? '' : ' ');
531                     sequenceLower = sequence.toLowerCase();
532                     sequenceLower = sequenceLower.replace(/[,\.]/g, '');  // ignore commas and periods
533 
534                     //console.info("_findSequence: checking sequence: '" + sequenceLower + "'");
535 
536                     if ( sequenceLower in hash ) {
537                         ret.match = sequenceArray;
538                         ret.start = start;
539                         ret.end = i;
540                         return ret;
541                         //console.info("_findSequence: Found sequence '" + sequence + "' New parts list is " + JSON.stringify(parts));
542                     }
543                 }
544             }
545         }
546 
547         return undefined;
548     },
549     */
550 
551     /**
552      * @protected
553      * @param {Array} parts
554      * @param {Array} names
555      * @param {boolean} isAsian
556      * @param {boolean=} noCompoundPrefix
557      */
558     _findPrefix: function (parts, names, isAsian, noCompoundPrefix) {
559         var i, prefix, prefixLower, prefixArray, aux = [];
560 
561         if (parts.length > 0 && names) {
562             for (i = parts.length; i > 0; i--) {
563                 prefixArray = parts.slice(0, i);
564                 prefix = prefixArray.join(isAsian ? '' : ' ');
565                 prefixLower = prefix.toLowerCase();
566                 prefixLower = prefixLower.replace(/[,\.]/g, ''); // ignore commas and periods
567 
568                 if (prefixLower in names) {
569                     aux = aux.concat(isAsian ? prefix : prefixArray);
570                     if (noCompoundPrefix) {
571                         // don't need to parse further. Just return it as is.
572                         return aux;
573                     }
574                     parts = parts.slice(i);
575                     i = parts.length + 1;
576                 }
577             }
578         }
579 
580         return aux;
581     },
582 
583     /**
584      * @protected
585      */
586     _findSuffix: function (parts, names, isAsian) {
587         var i, j, seq = "";
588 
589         for (i = 0; i < names.length; i++) {
590             if (parts.length >= names[i].length) {
591                 j = 0;
592                 while (j < names[i].length && parts[parts.length - j] === names[i][names[i].length - j]) {
593                     j++;
594                 }
595                 if (j >= names[i].length) {
596                     seq = parts.slice(parts.length - j).join(isAsian ? "" : " ") + (isAsian ? "" : " ") + seq;
597                     parts = parts.slice(0, parts.length - j);
598                     i = -1; // restart the search
599                 }
600             }
601         }
602 
603         this.suffix = seq;
604         return parts;
605     },
606 
607     /**
608      * @protected
609      * Tell whether or not the given word is a conjunction in this language.
610      * @param {string} word the word to test
611      * @return {boolean} true if the word is a conjunction
612      */
613     _isConjunction: function _isConjunction(word) {
614         return (this.info.conjunctions.and1 === word ||
615             this.info.conjunctions.and2 === word ||
616             this.info.conjunctions.or1 === word ||
617             this.info.conjunctions.or2 === word ||
618             ("&" === word) ||
619             ("+" === word));
620     },
621 
622     /**
623      * Find the last instance of 'and' in the name
624      * @protected
625      * @param {Array.<string>} parts
626      * @return {number}
627      */
628     _findLastConjunction: function _findLastConjunction(parts) {
629         var conjunctionIndex = -1,
630             index, part;
631 
632         for (index = 0; index < parts.length; index++) {
633             part = parts[index];
634             if (typeof (part) === 'string') {
635                 part = part.toLowerCase();
636                 // also recognize English
637                 if ("and" === part || "or" === part || "&" === part || "+" === part) {
638                     conjunctionIndex = index;
639                 }
640                 if (this._isConjunction(part)) {
641                     conjunctionIndex = index;
642                 }
643             }
644         }
645         return conjunctionIndex;
646     },
647 
648     /**
649      * @protected
650      * @param {Array.<string>} parts the current array of name parts
651      * @param {boolean} isAsian true if the name is being parsed as an Asian name
652      * @return {Array.<string>} the remaining parts after the prefixes have been removed
653      */
654     _extractPrefixes: function (parts, isAsian) {
655         var i = this._findPrefix(parts, this.info.prefixes, isAsian);
656         if (i > 0) {
657             this.prefix = parts.slice(0, i).join(isAsian ? "" : " ");
658             return parts.slice(i);
659         }
660         // prefixes not found, so just return the array unmodified
661         return parts;
662     },
663 
664     /**
665      * @protected
666      * @param {Array.<string>} parts the current array of name parts
667      * @param {boolean} isAsian true if the name is being parsed as an Asian name
668      * @return {Array.<string>} the remaining parts after the suffices have been removed
669      */
670     _extractSuffixes: function (parts, isAsian) {
671         var i = this._findSuffix(parts, this.info.suffixes, isAsian);
672         if (i > 0) {
673             this.suffix = parts.slice(i).join(isAsian ? "" : " ");
674             return parts.slice(0, i);
675         }
676         // suffices not found, so just return the array unmodified
677         return parts;
678     },
679 
680     /**
681      * Adjoin auxillary words to their head words.
682      * @protected
683      * @param {Array.<string>} parts the current array of name parts
684      * @param {boolean} isAsian true if the name is being parsed as an Asian name
685      * @return {Array.<string>} the parts after the auxillary words have been plucked onto their head word
686      */
687     _joinAuxillaries: function (parts, isAsian) {
688         var start, i, prefixArray, prefix, prefixLower;
689 
690         if (this.info.auxillaries && (parts.length > 2 || this.prefix)) {
691             for (start = 0; start < parts.length - 1; start++) {
692                 for (i = parts.length; i > start; i--) {
693                     prefixArray = parts.slice(start, i);
694                     prefix = prefixArray.join(' ');
695                     prefixLower = prefix.toLowerCase();
696                     prefixLower = prefixLower.replace(/[,\.]/g, ''); // ignore commas and periods
697 
698                     if (prefixLower in this.info.auxillaries) {
699                         parts.splice(start, i + 1 - start, prefixArray.concat(parts[i]));
700                         i = start;
701                     }
702                 }
703             }
704         }
705 
706         return parts;
707     },
708 
709     /**
710      * Recursively join an array or string into a long string.
711      * @protected
712      */
713     _joinArrayOrString: function _joinArrayOrString(part) {
714         var i;
715         if (typeof (part) === 'object') {
716             for (i = 0; i < part.length; i++) {
717                 part[i] = this._joinArrayOrString(part[i]);
718             }
719             var ret = "";
720             part.forEach(function (segment) {
721                 if (ret.length > 0 && !isPunct(segment.charAt(0))) {
722                     ret += ' ';
723                 }
724                 ret += segment;
725             });
726 
727             return ret;
728         }
729 
730         return part;
731     },
732 
733     /**
734      * @protected
735      */
736     _joinNameArrays: function _joinNameArrays() {
737         var prop;
738         for (prop in this) {
739 
740             if (this[prop] !== undefined && typeof (this[prop]) === 'object' && ilib.isArray(this[prop])) {
741 
742                 this[prop] = this._joinArrayOrString(this[prop]);
743             }
744         }
745     },
746 
747     /**
748      * @protected
749      */
750     _parseAsianName: function (parts, language) {
751         var familyNameArray = this._findPrefix(parts, this.info.knownFamilyNames, true, typeof(this.singleFamilyName) !== 'undefined' ? this.singleFamilyName : this.info.noCompoundFamilyNames);
752         var tempFullName = parts.join('');
753 
754         if (familyNameArray && familyNameArray.length > 0) {
755             this.familyName = familyNameArray.join('');
756             this.givenName = parts.slice(this.familyName.length).join('');
757 
758             //Overide parsing rules if spaces are found in korean
759             if (language === "ko" && tempFullName.search(/\s*[/\s]/) > -1 && !this.suffix) {
760                 this._parseKoreanName(tempFullName);
761             }
762         } else if (this.locale.getLanguage() === "ja") {
763             this._parseJapaneseName(parts);
764         } else if (this.suffix || this.prefix) {
765             this.familyName = parts.join('');
766         } else {
767             this.givenName = parts.join('');
768         }
769     },
770 
771     /**
772      * @protected
773      */
774     _parseKoreanName: function (name) {
775         var tempName = name;
776 
777         var spaceSplit = tempName.split(" ");
778         var spceCount = spaceSplit.length;
779         var fistSpaceIndex = tempName.indexOf(" ");
780         var lastSpaceIndex = tempName.lastIndexOf(" ");
781 
782         if (spceCount === 2) {
783             this.familyName = spaceSplit[0];
784             this.givenName = tempName.slice(fistSpaceIndex, tempName.length);
785         } else {
786             this.familyName = spaceSplit[0];
787             this.middleName = tempName.slice(fistSpaceIndex, lastSpaceIndex);
788             this.givenName = tempName.slice(lastSpaceIndex, tempName.length);
789         }
790 
791     },
792 
793     /**
794      * @protected
795      */
796     _parseJapaneseName: function (parts) {
797         if (this.suffix && this.suffix.length > 1 && this.info.honorifics.indexOf(this.suffix)>-1) {
798             if (parts.length === 1) {
799                 if (CType.withinRange(parts[0], "cjk")) {
800                     this.familyName = parts[0];
801                 } else {
802                     this.givenName = parts[0];
803                 }
804                 return;
805             } else if (parts.length === 2) {
806                 this.familyName = parts.slice(0,parts.length).join("")
807                 return;
808             }
809         }
810         if (parts.length > 1) {
811             var fn = "";
812             for (var i = 0; i < parts.length; i++) {
813                 if (CType.withinRange(parts[i], "cjk")) {
814                     fn += parts[i];
815                 } else if (fn.length > 1 && CType.withinRange(parts[i], "hiragana")) {
816                     this.familyName = fn;
817                     this.givenName = parts.slice(i,parts.length).join("");
818                     return;
819                 } else {
820                     break;
821                 }
822             }
823         }
824         if (parts.length === 1) {
825             this.familyName = parts[0];
826         } else if (parts.length === 2) {
827             this.familyName = parts[0];
828             this.givenName = parts[1];
829         } else if (parts.length === 3) {
830             this.familyName = parts[0];
831             this.givenName = parts.slice(1,parts.length).join("");
832         } else if (parts.length > 3) {
833             this.familyName = parts.slice(0,2).join("")
834             this.givenName = parts.slice(2,parts.length).join("");
835         }
836     },
837 
838     /**
839      * @protected
840      */
841     _parseSpanishName: function (parts) {
842         var conjunctionIndex;
843 
844         if (parts.length === 1) {
845             if (this.prefix || typeof (parts[0]) === 'object') {
846                 this.familyName = parts[0];
847             } else {
848                 this.givenName = parts[0];
849             }
850         } else if (parts.length === 2) {
851             // we do G F
852             this.givenName = parts[0];
853             this.familyName = parts[1];
854         } else if (parts.length === 3) {
855             conjunctionIndex = this._findLastConjunction(parts);
856             // if there's an 'and' in the middle spot, put everything in the first name
857             if (conjunctionIndex === 1) {
858                 this.givenName = parts;
859             } else {
860                 // else, do G F F
861                 this.givenName = parts[0];
862                 this.familyName = parts.slice(1);
863             }
864         } else if (parts.length > 3) {
865             //there are at least 4 parts to this name
866 
867             conjunctionIndex = this._findLastConjunction(parts);
868             ////console.log("@@@@@@@@@@@@@@@@"+conjunctionIndex)
869             if (conjunctionIndex > 0) {
870                 // if there's a conjunction that's not the first token, put everything up to and
871                 // including the token after it into the first name, the last 2 tokens into
872                 // the family name (if they exist) and everything else in to the middle name
873                 // 0 1 2 3 4 5
874                 // G A G
875                 // G A G F
876                 // G G A G
877                 // G A G F F
878                 // G G A G F
879                 // G G G A G
880                 // G A G M F F
881                 // G G A G F F
882                 // G G G A G F
883                 // G G G G A G
884                 this.givenName = parts.splice(0, conjunctionIndex + 2);
885                 if (parts.length > 1) {
886                     this.familyName = parts.splice(parts.length - 2, 2);
887                     if (parts.length > 0) {
888                         this.middleName = parts;
889                     }
890                 } else if (parts.length === 1) {
891                     this.familyName = parts[0];
892                 }
893             } else {
894                 this.givenName = parts.splice(0, 1);
895                 this.familyName = parts.splice(parts.length - 2, 2);
896                 this.middleName = parts;
897             }
898         }
899     },
900 
901     /**
902      * @protected
903      */
904     _parseIndonesianName: function (parts) {
905         var conjunctionIndex;
906 
907         if (parts.length === 1) {
908             //if (this.prefix || typeof(parts[0]) === 'object') {
909             //this.familyName = parts[0];
910             //} else {
911             this.givenName = parts[0];
912             //}
913             //} else if (parts.length === 2) {
914             // we do G F
915             //this.givenName = parts[0];
916             //this.familyName = parts[1];
917         } else if (parts.length >= 2) {
918             //there are at least 3 parts to this name
919 
920             conjunctionIndex = this._findLastConjunction(parts);
921             if (conjunctionIndex > 0) {
922                 // if there's a conjunction that's not the first token, put everything up to and
923                 // including the token after it into the first name, the last 2 tokens into
924                 // the family name (if they exist) and everything else in to the middle name
925                 // 0 1 2 3 4 5
926                 // G A G
927                 // G A G F
928                 // G G A G
929                 // G A G F F
930                 // G G A G F
931                 // G G G A G
932                 // G A G M F F
933                 // G G A G F F
934                 // G G G A G F
935                 // G G G G A G
936                 this.givenName = parts.splice(0, conjunctionIndex + 2);
937                 if (parts.length > 1) {
938                     //this.familyName = parts.splice(parts.length-2, 2);
939                     //if ( parts.length > 0 ) {
940                     this.middleName = parts;
941                 }
942                 //} else if (parts.length === 1) {
943                 //    this.familyName = parts[0];
944                 //}
945             } else {
946                 this.givenName = parts.splice(0, 1);
947                 //this.familyName = parts.splice(parts.length-2, 2);
948                 this.middleName = parts;
949             }
950         }
951     },
952 
953     /**
954      * @protected
955      */
956     _parseGenericWesternName: function (parts) {
957         /* Western names are parsed as follows, and rules are applied in this
958          * order:
959          *
960          * G
961          * G F
962          * G M F
963          * G M M F
964          * P F
965          * P G F
966          */
967         var conjunctionIndex;
968 
969         if (parts.length === 1) {
970             if (this.prefix || typeof (parts[0]) === 'object') {
971                 // already has a prefix, so assume it goes with the family name like "Dr. Roberts" or
972                 // it is a name with auxillaries, which is almost always a family name
973                 this.familyName = parts[0];
974             } else {
975                 this.givenName = parts[0];
976             }
977         } else if (parts.length === 2) {
978             // we do G F
979             if (this.info.order == 'fgm') {
980                 this.givenName = parts[1];
981                 this.familyName = parts[0];
982             } else if (this.info.order == "gmf" || typeof (this.info.order) == 'undefined') {
983                 this.givenName = parts[0];
984                 this.familyName = parts[1];
985             }
986         } else if (parts.length >= 3) {
987             //find the first instance of 'and' in the name
988             conjunctionIndex = this._findLastConjunction(parts);
989 
990             if (conjunctionIndex > 0) {
991                 // if there's a conjunction that's not the first token, put everything up to and
992                 // including the token after it into the first name, the last token into
993                 // the family name (if it exists) and everything else in to the middle name
994                 // 0 1 2 3 4 5
995                 // G A G M M F
996                 // G G A G M F
997                 // G G G A G F
998                 // G G G G A G
999                 //if(this.order == "gmf") {
1000                 this.givenName = parts.slice(0, conjunctionIndex + 2);
1001 
1002                 if (conjunctionIndex + 1 < parts.length - 1) {
1003                     this.familyName = parts.splice(parts.length - 1, 1);
1004                     ////console.log(this.familyName);
1005                     if (conjunctionIndex + 2 < parts.length - 1) {
1006                         this.middleName = parts.slice(conjunctionIndex + 2, parts.length - conjunctionIndex - 3);
1007                     }
1008                 } else if (this.info.order == "fgm") {
1009                     this.familyName = parts.slice(0, conjunctionIndex + 2);
1010                     if (conjunctionIndex + 1 < parts.length - 1) {
1011                         this.middleName = parts.splice(parts.length - 1, 1);
1012                         if (conjunctionIndex + 2 < parts.length - 1) {
1013                             this.givenName = parts.slice(conjunctionIndex + 2, parts.length - conjunctionIndex - 3);
1014                         }
1015                     }
1016                 }
1017             } else if (this.info.order === "fgm") {
1018                 this.givenName = parts[1];
1019                 this.middleName = parts.slice(2);
1020                 this.familyName = parts[0];
1021             } else {
1022                 this.givenName = parts[0];
1023                 this.middleName = parts.slice(1, parts.length - 1);
1024                 this.familyName = parts[parts.length - 1];
1025             }
1026         }
1027     },
1028 
1029      /**
1030      * parse patrinomic name from the russian names
1031      * @protected
1032      * @param {Array.<string>} parts the current array of name parts
1033      * @return number  index of the part which contains patronymic name
1034      */
1035     _findPatronymicName: function(parts) {
1036         var index, part;
1037         for (index = 0; index < parts.length; index++) {
1038             part = parts[index];
1039             if (typeof (part) === 'string') {
1040                 part = part.toLowerCase();
1041 
1042                 var subLength = this.info.patronymicName.length;
1043                 while(subLength--) {
1044                     if(part.indexOf(this.info.patronymicName[subLength])!== -1 )
1045                         return index;
1046                 }
1047             }
1048         }
1049         return -1;
1050     },
1051 
1052     /**
1053      * find if the given part is patronymic name
1054      *
1055      * @protected
1056      * @param {string} part string from name parts @
1057      * @return number index of the part which contains familyName
1058      */
1059     _isPatronymicName: function(part) {
1060         var pName;
1061         if ( typeof (part) === 'string') {
1062             pName = part.toLowerCase();
1063 
1064             var subLength = this.info.patronymicName.length;
1065             while (subLength--) {
1066                 if (pName.indexOf(this.info.patronymicName[subLength]) !== -1)
1067                     return true;
1068             }
1069         }
1070         return false;
1071     },
1072 
1073     /**
1074      * find family name from the russian name
1075      *
1076      * @protected
1077      * @param {Array.<string>} parts the current array of name parts
1078      * @return boolean true if patronymic, false otherwise
1079      */
1080     _findFamilyName: function(parts) {
1081         var index, part, substring;
1082         for (index = 0; index < parts.length; index++) {
1083             part = parts[index];
1084 
1085             if ( typeof (part) === 'string') {
1086                 part = part.toLowerCase();
1087                 var length = part.length - 1;
1088 
1089                 if (this.info.familyName.indexOf(part) !== -1) {
1090                     return index;
1091                 } else if (part[length] === 'в' || part[length] === 'н' ||
1092                     part[length] === 'й') {
1093                     substring = part.slice(0, -1);
1094                     if (this.info.familyName.indexOf(substring) !== -1) {
1095                         return index;
1096                     }
1097                 } else if ((part[length - 1] === 'в' && part[length] === 'а') ||
1098                     (part[length - 1] === 'н' && part[length] === 'а') ||
1099                     (part[length - 1] === 'а' && part[length] === 'я')) {
1100                     substring = part.slice(0, -2);
1101                     if (this.info.familyName.indexOf(substring) !== -1) {
1102                         return index;
1103                     }
1104                 }
1105             }
1106         }
1107         return -1;
1108     },
1109 
1110     /**
1111      * parse russian name
1112      *
1113      * @protected
1114      * @param {Array.<string>} parts the current array of name parts
1115      * @return
1116      */
1117     _parseRussianName: function(parts) {
1118         var conjunctionIndex, familyIndex = -1;
1119 
1120         if (parts.length === 1) {
1121             if (this.prefix || typeof (parts[0]) === 'object') {
1122                 // already has a prefix, so assume it goes with the family name
1123                 // like "Dr. Roberts" or
1124                 // it is a name with auxillaries, which is almost always a
1125                 // family name
1126                 this.familyName = parts[0];
1127             } else {
1128                 this.givenName = parts[0];
1129             }
1130         } else if (parts.length === 2) {
1131             // we do G F
1132             if (this.info.order === 'fgm') {
1133                 this.givenName = parts[1];
1134                 this.familyName = parts[0];
1135             } else if (this.info.order === "gmf") {
1136                 this.givenName = parts[0];
1137                 this.familyName = parts[1];
1138             } else if ( typeof (this.info.order) === 'undefined') {
1139                 if (this._isPatronymicName(parts[1]) === true) {
1140                     this.middleName = parts[1];
1141                     this.givenName = parts[0];
1142                 } else if ((familyIndex = this._findFamilyName(parts)) !== -1) {
1143                     if (familyIndex === 1) {
1144                         this.givenName = parts[0];
1145                         this.familyName = parts[1];
1146                     } else {
1147                         this.familyName = parts[0];
1148                         this.givenName = parts[1];
1149                     }
1150 
1151                 } else {
1152                     this.givenName = parts[0];
1153                     this.familyName = parts[1];
1154                 }
1155 
1156             }
1157         } else if (parts.length >= 3) {
1158             // find the first instance of 'and' in the name
1159             conjunctionIndex = this._findLastConjunction(parts);
1160             var patronymicNameIndex = this._findPatronymicName(parts);
1161             if (conjunctionIndex > 0) {
1162                 // if there's a conjunction that's not the first token, put
1163                 // everything up to and
1164                 // including the token after it into the first name, the last
1165                 // token into
1166                 // the family name (if it exists) and everything else in to the
1167                 // middle name
1168                 // 0 1 2 3 4 5
1169                 // G A G M M F
1170                 // G G A G M F
1171                 // G G G A G F
1172                 // G G G G A G
1173                 // if(this.order == "gmf") {
1174                 this.givenName = parts.slice(0, conjunctionIndex + 2);
1175 
1176                 if (conjunctionIndex + 1 < parts.length - 1) {
1177                     this.familyName = parts.splice(parts.length - 1, 1);
1178                     // //console.log(this.familyName);
1179                     if (conjunctionIndex + 2 < parts.length - 1) {
1180                         this.middleName = parts.slice(conjunctionIndex + 2,
1181                             parts.length - conjunctionIndex - 3);
1182                     }
1183                 } else if (this.order == "fgm") {
1184                     this.familyName = parts.slice(0, conjunctionIndex + 2);
1185                     if (conjunctionIndex + 1 < parts.length - 1) {
1186                         this.middleName = parts.splice(parts.length - 1, 1);
1187                         if (conjunctionIndex + 2 < parts.length - 1) {
1188                             this.givenName = parts.slice(conjunctionIndex + 2,
1189                                 parts.length - conjunctionIndex - 3);
1190                         }
1191                     }
1192                 }
1193             } else if (patronymicNameIndex !== -1) {
1194                 this.middleName = parts[patronymicNameIndex];
1195 
1196                 if (patronymicNameIndex === (parts.length - 1)) {
1197                     this.familyName = parts[0];
1198                     this.givenName = parts.slice(1, patronymicNameIndex);
1199                 } else {
1200                     this.givenName = parts.slice(0, patronymicNameIndex);
1201 
1202                     this.familyName = parts[parts.length - 1];
1203                 }
1204             } else {
1205                 this.givenName = parts[0];
1206 
1207                 this.middleName = parts.slice(1, parts.length - 1);
1208 
1209                 this.familyName = parts[parts.length - 1];
1210             }
1211         }
1212     },
1213 
1214 
1215     /**
1216      * @protected
1217      */
1218     _parseWesternName: function (parts) {
1219 
1220         if (this.locale.getLanguage() === "es" || this.locale.getLanguage() === "pt") {
1221             // in spain and mexico and portugal, we parse names differently than in the rest of the world
1222             // because of the double family names
1223             this._parseSpanishName(parts);
1224         } else if (this.locale.getLanguage() === "ru") {
1225             /*
1226              * In Russian, names can be given equally validly as given-family
1227              * or family-given. Use the value of the "order" property of the
1228              * constructor options to give the default when the order is ambiguous.
1229              */
1230             this._parseRussianName(parts);
1231         } else if (this.locale.getLanguage() === "id") {
1232             // in indonesia, we parse names differently than in the rest of the world
1233             // because names don't have family names usually.
1234             this._parseIndonesianName(parts);
1235         } else {
1236             this._parseGenericWesternName(parts);
1237         }
1238     },
1239 
1240     /**
1241      * When sorting names with auxiliary words (like "van der" or "de los"), determine
1242      * which is the "head word" and return a string that can be easily sorted by head
1243      * word. In English, names are always sorted by initial characters. In places like
1244      * the Netherlands or Germany, family names are sorted by the head word of a list
1245      * of names rather than the first element of that name.
1246      * @return {string|undefined} a string containing the family name[s] to be used for sorting
1247      * in the current locale, or undefined if there is no family name in this object
1248      */
1249     getSortFamilyName: function () {
1250         var name,
1251             auxillaries,
1252             auxString,
1253             parts,
1254             i;
1255 
1256         // no name to sort by
1257         if (!this.familyName) {
1258             return undefined;
1259         }
1260 
1261         // first break the name into parts
1262         if (this.info) {
1263             if (this.info.sortByHeadWord) {
1264                 if (typeof (this.familyName) === 'string') {
1265                     name = this.familyName.replace(/\s+/g, ' '); // compress multiple whitespaces
1266                     parts = name.trim().split(' ');
1267                 } else {
1268                     // already split
1269                     parts = this.familyName;
1270                 }
1271 
1272                 auxillaries = this._findPrefix(parts, this.info.auxillaries, false);
1273                 if (auxillaries && auxillaries.length > 0) {
1274                     if (typeof (this.familyName) === 'string') {
1275                         auxString = auxillaries.join(' ');
1276                         name = this.familyName.substring(auxString.length + 1) + ', ' + auxString;
1277                     } else {
1278                         name = parts.slice(auxillaries.length).join(' ') +
1279                             ', ' +
1280                             parts.slice(0, auxillaries.length).join(' ');
1281                     }
1282                 }
1283             } else if (this.info.knownFamilyNames && this.familyName) {
1284                 parts = this.familyName.split('');
1285                 var familyNameArray = this._findPrefix(parts, this.info.knownFamilyNames, true, this.info.noCompoundFamilyNames);
1286                 name = "";
1287                 for (i = 0; i < familyNameArray.length; i++) {
1288                     name += (this.info.knownFamilyNames[familyNameArray[i]] || "");
1289                 }
1290             }
1291         }
1292 
1293         return name || this.familyName;
1294     },
1295 
1296     getHeadFamilyName: function () {},
1297 
1298     /**
1299      * @protected
1300      * Return a shallow copy of the current instance.
1301      */
1302     clone: function () {
1303         return new Name(this);
1304     }
1305 };
1306 
1307 module.exports = Name;