1 /*
  2  * Address.js - Represent a mailing address
  3  *
  4  * Copyright © 2013-2015, 2018, JEDLSoft
  5  *
  6  * Licensed under the Apache License, Version 2.0 (the "License");
  7  * you may not use this file except in compliance with the License.
  8  * You may obtain a copy of the License at
  9  *
 10  *     http://www.apache.org/licenses/LICENSE-2.0
 11  *
 12  * Unless required by applicable law or agreed to in writing, software
 13  * distributed under the License is distributed on an "AS IS" BASIS,
 14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15  *
 16  * See the License for the specific language governing permissions and
 17  * limitations under the License.
 18  */
 19 
 20 /*globals console RegExp */
 21 
 22 // !data address countries nativecountries ctrynames
 23 
 24 var ilib = require("../index.js");
 25 var Utils = require("./Utils.js");
 26 var JSUtils = require("./JSUtils.js");
 27 var Locale = require("./Locale.js");
 28 var CType = require("./CType.js");
 29 var isIdeo = require("./isIdeo.js");
 30 var isAscii = require("./isAscii.js");
 31 var isDigit = require("./isDigit.js");
 32 var IString = require("./IString.js");
 33 
 34 /**
 35  * @class
 36  * Create a new Address instance and parse a physical address.<p>
 37  *
 38  * This function parses a physical address written in a free-form string.
 39  * It returns an object with a number of properties from the list below
 40  * that it may have extracted from that address.<p>
 41  *
 42  * The following is a list of properties that the algorithm will return:<p>
 43  *
 44  * <ul>
 45  * <li><i>streetAddress</i>: The street address, including house numbers and all.
 46  * <li><i>locality</i>: The locality of this address (usually a city or town).
 47  * <li><i>region</i>: The region where the locality is located. In the US, this
 48  * corresponds to states. In other countries, this may be provinces,
 49  * cantons, prefectures, etc. In some smaller countries, there are no
 50  * such divisions.
 51  * <li><i>postalCode</i>: Country-specific code for expediting mail. In the US,
 52  * this is the zip code.
 53  * <li><i>country</i>: The country of the address.
 54  * <li><i>countryCode</i>: The ISO 3166 2-letter region code for the destination
 55  * country in this address.
 56  * </ul>
 57  *
 58  * The above properties will not necessarily appear in the instance. For
 59  * any individual property, if the free-form address does not contain
 60  * that property or it cannot be parsed out, the it is left out.<p>
 61  *
 62  * The options parameter may contain any of the following properties:
 63  *
 64  * <ul>
 65  * <li><i>locale</i> - locale or localeSpec to use to parse the address. If not
 66  * specified, this function will use the current ilib locale
 67  *
 68  * <li><i>onLoad</i> - a callback function to call when the address info for the
 69  * locale is fully loaded and the address has been parsed. When the onLoad
 70  * option is given, the address object
 71  * will attempt to load any missing locale data using the ilib loader callback.
 72  * When the constructor is done (even if the data is already preassembled), the
 73  * onLoad function is called with the current instance as a parameter, so this
 74  * callback can be used with preassembled or dynamic loading or a mix of the two.
 75  *
 76  * <li><i>sync</i> - tell whether to load any missing locale data synchronously or
 77  * asynchronously. If this option is given as "false", then the "onLoad"
 78  * callback must be given, as the instance returned from this constructor will
 79  * not be usable for a while.
 80  *
 81  * <li><i>loadParams</i> - an object containing parameters to pass to the
 82  * loader callback function when locale data is missing. The parameters are not
 83  * interpretted or modified in any way. They are simply passed along. The object
 84  * may contain any property/value pairs as long as the calling code is in
 85  * agreement with the loader callback function as to what those parameters mean.
 86  * </ul>
 87  *
 88  * When an address cannot be parsed properly, the entire address will be placed
 89  * into the streetAddress property.<p>
 90  *
 91  * When the freeformAddress is another Address, this will act like a copy
 92  * constructor.<p>
 93  *
 94  *
 95  * @constructor
 96  * @param {string|Address} freeformAddress free-form address to parse, or a
 97  * javascript object containing the fields
 98  * @param {Object} options options to the parser
 99  */
100 var Address = function (freeformAddress, options) {
101     var address;
102 
103     if (!freeformAddress) {
104         return undefined;
105     }
106 
107     this.sync = true;
108     this.loadParams = {};
109 
110     if (options) {
111         if (options.locale) {
112             this.locale = (typeof(options.locale) === 'string') ? new Locale(options.locale) : options.locale;
113         }
114 
115         if (typeof(options.sync) !== 'undefined') {
116             this.sync = !!options.sync;
117         }
118 
119         if (options.loadParams) {
120             this.loadParams = options.loadParams;
121         }
122     }
123 
124     this.locale = this.locale || new Locale();
125     // initialize from an already parsed object
126     if (typeof(freeformAddress) === 'object') {
127         /**
128          * The street address, including house numbers and all.
129          * @type {string|undefined}
130          */
131         this.streetAddress = freeformAddress.streetAddress;
132         /**
133          * The locality of this address (usually a city or town).
134          * @type {string|undefined}
135          */
136         this.locality = freeformAddress.locality;
137         /**
138          * The region (province, canton, prefecture, state, etc.) where the address is located.
139          * @type {string|undefined}
140          */
141         this.region = freeformAddress.region;
142         /**
143          * Country-specific code for expediting mail. In the US, this is the zip code.
144          * @type {string|undefined}
145          */
146         this.postalCode = freeformAddress.postalCode;
147         /**
148          * Optional city-specific code for a particular post office, used to expidite
149          * delivery.
150          * @type {string|undefined}
151          */
152         this.postOffice = freeformAddress.postOffice;
153         /**
154          * The country of the address.
155          * @type {string|undefined}
156          */
157         this.country = freeformAddress.country;
158         if (freeformAddress.countryCode) {
159             /**
160              * The 2 or 3 letter ISO 3166 region code for the destination country in this address.
161              * @type {string}
162              */
163             this.countryCode = freeformAddress.countryCode;
164         }
165         if (freeformAddress.format) {
166             /**
167              * private
168              * @type {string}
169              */
170             this.format = freeformAddress.format;
171         }
172         return this;
173     }
174 
175     address = freeformAddress.replace(/[ \t\r]+/g, " ").trim();
176     address = address.replace(/[\s\n]+$/, "");
177     address = address.replace(/^[\s\n]+/, "");
178     //console.log("\n\n-------------\nAddress is '" + address + "'");
179 
180     this.lines = address.split(/[,,\n]/g);
181     this.removeEmptyLines(this.lines);
182 
183     isAscii._init(this.sync, this.loadParams, ilib.bind(this, function() {
184         isIdeo._init(this.sync, this.loadParams, ilib.bind(this, function() {
185             isDigit._init(this.sync, this.loadParams, ilib.bind(this, function() {
186                 if (typeof(ilib.data.nativecountries) === 'undefined') {
187                     Utils.loadData({
188                         object: "Address",
189                         name: "nativecountries.json", // countries in their own language
190                         locale: "-", // only need to load the root file
191                         nonlocale: true,
192                         sync: this.sync,
193                         loadParams: this.loadParams,
194                         callback: ilib.bind(this, function(nativecountries) {
195                             ilib.data.nativecountries = nativecountries;
196                             this._loadCountries(options && options.onLoad);
197                         })
198                     });
199                 } else {
200                     this._loadCountries(options && options.onLoad);
201                 }
202             }));
203         }));
204     }));
205 };
206 
207 /** @protected */
208 Address.prototype = {
209     /**
210      * @private
211      */
212     _loadCountries: function(onLoad) {
213         if (typeof(ilib.data.countries) === 'undefined') {
214             Utils.loadData({
215                 object: "Address",
216                 name: "countries.json", // countries in English
217                 locale: "-", // only need to load the root file
218                 nonlocale: true,
219                 sync: this.sync,
220                 loadParams: this.loadParams,
221                 callback: ilib.bind(this, function(countries) {
222                     ilib.data.countries = countries;
223                     this._loadCtrynames(onLoad);
224                 })
225             });
226         } else {
227             this._loadCtrynames(onLoad);
228         }
229     },
230 
231     /**
232      * @private
233      */
234     _loadCtrynames: function(onLoad) {
235         Utils.loadData({
236             name: "ctrynames.json",
237             object: "Address",
238             locale: this.locale,
239             sync: this.sync,
240             loadParams: JSUtils.merge(this.loadParams, {returnOne: true}),
241             callback: ilib.bind(this, function(ctrynames) {
242                 this.ctrynames = ctrynames;
243                 this._determineDest(ctrynames, onLoad);
244             })
245         });
246     },
247 
248     /**
249      * @private
250      * @param {Object?} ctrynames
251      */
252     _findDest: function (ctrynames) {
253         var match;
254 
255         for (var countryName in ctrynames) {
256             if (countryName && countryName !== "generated") {
257                 // find the longest match in the current table
258                 // ctrynames contains the country names mapped to region code
259                 // for efficiency, only test for things longer than the current match
260                 if (!match || match.text.length < countryName.length) {
261                     var temp = this._findCountry(countryName);
262                     if (temp) {
263                         match = temp;
264                         this.country = match.text;
265                         this.countryCode = ctrynames[countryName];
266                     }
267                 }
268             }
269         }
270         return match;
271     },
272 
273     /**
274      * @private
275      * @param {Object?} localizedCountries
276      * @param {function(Address):undefined} callback
277      */
278     _determineDest: function (localizedCountries, callback) {
279         var match;
280 
281         /*
282          * First, find the name of the destination country, as that determines how to parse
283          * the rest of the address. For any address, there are three possible ways
284          * that the name of the country could be written:
285          * 1. In the current language
286          * 2. In its own native language
287          * 3. In English
288          * We'll try all three.
289          */
290         var tables = [];
291         if (localizedCountries) {
292             tables.push(localizedCountries);
293         }
294         tables.push(ilib.data.nativecountries);
295         tables.push(ilib.data.countries);
296 
297         for (var i = 0; i < tables.length; i++) {
298             match = this._findDest(tables[i]);
299 
300             if (match) {
301                 this.lines[match.line] = this.lines[match.line].substring(0, match.start) + this.lines[match.line].substring(match.start + match.text.length);
302 
303                 this._init(callback);
304                 return;
305             }
306         }
307 
308         // no country, so try parsing it as if we were in the same country
309         this.country = undefined;
310         this.countryCode = this.locale.getRegion();
311         this._init(callback);
312     },
313 
314     /**
315      * @private
316      * @param {function(Address):undefined} callback
317      */
318     _init: function(callback) {
319         Utils.loadData({
320             object: "Address",
321             locale: new Locale(this.countryCode),
322             name: "address.json",
323             sync: this.sync,
324             loadParams: this.loadParams,
325             callback: ilib.bind(this, function(info) {
326                 if (!info || JSUtils.isEmpty(info) || !info.fields) {
327                     // load the "unknown" locale instead
328                     Utils.loadData({
329                         object: "Address",
330                         locale: new Locale("XX"),
331                         name: "address.json",
332                         sync: this.sync,
333                         loadParams: this.loadParams,
334                         callback: ilib.bind(this, function(info) {
335                             this.info = info;
336                             this._parseAddress();
337                             if (typeof(callback) === 'function') {
338                                 callback(this);
339                             }
340                         })
341                     });
342                 } else {
343                     this.info = info;
344                     this._parseAddress();
345                     if (typeof(callback) === 'function') {
346                         callback(this);
347                     }
348                 }
349             })
350         });
351     },
352 
353     /**
354      * @private
355      */
356     _parseAddress: function() {
357         // clean it up first
358         var i,
359             asianChars = 0,
360             latinChars = 0,
361             startAt,
362             infoFields,
363             field,
364             pattern,
365             matchFunction,
366             match,
367             fieldNumber;
368 
369         // for locales that support both latin and asian character addresses,
370         // decide if we are parsing an asian or latin script address
371         if (this.info && this.info.multiformat) {
372             for (var j = 0; j < this.lines.length; j++) {
373                 var line = new IString(this.lines[j]);
374                 var it = line.charIterator();
375                 while (it.hasNext()) {
376                     var c = it.next();
377                     if (isIdeo(c) ||
378                             CType.withinRange(c, "hangul") ||
379                             CType.withinRange(c, 'katakana') ||
380                             CType.withinRange(c, 'hiragana') ||
381                             CType.withinRange(c, 'bopomofo')) {
382                         asianChars++;
383                     } else if (isAscii(c) && !isDigit(c)) {
384                         latinChars++;
385                     }
386                 }
387             }
388 
389             this.format = (asianChars >= latinChars) ? "asian" : "latin";
390             startAt = this.info.startAt[this.format];
391             infoFields = this.info.fields[this.format];
392             // //console.log("multiformat locale: format is now " + this.format);
393         } else {
394             startAt = (this.info && this.info.startAt) || "end";
395             infoFields = (this.info && this.info.fields) || [];
396         }
397         this.compare = (startAt === "end") ? this.endsWith : this.startsWith;
398 
399         //console.log("this.lines is: " + JSON.stringify(this.lines));
400 
401         for (i = 0; i < infoFields.length && this.lines.length > 0; i++) {
402             field = infoFields[i];
403             this.removeEmptyLines(this.lines);
404             //console.log("Searching for field " + field.name);
405             if (field.pattern) {
406                 if (typeof(field.pattern) === 'string') {
407                     pattern = new RegExp(field.pattern, "img");
408                     matchFunction = this.matchRegExp;
409                 } else {
410                     pattern = field.pattern;
411                     matchFunction = this.matchPattern;
412                 }
413 
414                 switch (field.line) {
415                 case 'startAtFirst':
416                     for (fieldNumber = 0; fieldNumber < this.lines.length; fieldNumber++) {
417                         match = matchFunction(this, this.lines[fieldNumber], pattern, field.matchGroup, startAt);
418                         if (match) {
419                             break;
420                         }
421                     }
422                     break;
423                 case 'startAtLast':
424                     for (fieldNumber = this.lines.length-1; fieldNumber >= 0; fieldNumber--) {
425                         match = matchFunction(this, this.lines[fieldNumber], pattern, field.matchGroup, startAt);
426                         if (match) {
427                             break;
428                         }
429                     }
430                     break;
431                 case 'first':
432                     fieldNumber = 0;
433                     match = matchFunction(this, this.lines[fieldNumber], pattern, field.matchGroup, startAt);
434                     break;
435                 case 'last':
436                 default:
437                     fieldNumber = this.lines.length - 1;
438                     match = matchFunction(this, this.lines[fieldNumber], pattern, field.matchGroup, startAt);
439                     break;
440                 }
441                 if (match) {
442                     // //console.log("found match for " + field.name + ": " + JSON.stringify(match));
443                     // //console.log("remaining line is " + match.line);
444                     this.lines[fieldNumber] = match.line;
445                     this[field.name] = match.match;
446                 }
447             } else {
448                 // if nothing is given, default to taking the whole field
449                 this[field.name] = this.lines.splice(fieldNumber,1)[0].trim();
450                 //console.log("typeof(this[field.name]) is " + typeof(this[field.name]) + " and value is " + JSON.stringify(this[field.name]));
451             }
452         }
453 
454         // all the left overs go in the street address field
455         this.removeEmptyLines(this.lines);
456         if (this.lines.length > 0) {
457             //console.log("this.lines is " + JSON.stringify(this.lines) + " and splicing to get streetAddress");
458             // Korea uses spaces between words, despite being an "asian" locale
459             var joinString = (this.info.joinString && this.info.joinString[this.format]) || ((this.format && this.format === "asian") ? "" : ", ");
460             this.streetAddress = this.lines.join(joinString).trim();
461         }
462 
463         this.lines = undefined;
464         //console.log("final result is " + JSON.stringify(this));
465     },
466 
467     /**
468      * @protected
469      * Find the named country either at the end or the beginning of the address.
470      */
471     _findCountry: function(name) {
472         var start = -1, match, line = 0;
473 
474         if (this.lines.length > 0) {
475             start = this.startsWith(this.lines[line], name);
476             if (start === -1) {
477                 line = this.lines.length-1;
478                 start = this.endsWith(this.lines[line], name);
479             }
480             if (start !== -1) {
481                 match = {
482                     text: this.lines[line].substring(start, start + name.length),
483                     line: line,
484                     start: start
485                 };
486             }
487         }
488 
489         return match;
490     },
491 
492     endsWith: function (subject, query) {
493         var start = subject.length-query.length,
494             i,
495             pat;
496         //console.log("endsWith: checking " + query + " against " + subject);
497         for (i = 0; i < query.length; i++) {
498             // TODO: use case mapper instead of toLowerCase()
499             if (subject.charAt(start+i).toLowerCase() !== query.charAt(i).toLowerCase()) {
500                 return -1;
501             }
502         }
503         if (start > 0) {
504             pat = /\s/;
505             if (!pat.test(subject.charAt(start-1))) {
506                 // make sure if we are not at the beginning of the string, that the match is
507                 // not the end of some other word
508                 return -1;
509             }
510         }
511         return start;
512     },
513 
514     startsWith: function (subject, query) {
515         var i;
516         // //console.log("startsWith: checking " + query + " against " + subject);
517         for (i = 0; i < query.length; i++) {
518             // TODO: use case mapper instead of toLowerCase()
519             if (subject.charAt(i).toLowerCase() !== query.charAt(i).toLowerCase()) {
520                 return -1;
521             }
522         }
523         return 0;
524     },
525 
526     removeEmptyLines: function (arr) {
527         var i = 0;
528 
529         while (i < arr.length) {
530             if (arr[i]) {
531                 arr[i] = arr[i].trim();
532                 if (arr[i].length === 0) {
533                     arr.splice(i,1);
534                 } else {
535                     i++;
536                 }
537             } else {
538                 arr.splice(i,1);
539             }
540         }
541     },
542 
543     matchRegExp: function(address, line, expression, matchGroup, startAt) {
544         var lastMatch,
545             match,
546             ret = {},
547             last;
548 
549         //console.log("searching for regexp " + expression.source + " in line " + line);
550 
551         match = expression.exec(line);
552         if (startAt === 'end') {
553             while (match !== null && match.length > 0) {
554                 //console.log("found matches " + JSON.stringify(match));
555                 lastMatch = match;
556                 match = expression.exec(line);
557             }
558             match = lastMatch;
559         }
560 
561         if (match && match !== null) {
562             //console.log("found matches " + JSON.stringify(match));
563             matchGroup = matchGroup || 0;
564             if (match[matchGroup] !== undefined) {
565                 ret.match = match[matchGroup].trim();
566                 ret.match = ret.match.replace(/^\-|\-+$/, '');
567                 ret.match = ret.match.replace(/\s+$/, '');
568                 last = (startAt === 'end') ? line.lastIndexOf(match[matchGroup]) : line.indexOf(match[matchGroup]);
569                 //console.log("last is " + last);
570                 ret.line = line.slice(0,last);
571                 if (address.format !== "asian") {
572                     ret.line += " ";
573                 }
574                 ret.line += line.slice(last+match[matchGroup].length);
575                 ret.line = ret.line.trim();
576                 //console.log("found match " + ret.match + " from matchgroup " + matchGroup + " and rest of line is " + ret.line);
577                 return ret;
578             }
579         //} else {
580             //console.log("no match");
581         }
582 
583         return undefined;
584     },
585 
586     matchPattern: function(address, line, pattern, matchGroup) {
587         var start,
588             j,
589             ret = {};
590 
591         //console.log("searching in line " + line);
592 
593         // search an array of possible fixed strings
594         //console.log("Using fixed set of strings.");
595         for (j = 0; j < pattern.length; j++) {
596             start = address.compare(line, pattern[j]);
597             if (start !== -1) {
598                 ret.match = line.substring(start, start+pattern[j].length);
599                 if (start !== 0) {
600                     ret.line = line.substring(0,start).trim();
601                 } else {
602                     ret.line = line.substring(pattern[j].length).trim();
603                 }
604                 //console.log("found match " + ret.match + " and rest of line is " + ret.line);
605                 return ret;
606             }
607         }
608 
609         return undefined;
610     }
611 };
612 
613 module.exports = Address;
614