1 /* 2 * Address.js - Represent a mailing address 3 * 4 * Copyright © 2013-2015, 2018, JEDLSoft 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 20 /*globals console RegExp */ 21 22 // !data address countries nativecountries ctrynames 23 24 var ilib = require("./ilib.js"); 25 var Utils = require("./Utils.js"); 26 var JSUtils = require("./JSUtils.js"); 27 var Locale = require("./Locale.js"); 28 var CType = require("./CType.js"); 29 var isIdeo = require("./isIdeo.js"); 30 var isAscii = require("./isAscii.js"); 31 var isDigit = require("./isDigit.js"); 32 var IString = require("./IString.js"); 33 34 /** 35 * @class 36 * Create a new Address instance and parse a physical address.<p> 37 * 38 * This function parses a physical address written in a free-form string. 39 * It returns an object with a number of properties from the list below 40 * that it may have extracted from that address.<p> 41 * 42 * The following is a list of properties that the algorithm will return:<p> 43 * 44 * <ul> 45 * <li><i>streetAddress</i>: The street address, including house numbers and all. 46 * <li><i>locality</i>: The locality of this address (usually a city or town). 47 * <li><i>region</i>: The region where the locality is located. In the US, this 48 * corresponds to states. In other countries, this may be provinces, 49 * cantons, prefectures, etc. In some smaller countries, there are no 50 * such divisions. 51 * <li><i>postalCode</i>: Country-specific code for expediting mail. In the US, 52 * this is the zip code. 53 * <li><i>country</i>: The country of the address. 54 * <li><i>countryCode</i>: The ISO 3166 2-letter region code for the destination 55 * country in this address. 56 * </ul> 57 * 58 * The above properties will not necessarily appear in the instance. For 59 * any individual property, if the free-form address does not contain 60 * that property or it cannot be parsed out, the it is left out.<p> 61 * 62 * The options parameter may contain any of the following properties: 63 * 64 * <ul> 65 * <li><i>locale</i> - locale or localeSpec to use to parse the address. If not 66 * specified, this function will use the current ilib locale 67 * 68 * <li><i>onLoad</i> - a callback function to call when the address info for the 69 * locale is fully loaded and the address has been parsed. When the onLoad 70 * option is given, the address object 71 * will attempt to load any missing locale data using the ilib loader callback. 72 * When the constructor is done (even if the data is already preassembled), the 73 * onLoad function is called with the current instance as a parameter, so this 74 * callback can be used with preassembled or dynamic loading or a mix of the two. 75 * 76 * <li><i>sync</i> - tell whether to load any missing locale data synchronously or 77 * asynchronously. If this option is given as "false", then the "onLoad" 78 * callback must be given, as the instance returned from this constructor will 79 * not be usable for a while. 80 * 81 * <li><i>loadParams</i> - an object containing parameters to pass to the 82 * loader callback function when locale data is missing. The parameters are not 83 * interpretted or modified in any way. They are simply passed along. The object 84 * may contain any property/value pairs as long as the calling code is in 85 * agreement with the loader callback function as to what those parameters mean. 86 * </ul> 87 * 88 * When an address cannot be parsed properly, the entire address will be placed 89 * into the streetAddress property.<p> 90 * 91 * When the freeformAddress is another Address, this will act like a copy 92 * constructor.<p> 93 * 94 * 95 * @constructor 96 * @param {string|Address} freeformAddress free-form address to parse, or a 97 * javascript object containing the fields 98 * @param {Object} options options to the parser 99 */ 100 var Address = function (freeformAddress, options) { 101 var address; 102 103 if (!freeformAddress) { 104 return undefined; 105 } 106 107 this.sync = true; 108 this.loadParams = {}; 109 110 if (options) { 111 if (options.locale) { 112 this.locale = (typeof(options.locale) === 'string') ? new Locale(options.locale) : options.locale; 113 } 114 115 if (typeof(options.sync) !== 'undefined') { 116 this.sync = !!options.sync; 117 } 118 119 if (options.loadParams) { 120 this.loadParams = options.loadParams; 121 } 122 } 123 124 this.locale = this.locale || new Locale(); 125 // initialize from an already parsed object 126 if (typeof(freeformAddress) === 'object') { 127 /** 128 * The street address, including house numbers and all. 129 * @type {string|undefined} 130 */ 131 this.streetAddress = freeformAddress.streetAddress; 132 /** 133 * The locality of this address (usually a city or town). 134 * @type {string|undefined} 135 */ 136 this.locality = freeformAddress.locality; 137 /** 138 * The region (province, canton, prefecture, state, etc.) where the address is located. 139 * @type {string|undefined} 140 */ 141 this.region = freeformAddress.region; 142 /** 143 * Country-specific code for expediting mail. In the US, this is the zip code. 144 * @type {string|undefined} 145 */ 146 this.postalCode = freeformAddress.postalCode; 147 /** 148 * Optional city-specific code for a particular post office, used to expidite 149 * delivery. 150 * @type {string|undefined} 151 */ 152 this.postOffice = freeformAddress.postOffice; 153 /** 154 * The country of the address. 155 * @type {string|undefined} 156 */ 157 this.country = freeformAddress.country; 158 if (freeformAddress.countryCode) { 159 /** 160 * The 2 or 3 letter ISO 3166 region code for the destination country in this address. 161 * @type {string} 162 */ 163 this.countryCode = freeformAddress.countryCode; 164 } 165 if (freeformAddress.format) { 166 /** 167 * private 168 * @type {string} 169 */ 170 this.format = freeformAddress.format; 171 } 172 return this; 173 } 174 175 address = freeformAddress.replace(/[ \t\r]+/g, " ").trim(); 176 address = address.replace(/[\s\n]+$/, ""); 177 address = address.replace(/^[\s\n]+/, ""); 178 //console.log("\n\n-------------\nAddress is '" + address + "'"); 179 180 this.lines = address.split(/[,,\n]/g); 181 this.removeEmptyLines(this.lines); 182 183 isAscii._init(this.sync, this.loadParams, ilib.bind(this, function() { 184 isIdeo._init(this.sync, this.loadParams, ilib.bind(this, function() { 185 isDigit._init(this.sync, this.loadParams, ilib.bind(this, function() { 186 if (typeof(ilib.data.nativecountries) === 'undefined') { 187 Utils.loadData({ 188 object: "Address", 189 name: "nativecountries.json", // countries in their own language 190 locale: "-", // only need to load the root file 191 nonlocale: true, 192 sync: this.sync, 193 loadParams: this.loadParams, 194 callback: ilib.bind(this, function(nativecountries) { 195 ilib.data.nativecountries = nativecountries; 196 this._loadCountries(options && options.onLoad); 197 }) 198 }); 199 } else { 200 this._loadCountries(options && options.onLoad); 201 } 202 })); 203 })); 204 })); 205 }; 206 207 /** @protected */ 208 Address.prototype = { 209 /** 210 * @private 211 */ 212 _loadCountries: function(onLoad) { 213 if (typeof(ilib.data.countries) === 'undefined') { 214 Utils.loadData({ 215 object: "Address", 216 name: "countries.json", // countries in English 217 locale: "-", // only need to load the root file 218 nonlocale: true, 219 sync: this.sync, 220 loadParams: this.loadParams, 221 callback: ilib.bind(this, function(countries) { 222 ilib.data.countries = countries; 223 this._loadCtrynames(onLoad); 224 }) 225 }); 226 } else { 227 this._loadCtrynames(onLoad); 228 } 229 }, 230 231 /** 232 * @private 233 */ 234 _loadCtrynames: function(onLoad) { 235 Utils.loadData({ 236 name: "ctrynames.json", 237 object: "Address", 238 locale: this.locale, 239 sync: this.sync, 240 loadParams: JSUtils.merge(this.loadParams, {returnOne: true}), 241 callback: ilib.bind(this, function(ctrynames) { 242 this.ctrynames = ctrynames; 243 this._determineDest(ctrynames, onLoad); 244 }) 245 }); 246 }, 247 248 /** 249 * @private 250 * @param {Object?} ctrynames 251 */ 252 _findDest: function (ctrynames) { 253 var match; 254 255 for (var countryName in ctrynames) { 256 if (countryName && countryName !== "generated") { 257 // find the longest match in the current table 258 // ctrynames contains the country names mapped to region code 259 // for efficiency, only test for things longer than the current match 260 if (!match || match.text.length < countryName.length) { 261 var temp = this._findCountry(countryName); 262 if (temp) { 263 match = temp; 264 this.country = match.text; 265 this.countryCode = ctrynames[countryName]; 266 } 267 } 268 } 269 } 270 return match; 271 }, 272 273 /** 274 * @private 275 * @param {Object?} localizedCountries 276 * @param {function(Address):undefined} callback 277 */ 278 _determineDest: function (localizedCountries, callback) { 279 var match; 280 281 /* 282 * First, find the name of the destination country, as that determines how to parse 283 * the rest of the address. For any address, there are three possible ways 284 * that the name of the country could be written: 285 * 1. In the current language 286 * 2. In its own native language 287 * 3. In English 288 * We'll try all three. 289 */ 290 var tables = []; 291 if (localizedCountries) { 292 tables.push(localizedCountries); 293 } 294 tables.push(ilib.data.nativecountries); 295 tables.push(ilib.data.countries); 296 297 for (var i = 0; i < tables.length; i++) { 298 match = this._findDest(tables[i]); 299 300 if (match) { 301 this.lines[match.line] = this.lines[match.line].substring(0, match.start) + this.lines[match.line].substring(match.start + match.text.length); 302 303 this._init(callback); 304 return; 305 } 306 } 307 308 // no country, so try parsing it as if we were in the same country 309 this.country = undefined; 310 this.countryCode = this.locale.getRegion(); 311 this._init(callback); 312 }, 313 314 /** 315 * @private 316 * @param {function(Address):undefined} callback 317 */ 318 _init: function(callback) { 319 Utils.loadData({ 320 object: "Address", 321 locale: new Locale(this.countryCode), 322 name: "address.json", 323 sync: this.sync, 324 loadParams: this.loadParams, 325 callback: ilib.bind(this, function(info) { 326 if (!info || JSUtils.isEmpty(info) || !info.fields) { 327 // load the "unknown" locale instead 328 Utils.loadData({ 329 object: "Address", 330 locale: new Locale("XX"), 331 name: "address.json", 332 sync: this.sync, 333 loadParams: this.loadParams, 334 callback: ilib.bind(this, function(info) { 335 this.info = info; 336 this._parseAddress(); 337 if (typeof(callback) === 'function') { 338 callback(this); 339 } 340 }) 341 }); 342 } else { 343 this.info = info; 344 this._parseAddress(); 345 if (typeof(callback) === 'function') { 346 callback(this); 347 } 348 } 349 }) 350 }); 351 }, 352 353 /** 354 * @private 355 */ 356 _parseAddress: function() { 357 // clean it up first 358 var i, 359 asianChars = 0, 360 latinChars = 0, 361 startAt, 362 infoFields, 363 field, 364 pattern, 365 matchFunction, 366 match, 367 fieldNumber; 368 369 // for locales that support both latin and asian character addresses, 370 // decide if we are parsing an asian or latin script address 371 if (this.info && this.info.multiformat) { 372 for (var j = 0; j < this.lines.length; j++) { 373 var line = new IString(this.lines[j]); 374 var it = line.charIterator(); 375 while (it.hasNext()) { 376 var c = it.next(); 377 if (isIdeo(c) || 378 CType.withinRange(c, "hangul") || 379 CType.withinRange(c, 'katakana') || 380 CType.withinRange(c, 'hiragana') || 381 CType.withinRange(c, 'bopomofo')) { 382 asianChars++; 383 } else if (isAscii(c) && !isDigit(c)) { 384 latinChars++; 385 } 386 } 387 } 388 389 this.format = (asianChars >= latinChars) ? "asian" : "latin"; 390 startAt = this.info.startAt[this.format]; 391 infoFields = this.info.fields[this.format]; 392 // //console.log("multiformat locale: format is now " + this.format); 393 } else { 394 startAt = (this.info && this.info.startAt) || "end"; 395 infoFields = (this.info && this.info.fields) || []; 396 } 397 this.compare = (startAt === "end") ? this.endsWith : this.startsWith; 398 399 //console.log("this.lines is: " + JSON.stringify(this.lines)); 400 401 for (i = 0; i < infoFields.length && this.lines.length > 0; i++) { 402 field = infoFields[i]; 403 this.removeEmptyLines(this.lines); 404 //console.log("Searching for field " + field.name); 405 if (field.pattern) { 406 if (typeof(field.pattern) === 'string') { 407 pattern = new RegExp(field.pattern, "img"); 408 matchFunction = this.matchRegExp; 409 } else { 410 pattern = field.pattern; 411 matchFunction = this.matchPattern; 412 } 413 414 switch (field.line) { 415 case 'startAtFirst': 416 for (fieldNumber = 0; fieldNumber < this.lines.length; fieldNumber++) { 417 match = matchFunction(this, this.lines[fieldNumber], pattern, field.matchGroup, startAt); 418 if (match) { 419 break; 420 } 421 } 422 break; 423 case 'startAtLast': 424 for (fieldNumber = this.lines.length-1; fieldNumber >= 0; fieldNumber--) { 425 match = matchFunction(this, this.lines[fieldNumber], pattern, field.matchGroup, startAt); 426 if (match) { 427 break; 428 } 429 } 430 break; 431 case 'first': 432 fieldNumber = 0; 433 match = matchFunction(this, this.lines[fieldNumber], pattern, field.matchGroup, startAt); 434 break; 435 case 'last': 436 default: 437 fieldNumber = this.lines.length - 1; 438 match = matchFunction(this, this.lines[fieldNumber], pattern, field.matchGroup, startAt); 439 break; 440 } 441 if (match) { 442 // //console.log("found match for " + field.name + ": " + JSON.stringify(match)); 443 // //console.log("remaining line is " + match.line); 444 this.lines[fieldNumber] = match.line; 445 this[field.name] = match.match; 446 } 447 } else { 448 // if nothing is given, default to taking the whole field 449 this[field.name] = this.lines.splice(fieldNumber,1)[0].trim(); 450 //console.log("typeof(this[field.name]) is " + typeof(this[field.name]) + " and value is " + JSON.stringify(this[field.name])); 451 } 452 } 453 454 // all the left overs go in the street address field 455 this.removeEmptyLines(this.lines); 456 if (this.lines.length > 0) { 457 //console.log("this.lines is " + JSON.stringify(this.lines) + " and splicing to get streetAddress"); 458 // Korea uses spaces between words, despite being an "asian" locale 459 var joinString = (this.info.joinString && this.info.joinString[this.format]) || ((this.format && this.format === "asian") ? "" : ", "); 460 this.streetAddress = this.lines.join(joinString).trim(); 461 } 462 463 this.lines = undefined; 464 //console.log("final result is " + JSON.stringify(this)); 465 }, 466 467 /** 468 * @protected 469 * Find the named country either at the end or the beginning of the address. 470 */ 471 _findCountry: function(name) { 472 var start = -1, match, line = 0; 473 474 if (this.lines.length > 0) { 475 start = this.startsWith(this.lines[line], name); 476 if (start === -1) { 477 line = this.lines.length-1; 478 start = this.endsWith(this.lines[line], name); 479 } 480 if (start !== -1) { 481 match = { 482 text: this.lines[line].substring(start, start + name.length), 483 line: line, 484 start: start 485 }; 486 } 487 } 488 489 return match; 490 }, 491 492 endsWith: function (subject, query) { 493 var start = subject.length-query.length, 494 i, 495 pat; 496 //console.log("endsWith: checking " + query + " against " + subject); 497 for (i = 0; i < query.length; i++) { 498 // TODO: use case mapper instead of toLowerCase() 499 if (subject.charAt(start+i).toLowerCase() !== query.charAt(i).toLowerCase()) { 500 return -1; 501 } 502 } 503 if (start > 0) { 504 pat = /\s/; 505 if (!pat.test(subject.charAt(start-1))) { 506 // make sure if we are not at the beginning of the string, that the match is 507 // not the end of some other word 508 return -1; 509 } 510 } 511 return start; 512 }, 513 514 startsWith: function (subject, query) { 515 var i; 516 // //console.log("startsWith: checking " + query + " against " + subject); 517 for (i = 0; i < query.length; i++) { 518 // TODO: use case mapper instead of toLowerCase() 519 if (subject.charAt(i).toLowerCase() !== query.charAt(i).toLowerCase()) { 520 return -1; 521 } 522 } 523 return 0; 524 }, 525 526 removeEmptyLines: function (arr) { 527 var i = 0; 528 529 while (i < arr.length) { 530 if (arr[i]) { 531 arr[i] = arr[i].trim(); 532 if (arr[i].length === 0) { 533 arr.splice(i,1); 534 } else { 535 i++; 536 } 537 } else { 538 arr.splice(i,1); 539 } 540 } 541 }, 542 543 matchRegExp: function(address, line, expression, matchGroup, startAt) { 544 var lastMatch, 545 match, 546 ret = {}, 547 last; 548 549 //console.log("searching for regexp " + expression.source + " in line " + line); 550 551 match = expression.exec(line); 552 if (startAt === 'end') { 553 while (match !== null && match.length > 0) { 554 //console.log("found matches " + JSON.stringify(match)); 555 lastMatch = match; 556 match = expression.exec(line); 557 } 558 match = lastMatch; 559 } 560 561 if (match && match !== null) { 562 //console.log("found matches " + JSON.stringify(match)); 563 matchGroup = matchGroup || 0; 564 if (match[matchGroup] !== undefined) { 565 ret.match = match[matchGroup].trim(); 566 ret.match = ret.match.replace(/^\-|\-+$/, ''); 567 ret.match = ret.match.replace(/\s+$/, ''); 568 last = (startAt === 'end') ? line.lastIndexOf(match[matchGroup]) : line.indexOf(match[matchGroup]); 569 //console.log("last is " + last); 570 ret.line = line.slice(0,last); 571 if (address.format !== "asian") { 572 ret.line += " "; 573 } 574 ret.line += line.slice(last+match[matchGroup].length); 575 ret.line = ret.line.trim(); 576 //console.log("found match " + ret.match + " from matchgroup " + matchGroup + " and rest of line is " + ret.line); 577 return ret; 578 } 579 //} else { 580 //console.log("no match"); 581 } 582 583 return undefined; 584 }, 585 586 matchPattern: function(address, line, pattern, matchGroup) { 587 var start, 588 j, 589 ret = {}; 590 591 //console.log("searching in line " + line); 592 593 // search an array of possible fixed strings 594 //console.log("Using fixed set of strings."); 595 for (j = 0; j < pattern.length; j++) { 596 start = address.compare(line, pattern[j]); 597 if (start !== -1) { 598 ret.match = line.substring(start, start+pattern[j].length); 599 if (start !== 0) { 600 ret.line = line.substring(0,start).trim(); 601 } else { 602 ret.line = line.substring(pattern[j].length).trim(); 603 } 604 //console.log("found match " + ret.match + " and rest of line is " + ret.line); 605 return ret; 606 } 607 } 608 609 return undefined; 610 } 611 }; 612 613 module.exports = Address; 614