/*
* Address.js - Represent a mailing address
*
* Copyright © 2013-2015, 2018, 2023 JEDLSoft
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*globals console RegExp */
// !data address countries nativecountries ctrynames
var ilib = require("../index.js");
var Utils = require("./Utils.js");
var JSUtils = require("./JSUtils.js");
var Locale = require("./Locale.js");
var CType = require("./CType.js");
var isIdeo = require("./isIdeo.js");
var isAscii = require("./isAscii.js");
var isDigit = require("./isDigit.js");
var IString = require("./IString.js");
/**
* @class
* Create a new Address instance and parse a physical address.<p>
*
* This function parses a physical address written in a free-form string.
* It returns an object with a number of properties from the list below
* that it may have extracted from that address.<p>
*
* The following is a list of properties that the algorithm will return:<p>
*
* <ul>
* <li><i>streetAddress</i>: The street address, including house numbers and all.
* <li><i>locality</i>: The locality of this address (usually a city or town).
* <li><i>region</i>: The region where the locality is located. In the US, this
* corresponds to states. In other countries, this may be provinces,
* cantons, prefectures, etc. In some smaller countries, there are no
* such divisions.
* <li><i>postalCode</i>: Country-specific code for expediting mail. In the US,
* this is the zip code.
* <li><i>country</i>: The country of the address.
* <li><i>countryCode</i>: The ISO 3166 2-letter region code for the destination
* country in this address.
* </ul>
*
* The above properties will not necessarily appear in the instance. For
* any individual property, if the free-form address does not contain
* that property or it cannot be parsed out, the it is left out.<p>
*
* The options parameter may contain any of the following properties:
*
* <ul>
* <li><i>locale</i> - locale or localeSpec to use to parse the address. If not
* specified, this function will use the current ilib locale
*
* <li><i>onLoad</i> - a callback function to call when the address info for the
* locale is fully loaded and the address has been parsed. When the onLoad
* option is given, the address object
* will attempt to load any missing locale data using the ilib loader callback.
* When the constructor is done (even if the data is already preassembled), the
* onLoad function is called with the current instance as a parameter, so this
* callback can be used with preassembled or dynamic loading or a mix of the two.
*
* <li><i>sync</i> - tell whether to load any missing locale data synchronously or
* asynchronously. If this option is given as "false", then the "onLoad"
* callback must be given, as the instance returned from this constructor will
* not be usable for a while.
*
* <li><i>loadParams</i> - an object containing parameters to pass to the
* loader callback function when locale data is missing. The parameters are not
* interpretted or modified in any way. They are simply passed along. The object
* may contain any property/value pairs as long as the calling code is in
* agreement with the loader callback function as to what those parameters mean.
* </ul>
*
* When an address cannot be parsed properly, the entire address will be placed
* into the streetAddress property.<p>
*
* When the freeformAddress is another Address, this will act like a copy
* constructor.<p>
*
*
* @constructor
* @param {string|Address} freeformAddress free-form address to parse, or a
* javascript object containing the fields
* @param {Object} options options to the parser
*/
var Address = function (freeformAddress, options) {
var address;
if (!freeformAddress) {
return undefined;
}
this.sync = true;
this.loadParams = {};
if (options) {
if (options.locale) {
this.locale = (typeof(options.locale) === 'string') ? new Locale(options.locale) : options.locale;
}
if (typeof(options.sync) !== 'undefined') {
this.sync = !!options.sync;
}
if (options.loadParams) {
this.loadParams = options.loadParams;
}
}
this.locale = this.locale || new Locale();
// initialize from an already parsed object
if (typeof(freeformAddress) === 'object') {
/**
* The street address, including house numbers and all.
* @type {string|undefined}
*/
this.streetAddress = freeformAddress.streetAddress;
/**
* The locality of this address (usually a city or town).
* @type {string|undefined}
*/
this.locality = freeformAddress.locality;
/**
* The region (province, canton, prefecture, state, etc.) where the address is located.
* @type {string|undefined}
*/
this.region = freeformAddress.region;
/**
* Country-specific code for expediting mail. In the US, this is the zip code.
* @type {string|undefined}
*/
this.postalCode = freeformAddress.postalCode;
/**
* Optional city-specific code for a particular post office, used to expidite
* delivery.
* @type {string|undefined}
*/
this.postOffice = freeformAddress.postOffice;
/**
* The country of the address.
* @type {string|undefined}
*/
this.country = freeformAddress.country;
if (freeformAddress.countryCode) {
/**
* The 2 or 3 letter ISO 3166 region code for the destination country in this address.
* @type {string}
*/
this.countryCode = freeformAddress.countryCode;
}
if (freeformAddress.format) {
/**
* private
* @type {string}
*/
this.format = freeformAddress.format;
}
return this;
}
address = freeformAddress.replace(/[ \t\r]+/g, " ").trim();
address = address.replace(/[\s\n]+$/, "");
address = address.replace(/^[\s\n]+/, "");
//console.log("\n\n-------------\nAddress is '" + address + "'");
this.lines = address.split(/[,,\n]/g);
this.removeEmptyLines(this.lines);
isAscii._init(this.sync, this.loadParams, ilib.bind(this, function() {
isIdeo._init(this.sync, this.loadParams, ilib.bind(this, function() {
isDigit._init(this.sync, this.loadParams, ilib.bind(this, function() {
if (typeof(ilib.data.nativecountries) === 'undefined') {
Utils.loadData({
object: "Address",
name: "nativecountries.json", // countries in their own language
locale: "-", // only need to load the root file
nonlocale: true,
sync: this.sync,
loadParams: this.loadParams,
callback: ilib.bind(this, function(nativecountries) {
ilib.data.nativecountries = nativecountries;
this._loadCountries(options && options.onLoad);
})
});
} else {
this._loadCountries(options && options.onLoad);
}
}));
}));
}));
};
/** @protected */
Address.prototype = {
/**
* @private
*/
_loadCountries: function(onLoad) {
if (typeof(ilib.data.countries) === 'undefined') {
Utils.loadData({
object: "Address",
name: "countries.json", // countries in English
locale: "-", // only need to load the root file
nonlocale: true,
sync: this.sync,
loadParams: this.loadParams,
callback: ilib.bind(this, function(countries) {
ilib.data.countries = countries;
this._loadCtrynames(onLoad);
})
});
} else {
this._loadCtrynames(onLoad);
}
},
/**
* @private
*/
_loadCtrynames: function(onLoad) {
Utils.loadData({
name: "ctrynames.json",
object: "Address",
locale: this.locale,
sync: this.sync,
loadParams: JSUtils.merge(this.loadParams, {returnOne: true}),
callback: ilib.bind(this, function(ctrynames) {
this.ctrynames = ctrynames;
this._determineDest(ctrynames, onLoad);
})
});
},
/**
* @private
* @param {Object?} ctrynames
*/
_findDest: function (ctrynames) {
var match;
for (var countryName in ctrynames) {
if (countryName && countryName !== "generated") {
// find the longest match in the current table
// ctrynames contains the country names mapped to region code
// for efficiency, only test for things longer than the current match
if (!match || match.text.length < countryName.length) {
var temp = this._findCountry(countryName);
if (temp) {
match = temp;
this.country = match.text;
this.countryCode = ctrynames[countryName];
}
}
}
}
return match;
},
/**
* @private
* @param {Object?} localizedCountries
* @param {function(Address):undefined} callback
*/
_determineDest: function (localizedCountries, callback) {
var match;
/*
* First, find the name of the destination country, as that determines how to parse
* the rest of the address. For any address, there are three possible ways
* that the name of the country could be written:
* 1. In the current language
* 2. In its own native language
* 3. In English
* We'll try all three.
*/
var tables = [];
if (localizedCountries) {
tables.push(localizedCountries);
}
tables.push(ilib.data.nativecountries);
tables.push(ilib.data.countries);
for (var i = 0; i < tables.length; i++) {
match = this._findDest(tables[i]);
if (match) {
this.lines[match.line] = this.lines[match.line].substring(0, match.start) + this.lines[match.line].substring(match.start + match.text.length);
this._init(callback);
return;
}
}
// no country, so try parsing it as if we were in the same country
this.country = undefined;
this.countryCode = this.locale.getRegion();
this._init(callback);
},
/**
* @private
* @param {function(Address):undefined} callback
*/
_init: function(callback) {
Utils.loadData({
object: "Address",
locale: new Locale(this.countryCode),
name: "address.json",
sync: this.sync,
loadParams: this.loadParams,
callback: ilib.bind(this, function(info) {
if (!info || JSUtils.isEmpty(info) || !info.fields) {
// load the "unknown" locale instead
Utils.loadData({
object: "Address",
locale: new Locale("XX"),
name: "address.json",
sync: this.sync,
loadParams: this.loadParams,
callback: ilib.bind(this, function(info) {
this.info = info;
this._parseAddress();
if (typeof(callback) === 'function') {
callback(this);
}
})
});
} else {
this.info = info;
this._parseAddress();
if (typeof(callback) === 'function') {
callback(this);
}
}
})
});
},
/**
* @private
*/
_parseAddress: function() {
// clean it up first
var i,
asianChars = 0,
latinChars = 0,
startAt,
infoFields,
field,
pattern,
matchFunction,
match,
fieldNumber;
// for locales that support both latin and asian character addresses,
// decide if we are parsing an asian or latin script address
if (this.info && this.info.multiformat) {
for (var j = 0; j < this.lines.length; j++) {
var line = new IString(this.lines[j]);
var it = line.charIterator();
while (it.hasNext()) {
var c = it.next();
if (isIdeo(c) ||
CType.withinRange(c, "hangul") ||
CType.withinRange(c, 'katakana') ||
CType.withinRange(c, 'hiragana') ||
CType.withinRange(c, 'bopomofo')) {
asianChars++;
} else if (isAscii(c) && !isDigit(c)) {
latinChars++;
}
}
}
this.format = (asianChars >= latinChars) ? "asian" : "latin";
startAt = this.info.startAt[this.format];
infoFields = this.info.fields[this.format];
// //console.log("multiformat locale: format is now " + this.format);
} else {
startAt = (this.info && this.info.startAt) || "end";
infoFields = (this.info && this.info.fields) || [];
}
this.compare = (startAt === "end") ? this.endsWith : this.startsWith;
//console.log("this.lines is: " + JSON.stringify(this.lines));
for (i = 0; i < infoFields.length && this.lines.length > 0; i++) {
field = infoFields[i];
this.removeEmptyLines(this.lines);
//console.log("Searching for field " + field.name);
if (field.pattern) {
if (typeof(field.pattern) === 'string') {
pattern = new RegExp(field.pattern, "img");
matchFunction = this.matchRegExp;
} else {
pattern = field.pattern;
matchFunction = this.matchPattern;
}
switch (field.line) {
case 'startAtFirst':
for (fieldNumber = 0; fieldNumber < this.lines.length; fieldNumber++) {
match = matchFunction(this, this.lines[fieldNumber], pattern, field.matchGroup, startAt);
if (match) {
break;
}
}
break;
case 'startAtLast':
for (fieldNumber = this.lines.length-1; fieldNumber >= 0; fieldNumber--) {
match = matchFunction(this, this.lines[fieldNumber], pattern, field.matchGroup, startAt);
if (match) {
break;
}
}
break;
case 'first':
fieldNumber = 0;
match = matchFunction(this, this.lines[fieldNumber], pattern, field.matchGroup, startAt);
break;
case 'last':
default:
fieldNumber = this.lines.length - 1;
match = matchFunction(this, this.lines[fieldNumber], pattern, field.matchGroup, startAt);
break;
}
if (match) {
// //console.log("found match for " + field.name + ": " + JSON.stringify(match));
// //console.log("remaining line is " + match.line);
this.lines[fieldNumber] = match.line;
this[field.name] = match.match;
}
} else {
// if nothing is given, default to taking the whole field
this[field.name] = this.lines.splice(fieldNumber,1)[0].trim();
//console.log("typeof(this[field.name]) is " + typeof(this[field.name]) + " and value is " + JSON.stringify(this[field.name]));
}
}
// all the left overs go in the street address field
this.removeEmptyLines(this.lines);
if (this.lines.length > 0) {
//console.log("this.lines is " + JSON.stringify(this.lines) + " and splicing to get streetAddress");
// Korea uses spaces between words, despite being an "asian" locale
var joinString = (this.info.joinString && this.info.joinString[this.format]) || ((this.format && this.format === "asian") ? "" : ", ");
this.streetAddress = this.lines.join(joinString).trim();
}
this.lines = undefined;
//console.log("final result is " + JSON.stringify(this));
},
/**
* Find the named country either at the end or the beginning of the address.
* @private
*/
_findCountry: function(name) {
var start = -1, match, line = 0;
if (this.lines.length > 0) {
start = this.startsWith(this.lines[line], name);
if (start === -1) {
line = this.lines.length-1;
start = this.endsWith(this.lines[line], name);
}
if (start !== -1) {
match = {
text: this.lines[line].substring(start, start + name.length),
line: line,
start: start
};
}
}
return match;
},
endsWith: function (subject, query) {
var start = subject.length-query.length,
i,
pat;
//console.log("endsWith: checking " + query + " against " + subject);
for (i = 0; i < query.length; i++) {
// TODO: use case mapper instead of toLowerCase()
if (subject.charAt(start+i).toLowerCase() !== query.charAt(i).toLowerCase()) {
return -1;
}
}
if (start > 0) {
pat = /\s/;
if (!pat.test(subject.charAt(start-1))) {
// make sure if we are not at the beginning of the string, that the match is
// not the end of some other word
return -1;
}
}
return start;
},
startsWith: function (subject, query) {
var i;
// //console.log("startsWith: checking " + query + " against " + subject);
for (i = 0; i < query.length; i++) {
// TODO: use case mapper instead of toLowerCase()
if (subject.charAt(i).toLowerCase() !== query.charAt(i).toLowerCase()) {
return -1;
}
}
return 0;
},
removeEmptyLines: function (arr) {
var i = 0;
while (i < arr.length) {
if (arr[i]) {
arr[i] = arr[i].trim();
if (arr[i].length === 0) {
arr.splice(i,1);
} else {
i++;
}
} else {
arr.splice(i,1);
}
}
},
matchRegExp: function(address, line, expression, matchGroup, startAt) {
var lastMatch,
match,
ret = {},
last;
//console.log("searching for regexp " + expression.source + " in line " + line);
match = expression.exec(line);
if (startAt === 'end') {
while (match !== null && match.length > 0) {
//console.log("found matches " + JSON.stringify(match));
lastMatch = match;
match = expression.exec(line);
}
match = lastMatch;
}
if (match && match !== null) {
//console.log("found matches " + JSON.stringify(match));
matchGroup = matchGroup || 0;
if (match[matchGroup] !== undefined) {
ret.match = match[matchGroup].trim();
ret.match = ret.match.replace(/^\-|\-+$/, '');
ret.match = ret.match.replace(/\s+$/, '');
last = (startAt === 'end') ? line.lastIndexOf(match[matchGroup]) : line.indexOf(match[matchGroup]);
//console.log("last is " + last);
ret.line = line.slice(0,last);
if (address.format !== "asian") {
ret.line += " ";
}
ret.line += line.slice(last+match[matchGroup].length);
ret.line = ret.line.trim();
//console.log("found match " + ret.match + " from matchgroup " + matchGroup + " and rest of line is " + ret.line);
return ret;
}
//} else {
//console.log("no match");
}
return undefined;
},
matchPattern: function(address, line, pattern, matchGroup) {
var start,
j,
ret = {};
//console.log("searching in line " + line);
// search an array of possible fixed strings
//console.log("Using fixed set of strings.");
for (j = 0; j < pattern.length; j++) {
start = address.compare(line, pattern[j]);
if (start !== -1) {
ret.match = line.substring(start, start+pattern[j].length);
if (start !== 0) {
ret.line = line.substring(0,start).trim();
} else {
ret.line = line.substring(pattern[j].length).trim();
}
//console.log("found match " + ret.match + " and rest of line is " + ret.line);
return ret;
}
}
return undefined;
}
};
module.exports = Address;
Source