From 029b6223c265b4238b45feb9dc10d9a09f7edb42 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Thu, 11 Jul 2024 12:04:12 +0200 Subject: [PATCH] fix(dedupe): improved deduplication between USA ZIP vs ZIP+4 properties --- .jshintrc | 2 +- helper/diffPlaces.js | 29 ++++++++++++++++++++++++----- test/unit/helper/diffPlaces.js | 26 ++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 6 deletions(-) diff --git a/.jshintrc b/.jshintrc index 6b5af2b22..a84cd32a4 100644 --- a/.jshintrc +++ b/.jshintrc @@ -2,7 +2,7 @@ "node": true, "curly": true, "eqeqeq": true, - "esversion": 9, + "esversion": "2022", "freeze": true, "immed": true, "indent": 2, diff --git a/helper/diffPlaces.js b/helper/diffPlaces.js index 6909034f2..3e0a168ea 100644 --- a/helper/diffPlaces.js +++ b/helper/diffPlaces.js @@ -41,9 +41,13 @@ function isLayerDifferent(item1, item2){ return false; } +function isUSA(item) { + if (!_.isArray(item?.parent?.country_a)) { return false; } + return item.parent.country_a[0] === 'USA'; +} + function isUsState(item) { - if (!_.isArray(item.parent.country_a)) { return false; } - return item.parent.country_a[0] === 'USA' && item.layer === 'region'; + return isUSA(item) && item.layer === 'region'; } // Geonames records in the locality and localadmin layer are parented by themselves @@ -206,7 +210,7 @@ function isAddressDifferent(item1, item2){ // only compare zip if both records have it, otherwise just ignore and assume it's the same // since by this time we've already compared parent hierarchies if( _.has(address1, 'zip') && _.has(address2, 'zip') ){ - if( isPropertyDifferent(address1, address2, 'zip') ){ return true; } + if( isZipDifferent(item1, item2) ){ return true; } } return false; @@ -255,10 +259,25 @@ function isDifferent(item1, item2, requestLanguage){ return false; } +/** + * return true if zip codes are different + * + * note: handle USA ZIP+4 vs ZIP 98036-6119 vs 98036 + */ +function isZipDifferent(item1, item2) { + + if (isUSA(item1) && isUSA(item2)) { + const firstWordOnly = (str) => normalizeString(str).split(' ')[0]; + return isPropertyDifferent(item1, item2, 'zip', firstWordOnly); + } + + return isPropertyDifferent(item1, item2, 'zip'); +} + /** * return true if properties are different */ -function isPropertyDifferent(item1, item2, prop ){ +function isPropertyDifferent(item1, item2, prop, normalizer = normalizeString ){ // if neither item has prop, we consider them the same if( !_.has(item1, prop) && !_.has(item2, prop) ){ return false; } @@ -274,7 +293,7 @@ function isPropertyDifferent(item1, item2, prop ){ let prop1StringValue = field.getStringValue( prop1[i] ); for( let j=0; j