Skip to content

Commit

Permalink
Merge pull request #28 from PDOK/remove_to_lower_case_suggest
Browse files Browse the repository at this point in the history
Remove to lower case suggest
  • Loading branch information
gerdos82 authored Jan 17, 2025
2 parents 00209bf + 30fd7ef commit 3f2c7be
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 57 deletions.
2 changes: 1 addition & 1 deletion internal/etl/etl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ func TestImportGeoPackage(t *testing.T) {
{
name: "import everything",
where: "",
count: 72210, // 33030*2 + substitution and synonyms combinations
count: 66338, // 33030*2 + substitution and synonyms combinations
},
{
name: "with where clause",
Expand Down
3 changes: 1 addition & 2 deletions internal/etl/transform/subst_and_synonyms.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,8 @@ func NewSubstAndSynonyms(substitutionsFile, synonymsFile string) (*SubstAndSynon
func (s SubstAndSynonyms) generate(fieldValuesByName map[string]string) []map[string]string {
var fieldValuesByNameWithAllValues = make(map[string][]string)
for key, value := range fieldValuesByName {
valueLower := strings.ToLower(value)
// Get all substitutions
substitutedValues := extendValues([]string{valueLower}, s.substitutions)
substitutedValues := extendValues([]string{value}, s.substitutions)
// Get all synonyms for these substituted values
// -> one way
synonymsValuesOneWay := extendValues(substitutedValues, s.synonyms)
Expand Down
7 changes: 3 additions & 4 deletions internal/etl/transform/subst_and_synonyms_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,9 @@ func Test_generate(t *testing.T) {
want []map[string]string
wantErr assert.ErrorAssertionFunc
}{
{"simple record", args{map[string]string{"component_thoroughfarename": "foo", "component_postaldescriptor": "1234AB", "component_addressareaname": "bar"}, "../testdata/substitutions.csv", "../testdata/synonyms.csv"}, []map[string]string{{"component_thoroughfarename": "foo", "component_postaldescriptor": "1234ab", "component_addressareaname": "bar"}}, assert.NoError},
{"single synonym record", args{map[string]string{"component_thoroughfarename": "eerste", "component_postaldescriptor": "1234AB", "component_addressareaname": "bar"}, "../testdata/substitutions.csv", "../testdata/synonyms.csv"}, []map[string]string{{"component_thoroughfarename": "eerste", "component_postaldescriptor": "1234ab", "component_addressareaname": "bar"}, {"component_thoroughfarename": "1ste", "component_postaldescriptor": "1234ab", "component_addressareaname": "bar"}}, assert.NoError},
{"single synonym with capital", args{map[string]string{"component_thoroughfarename": "Eerste", "component_postaldescriptor": "1234AB", "component_addressareaname": "bar"}, "../testdata/substitutions.csv", "../testdata/synonyms.csv"}, []map[string]string{{"component_thoroughfarename": "eerste", "component_postaldescriptor": "1234ab", "component_addressareaname": "bar"}, {"component_thoroughfarename": "1ste", "component_postaldescriptor": "1234ab", "component_addressareaname": "bar"}}, assert.NoError},
{"two-way synonym record", args{map[string]string{"component_thoroughfarename": "eerste 2de", "component_postaldescriptor": "1234AB", "component_addressareaname": "bar"}, "../testdata/substitutions.csv", "../testdata/synonyms.csv"}, []map[string]string{{"component_thoroughfarename": "eerste 2de", "component_postaldescriptor": "1234ab", "component_addressareaname": "bar"}, {"component_thoroughfarename": "1ste 2de", "component_postaldescriptor": "1234ab", "component_addressareaname": "bar"}, {"component_thoroughfarename": "eerste tweede", "component_postaldescriptor": "1234ab", "component_addressareaname": "bar"}, {"component_thoroughfarename": "1ste tweede", "component_postaldescriptor": "1234ab", "component_addressareaname": "bar"}}, assert.NoError},
{"simple record", args{map[string]string{"component_thoroughfarename": "foo", "component_postaldescriptor": "1234AB", "component_addressareaname": "bar"}, "../testdata/substitutions.csv", "../testdata/synonyms.csv"}, []map[string]string{{"component_thoroughfarename": "foo", "component_postaldescriptor": "1234AB", "component_addressareaname": "bar"}}, assert.NoError},
{"single synonym record", args{map[string]string{"component_thoroughfarename": "eerste", "component_postaldescriptor": "1234AB", "component_addressareaname": "bar"}, "../testdata/substitutions.csv", "../testdata/synonyms.csv"}, []map[string]string{{"component_thoroughfarename": "eerste", "component_postaldescriptor": "1234AB", "component_addressareaname": "bar"}, {"component_thoroughfarename": "1ste", "component_postaldescriptor": "1234AB", "component_addressareaname": "bar"}}, assert.NoError},
{"two-way synonym record", args{map[string]string{"component_thoroughfarename": "eerste 2de", "component_postaldescriptor": "1234AB", "component_addressareaname": "bar"}, "../testdata/substitutions.csv", "../testdata/synonyms.csv"}, []map[string]string{{"component_thoroughfarename": "eerste 2de", "component_postaldescriptor": "1234AB", "component_addressareaname": "bar"}, {"component_thoroughfarename": "1ste 2de", "component_postaldescriptor": "1234AB", "component_addressareaname": "bar"}, {"component_thoroughfarename": "eerste tweede", "component_postaldescriptor": "1234AB", "component_addressareaname": "bar"}, {"component_thoroughfarename": "1ste tweede", "component_postaldescriptor": "1234AB", "component_addressareaname": "bar"}}, assert.NoError},
{"avoid endless loop for synonyms that contain source value", args{map[string]string{"street": "oude kerkstraat"}, "../testdata/substitutions.csv", "../testdata/synonyms.csv"}, []map[string]string{{"street": "oude kerkstraat"}, {"street": "oud kerkstraat"}}, assert.NoError},
}
for _, tt := range tests {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 1",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/51?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -65,7 +65,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 1",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/52?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -113,7 +113,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 10",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/32183?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -161,7 +161,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 10",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/53?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -209,7 +209,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 10",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/32184?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -257,7 +257,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 10",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/54?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -305,7 +305,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 11",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/22549?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -353,7 +353,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 13",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/56?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -401,7 +401,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 13",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/55?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -449,7 +449,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 15",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/57?f=json",
"score": 0.07213475555181503
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 1",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/51?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -65,7 +65,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 1",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/52?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -113,7 +113,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 10",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/32183?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -161,7 +161,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 10",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/53?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -209,7 +209,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 10",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/32184?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -257,7 +257,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 10",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/54?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -305,7 +305,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 11",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/22549?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -353,7 +353,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 13",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/56?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -401,7 +401,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 13",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/55?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -449,7 +449,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 15",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/57?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -497,7 +497,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 15",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/58?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -545,7 +545,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 17",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/16128?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -593,7 +593,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 19",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/59?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -641,7 +641,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 19 A",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/23322?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -689,7 +689,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 2",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/16129?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -737,7 +737,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 2",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/16130?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -785,7 +785,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 21",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/60?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -833,7 +833,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 21 A",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/61?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -881,7 +881,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 23",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/16132?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -929,7 +929,7 @@
"collectionId": "buildings",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 23",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/buildings/items/16131?f=json",
"score": 0.07213475555181503
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"collectionId": "addresses",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 1",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/addresses/items/51?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -65,7 +65,7 @@
"collectionId": "addresses",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 1",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/addresses/items/52?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -113,7 +113,7 @@
"collectionId": "addresses",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 10",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/addresses/items/32183?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -161,7 +161,7 @@
"collectionId": "addresses",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 10",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/addresses/items/53?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -209,7 +209,7 @@
"collectionId": "addresses",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 10",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/addresses/items/32184?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -257,7 +257,7 @@
"collectionId": "addresses",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 10",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/addresses/items/54?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -305,7 +305,7 @@
"collectionId": "addresses",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 11",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/addresses/items/22549?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -353,7 +353,7 @@
"collectionId": "addresses",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 13",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/addresses/items/56?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -401,7 +401,7 @@
"collectionId": "addresses",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 13",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/addresses/items/55?f=json",
"score": 0.07213475555181503
},
Expand Down Expand Up @@ -449,7 +449,7 @@
"collectionId": "addresses",
"collectionVersion": "1",
"displayName": "Abbewaal - Den Burg 15",
"highlight": "abbewaal <b>den</b> burg",
"highlight": "Abbewaal <b>Den</b> Burg",
"href": "https://example.com/ogc/v1/collections/addresses/items/57?f=json",
"score": 0.07213475555181503
},
Expand Down
Loading

0 comments on commit 3f2c7be

Please sign in to comment.