From 12cd6b670bdceb739e4e2fd3205d0cb01684a407 Mon Sep 17 00:00:00 2001 From: mshroom <32199029+mshroom@users.noreply.github.com> Date: Tue, 5 Mar 2024 10:09:46 +0200 Subject: [PATCH 1/3] QDC: Index spatial and temporal coverage --- .../Finna/Record/QdcRecordTrait.php | 89 ++++++++++++++----- .../Finna/Record/QdcTest.php | 33 +++++++ .../fixtures/Finna/record/qdc_dateranges.xml | 10 +++ 3 files changed, 112 insertions(+), 20 deletions(-) diff --git a/src/RecordManager/Finna/Record/QdcRecordTrait.php b/src/RecordManager/Finna/Record/QdcRecordTrait.php index c2df1653..dc43ad6e 100644 --- a/src/RecordManager/Finna/Record/QdcRecordTrait.php +++ b/src/RecordManager/Finna/Record/QdcRecordTrait.php @@ -137,26 +137,10 @@ public function toSolrArray(Database $db = null) } } - foreach ($this->doc->coverage as $coverage) { - $attrs = $coverage->attributes(); - if ($attrs->type == 'geocoding') { - $match = preg_match( - '/([\d\.]+)\s*,\s*([\d\.]+)/', - trim((string)$coverage), - $matches - ); - if ($match) { - if ($attrs->format == 'lon,lat') { - $lon = $matches[1]; - $lat = $matches[2]; - } else { - $lat = $matches[1]; - $lon = $matches[2]; - } - $data['location_geo'][] = "POINT($lon $lat)"; - } - } - } + $data['era'] = $data['era_facet'] = $this->getCoverageByType('temporal'); + $data['geographic'] = $data['geographic_facet'] = $this->getCoverageByType('spatial'); + $data['location_geo'] = $this->getCoverageByType('geocoding'); + if (!empty($data['location_geo'])) { $data['center_coords'] = $this->metadataUtils->getCenterCoordinates($data['location_geo']); @@ -187,6 +171,71 @@ public function toSolrArray(Database $db = null) return $data; } + /** + * Get locations for geocoding + * + * Returns an associative array of primary and secondary locations + * + * @return array + */ + public function getLocations() + { + $locations = []; + // If there is already coordinates in the record, don't return anything for geocoding + if (!$this->getCoverageByType('geocoding')) { + $locations = $this->getCoverageByType('spatial'); + } + return [ + 'primary' => $locations, + 'secondary' => [], + ]; + } + + /** + * Get coverage by type + * + * @param string $type Type attribute + * + * @return array + */ + protected function getCoverageByType(string $type) + { + $result = []; + foreach ($this->doc->coverage as $coverage) { + if ($type !== (string)$coverage->attributes()->type) { + continue; + } + $cov = trim((string)$coverage); + // Check if field contains coordinates + $match = preg_match( + '/([\d\.]+)\s*,\s*([\d\.]+)/', + $cov, + $matches + ); + // If type is geocoding, return only coordinates. + // Other types might contain ill-formatted coordinates which should be discarded. + switch ($type) { + case 'geocoding': + if ($match) { + if ($coverage->attributes()->format == 'lon,lat') { + $lon = $matches[1]; + $lat = $matches[2]; + } else { + $lat = $matches[1]; + $lon = $matches[2]; + } + $result[] = "POINT($lon $lat)"; + } + break; + default: + if (!$match && $stripped = $this->metadataUtils->stripTrailingPunctuation($cov, '.')) { + $result[] = $stripped; + } + } + } + return $result; + } + /** * Check if the needle is found in the haystack using fnmatch for comparison * diff --git a/tests/RecordManagerTest/Finna/Record/QdcTest.php b/tests/RecordManagerTest/Finna/Record/QdcTest.php index 78950d14..85497040 100644 --- a/tests/RecordManagerTest/Finna/Record/QdcTest.php +++ b/tests/RecordManagerTest/Finna/Record/QdcTest.php @@ -88,6 +88,39 @@ public function testDateRanges() $this->assertEquals($expected, $fields); } + /** + * Test coverage. + * + * @return void + */ + public function testCoverage() + { + $spatial = [ + 'Helsinki', + 'Vantaa', + ]; + $temporal = [ + '2010', + '2010-luku', + ]; + $geocoding = ['POINT(27.1826451 63.5694237)']; + $fields = $this->createRecord( + Qdc::class, + 'qdc_dateranges.xml', + [], + 'Finna', + [ + $this->createMock(\RecordManager\Base\Http\ClientManager::class), + ] + ); + $fields = $fields->toSolrArray(); + $this->assertEquals($spatial, $fields['geographic']); + $this->assertEquals($spatial, $fields['geographic_facet']); + $this->assertEquals($geocoding, $fields['location_geo']); + $this->assertEquals($temporal, $fields['era']); + $this->assertEquals($temporal, $fields['era_facet']); + } + /** * Test media types * diff --git a/tests/fixtures/Finna/record/qdc_dateranges.xml b/tests/fixtures/Finna/record/qdc_dateranges.xml index d42b10fd..2669f707 100644 --- a/tests/fixtures/Finna/record/qdc_dateranges.xml +++ b/tests/fixtures/Finna/record/qdc_dateranges.xml @@ -53,4 +53,14 @@ 20221212-20221214 20221210 2 028----00 05 + + + Undefined coverage type + 2010 + 2010-luku. + Helsinki. + Vantaa + 63.0,27.0 + 63.5694237,27.1826451 + Iisalmi From 9eda9445f3aebe12c3c1febf468c79a597279307 Mon Sep 17 00:00:00 2001 From: mshroom <32199029+mshroom@users.noreply.github.com> Date: Tue, 5 Mar 2024 10:31:21 +0200 Subject: [PATCH 2/3] Fix whitespace --- src/RecordManager/Finna/Record/QdcRecordTrait.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RecordManager/Finna/Record/QdcRecordTrait.php b/src/RecordManager/Finna/Record/QdcRecordTrait.php index dc43ad6e..ddbbaf05 100644 --- a/src/RecordManager/Finna/Record/QdcRecordTrait.php +++ b/src/RecordManager/Finna/Record/QdcRecordTrait.php @@ -212,7 +212,7 @@ protected function getCoverageByType(string $type) $cov, $matches ); - // If type is geocoding, return only coordinates. + // If type is geocoding, return only coordinates. // Other types might contain ill-formatted coordinates which should be discarded. switch ($type) { case 'geocoding': From ab61653d046bda2910ec79ee4daddcd4443588ab Mon Sep 17 00:00:00 2001 From: mshroom <32199029+mshroom@users.noreply.github.com> Date: Tue, 5 Mar 2024 12:19:39 +0200 Subject: [PATCH 3/3] small fixes and more test cases --- .../Finna/Record/QdcRecordTrait.php | 32 ++++++++----------- .../Finna/Record/QdcTest.php | 5 ++- .../fixtures/Finna/record/qdc_dateranges.xml | 1 + 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/RecordManager/Finna/Record/QdcRecordTrait.php b/src/RecordManager/Finna/Record/QdcRecordTrait.php index ddbbaf05..fa738a94 100644 --- a/src/RecordManager/Finna/Record/QdcRecordTrait.php +++ b/src/RecordManager/Finna/Record/QdcRecordTrait.php @@ -178,7 +178,7 @@ public function toSolrArray(Database $db = null) * * @return array */ - public function getLocations() + public function getLocations(): array { $locations = []; // If there is already coordinates in the record, don't return anything for geocoding @@ -198,7 +198,7 @@ public function getLocations() * * @return array */ - protected function getCoverageByType(string $type) + protected function getCoverageByType(string $type): array { $result = []; foreach ($this->doc->coverage as $coverage) { @@ -214,23 +214,19 @@ protected function getCoverageByType(string $type) ); // If type is geocoding, return only coordinates. // Other types might contain ill-formatted coordinates which should be discarded. - switch ($type) { - case 'geocoding': - if ($match) { - if ($coverage->attributes()->format == 'lon,lat') { - $lon = $matches[1]; - $lat = $matches[2]; - } else { - $lat = $matches[1]; - $lon = $matches[2]; - } - $result[] = "POINT($lon $lat)"; - } - break; - default: - if (!$match && $stripped = $this->metadataUtils->stripTrailingPunctuation($cov, '.')) { - $result[] = $stripped; + if ('geocoding' === $type) { + if ($match) { + if ($coverage->attributes()->format == 'lon,lat') { + $lon = $matches[1]; + $lat = $matches[2]; + } else { + $lat = $matches[1]; + $lon = $matches[2]; } + $result[] = "POINT($lon $lat)"; + } + } elseif (!$match && $stripped = $this->metadataUtils->stripTrailingPunctuation($cov, '.')) { + $result[] = $stripped; } } return $result; diff --git a/tests/RecordManagerTest/Finna/Record/QdcTest.php b/tests/RecordManagerTest/Finna/Record/QdcTest.php index 85497040..42321de9 100644 --- a/tests/RecordManagerTest/Finna/Record/QdcTest.php +++ b/tests/RecordManagerTest/Finna/Record/QdcTest.php @@ -103,7 +103,10 @@ public function testCoverage() '2010', '2010-luku', ]; - $geocoding = ['POINT(27.1826451 63.5694237)']; + $geocoding = [ + 'POINT(27.1826451 63.5694237)', + 'POINT(20.0 60.0)', + ]; $fields = $this->createRecord( Qdc::class, 'qdc_dateranges.xml', diff --git a/tests/fixtures/Finna/record/qdc_dateranges.xml b/tests/fixtures/Finna/record/qdc_dateranges.xml index 2669f707..1a7299d0 100644 --- a/tests/fixtures/Finna/record/qdc_dateranges.xml +++ b/tests/fixtures/Finna/record/qdc_dateranges.xml @@ -62,5 +62,6 @@ Vantaa 63.0,27.0 63.5694237,27.1826451 + 20.0,60.0 Iisalmi