Skip to content

Commit

Permalink
[FINNA-623] QDC: Add a check for detecting invalid language codes (#60)
Browse files Browse the repository at this point in the history
  • Loading branch information
LuomaJuha authored Feb 16, 2024
1 parent d3940ff commit 509b478
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 0 deletions.
28 changes: 28 additions & 0 deletions src/RecordManager/Finna/Record/Qdc.php
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,34 @@ protected function getHierarchyFields(array &$data): void
}
}

/**
* Get languages
*
* @return array
*/
protected function getLanguages()
{
$languages = [];
foreach ($this->doc->language as $language) {
foreach (explode(' ', trim((string)$language)) as $part) {
$check = preg_replace(
'/^http:\/\/lexvo\.org\/id\/iso639-.\/(.*)/',
'$1',
$part
);
// Check that the language given is in proper form
if (mb_strlen($check) > 9 || !ctype_lower($check)) {
$this->storeWarning("unhandled language $check");
continue;
}
foreach (str_split($check, 3) as $code) {
$languages[] = $code;
}
}
}
return $this->metadataUtils->normalizeLanguageStrings($languages);
}

/**
* Get online URLs
*
Expand Down
34 changes: 34 additions & 0 deletions tests/RecordManagerTest/Finna/Record/QdcTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,38 @@ public function testMediaTypes()
$fields['media_type_str_mv']
);
}

/**
* Test QDC processing warnings handling
*
* @return void
*/
public function testQdcLanguageWarnings()
{
$record = $this->createRecord(
Qdc::class,
'qdc_language_warnings.xml',
[],
'Finna',
[$this->createMock(\RecordManager\Base\Http\ClientManager::class)]
);
$fields = $record->toSolrArray();
$this->compareArray(
[
'unhandled language Veryodd',
'unhandled language verylonglanguagehere',
'unhandled language EnGb',
'unhandled language caT',
],
$record->getProcessingWarnings(),
'getProcessingWarnings'
);
$this->compareArray(
[
'fi',
],
$fields['language'],
'LanguageCheckAfterWarnings'
);
}
}
13 changes: 13 additions & 0 deletions tests/fixtures/Finna/record/qdc_language_warnings.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?xml version="1.0"?>
<qualifieddc schemaLocation="http://purl.org/dc/terms/ http://dublincore.org/schemas/xmls/qdc/2006/01/06/dcterms.xsd http://purl.org/dc/elements/1.1/ http://dublincore.org/schemas/xmls/qdc/2006/01/06/dc.xsd">
<title lang="fi">Language test xml</title>
<language type="iso">Veryodd</language>
<language type="iso">verylonglanguagehere</language>
<language type="iso">EnGb</language>
<language type="iso">caT</language>
<language type="iso">fi</language>
<rights>CC BY-NC-ND 4.0</rights>
<publisher lang="fi">Sanitation Project, Research Institute for Humanity and Nature</publisher>
<permaddress type="doi">http://dx.doi.org/https://doi.org/10.34416/svc.00029</permaddress>
<recordID>10138_331330</recordID>
</qualifieddc>

0 comments on commit 509b478

Please sign in to comment.