Skip to content

Commit

Permalink
[FINNA-623] QDC: Tweak check for detecting invalid language codes (#61)
Browse files Browse the repository at this point in the history
  • Loading branch information
LuomaJuha authored Feb 22, 2024
1 parent 565e600 commit deadb3b
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 3 deletions.
13 changes: 10 additions & 3 deletions src/RecordManager/Finna/Record/Qdc.php
Original file line number Diff line number Diff line change
Expand Up @@ -154,14 +154,21 @@ protected function getLanguages()
$languages = [];
foreach ($this->doc->language as $language) {
foreach (explode(' ', trim((string)$language)) as $part) {
//Remove extra characters from start and end of a language
$check = trim($part, ', ');
$check = preg_replace(
'/^http:\/\/lexvo\.org\/id\/iso639-.\/(.*)/',
'$1',
$part
$check
);
// en_US
if (str_contains($check, '_')) {
$check = explode('_', $check)[0];
}
// Check that the language given is in proper form
if (mb_strlen($check) > 9 || !ctype_lower($check)) {
$this->storeWarning("unhandled language $check");
if (!$check || strlen($check) > 9 || !ctype_lower($check)) {
$toLog = $part ?: 'EMPTY_VALUE';
$this->storeWarning("unhandled language $toLog");
continue;
}
foreach (str_split($check, 3) as $code) {
Expand Down
7 changes: 7 additions & 0 deletions tests/RecordManagerTest/Finna/Record/QdcTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,20 @@ public function testQdcLanguageWarnings()
'unhandled language verylonglanguagehere',
'unhandled language EnGb',
'unhandled language caT',
'unhandled language po,tt',
'unhandled language ,',
'unhandled language EMPTY_VALUE',
],
$record->getProcessingWarnings(),
'getProcessingWarnings'
);
$this->compareArray(
[
'fi',
'jp',
'sv',
'en',
'nr',
],
$fields['language'],
'LanguageCheckAfterWarnings'
Expand Down
5 changes: 5 additions & 0 deletions tests/fixtures/Finna/record/qdc_language_warnings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
<language type="iso">EnGb</language>
<language type="iso">caT</language>
<language type="iso">fi</language>
<language type="iso">jp_JP</language>
<language type="iso">sv, en, nr</language><!-- this should validate as a language -->
<language type="iso">po,tt</language><!--this should not validate as a language -->
<language type="iso">,</language>
<language type="iso"> </language>
<rights>CC BY-NC-ND 4.0</rights>
<publisher lang="fi">Sanitation Project, Research Institute for Humanity and Nature</publisher>
<permaddress type="doi">http://dx.doi.org/https://doi.org/10.34416/svc.00029</permaddress>
Expand Down

0 comments on commit deadb3b

Please sign in to comment.