Skip to content

Commit

Permalink
Context groups parsing improvements (#87)
Browse files Browse the repository at this point in the history
* Context groups parsing improvements

* refactoring

* Ready to be merged
  • Loading branch information
mauretto78 authored Apr 17, 2024
1 parent 0e1770b commit 7756325
Show file tree
Hide file tree
Showing 6 changed files with 158 additions and 9 deletions.
36 changes: 29 additions & 7 deletions src/XliffParser/AbstractXliffParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
use Psr\Log\LoggerInterface;

abstract class AbstractXliffParser {

const MAX_GROUP_RECURSION_LEVEL = 5;

/**
* @var LoggerInterface
*/
Expand Down Expand Up @@ -57,24 +60,42 @@ abstract public function parse( DOMDocument $dom, $output = [] );
/**
* Extract trans-unit content from the current node
*
* @param $childNode
* @param $childNode
* @param $transUnitIdArrayForUniquenessCheck
* @param DOMDocument $dom
* @param DOMDocument $dom
* @param $output
* @param $i
* @param $j
* @param array $contextGroups
* @param int $recursionLevel
*/
protected function extractTuFromNode( $childNode, &$transUnitIdArrayForUniquenessCheck, DOMDocument $dom, &$output, &$i, &$j ) {
protected function extractTuFromNode( $childNode, &$transUnitIdArrayForUniquenessCheck, DOMDocument $dom, &$output, &$i, &$j, $contextGroups = [], $recursionLevel = 0 ) {
if ( $childNode->nodeName === 'group' ) {

// add nested context-groups
foreach ( $childNode->childNodes as $nestedChildNode ) {
if ( $nestedChildNode->nodeName === 'context-group' ) {
$contextGroups[] = $nestedChildNode;
}
}

foreach ( $childNode->childNodes as $nestedChildNode ) {

// nested groups
if ( $nestedChildNode->nodeName === 'group' ) {
$this->extractTuFromNode( $nestedChildNode, $transUnitIdArrayForUniquenessCheck, $dom, $output, $i, $j );

// avoid infinite recursion
$recursionLevel++;
if($recursionLevel < self::MAX_GROUP_RECURSION_LEVEL){
$this->extractTuFromNode( $nestedChildNode, $transUnitIdArrayForUniquenessCheck, $dom, $output, $i, $j, $contextGroups, $recursionLevel );
}

} elseif ( $nestedChildNode->nodeName === $this->getTuTagName() ) {
$this->extractTransUnit( $nestedChildNode, $transUnitIdArrayForUniquenessCheck, $dom, $output, $i, $j );
$this->extractTransUnit( $nestedChildNode, $transUnitIdArrayForUniquenessCheck, $dom, $output, $i, $j, $contextGroups );
}
}
} elseif ( $childNode->nodeName === $this->getTuTagName() ) {
$this->extractTransUnit( $childNode, $transUnitIdArrayForUniquenessCheck, $dom, $output, $i, $j );
$this->extractTransUnit( $childNode, $transUnitIdArrayForUniquenessCheck, $dom, $output, $i, $j, $contextGroups );
}
}

Expand All @@ -87,10 +108,11 @@ protected function extractTuFromNode( $childNode, &$transUnitIdArrayForUniquenes
* @param $output
* @param $i
* @param $j
* @param $contextGroups
*
* @return mixed
*/
abstract protected function extractTransUnit( $transUnit, &$transUnitIdArrayForUniquenessCheck, $dom, &$output, &$i, &$j );
abstract protected function extractTransUnit( $transUnit, &$transUnitIdArrayForUniquenessCheck, $dom, &$output, &$i, &$j,$contextGroups = [] );

/**
* @param DOMDocument $dom
Expand Down
9 changes: 8 additions & 1 deletion src/XliffParser/XliffParserV1.php
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,11 @@ private function extractReference( DOMElement $file ) {
* @param $output
* @param $i
* @param $j
* @param $contextGroups
*
* @throws Exception
*/
protected function extractTransUnit( $transUnit, &$transUnitIdArrayForUniquenessCheck, $dom, &$output, &$i, &$j ) {
protected function extractTransUnit( $transUnit, &$transUnitIdArrayForUniquenessCheck, $dom, &$output, &$i, &$j, $contextGroups = [] ) {
// metadata
$output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'attr' ] = $this->extractTransUnitMetadata( $transUnit, $transUnitIdArrayForUniquenessCheck );

Expand Down Expand Up @@ -184,6 +185,12 @@ protected function extractTransUnit( $transUnit, &$transUnitIdArrayForUniqueness
}

// context-group
if(!empty($contextGroups)){
foreach ($contextGroups as $contextGroup){
$output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'context-group' ][] = $this->extractTransUnitContextGroup( $dom, $contextGroup );
}
}

foreach ( $transUnit->getElementsByTagName( 'context-group' ) as $contextGroup ) {
$output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'context-group' ][] = $this->extractTransUnitContextGroup( $dom, $contextGroup );
}
Expand Down
3 changes: 2 additions & 1 deletion src/XliffParser/XliffParserV2.php
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,11 @@ private function extractNotes( DOMElement $file ) {
* @param $output
* @param $i
* @param $j
* @param $contextGroups
*
* @throws \Exception
*/
protected function extractTransUnit( $transUnit, &$transUnitIdArrayForUniquenessCheck, $dom, &$output, &$i, &$j ) {
protected function extractTransUnit( $transUnit, &$transUnitIdArrayForUniquenessCheck, $dom, &$output, &$i, &$j, $contextGroups = [] ) {
// metadata
$output[ 'files' ][ $i ][ 'trans-units' ][ $j ][ 'attr' ] = $this->extractTransUnitMetadata( $transUnit, $transUnitIdArrayForUniquenessCheck );

Expand Down
23 changes: 23 additions & 0 deletions tests/XliffParserV1Test.php
Original file line number Diff line number Diff line change
Expand Up @@ -638,4 +638,27 @@ public function can_parse_segment_state_attribute()
$this->assertEquals($parsed['files'][1]['trans-units'][$i]['seg-target'][0]['attr']['state'], $states[$i-1]);
}
}

/**
* @test
*/
public function can_parse_context_group()
{
$parsed = (new XliffParser())->xliffToArray($this->getTestFile('context-group.xlf'));

$this->assertEquals($parsed['files'][1]['trans-units'][1]['context-group'][0]['contexts'][0]['raw-content'], "PSMS-ID-ec2d50b6-d0ce-4672-b8df-9ea82616d85c::1");
$this->assertEquals($parsed['files'][1]['trans-units'][1]['context-group'][0]['contexts'][1]['raw-content'], "Translation Context: Admin Portal Title text");
$this->assertEquals($parsed['files'][1]['trans-units'][2]['context-group'][0]['contexts'][0]['raw-content'], "PSMS-ID-ec2d50b6-d0ce-4672-b8df-9ea82616d85c::1");
$this->assertEquals($parsed['files'][1]['trans-units'][2]['context-group'][0]['contexts'][1]['raw-content'], "Translation Context: Admin Portal Title text");
}

/**
* @test
*/
public function can_skip_too_deep_nested_group()
{
$parsed = (new XliffParser())->xliffToArray($this->getTestFile('context-group-nested.xlf'));

$this->assertCount(2, $parsed['files'][1]['trans-units']);
}
}
67 changes: 67 additions & 0 deletions tests/files/context-group-nested.xlf
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
<?xml version="1.0" encoding="UTF-8"?>
<xliff
xmlns="urn:oasis:names:tc:xliff:document:1.2"
xmlns:okp="okapi-framework:xliff-extensions" version="1.2"
xmlns:mtc="https://www.matecat.com">
<file datatype="x-mxliff" original="context-group.xlf" source-language="en-US" target-language="it-IT">
<slr:profiles generalProfile="xliff:codepoints"/>
</file>
<file datatype="x-undefined" original="GitFarm-AuthAdminPortalFrontend-8f058d3-uok6BxzR-operations-af-ZA.xlf" source-language="en-US" target-language="it-IT">
<body>
<group id="0">
<group id="1">
<group id="2">
<group id="3">
<group id="4">
<group id="5">
<group id="0">
<group id="1">
<group id="2">
<group id="3">
<group id="4">
<group id="5">
<context-group purpose="information">
<context context-type="x-key">PSMS-ID-ec2d50b6-d0ce-4672-b8df-9ea82616d85c::1</context>
<context context-type="x-key-note">Translation Context: Admin Portal Title text</context>
</context-group>
<group id="32">
<trans-unit id="tu1" xml:space="preserve" translate="yes">
<source xml:lang="en-US">First segment</source>
<target xml:lang="it-IT" state="new">Primo segmento</target>
</trans-unit>
<trans-unit id="tu2" xml:space="preserve" translate="yes">
<source xml:lang="en-US">Second segment</source>
<target xml:lang="it-IT" state="new">Segmento segmento</target>
</trans-unit>
</group>
</group>
</group>
</group>
</group>
</group>
</group>
</group>
</group>
</group>
</group>
</group>
</group>
<group id="66">
<group id="32">
<trans-unit id="tu32" xml:space="preserve" translate="yes">
<source xml:lang="en-US">First segment</source>
<target xml:lang="it-IT" state="new">Primo segmento</target>
</trans-unit>
<trans-unit id="tu33" xml:space="preserve" translate="yes">
<source xml:lang="en-US">Second segment</source>
<target xml:lang="it-IT" state="new">Segmento segmento</target>
</trans-unit>
</group>
<context-group purpose="information">
<context context-type="x-key">PSMS-ID-ec2d50b6-d0ce-4672-b8df-9ea82616d85c::1</context>
<context context-type="x-key-note">Translation Context: Admin Portal Title text</context>
</context-group>
</group>
</body>
</file>
</xliff>
29 changes: 29 additions & 0 deletions tests/files/context-group.xlf
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?xml version="1.0" encoding="UTF-8"?>
<xliff
xmlns="urn:oasis:names:tc:xliff:document:1.2"
xmlns:okp="okapi-framework:xliff-extensions" version="1.2"
xmlns:mtc="https://www.matecat.com">
<file datatype="x-mxliff" original="context-group.xlf" source-language="en-US" target-language="it-IT">
<slr:profiles generalProfile="xliff:codepoints"/>
</file>
<file datatype="x-undefined" original="GitFarm-AuthAdminPortalFrontend-8f058d3-uok6BxzR-operations-af-ZA.xlf" source-language="en-US" target-language="it-IT">
<body>
<group id="0">
<context-group purpose="information">
<context context-type="x-key">PSMS-ID-ec2d50b6-d0ce-4672-b8df-9ea82616d85c::1</context>
<context context-type="x-key-note">Translation Context: Admin Portal Title text</context>
</context-group>
<group id="32">
<trans-unit id="tu1" xml:space="preserve" translate="yes">
<source xml:lang="en-US">First segment</source>
<target xml:lang="it-IT" state="new">Primo segmento</target>
</trans-unit>
<trans-unit id="tu2" xml:space="preserve" translate="yes">
<source xml:lang="en-US">Second segment</source>
<target xml:lang="it-IT" state="new">Segmento segmento</target>
</trans-unit>
</group>
</group>
</body>
</file>
</xliff>

0 comments on commit 7756325

Please sign in to comment.