Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace XMLNodeEntry with new XMLElementEntry #1068

Merged
merged 1 commit into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/components/core/building-blocks.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ $rows = array_to_rows([
- [Structure](../../../src/core/etl/src/Flow/ETL/Row/Entry/StructureEntry.php)
- [Uuid](../../../src/core/etl/src/Flow/ETL/Row/Entry/UuidEntry.php)
- [XML](../../../src/core/etl/src/Flow/ETL/Row/Entry/XMLEntry.php)
- [XMLNode](../../../src/core/etl/src/Flow/ETL/Row/Entry/XMLNodeEntry.php)
- [XMLElement](../../../src/core/etl/src/Flow/ETL/Row/Entry/XMLElementEntry.php)

Internally flow is using [EntryFactory](../../../src/core/etl/src/Flow/ETL/Row/Factory/NativeEntryFactory.php) to create entries.
It will try to detect and create the most appropriate entry type based on the value.
Expand Down
8 changes: 4 additions & 4 deletions examples/topics/data_source/xml/code.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
__DIR__ . '/input/dataset.xml',
'users/user'
))
->withEntry('id', ref('node')->xpath('@id')->domNodeValue())
->withEntry('name', ref('node')->xpath('name')->domNodeValue())
->withEntry('active', ref('node')->xpath('active')->domNodeValue())
->withEntry('email', ref('node')->xpath('email')->domNodeValue())
->withEntry('id', ref('node')->xpath('@id')->domElementValue())
->withEntry('name', ref('node')->xpath('name')->domElementValue())
->withEntry('active', ref('node')->xpath('active')->domElementValue())
->withEntry('email', ref('node')->xpath('email')->domElementValue())
->drop('node')
->collect()
->write(to_stream(__DIR__ . '/output.txt', truncate: false))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
use function Flow\ETL\DSL\type_string;
use Flow\ETL\Exception\RuntimeException;
use Flow\ETL\PHP\Type\Logical\Structure\StructureElement;
use Flow\ETL\PHP\Type\Logical\{DateTimeType, JsonType, ListType, MapType, StructureType, UuidType, XMLNodeType, XMLType};
use Flow\ETL\PHP\Type\Logical\{DateTimeType, JsonType, ListType, MapType, StructureType, UuidType, XMLElementType, XMLType};
use Flow\ETL\PHP\Type\Native\{ArrayType, EnumType, ObjectType, ScalarType};
use Flow\ETL\Row\Schema;
use Flow\ETL\Row\Schema\Definition;
Expand Down Expand Up @@ -104,7 +104,7 @@ private function convert(Definition $definition) : array
}

$avroType = match ($type::class) {
JsonType::class, UuidType::class, XMLType::class, XMLNodeType::class => ['name' => $definition->entry()->name(), 'type' => \AvroSchema::STRING_TYPE],
JsonType::class, UuidType::class, XMLType::class, XMLElementType::class => ['name' => $definition->entry()->name(), 'type' => \AvroSchema::STRING_TYPE],
EnumType::class => [
'name' => $definition->entry()->name(),
'type' => [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
use Flow\ETL\Exception\RuntimeException;
use Flow\ETL\PHP\Type\Logical\Map\{MapKey, MapValue};
use Flow\ETL\PHP\Type\Logical\Structure\StructureElement;
use Flow\ETL\PHP\Type\Logical\{DateTimeType, JsonType, ListType, MapType, StructureType, UuidType, XMLNodeType, XMLType};
use Flow\ETL\PHP\Type\Logical\{DateTimeType, JsonType, ListType, MapType, StructureType, UuidType, XMLElementType, XMLType};
use Flow\ETL\PHP\Type\Native\{ObjectType, ScalarType};
use Flow\ETL\PHP\Type\Type;
use Flow\ETL\Row\{Entry, Schema};
Expand Down Expand Up @@ -67,7 +67,7 @@ private function flowListToParquetList(ListType $type) : ListElement
case JsonType::class:
return ListElement::json();
case XMLType::class:
case XMLNodeType::class:
case XMLElementType::class:
return ListElement::string();
case ObjectType::class:
$class = $element->class;
Expand Down Expand Up @@ -143,7 +143,7 @@ private function flowMapValueToParquetMapValue(MapValue $mapValue) : ParquetSche
case JsonType::class:
return ParquetSchema\MapValue::json();
case XMLType::class:
case XMLNodeType::class:
case XMLElementType::class:
return ParquetSchema\MapValue::string();
case ObjectType::class:
$class = $mapValueType->class;
Expand Down Expand Up @@ -226,7 +226,7 @@ private function flowTypeToParquetType(string $name, Type $type) : Column
case JsonType::class:
return FlatColumn::json($name);
case XMLType::class:
case XMLNodeType::class:
case XMLElementType::class:
return FlatColumn::string($name);
case ObjectType::class:
return $this->flowObjectToParquetFlat($type, $name);
Expand Down
21 changes: 14 additions & 7 deletions src/core/etl/src/Flow/ETL/DSL/functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,14 @@
use Flow\ETL\PHP\Type\Logical\List\ListElement;
use Flow\ETL\PHP\Type\Logical\Map\{MapKey, MapValue};
use Flow\ETL\PHP\Type\Logical\Structure\StructureElement;
use Flow\ETL\PHP\Type\Logical\{DateTimeType, JsonType, ListType, MapType, StructureType, UuidType, XMLNodeType, XMLType};
use Flow\ETL\PHP\Type\Logical\{DateTimeType,
JsonType,
ListType,
MapType,
StructureType,
UuidType,
XMLElementType,
XMLType};
use Flow\ETL\PHP\Type\Native\{ArrayType, CallableType, EnumType, NullType, ObjectType, ResourceType, ScalarType};
use Flow\ETL\PHP\Type\{Type, TypeDetector};
use Flow\ETL\Row\Factory\NativeEntryFactory;
Expand Down Expand Up @@ -263,9 +270,9 @@ function xml_entry(string $name, \DOMDocument|string|null $value) : Entry\XMLEnt
return new Entry\XMLEntry($name, $value);
}

function xml_node_entry(string $name, ?\DOMNode $value) : Entry\XMLNodeEntry
function xml_element_entry(string $name, \DOMElement|string|null $value) : Entry\XMLElementEntry
{
return new Entry\XMLNodeEntry($name, $value);
return new Entry\XMLElementEntry($name, $value);
}

function entries(Entry ...$entries) : Row\Entries
Expand Down Expand Up @@ -352,9 +359,9 @@ function type_xml(bool $nullable = false) : XMLType
return new XMLType($nullable);
}

function type_xml_node(bool $nullable = false) : XMLNodeType
function type_xml_element(bool $nullable = false) : XMLElementType
{
return new XMLNodeType($nullable);
return new XMLElementType($nullable);
}

function type_uuid(bool $nullable = false) : UuidType
Expand Down Expand Up @@ -1067,9 +1074,9 @@ function xml_schema(string $name, bool $nullable = false, ?Schema\Metadata $meta
return Definition::xml($name, $nullable, $metadata);
}

function xml_node_schema(string $name, bool $nullable = false, ?Schema\Metadata $metadata = null) : Definition
function xml_element_schema(string $name, bool $nullable = false, ?Schema\Metadata $metadata = null) : Definition
{
return Definition::xml_node($name, $nullable, $metadata);
return Definition::xml_element($name, $nullable, $metadata);
}

function struct_schema(string $name, StructureType $type, ?Schema\Metadata $metadata = null) : Definition
Expand Down
2 changes: 1 addition & 1 deletion src/core/etl/src/Flow/ETL/Formatter/ASCII/ASCIIValue.php
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ private function stringValue() : string
if ($val instanceof Entry) {
$this->stringValue = $val->toString();

if ($val instanceof Entry\XMLEntry || $val instanceof Entry\XMLNodeEntry) {
if ($val instanceof Entry\XMLEntry || $val instanceof Entry\XMLElementEntry) {
$this->stringValue = \str_replace("\n", '', $this->stringValue);
}

Expand Down
35 changes: 35 additions & 0 deletions src/core/etl/src/Flow/ETL/Function/DOMElementAttribute.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Function;

use Flow\ETL\Row;

final class DOMElementAttribute extends ScalarFunctionChain
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

another good scalar function would be DOMElementHasAttribute which could be user together with when() expression. like when(ref('node')->domElementHasAttribute("id"))->then(...), but it's out of the scope of that PR.

{
public function __construct(private readonly ScalarFunction $ref, private readonly string $attribute)
{
}

public function eval(Row $row) : ?string
{
$value = $this->ref->eval($row);

if (!$value instanceof \DOMElement) {
return null;
}

if (!$value->hasAttributes()) {
return null;
}

$attributes = $value->attributes;

if (!$namedItem = $attributes->getNamedItem($this->attribute)) {
return null;
}

return $namedItem->nodeValue;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

use Flow\ETL\Row;

final class DOMNodeValue extends ScalarFunctionChain
final class DOMElementValue extends ScalarFunctionChain
{
public function __construct(private readonly ScalarFunction $ref)
{
Expand All @@ -17,7 +17,7 @@ public function eval(Row $row) : mixed
/** @var mixed $value */
$value = $this->ref->eval($row);

if (!$value instanceof \DOMNode) {
if (!$value instanceof \DOMElement) {
return null;
}

Expand Down
46 changes: 0 additions & 46 deletions src/core/etl/src/Flow/ETL/Function/DOMNodeAttribute.php

This file was deleted.

8 changes: 4 additions & 4 deletions src/core/etl/src/Flow/ETL/Function/ScalarFunctionChain.php
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,14 @@ public function divide(ScalarFunction $ref) : self
return new Divide($this, $ref);
}

public function domNodeAttribute(string $attribute) : self
public function domElementAttribute(string $attribute) : self
{
return new DOMNodeAttribute($this, $attribute);
return new DOMElementAttribute($this, $attribute);
}

public function domNodeValue() : self
public function domElementValue() : self
{
return new DOMNodeValue($this);
return new DOMElementValue($this);
}

public function endsWith(ScalarFunction $needle) : self
Expand Down
7 changes: 1 addition & 6 deletions src/core/etl/src/Flow/ETL/Function/XPath.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,10 @@ public function __construct(private readonly ScalarFunction $ref, private readon
*/
public function eval(Row $row) : \DOMNode|array|null
{
/** @var mixed $value */
$value = $this->ref->eval($row);

if ($value instanceof \DOMNode && !$value instanceof \DOMDocument) {
$newDom = new \DOMDocument();
$newNode = $newDom->importNode($value, true);
$newDom->append($newNode);

$value = $newDom;
$value = (new \DOMDocument())->importNode($value, true);
}

if (!$value instanceof \DOMDocument) {
Expand Down
15 changes: 12 additions & 3 deletions src/core/etl/src/Flow/ETL/PHP/Type/Logical/List/ListElement.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,16 @@

namespace Flow\ETL\PHP\Type\Logical\List;

use function Flow\ETL\DSL\{type_boolean, type_datetime, type_float, type_int, type_json, type_object, type_string, type_uuid, type_xml, type_xml_node};
use function Flow\ETL\DSL\{type_boolean,
type_datetime,
type_float,
type_int,
type_json,
type_object,
type_string,
type_uuid,
type_xml,
type_xml_element};
use Flow\ETL\Exception\InvalidArgumentException;
use Flow\ETL\PHP\Type\Logical\{ListType, MapType, StructureType};
use Flow\ETL\PHP\Type\{Type, TypeFactory};
Expand Down Expand Up @@ -96,9 +105,9 @@ public static function xml(bool $nullable = false) : self
return new self(type_xml($nullable));
}

public static function xml_node(bool $nullable = false) : self
public static function xml_element(bool $nullable = false) : self
{
return new self(type_xml_node($nullable));
return new self(type_xml_element($nullable));
}

public function isEqual(mixed $value) : bool
Expand Down
13 changes: 10 additions & 3 deletions src/core/etl/src/Flow/ETL/PHP/Type/Logical/Map/MapValue.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,14 @@

namespace Flow\ETL\PHP\Type\Logical\Map;

use function Flow\ETL\DSL\{type_boolean, type_datetime, type_float, type_int, type_object, type_string, type_xml, type_xml_node};
use function Flow\ETL\DSL\{type_boolean,
type_datetime,
type_float,
type_int,
type_object,
type_string,
type_xml,
type_xml_element};
use Flow\ETL\Exception\InvalidArgumentException;
use Flow\ETL\PHP\Type\Logical\{ListType, MapType};
use Flow\ETL\PHP\Type\{Type, TypeFactory};
Expand Down Expand Up @@ -82,9 +89,9 @@ public static function xml() : self
return new self(type_xml());
}

public static function xmlNode() : self
public static function xmlElement() : self
{
return new self(type_xml_node());
return new self(type_xml_element());
}

public function isEqual(mixed $value) : bool
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
use Flow\ETL\PHP\Type\Native\NullType;
use Flow\ETL\PHP\Type\Type;

final class XMLNodeType implements LogicalType
final class XMLElementType implements LogicalType
{
public function __construct(private readonly bool $nullable)
{
Expand All @@ -30,7 +30,7 @@ public function isValid(mixed $value) : bool
return true;
}

if ($value instanceof \DOMNode) {
if ($value instanceof \DOMElement) {
return true;
}

Expand Down Expand Up @@ -58,7 +58,7 @@ public function merge(Type $type) : self
public function normalize() : array
{
return [
'type' => 'xml_node',
'type' => 'xml_element',
'nullable' => $this->nullable,
];
}
Expand All @@ -70,6 +70,6 @@ public function nullable() : bool

public function toString() : string
{
return ($this->nullable ? '?' : '') . 'xml_node';
return ($this->nullable ? '?' : '') . 'xml_element';
}
}
17 changes: 14 additions & 3 deletions src/core/etl/src/Flow/ETL/PHP/Type/TypeDetector.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,18 @@

namespace Flow\ETL\PHP\Type;

use function Flow\ETL\DSL\{type_array, type_boolean, type_datetime, type_float, type_int, type_json, type_null, type_object, type_string, type_uuid, type_xml, type_xml_node};
use function Flow\ETL\DSL\{type_array,
type_boolean,
type_datetime,
type_float,
type_int,
type_json,
type_null,
type_object,
type_string,
type_uuid,
type_xml,
type_xml_element};
use Flow\ETL\Exception\InvalidArgumentException;
use Flow\ETL\PHP\Type\Logical\List\ListElement;
use Flow\ETL\PHP\Type\Logical\Structure\StructureElement;
Expand Down Expand Up @@ -85,8 +96,8 @@ public function detectType(mixed $value) : Type
return type_xml();
}

if (type_xml_node()->isValid($value)) {
return type_xml_node();
if (type_xml_element()->isValid($value)) {
return type_xml_element();
}

return type_object($value::class);
Expand Down
Loading
Loading