forked from datahub-project/datahub
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(ingest): add YamlFileUpdater utility (datahub-project#8266)
- Loading branch information
Showing
3 changed files
with
138 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
35 changes: 35 additions & 0 deletions
35
metadata-ingestion/src/datahub/utilities/yaml_sync_utils.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import contextlib | ||
import pathlib | ||
from typing import Any, Iterator | ||
|
||
import ruamel.yaml.util | ||
from ruamel.yaml import YAML | ||
|
||
|
||
@contextlib.contextmanager | ||
def YamlFileUpdater(file: pathlib.Path) -> Iterator[Any]: | ||
yaml = YAML() | ||
yaml.preserve_quotes = True # type: ignore[assignment] | ||
|
||
doc = yaml.load(file) | ||
|
||
# All the user to make changes to the doc. | ||
# TODO: Enable replacing the doc entirely. | ||
yield doc | ||
|
||
# Guess existing indentation in the file so that we can preserve it. | ||
_, ind, bsi = ruamel.yaml.util.load_yaml_guess_indent(file.read_text()) | ||
yaml.width = 2**20 # type: ignore[assignment] | ||
|
||
yaml.sequence_indent = ind | ||
yaml.block_seq_indent = bsi | ||
|
||
if (ind, bsi) == (4, 2): | ||
# (2, 4, 2) is much more common than (4, 4, 2). | ||
yaml.map_indent = 2 # type: ignore[assignment] | ||
else: | ||
# TODO: Some folks use a different mapping indent than sequence indent. | ||
# We should support that, but for now, we just use the sequence indent. | ||
yaml.map_indent = ind | ||
|
||
yaml.dump(doc, file) |
91 changes: 91 additions & 0 deletions
91
metadata-ingestion/tests/unit/utilities/test_yaml_sync_utils.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
import pathlib | ||
|
||
from datahub.utilities.yaml_sync_utils import YamlFileUpdater | ||
|
||
|
||
def test_update_yaml_file(tmp_path: pathlib.Path) -> None: | ||
infile = tmp_path / "test.yml" | ||
|
||
# Note - this will drop the leading newline before the comment. | ||
infile.write_text( | ||
""" | ||
# this is a comment | ||
# | ||
obj: | ||
key1: value1 | ||
list_ty: | ||
- foo | ||
- key1: value1 | ||
key2: value2 | ||
""" | ||
) | ||
# ind=4, bsi=2 | ||
|
||
with YamlFileUpdater(infile) as doc: | ||
doc["foo"] = "bar" | ||
doc["list_ty"].append("baz") | ||
doc["list_ty"][1]["key1.5"] = "val1.5" | ||
|
||
assert ( | ||
infile.read_text() | ||
== """# this is a comment | ||
# | ||
obj: | ||
key1: value1 | ||
list_ty: | ||
- foo | ||
- key1: value1 | ||
key2: value2 | ||
key1.5: val1.5 | ||
- baz | ||
foo: bar | ||
""" | ||
) | ||
|
||
|
||
def test_indentation_inference(tmp_path: pathlib.Path) -> None: | ||
infile = tmp_path / "test.yml" | ||
|
||
infile.write_text( | ||
""" | ||
# this is a comment | ||
# | ||
obj: | ||
key1: value1 | ||
list_ty: | ||
- foo | ||
- key1: value1 | ||
key2: value2 | ||
""" | ||
) | ||
# ind=2, bsi=0 | ||
|
||
with YamlFileUpdater(infile) as doc: | ||
doc["foo"] = "bar" | ||
|
||
assert ( | ||
infile.read_text() | ||
== """# this is a comment | ||
# | ||
obj: | ||
key1: value1 | ||
list_ty: | ||
- foo | ||
- key1: value1 | ||
key2: value2 | ||
foo: bar | ||
""" | ||
) | ||
|
||
|
||
# TODO: This yaml indentation will fail, because the mapping indent is 2 but the sequence indent is 4. | ||
""" | ||
x: | ||
y: | ||
- b: 1 | ||
- 2 | ||
""" |