From 1d180b4bdf8123092e12d514476410002468ab85 Mon Sep 17 00:00:00 2001 From: Janine Olear Date: Fri, 21 Jul 2023 19:31:34 +0200 Subject: [PATCH 1/7] prepare transformer code for second api version Signed-off-by: Janine Olear --- .../connection/connection.py | 14 +++++++ .../transform/transform.py | 39 ++++--------------- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/src/cloudimagedirectory/connection/connection.py b/src/cloudimagedirectory/connection/connection.py index 94048820..2456a69e 100644 --- a/src/cloudimagedirectory/connection/connection.py +++ b/src/cloudimagedirectory/connection/connection.py @@ -31,6 +31,20 @@ def is_provided_by(self, name: str) -> bool: """Check the origin of the file.""" return f"{name}/" in self.filename + def is_API(self, api) -> bool: + """Check if the file is the actual API entry and not a sub url.""" + path = self.filename.split("/") + if path[1] != api: + return False + slash_count = self.filename.count("/") + if slash_count != 11: + return False + # NOTE: check length of hash value. + if len(path[len(path)-1]) != 40: + return False + + return True + class ConnectionFS: """Handles the connection to the filesystem.""" diff --git a/src/cloudimagedirectory/transform/transform.py b/src/cloudimagedirectory/transform/transform.py index 0011241a..6446eb97 100644 --- a/src/cloudimagedirectory/transform/transform.py +++ b/src/cloudimagedirectory/transform/transform.py @@ -84,9 +84,8 @@ class TransformerIdxListImageLatest(Transformer): # TODO: Mypy says that 'data' below is not iterable. This needs to be fixed later. @no_type_check def run(self, data: Transformer) -> list: # noqa: C901 - """Sort the raw data.""" - # NOTE: Verify that the data is not raw. - entries = [x for x in data if not x.is_raw() and not x.is_provided_by("idx")] + # NOTE: Verify that the data is from api v1. + entries = [x for x in data if x.is_API("v1")] # NOTE: Sort the list of data by date entries.sort( @@ -281,9 +280,8 @@ class TransformerIdxListImageNames(Transformer): # TODO: Mypy says that 'data' below is not iterable. This needs to be fixed later. @no_type_check def run(self, data: type[Transformer]) -> list: - """Sort the raw data.""" - # NOTE: Verify that the data is not raw. - entries = [x for x in data if not x.is_raw() and not x.is_provided_by("idx")] + # NOTE: Verify that the data is from api v1. + entries = [x for x in data if x.is_API("v1")] results = [] @@ -301,9 +299,8 @@ class TransformerV2All(Transformer): # TODO: Mypy says that 'data' below is not iterable. This needs to be fixed later. @no_type_check def run(self, data: type[Transformer]) -> list: - """Sort the raw data.""" - # NOTE: Verify that the data is not raw. - entries = [x for x in data if not x.is_raw() and not x.is_provided_by("idx")] + # NOTE: Verify that the data is from api v1. + entries = [x for x in data if x.is_API("v1")] results = [] @@ -351,7 +348,6 @@ def run(self, data: type[Transformer]) -> list: try: filename = entry.filename.split("/")[3] - print(entry.filename) os = filename.split("_")[0] if os not in os_list: @@ -361,27 +357,7 @@ def run(self, data: type[Transformer]) -> list: except IndexError: print(f"Could not format image, filename: {filename}") - rhel_products = { - "rh-ocp-worker", - "rh-oke-worker", - "rh-opp-worker", - "rh-rhel", - "rhel-arm64", - "rhel-byos", - "rhel-raw", - "rhel-sap-apps", - "rhel-sap-ha", - "rh", - } - - os_list_final: dict[Any, Any] = {} - for os, val in list(os_list.items()): - key = os - if os in rhel_products: - key = "rhel" - os_list_final[key] = os_list_final.get(key, 0) + val - - for os, val in os_list_final.items(): + for os, val in os_list.items(): desc = self.description.get(os, "no description") disp_name = self.display_name.get(os, "no display name") @@ -394,4 +370,5 @@ def run(self, data: type[Transformer]) -> list: results.append(entry_object) + # NOTE: Add /list suffix to prevent collision with "os" folder. return [connection.DataEntry("v2/os/list", results)] From 6dcdc4ee24f029e1911fa446fe955fa93732be1f Mon Sep 17 00:00:00 2001 From: Janine Olear Date: Sun, 23 Jul 2023 23:46:38 +0200 Subject: [PATCH 2/7] build basic structure for second api version --- .../connection/connection.py | 9 +- .../transform/transform.py | 141 ++++++++++++++++-- src/cloudimagedirectory/transformer.py | 26 +++- 3 files changed, 159 insertions(+), 17 deletions(-) diff --git a/src/cloudimagedirectory/connection/connection.py b/src/cloudimagedirectory/connection/connection.py index 2456a69e..c3e74820 100644 --- a/src/cloudimagedirectory/connection/connection.py +++ b/src/cloudimagedirectory/connection/connection.py @@ -34,11 +34,16 @@ def is_provided_by(self, name: str) -> bool: def is_API(self, api) -> bool: """Check if the file is the actual API entry and not a sub url.""" path = self.filename.split("/") - if path[1] != api: + if path[0] != api: return False + + if path[0] == "v1": + return True + slash_count = self.filename.count("/") - if slash_count != 11: + if slash_count != 10: return False + # NOTE: check length of hash value. if len(path[len(path)-1]) != 40: return False diff --git a/src/cloudimagedirectory/transform/transform.py b/src/cloudimagedirectory/transform/transform.py index 6446eb97..02664516 100644 --- a/src/cloudimagedirectory/transform/transform.py +++ b/src/cloudimagedirectory/transform/transform.py @@ -1,6 +1,8 @@ """Transforms the raw data into useful data.""" import copy import os +import hashlib + from datetime import datetime from typing import Any, Callable, no_type_check @@ -84,8 +86,8 @@ class TransformerIdxListImageLatest(Transformer): # TODO: Mypy says that 'data' below is not iterable. This needs to be fixed later. @no_type_check def run(self, data: Transformer) -> list: # noqa: C901 - # NOTE: Verify that the data is from api v1. - entries = [x for x in data if x.is_API("v1")] + # NOTE: Verify that the data is not raw. + entries = [x for x in data if not x.is_raw() and not x.is_provided_by("idx")] # NOTE: Sort the list of data by date entries.sort( @@ -280,8 +282,8 @@ class TransformerIdxListImageNames(Transformer): # TODO: Mypy says that 'data' below is not iterable. This needs to be fixed later. @no_type_check def run(self, data: type[Transformer]) -> list: - # NOTE: Verify that the data is from api v1. - entries = [x for x in data if x.is_API("v1")] + # NOTE: Verify that the data is not raw. + entries = [x for x in data if not x.is_raw() and not x.is_provided_by("idx")] results = [] @@ -293,14 +295,132 @@ def run(self, data: type[Transformer]) -> list: return [connection.DataEntry("v1/idx/list/image-names", results)] +class TransformerAWSV2RHEL(Transformer): + """Transform raw rhel AWS data into the schema.""" + + def run(self, data): + """Transform the raw data.""" + # NOTE: Verify that the data is raw. + entries = [x for x in data if x.is_provided_by("aws") and x.is_raw()] + + results = [] + for e in entries: + entry = copy.deepcopy(e) + + raw = self.src_conn.get_content(entry) + region = os.path.basename(raw.filename).split(".")[0] + + for content in raw.content: + if content["OwnerId"] != config.AWS_RHEL_OWNER_ID: + continue + + image_data = format_aws.image_rhel(content, region) + image_name = image_data["name"].replace(" ", "_").lower() + os_name = "rhel" + provider = "aws" + version = image_data["version"] + # NOTE: Due to consistency issues between the cloud providers and the fact + # that they do not all have unique numbers to identify their images, we decided + # to use this solution instead. + image_id = hashlib.sha1(image_name.encode()).hexdigest() + + # NOTE: example of expected paths + # v2/os/rhel/provider/aws/version/8.6.0/region/eu-west-3/image/71d0a7aaa1f0dc06840e46f6ce316a7acfb022d4 + # v2/os/rhel/provider/aws/version/8.2.0/region/eu-north-1/image/14e4eab326cc5a2ef13cb5c0f36bc9bfa41025d9 + path = f"v2/os/{os_name}/provider/{provider}/version/{version}/region/{region}/image/{image_id}" + data_entry = connection.DataEntry(path, image_data) + + results.append(data_entry) + return results + + +class TransformerAzureV2RHEL(Transformer): + """Transform raw rhel Azure data into the schema.""" + + def run(self, data): + """Transform the raw data.""" + # NOTE: Verify that the data is raw and provided by azure. + entries = [x for x in data if x.is_provided_by("azure") and x.is_raw()] + + results = [] + for e in entries: + entry = copy.deepcopy(e) + + raw = self.src_conn.get_content(entry) + region = os.path.basename(raw.filename).split(".")[0] + + for content in raw.content: + if content["publisher"] != "RedHat": + continue + + content["hyperVGeneration"] = "unknown" + + image_data = format_azure.image_rhel(content) + image_name = image_data["name"].replace(" ", "_").lower() + os_name = "rhel" + provider = "azure" + version = image_data["version"] + # NOTE: Due to consistency issues between the cloud providers and the fact + # that they do not all have unique numbers to identify their images, we decided + # to use this solution instead. + image_id = hashlib.sha1(image_name.encode()).hexdigest() + + # NOTE: example of expected paths + # /v2/rhel/azure/8.6.0/af-south-1/71d0a7aaa1f0dc06840e46f6ce316a7acfb022d4 + # /v2/rhel/azure/8.2.0/af-south-1/14e4eab326cc5a2ef13cb5c0f36bc9bfa41025d9 + path = f"/v2/os/{os_name}/provider/{provider}/version/{version}/region/{region}/image/{image_id}" + data_entry = connection.DataEntry(path, image_data) + + results.append(data_entry) + return results + + +class TransformerGoogleV2RHEL(Transformer): + """Transform raw rhel Google data into the schema.""" + + def run(self, data): + """Transform the raw data.""" + # NOTE: Verify that the data is raw and provided by google. + entries = [x for x in data if x.is_provided_by("google") and x.is_raw()] + + results = [] + for e in entries: + entry = copy.deepcopy(e) + + raw = self.src_conn.get_content(entry) + region = os.path.basename(raw.filename).split(".")[0] + + for content in raw.content: + content["creation_timestamp"] = content["creationTimestamp"] + if "rhel" in content["name"]: + image_data = format_google.image_rhel(content) + image_name = image_data["name"].replace(" ", "_").lower() + os_name = "rhel" + provider = "google" + version = image_data["version"] + # NOTE: Due to consistency issues between the cloud providers and the fact + # that they do not all have unique numbers to identify their images, we decided + # to use this solution instead. + image_id = hashlib.sha1(image_name.encode()).hexdigest() + + # NOTE: example of expected paths + # /v2/rhel/google/8.6.0/global/71d0a7aaa1f0dc06840e46f6ce316a7acfb022d4 + # /v2/rhel/google/8.2.0/global/14e4eab326cc5a2ef13cb5c0f36bc9bfa41025d9 + path = f"/v2/os/{os_name}/provider/{provider}/version/{version}/region/{region}/image/{image_id}" + data_entry = connection.DataEntry(path, image_data) + + results.append(data_entry) + return results + + class TransformerV2All(Transformer): """Genearate list of all image details.""" # TODO: Mypy says that 'data' below is not iterable. This needs to be fixed later. @no_type_check def run(self, data: type[Transformer]) -> list: - # NOTE: Verify that the data is from api v1. - entries = [x for x in data if x.is_API("v1")] + # NOTE: Verify that the data is from api v2. + entries = [x for x in data if x.is_API("v2")] results = [] @@ -311,8 +431,8 @@ def run(self, data: type[Transformer]) -> list: print("warn: could not determine region or provider of image: " + entry.filename) continue - entry.content["provider"] = filename[1] - entry.content["region"] = filename[2] + entry.content["provider"] = filename[4] + entry.content["region"] = filename[8] results.append(entry.content) results.sort(key=lambda x: x["name"], reverse=False) @@ -336,9 +456,8 @@ def display_name(self) -> dict: # TODO: Mypy says that 'data' below is not iterable. This needs to be fixed later. @no_type_check def run(self, data: type[Transformer]) -> list: - """Sort the raw data.""" - # NOTE: Verify that the data is not raw. - entries = [x for x in data if not x.is_raw() and not x.is_provided_by("idx")] + # NOTE: Verify that the data is from api v2. + entries = [x for x in data if x.is_API("v2")] results = [] os_list = {} diff --git a/src/cloudimagedirectory/transformer.py b/src/cloudimagedirectory/transformer.py index 13016727..3000956c 100644 --- a/src/cloudimagedirectory/transformer.py +++ b/src/cloudimagedirectory/transformer.py @@ -53,6 +53,7 @@ def run(origin_path: str, destination_path: str, arg_files: str, filter_until: s filters = [ filter.FilterImageByFilename("test"), filter.FilterImageByFilename("beta"), + filter.FilterImageByFilename("raw"), filter.FilterImageByUniqueName(), ] @@ -64,7 +65,7 @@ def run(origin_path: str, destination_path: str, arg_files: str, filter_until: s filter_after = pd.to_datetime(filter_until) filters.append(filter.FilterImageByLatestUpdate(filter_after)) - pipeline = transform.Pipeline( + pipeline_v1 = transform.Pipeline( origin_connection, [ transform.TransformerAWS, @@ -78,12 +79,29 @@ def run(origin_path: str, destination_path: str, arg_files: str, filter_until: s transform.TransformerIdxListImageLatestGoogle, transform.TransformerIdxListImageLatestAWS, transform.TransformerIdxListImageLatestAZURE, + ], + ) + print("run pipeline v1") + results = pipeline_v1.run(filenames) + + # NOTE: Introducing a second pipeline, to avoid filtering of v1/v2 data + # based on the image filename. + # We do not adapt the filter, since v1 will be removed soon. + pipeline_v2 = transform.Pipeline( + origin_connection, + [ + transform.TransformerAWSV2RHEL, + transform.TransformerAzureV2RHEL, + transform.TransformerGoogleV2RHEL, + ], + filters, + [ transform.TransformerV2All, - transform.TransformerV2ListOS, + #transform.TransformerV2ListOS, ], ) - print("run pipeline") - results = pipeline.run(filenames) + print("run pipeline v2") + results.extend(pipeline_v2.run(filenames)) for result in results: result.filename = destination_path + "/" + result.filename From ed01e82630bb50725c98f64fa43952092a16a029 Mon Sep 17 00:00:00 2001 From: Janine Olear Date: Mon, 24 Jul 2023 21:25:13 +0200 Subject: [PATCH 3/7] fix tests Signed-off-by: Janine Olear --- .../transform/transform.py | 24 +++++++++---------- src/cloudimagedirectory/transformer.py | 2 +- tests/transformer/test_list_os.py | 18 +++++++------- .../transformer/testdata/expected/v2/os/list | 2 +- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/cloudimagedirectory/transform/transform.py b/src/cloudimagedirectory/transform/transform.py index 02664516..994b1bbd 100644 --- a/src/cloudimagedirectory/transform/transform.py +++ b/src/cloudimagedirectory/transform/transform.py @@ -345,9 +345,7 @@ def run(self, data): results = [] for e in entries: entry = copy.deepcopy(e) - raw = self.src_conn.get_content(entry) - region = os.path.basename(raw.filename).split(".")[0] for content in raw.content: if content["publisher"] != "RedHat": @@ -359,6 +357,7 @@ def run(self, data): image_name = image_data["name"].replace(" ", "_").lower() os_name = "rhel" provider = "azure" + region = "global" version = image_data["version"] # NOTE: Due to consistency issues between the cloud providers and the fact # that they do not all have unique numbers to identify their images, we decided @@ -366,9 +365,9 @@ def run(self, data): image_id = hashlib.sha1(image_name.encode()).hexdigest() # NOTE: example of expected paths - # /v2/rhel/azure/8.6.0/af-south-1/71d0a7aaa1f0dc06840e46f6ce316a7acfb022d4 - # /v2/rhel/azure/8.2.0/af-south-1/14e4eab326cc5a2ef13cb5c0f36bc9bfa41025d9 - path = f"/v2/os/{os_name}/provider/{provider}/version/{version}/region/{region}/image/{image_id}" + # v2/os/rhel/provider/azure/version/8.6.0/region/southcentralus/image/71d0a7aaa1f0dc06840e46f6ce316a7acfb022d4 + # v2/os/rhel/provider/azure/version/8.2.0/region/southcentralus/image/14e4eab326cc5a2ef13cb5c0f36bc9bfa41025d9 + path = f"v2/os/{os_name}/provider/{provider}/version/{version}/region/{region}/image/{image_id}" data_entry = connection.DataEntry(path, image_data) results.append(data_entry) @@ -386,15 +385,15 @@ def run(self, data): results = [] for e in entries: entry = copy.deepcopy(e) - raw = self.src_conn.get_content(entry) - region = os.path.basename(raw.filename).split(".")[0] for content in raw.content: content["creation_timestamp"] = content["creationTimestamp"] if "rhel" in content["name"]: + image_data = format_google.image_rhel(content) image_name = image_data["name"].replace(" ", "_").lower() + region = "global" os_name = "rhel" provider = "google" version = image_data["version"] @@ -404,9 +403,9 @@ def run(self, data): image_id = hashlib.sha1(image_name.encode()).hexdigest() # NOTE: example of expected paths - # /v2/rhel/google/8.6.0/global/71d0a7aaa1f0dc06840e46f6ce316a7acfb022d4 - # /v2/rhel/google/8.2.0/global/14e4eab326cc5a2ef13cb5c0f36bc9bfa41025d9 - path = f"/v2/os/{os_name}/provider/{provider}/version/{version}/region/{region}/image/{image_id}" + # v2/os/rhel/provider/google/version/8.6.0/region/global/image/71d0a7aaa1f0dc06840e46f6ce316a7acfb022d4 + # v2/os/rhel/provider/google/version/8.2.0/region/global/image/14e4eab326cc5a2ef13cb5c0f36bc9bfa41025d9 + path = f"v2/os/{os_name}/provider/{provider}/version/{version}/region/{region}/image/{image_id}" data_entry = connection.DataEntry(path, image_data) results.append(data_entry) @@ -426,6 +425,7 @@ def run(self, data: type[Transformer]) -> list: for e in entries: entry = copy.deepcopy(e) + filename = entry.filename.split("/") if len(filename) < 3: print("warn: could not determine region or provider of image: " + entry.filename) @@ -464,10 +464,10 @@ def run(self, data: type[Transformer]) -> list: for e in entries: entry = copy.deepcopy(e) + filename = entry.filename.split("/")[10] try: - filename = entry.filename.split("/")[3] - os = filename.split("_")[0] + os = entry.filename.split("/")[2] if os not in os_list: os_list[os] = 1 diff --git a/src/cloudimagedirectory/transformer.py b/src/cloudimagedirectory/transformer.py index 3000956c..7465bc63 100644 --- a/src/cloudimagedirectory/transformer.py +++ b/src/cloudimagedirectory/transformer.py @@ -97,7 +97,7 @@ def run(origin_path: str, destination_path: str, arg_files: str, filter_until: s filters, [ transform.TransformerV2All, - #transform.TransformerV2ListOS, + transform.TransformerV2ListOS, ], ) print("run pipeline v2") diff --git a/tests/transformer/test_list_os.py b/tests/transformer/test_list_os.py index 2857fa09..b48e465a 100644 --- a/tests/transformer/test_list_os.py +++ b/tests/transformer/test_list_os.py @@ -40,34 +40,34 @@ def test_transformerV2ListOS(tmpdir): runner.chunk_size = chunk_size data = [ transformer.connection.DataEntry( - "v1/azure/global/rh-ocp-worker_rh-ocp-worker_x64", + "v2/os/rhel/provider/azure/version/8/region/global/image/dba7673010f19a94af4345453005933fd511bea9", { "date": "2019-01-01", "name": "test1", "arch": "arch1", - "region": "region-1", + "region": "global", }, ), transformer.connection.DataEntry( - "v1/google/global/rhel_9.0_sap_x86_64", + "v2/os/rhel/provider/google/version/9/region/global/image/9054fbe0b622c638224d50d20824d2ff6782e308", { - "date": "2020-01-01", + "date": "2023-03-06T12:57:17.827-08:00", "name": "test2", - "arch": "arch2", - "region": "region-1", + "arch": "ARM64", + "region": "global", }, ), transformer.connection.DataEntry( - "v1/aws/ap-northeast-2/rhel_8.5_hvm_arm64_hourly2", + "v2/os/rhel/provider/aws/version/8/region/ap-south-2/image/9054fbe0b622c638224d50d20824d2ff6782e308", { "date": "2020-01-01", "name": "test2", "arch": "arch2", - "region": "region-1", + "region": "ap-south-2", }, ), transformer.connection.DataEntry( - "v1/aws/some-region-1/unkown_distro", + "v2/os/unkown/provider/aws/version/7/region/some-region-1/image/9054fbe0b622c638224d50d20824d2ff6782e308", { "date": "2020-01-01", "name": "test2", diff --git a/tests/transformer/testdata/expected/v2/os/list b/tests/transformer/testdata/expected/v2/os/list index 2d54aa13..ca92a5df 100644 --- a/tests/transformer/testdata/expected/v2/os/list +++ b/tests/transformer/testdata/expected/v2/os/list @@ -1 +1 @@ -[{"name": "rhel", "display_name": "Red Hat Enterprise Linux", "description": "Red Hat Enterprise Linux", "count": 2}, {"name": "osa", "display_name": "no display name", "description": "no description", "count": 1}] +[{"name": "rhel", "display_name": "Red Hat Enterprise Linux", "description": "Red Hat Enterprise Linux", "count": 3}] From 5e3f76db02ad0cd580b0397009530777a22b324f Mon Sep 17 00:00:00 2001 From: Janine Olear Date: Mon, 24 Jul 2023 23:35:57 +0200 Subject: [PATCH 4/7] add test for TransformerAWSV2RHEL Signed-off-by: Janine Olear --- .../af-south-1/image/4031b089d970c84bf7fad57831ba552e36517a3f | 1 + 1 file changed, 1 insertion(+) create mode 100644 tests/transformer/testdata/expected/v2/os/rhel/provider/aws/version/6.10/region/af-south-1/image/4031b089d970c84bf7fad57831ba552e36517a3f diff --git a/tests/transformer/testdata/expected/v2/os/rhel/provider/aws/version/6.10/region/af-south-1/image/4031b089d970c84bf7fad57831ba552e36517a3f b/tests/transformer/testdata/expected/v2/os/rhel/provider/aws/version/6.10/region/af-south-1/image/4031b089d970c84bf7fad57831ba552e36517a3f new file mode 100644 index 00000000..cdc2c7b0 --- /dev/null +++ b/tests/transformer/testdata/expected/v2/os/rhel/provider/aws/version/6.10/region/af-south-1/image/4031b089d970c84bf7fad57831ba552e36517a3f @@ -0,0 +1 @@ +{"name": "RHEL 6.10 hvm x86_64 Hourly2", "arch": "x86_64", "version": "6.10", "imageId": "ami-0c22ca1423e1721e7", "date": "2021-03-18T15:22:40.000Z", "virt": "hvm", "selflink": "https://console.aws.amazon.com/ec2/home?region=af-south-1#launchAmi=ami-0c22ca1423e1721e7", "region": "af-south-1"} From a52dd8515c0b3d958aaea28baa7be5d2325171a3 Mon Sep 17 00:00:00 2001 From: Janine Olear Date: Mon, 24 Jul 2023 23:55:38 +0200 Subject: [PATCH 5/7] add test for TransformerAzureV2RHEL Signed-off-by: Janine Olear --- tests/transformer/test_aws_rhel.py | 33 +++++++++++++++++++ tests/transformer/test_azure_rhel.py | 33 +++++++++++++++++++ .../93212c01392a1e372edd399bde5838066089b22c | 1 + 3 files changed, 67 insertions(+) create mode 100644 tests/transformer/test_aws_rhel.py create mode 100644 tests/transformer/test_azure_rhel.py create mode 100644 tests/transformer/testdata/expected/v2/os/rhel/provider/azure/version/311.161/region/global/image/93212c01392a1e372edd399bde5838066089b22c diff --git a/tests/transformer/test_aws_rhel.py b/tests/transformer/test_aws_rhel.py new file mode 100644 index 00000000..b5c3f406 --- /dev/null +++ b/tests/transformer/test_aws_rhel.py @@ -0,0 +1,33 @@ +"""Tests for the v2 AWS RHEL transformer.""" +import filecmp +import os + +from cloudimagedirectory import transformer + + +def test_aws_v2_rhel_transformer_command(runner, tmp_path): + """Verify that we can transform AWS data for RHEL.""" + result = runner.invoke( + transformer.run, + [ + "-f", + "tests/transformer/testdata/input/raw/aws/af-south-1.json", + "-op=.", + f"-dp={tmp_path}", + "--filter.until=none", + ], + ) + + assert result.exit_code == 0, f"expected no error, but got code {result.exit_code} and output:\n{result.output}" + + # Ensure the directory was made. + assert os.path.isdir(f"{tmp_path}/v2/os/rhel/provider/aws/version/6.10/region/af-south-1/image") + + # Get current directory + pwd = os.getcwd() + + # Check image data by comparing the expected file and the output file byte by byte. + assert filecmp.cmp( + f"{pwd}/tests/transformer/testdata/expected/v2/os/rhel/provider/aws/version/6.10/region/af-south-1/image/4031b089d970c84bf7fad57831ba552e36517a3f", + f"{tmp_path}/v2/os/rhel/provider/aws/version/6.10/region/af-south-1/image/4031b089d970c84bf7fad57831ba552e36517a3f", + ) diff --git a/tests/transformer/test_azure_rhel.py b/tests/transformer/test_azure_rhel.py new file mode 100644 index 00000000..5dca149a --- /dev/null +++ b/tests/transformer/test_azure_rhel.py @@ -0,0 +1,33 @@ +"""Tests for the v2 Azure RHEL transformer.""" +import filecmp +import os + +from cloudimagedirectory import transformer + + +def test_aws_v2_rhel_transformer_command(runner, tmp_path): + """Verify that we can transform Azure data for RHEL.""" + result = runner.invoke( + transformer.run, + [ + "-f", + "tests/transformer/testdata/input/raw/azure/eastus.json", + "-op=.", + f"-dp={tmp_path}", + "--filter.until=none", + ], + ) + + assert result.exit_code == 0, f"expected no error, but got code {result.exit_code} and output:\n{result.output}" + + # Ensure the directory was made. + assert os.path.isdir(f"{tmp_path}/v2/os/rhel/provider/azure/version/311.161/region/global/image") + + # Get current directory + pwd = os.getcwd() + + # Check image data by comparing the expected file and the output file byte by byte. + assert filecmp.cmp( + f"{pwd}/tests/transformer/testdata/expected/v2/os/rhel/provider/azure/version/311.161/region/global/image/93212c01392a1e372edd399bde5838066089b22c", + f"{tmp_path}/v2/os/rhel/provider/azure/version/311.161/region/global/image/93212c01392a1e372edd399bde5838066089b22c", + ) diff --git a/tests/transformer/testdata/expected/v2/os/rhel/provider/azure/version/311.161/region/global/image/93212c01392a1e372edd399bde5838066089b22c b/tests/transformer/testdata/expected/v2/os/rhel/provider/azure/version/311.161/region/global/image/93212c01392a1e372edd399bde5838066089b22c new file mode 100644 index 00000000..27ab9921 --- /dev/null +++ b/tests/transformer/testdata/expected/v2/os/rhel/provider/azure/version/311.161/region/global/image/93212c01392a1e372edd399bde5838066089b22c @@ -0,0 +1 @@ +{"name": "osa osa_311 x64", "arch": "x64", "version": "311.161", "imageId": "RedHat:osa:osa_311:311.161.20200115", "date": "2020-01-15", "virt": "unknown"} From 542d4b1182408fb3c3735728e6b2a1c90006da9c Mon Sep 17 00:00:00 2001 From: Janine Olear Date: Tue, 25 Jul 2023 10:00:50 +0200 Subject: [PATCH 6/7] add test for TransformerGoogleV2RHEL Signed-off-by: Janine Olear --- tests/transformer/test_google_rhel.py | 33 +++++++++++++++++++ .../a2f9b1c21e096445099c419aa0c0c9bc32657059 | 1 + 2 files changed, 34 insertions(+) create mode 100644 tests/transformer/test_google_rhel.py create mode 100644 tests/transformer/testdata/expected/v2/os/rhel/provider/google/version/7/region/global/image/a2f9b1c21e096445099c419aa0c0c9bc32657059 diff --git a/tests/transformer/test_google_rhel.py b/tests/transformer/test_google_rhel.py new file mode 100644 index 00000000..fdf4b4f5 --- /dev/null +++ b/tests/transformer/test_google_rhel.py @@ -0,0 +1,33 @@ +"""Tests for the v2 Google RHEL transformer.""" +import filecmp +import os + +from cloudimagedirectory import transformer + + +def test_aws_v2_rhel_transformer_command(runner, tmp_path): + """Verify that we can transform Google data for RHEL.""" + result = runner.invoke( + transformer.run, + [ + "-f", + "tests/transformer/testdata/input/raw/google/all.json", + "-op=.", + f"-dp={tmp_path}", + "--filter.until=none", + ], + ) + + assert result.exit_code == 0, f"expected no error, but got code {result.exit_code} and output:\n{result.output}" + + # Ensure the directory was made. + assert os.path.isdir(f"{tmp_path}/v2/os/rhel/provider/google/version/7/region/global/image") + + # Get current directory + pwd = os.getcwd() + + # Check image data by comparing the expected file and the output file byte by byte. + assert filecmp.cmp( + f"{pwd}/tests/transformer/testdata/expected/v2/os/rhel/provider/google/version/7/region/global/image/a2f9b1c21e096445099c419aa0c0c9bc32657059", + f"{tmp_path}/v2/os/rhel/provider/google/version/7/region/global/image/a2f9b1c21e096445099c419aa0c0c9bc32657059", + ) diff --git a/tests/transformer/testdata/expected/v2/os/rhel/provider/google/version/7/region/global/image/a2f9b1c21e096445099c419aa0c0c9bc32657059 b/tests/transformer/testdata/expected/v2/os/rhel/provider/google/version/7/region/global/image/a2f9b1c21e096445099c419aa0c0c9bc32657059 new file mode 100644 index 00000000..094e4897 --- /dev/null +++ b/tests/transformer/testdata/expected/v2/os/rhel/provider/google/version/7/region/global/image/a2f9b1c21e096445099c419aa0c0c9bc32657059 @@ -0,0 +1 @@ +{"name": "RHEL 7 X86_64", "arch": "X86_64", "version": "7", "imageId": "https://www.googleapis.com/compute/v1/projects/rhel-cloud/global/images/rhel-7-v20230306", "date": "2023-03-06T12:57:17.210-08:00", "selflink": "https://console.cloud.google.com/compute/imagesDetail/projects/rhel-cloud/global/images/rhel-7-v20230306"} From 6088c9fb2e94dc0ccfa38ccc5929b49b5393f775 Mon Sep 17 00:00:00 2001 From: Janine Olear Date: Tue, 25 Jul 2023 10:25:42 +0200 Subject: [PATCH 7/7] fix linting errors Signed-off-by: Janine Olear --- .../connection/connection.py | 4 ++-- src/cloudimagedirectory/transform/transform.py | 18 ++++++++---------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/cloudimagedirectory/connection/connection.py b/src/cloudimagedirectory/connection/connection.py index c3e74820..8c3ee3f6 100644 --- a/src/cloudimagedirectory/connection/connection.py +++ b/src/cloudimagedirectory/connection/connection.py @@ -31,7 +31,7 @@ def is_provided_by(self, name: str) -> bool: """Check the origin of the file.""" return f"{name}/" in self.filename - def is_API(self, api) -> bool: + def is_API(self, api: str) -> bool: """Check if the file is the actual API entry and not a sub url.""" path = self.filename.split("/") if path[0] != api: @@ -45,7 +45,7 @@ def is_API(self, api) -> bool: return False # NOTE: check length of hash value. - if len(path[len(path)-1]) != 40: + if len(path[len(path) - 1]) != 40: return False return True diff --git a/src/cloudimagedirectory/transform/transform.py b/src/cloudimagedirectory/transform/transform.py index 994b1bbd..ca242906 100644 --- a/src/cloudimagedirectory/transform/transform.py +++ b/src/cloudimagedirectory/transform/transform.py @@ -1,8 +1,7 @@ """Transforms the raw data into useful data.""" import copy -import os import hashlib - +import os from datetime import datetime from typing import Any, Callable, no_type_check @@ -86,7 +85,7 @@ class TransformerIdxListImageLatest(Transformer): # TODO: Mypy says that 'data' below is not iterable. This needs to be fixed later. @no_type_check def run(self, data: Transformer) -> list: # noqa: C901 - # NOTE: Verify that the data is not raw. + # NOTE: Verify that the data is not raw. entries = [x for x in data if not x.is_raw() and not x.is_provided_by("idx")] # NOTE: Sort the list of data by date @@ -298,7 +297,7 @@ def run(self, data: type[Transformer]) -> list: class TransformerAWSV2RHEL(Transformer): """Transform raw rhel AWS data into the schema.""" - def run(self, data): + def run(self, data: list) -> list: """Transform the raw data.""" # NOTE: Verify that the data is raw. entries = [x for x in data if x.is_provided_by("aws") and x.is_raw()] @@ -322,7 +321,7 @@ def run(self, data): # NOTE: Due to consistency issues between the cloud providers and the fact # that they do not all have unique numbers to identify their images, we decided # to use this solution instead. - image_id = hashlib.sha1(image_name.encode()).hexdigest() + image_id = hashlib.sha1(image_name.encode()).hexdigest() # noqa: S324 # NOTE: example of expected paths # v2/os/rhel/provider/aws/version/8.6.0/region/eu-west-3/image/71d0a7aaa1f0dc06840e46f6ce316a7acfb022d4 @@ -337,7 +336,7 @@ def run(self, data): class TransformerAzureV2RHEL(Transformer): """Transform raw rhel Azure data into the schema.""" - def run(self, data): + def run(self, data: list) -> list: """Transform the raw data.""" # NOTE: Verify that the data is raw and provided by azure. entries = [x for x in data if x.is_provided_by("azure") and x.is_raw()] @@ -362,7 +361,7 @@ def run(self, data): # NOTE: Due to consistency issues between the cloud providers and the fact # that they do not all have unique numbers to identify their images, we decided # to use this solution instead. - image_id = hashlib.sha1(image_name.encode()).hexdigest() + image_id = hashlib.sha1(image_name.encode()).hexdigest() # noqa: S324 # NOTE: example of expected paths # v2/os/rhel/provider/azure/version/8.6.0/region/southcentralus/image/71d0a7aaa1f0dc06840e46f6ce316a7acfb022d4 @@ -377,7 +376,7 @@ def run(self, data): class TransformerGoogleV2RHEL(Transformer): """Transform raw rhel Google data into the schema.""" - def run(self, data): + def run(self, data: list) -> list: """Transform the raw data.""" # NOTE: Verify that the data is raw and provided by google. entries = [x for x in data if x.is_provided_by("google") and x.is_raw()] @@ -390,7 +389,6 @@ def run(self, data): for content in raw.content: content["creation_timestamp"] = content["creationTimestamp"] if "rhel" in content["name"]: - image_data = format_google.image_rhel(content) image_name = image_data["name"].replace(" ", "_").lower() region = "global" @@ -400,7 +398,7 @@ def run(self, data): # NOTE: Due to consistency issues between the cloud providers and the fact # that they do not all have unique numbers to identify their images, we decided # to use this solution instead. - image_id = hashlib.sha1(image_name.encode()).hexdigest() + image_id = hashlib.sha1(image_name.encode()).hexdigest() # noqa: S324 # NOTE: example of expected paths # v2/os/rhel/provider/google/version/8.6.0/region/global/image/71d0a7aaa1f0dc06840e46f6ce316a7acfb022d4