From 6a8d6c64b373ebe9ffd2016a1b52e31a2f55b50b Mon Sep 17 00:00:00 2001 From: Marek Kulik Date: Mon, 6 Jun 2022 00:19:45 +0200 Subject: [PATCH] Changing srpm mapping method Signed-off-by: Martin Curlej --- .gitignore | 5 +- MANIFEST.in | 6 ++ module-build.spec | 2 +- module_build/builders/mock_builder.py | 81 ++++++++++++++++++--------- setup.py | 2 +- tests/builders/test_mock_builder.py | 29 ++++++++++ 6 files changed, 93 insertions(+), 32 deletions(-) create mode 100644 MANIFEST.in diff --git a/.gitignore b/.gitignore index 74082c9..2044a9b 100644 --- a/.gitignore +++ b/.gitignore @@ -139,9 +139,10 @@ cython_debug/ .ionide # Support for Project snippet scope -.vscode/*.code-snippets +.vscode +.code-snippets # Ignore code-workspaces *.code-workspace -# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode \ No newline at end of file +# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..72595c9 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,6 @@ +recursive-include tests *.py +recursive-include tests/data * +include README +include LICENSE +include requirements.txt +include test-requirements.txt diff --git a/module-build.spec b/module-build.spec index 4a47333..a1a6965 100644 --- a/module-build.spec +++ b/module-build.spec @@ -1,5 +1,5 @@ Name: module-build -Version: 0.1.0 +Version: 0.2.0 Release: 1%{?dist} Summary: Tool/library for building module streams locally License: MIT diff --git a/module_build/builders/mock_builder.py b/module_build/builders/mock_builder.py index 4f8728c..c300171 100644 --- a/module_build/builders/mock_builder.py +++ b/module_build/builders/mock_builder.py @@ -2,16 +2,14 @@ import os import shutil import subprocess -import tempfile from collections import OrderedDict from multiprocessing import Manager, Pool from pathlib import Path from sys import stdout from time import sleep -import libarchive import mockbuild.config -from module_build.constants import SPEC_EXTENSION, SRPM_EXTENSION +from module_build.constants import SRPM_EXTENSION from module_build.log import logger from module_build.metadata import (generate_and_populate_output_mmd, generate_module_stream_version, mmd_to_str) @@ -218,9 +216,8 @@ def _create_workers_pool(self, processess): def _map_srpm_files(self, srpm_dir): """ Function responsible for mapping srpm names to modules names. - It extracts .spec file from the rpm and looks for 'Name:' - line with an actual name. All results are stored in mock_info - variable inside class object. + It reads name directly from SRPM header. All results are stored + in mock_info variable inside class object. Args: srpm_dir (str, Path): Path to directory with SRPM files @@ -232,33 +229,61 @@ def _map_srpm_files(self, srpm_dir): for file in srpm_dir.glob(f"*.{SRPM_EXTENSION}"): logger.info(f"SRPM: Mapping component for '{file.name}' file") - with libarchive.file_reader(str(file.resolve())) as archive: - for entry in archive: - # check for spec file - if not all((entry.isfile, entry.pathname.endswith(SPEC_EXTENSION))): - continue + with open(str(file.resolve()), "rb") as f: + # (s)RPM is 4 bytes aligned (at least) so we start reading here.. + # We gonna keep reading until we find magic 'number' for SRPM Header + while (byte := f.read(4)): + if byte == b"\x8e\xad\xe8\x01": + break + # EOF, we found nothing. It's not critical so let's skip this file. + else: + logger.warning(f"SRPM: Mapping name for: '{file.name}' failed because of unknown format?") + continue - logger.info(f"SRPM: Located .spec file: '{entry.pathname}'") + # Now let's read package name from RPMTAG_NAME. + # We cannot go by offset because size is not static.. + # but the name tag is always first so it makes things easier. + # There is additional counter of 2500 bytes because some major + # changes might accour in scheme..(unrealistically) so to avoid + # reading incorrect bytes we add boundary. + byte_c = 0 + # We cannot combine multiple conditions with Walrus Operator + while (byte := f.read(4)): + if byte_c == 625: + byte_c = -1 + continue - # read content of spec file and look for "Name:" - with tempfile.NamedTemporaryFile() as tmp: - for block in entry.get_blocks(): - tmp.write(block) + byte_c += 1 + # This is special sequence that should be unique before first tag + if byte[:2] == b"\x43\x00": + break + else: + logger.warning(f"SRPM: Mapping name for: '{file.name}' failed during searching for name, EOF.") + continue - # Reset fd - tmp.flush() - tmp.seek(0) + if byte_c == -1: + logger.warning(f"SRPM: Mapping name for: '{file.name}' failed during searching for name.") + continue - for line in tmp: - # we are still in bytes - line_str = line.decode("utf-8") + # We found the name, let's extract it. Last 2 bytes read + # previously contain beginning of our name. + # There are no packages with len() < than 2. + name = bytearray(byte[-2:]) + + # We don't know length of the name so we read it byte by byte. + while (byte := f.read(1)): + # That's end of the name. + if byte == b"\x00": + break + name += bytearray(byte) + else: + logger.warning(f"SRPM: Mapping name for: '{file.name}' failed during name reading.") + continue - if line_str.startswith("Name:"): - component_name = line_str.split(":", 1)[1].strip() - logger.info(f"SRPM: Found SRPM: '{file.name}' for component: '{component_name}'") - self.mock_info.add_srpm(component_name, srpm_dir / file.name) - break - break + # Convert to string + name = name.decode() + self.mock_info.add_srpm(name, srpm_dir / file.name) + logger.info(f"SRPM: Found SRPM: '{file.name}' for component: '{name}'") def _precheck_rpm_mapping(self, context_to_build): """Checks if all components have a proper SRPM file. diff --git a/setup.py b/setup.py index 4b00e1e..f440e8d 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ def read_requirements(filename): setup( name="module-build", description="A library and CLI tool for building module streams.", - version="0.1.0", + version="0.2.0", classifiers=[ "Programming Language :: Python", "Topic :: Software Development :: Build Tools" diff --git a/tests/builders/test_mock_builder.py b/tests/builders/test_mock_builder.py index b0c78b9..a1ac463 100644 --- a/tests/builders/test_mock_builder.py +++ b/tests/builders/test_mock_builder.py @@ -5,6 +5,7 @@ import pytest from module_build.builders.mock_builder import MockBuilder, MockBuildPool from module_build.metadata import load_modulemd_file_from_path +from module_build.mock.info import MockBuildInfoSRPM from module_build.stream import ModuleStream from tests import (assert_modular_dependencies, fake_buildroot_run, fake_call_createrepo_c_on_dir, fake_get_artifacts, @@ -466,6 +467,34 @@ def test_srpm_build_with_missing_sources(self, mock_config, srpms, tmpdir, worke assert "Missing SRPM for" in err_msg assert "flatpak" in err_msg + @pytest.mark.parametrize("create_fake_srpm", [({"name": "nginx"}, {"name": "rustc"}, {"name": "php"}), ], indirect=True) + def test_srpm_mapping_with_invalid_files(self, tmpdir, create_fake_srpm, workers): + """ + Test SRPM mapping with invalid SRPM files. + """ + + # Add garbage files to SRPM directory + garbage_file_names = ("struck.src.rpm", "nginx-1.33.src.rpm", "not_a_file", "magic.rpm") + for idx, f in enumerate(garbage_file_names, start=1): + with open(Path(create_fake_srpm) / f, "wb") as fd: + fd.write(os.urandom(1024 * 10 * idx)) + + # Validate created Fake SRPMs + fake_srpm_files = Path(create_fake_srpm).glob('*.rpm') + fake_srpm_files = [x for x in fake_srpm_files if x.is_file()] + assert 6 == len(fake_srpm_files) + + cwd = tmpdir.mkdir("workdir").strpath + srpm_dir = str(Path(create_fake_srpm).resolve()) + rootdir = None + mock_cfg_path = get_full_data_path("mock_cfg/fedora-35-x86_64.cfg") + external_repos = [] + + builder = MockBuilder(mock_cfg_path, cwd, external_repos, rootdir, srpm_dir, workers) + + assert 3 == builder.mock_info.get_srpm_count() + assert isinstance(builder.mock_info._if_srpm_present("nginx")[0], MockBuildInfoSRPM) + class TestMockBuilderAsync: @patch("module_build.builders.mock_builder.MockBuildPool.add_job")