From 818dacfe89ab9124c7fa78170e26b1013481cf5f Mon Sep 17 00:00:00 2001 From: Michal Mielewczyk Date: Wed, 2 Oct 2024 10:40:53 +0200 Subject: [PATCH 1/3] pyocf: Disarm error volumes in teardown This prevents obfuscating the actual test errors Signed-off-by: Michal Mielewczyk --- tests/functional/pyocf/types/ctx.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/functional/pyocf/types/ctx.py b/tests/functional/pyocf/types/ctx.py index e6fa60ac..46ddabbc 100644 --- a/tests/functional/pyocf/types/ctx.py +++ b/tests/functional/pyocf/types/ctx.py @@ -1,5 +1,6 @@ # # Copyright(c) 2019-2022 Intel Corporation +# Copyright(c) 2024 Huawei Technologies # SPDX-License-Identifier: BSD-3-Clause # @@ -113,6 +114,10 @@ def cleanup_volume_types(self): def stop_caches(self): for cache in self.caches[:]: + try: + cache.get_volume().disarm() + except AttributeError: + pass cache.stop() def exit(self): From 6ec4f6cceb2e848663911650637c5b161e64ff70 Mon Sep 17 00:00:00 2001 From: Michal Mielewczyk Date: Wed, 2 Oct 2024 14:43:20 +0200 Subject: [PATCH 2/3] pyocf: rio: reads and writes accounting Signed-off-by: Michal Mielewczyk --- tests/functional/pyocf/rio.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/functional/pyocf/rio.py b/tests/functional/pyocf/rio.py index ec9e3fd2..c811eea5 100644 --- a/tests/functional/pyocf/rio.py +++ b/tests/functional/pyocf/rio.py @@ -87,6 +87,8 @@ def __init__(self, jobspec: JobSpec, queue): self.ios = Size(0) self.io_target = 0 self.finish_time = None + self.submitted_reads = 0 + self.submitted_writes = 0 self.qd_condition = Condition() self.qd = 0 @@ -159,6 +161,12 @@ def run(self): io.callback = self.get_io_cb() self.ios += self.jobspec.bs io.submit() + + if iodir is IoDir.WRITE: + self.submitted_writes += 1 + if iodir is IoDir.READ: + self.submitted_reads += 1 + with self.qd_condition: self.qd += 1 @@ -172,6 +180,8 @@ def __init__(self): self._threads = [] self.errors = {} self.error_count = 0 + self.submitted_reads = 0 + self.submitted_writes = 0 def copy(self): r = copy.copy(self) @@ -254,6 +264,8 @@ def wait_for_completion(self): thread.join() self.errors.update({thread.name: thread.errors}) self.error_count += len(thread.errors) + self.submitted_reads += thread.submitted_reads + self.submitted_writes += thread.submitted_writes self.global_jobspec.target.close() From a681f7ec7e27f0095092acf828b25558642d4fa9 Mon Sep 17 00:00:00 2001 From: Michal Mielewczyk Date: Tue, 1 Oct 2024 14:32:13 +0200 Subject: [PATCH 3/3] pyocf: Test for reporting and accounting IO errors Test for bug fixed in 237f6c708a16aca66f4c6eeefec70a63f9e2beab Signed-off-by: Michal Mielewczyk --- tests/functional/tests/engine/test_errors.py | 148 +++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 tests/functional/tests/engine/test_errors.py diff --git a/tests/functional/tests/engine/test_errors.py b/tests/functional/tests/engine/test_errors.py new file mode 100644 index 00000000..ece545be --- /dev/null +++ b/tests/functional/tests/engine/test_errors.py @@ -0,0 +1,148 @@ +# +# Copyright(c) 2024 Huawei Technologies +# SPDX-License-Identifier: BSD-3-Clause +# + +import pytest + +from pyocf.types.cache import Cache, CacheMode +from pyocf.types.core import Core +from pyocf.types.volume import RamVolume, ErrorDevice +from pyocf.types.volume_core import CoreVolume +from pyocf.types.shared import CacheLineSize +from pyocf.utils import Size +from pyocf.rio import Rio, ReadWrite + +BLOCK_SIZES = [Size(512), Size.from_KiB(1), Size.from_KiB(4), Size.from_KiB(64), Size.from_KiB(256)] + + +@pytest.mark.parametrize("cls", [CacheLineSize.LINE_4KiB, CacheLineSize.LINE_64KiB]) +@pytest.mark.parametrize("cache_mode", [c for c in CacheMode if not c.lazy_write()]) +@pytest.mark.parametrize("rio_bs", BLOCK_SIZES) +def test_strict_engine_errors(pyocf_ctx, cache_mode: CacheMode, cls: CacheLineSize, rio_bs: Size): + cache_vol_size = Size.from_MiB(50) + ram_cache_volume = RamVolume(cache_vol_size) + error_sectors = set(x for x in range(0, cache_vol_size, 512)) + error_device = ErrorDevice(ram_cache_volume, error_sectors, armed=False) + core_device = RamVolume(Size.from_MiB(50)) + + cache = Cache.start_on_device(error_device, cache_mode=cache_mode) + core = Core.using_device(core_device) + queue = cache.get_default_queue() + + cache.add_core(core) + core_volume = CoreVolume(core) + core_volume.open() + + error_device.reset_stats() + error_device.arm() + + rio_size = Size.from_MiB(3) if rio_bs > Size(4096) else Size.from_MiB(1) + + read_rio_stats = ( + Rio() + .target(core_volume) + .njobs(1) + .readwrite(ReadWrite.RANDREAD) + .size(rio_size) + .bs(rio_bs) + .qd(16) + .continue_on_error() + .run([queue]) + ) + + # FIXME: Get rid of the second Rio instance, once the real RANDRW support is + # implemented in Rio + write_rio_stats = ( + Rio() + .target(core_volume) + .njobs(1) + .readwrite(ReadWrite.RANDWRITE) + .size(rio_size) + .bs(rio_bs) + .qd(16) + .continue_on_error() + .run([queue]) + ) + + cache.settle() + + assert cache.get_stats()["usage"]["occupancy"]["value"] == 0 + + assert read_rio_stats.error_count == 0 + assert write_rio_stats.error_count == 0 + + if cache_mode is CacheMode.PT: + expected_cache_write_errors = 0 + else: + expected_cache_write_errors = write_rio_stats.submitted_writes + + actual_cache_write_errors = cache.get_stats()["errors"]["cache_volume_wr"]["value"] + + assert actual_cache_write_errors >= expected_cache_write_errors + + error_device.disarm() + + +@pytest.mark.parametrize("cls", [CacheLineSize.LINE_4KiB, CacheLineSize.LINE_64KiB]) +@pytest.mark.parametrize("cache_mode", [c for c in CacheMode if c.lazy_write()]) +@pytest.mark.parametrize("rio_bs", BLOCK_SIZES) +def test_lazy_engine_errors(pyocf_ctx, cache_mode: CacheMode, cls: CacheLineSize, rio_bs: Size): + cache_vol_size = Size.from_MiB(50) + ram_cache_volume = RamVolume(cache_vol_size) + error_sectors = set(x for x in range(0, cache_vol_size, 512)) + error_device = ErrorDevice(ram_cache_volume, error_sectors, armed=False) + core_device = RamVolume(Size.from_MiB(50)) + + cache = Cache.start_on_device(error_device, cache_mode=cache_mode) + core = Core.using_device(core_device) + queue = cache.get_default_queue() + + cache.add_core(core) + core_volume = CoreVolume(core) + core_volume.open() + + error_device.reset_stats() + error_device.arm() + + rio_size = Size.from_MiB(3) if rio_bs > Size(4096) else Size.from_MiB(1) + + read_rio_stats = ( + Rio() + .target(core_volume) + .njobs(1) + .readwrite(ReadWrite.RANDREAD) + .size(rio_size) + .bs(rio_bs) + .qd(16) + .continue_on_error() + .run([queue]) + ) + + # FIXME: Get rid of the second Rio instance, once the real RANDRW support is + # implemented in Rio + write_rio_stats = ( + Rio() + .target(core_volume) + .njobs(1) + .readwrite(ReadWrite.RANDWRITE) + .size(rio_size) + .bs(rio_bs) + .qd(16) + .continue_on_error() + .run([queue]) + ) + + cache.settle() + + assert cache.get_stats()["usage"]["occupancy"]["value"] == 0 + + assert read_rio_stats.error_count == 0 + assert write_rio_stats.error_count == write_rio_stats.submitted_writes + + expected_cache_write_errors = write_rio_stats.submitted_writes + actual_cache_write_errors = cache.get_stats()["errors"]["cache_volume_wr"]["value"] + + assert actual_cache_write_errors >= expected_cache_write_errors + + error_device.disarm()