From 0d06b3a597f08daff9da7360b693e96b925f76ba Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Thu, 21 Nov 2024 21:26:00 +0100 Subject: [PATCH 1/2] Fix race condition during cache attach After attaching new cache device handle all the IOs in Pass-Through mode until all the d2c requests are completed. Signed-off-by: Robert Baldyga --- src/engine/cache_engine.c | 5 +++++ src/mngt/ocf_mngt_cache.c | 23 ++++++++++++++++++++--- src/ocf_cache_priv.h | 2 ++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/engine/cache_engine.c b/src/engine/cache_engine.c index f7f160e0..2ab79725 100644 --- a/src/engine/cache_engine.c +++ b/src/engine/cache_engine.c @@ -162,6 +162,11 @@ void ocf_resolve_effective_cache_mode(ocf_cache_t cache, return; } + if (env_atomic_read(&cache->attach_pt)) { + req->cache_mode = ocf_req_cache_mode_pt; + return; + } + if (cache->pt_unaligned_io && !ocf_req_is_4k(req->addr, req->bytes)) { req->cache_mode = ocf_req_cache_mode_pt; return; diff --git a/src/mngt/ocf_mngt_cache.c b/src/mngt/ocf_mngt_cache.c index d1c8ad76..d9261c35 100644 --- a/src/mngt/ocf_mngt_cache.c +++ b/src/mngt/ocf_mngt_cache.c @@ -1901,18 +1901,35 @@ static void _ocf_mngt_attach_shutdown_status(ocf_pipeline_t pipeline, _ocf_mngt_attach_shutdown_status_complete, context); } + +static void _ocf_mngt_attach_post_init_finish(void *priv) +{ + struct ocf_cache_attach_context *context = priv; + ocf_cache_t cache = context->cache; + + ocf_refcnt_unfreeze(&cache->refcnt.d2c); + + env_atomic_set(&cache->attach_pt, 0); + + ocf_cache_log(cache, log_debug, "Cache attached\n"); + + ocf_pipeline_next(context->pipeline); +} + static void _ocf_mngt_attach_post_init(ocf_pipeline_t pipeline, void *priv, ocf_pipeline_arg_t arg) { struct ocf_cache_attach_context *context = priv; ocf_cache_t cache = context->cache; + env_atomic_set(&cache->attach_pt, 1); + ocf_cleaner_refcnt_unfreeze(cache); ocf_refcnt_unfreeze(&cache->refcnt.metadata); - ocf_cache_log(cache, log_debug, "Cache attached\n"); - - ocf_pipeline_next(pipeline); + ocf_refcnt_freeze(&cache->refcnt.d2c); + ocf_refcnt_register_zero_cb(&cache->refcnt.d2c, + _ocf_mngt_attach_post_init_finish, context); } static void _ocf_mngt_attach_handle_error( diff --git a/src/ocf_cache_priv.h b/src/ocf_cache_priv.h index 9a8e02d3..b4ab8b67 100644 --- a/src/ocf_cache_priv.h +++ b/src/ocf_cache_priv.h @@ -106,6 +106,8 @@ struct ocf_cache { env_atomic flush_in_progress; env_mutex flush_mutex; + env_atomic attach_pt; + struct ocf_cleaner cleaner; struct list_head io_queues; From b850727d174c0602eec01020672438d222422454 Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Thu, 21 Nov 2024 21:29:23 +0100 Subject: [PATCH 2/2] tests: Fix d2c test Cache attach operation is not supposed to complete unless all the d2c requests are completed, thus need to handle it asynchronously. Signed-off-by: Robert Baldyga --- tests/functional/pyocf/types/cache.py | 35 +++++++++++++++++++---- tests/functional/pyocf/types/shared.py | 5 +++- tests/functional/tests/engine/test_d2c.py | 18 +++++++++--- 3 files changed, 48 insertions(+), 10 deletions(-) diff --git a/tests/functional/pyocf/types/cache.py b/tests/functional/pyocf/types/cache.py index 4c6dd943..b5ece0f2 100644 --- a/tests/functional/pyocf/types/cache.py +++ b/tests/functional/pyocf/types/cache.py @@ -569,7 +569,7 @@ def alloc_device_config(self, device, perform_test=True): def free_device_config(self, cfg): lib = OcfLib.getInstance().ocf_volume_destroy(cfg._volume) - def attach_device( + def attach_device_async( self, device, force=False, @@ -593,14 +593,39 @@ def attach_device( self.write_lock() - c = OcfCompletion([("cache", c_void_p), ("priv", c_void_p), ("error", c_int)]) + def callback(c): + self.write_unlock() + self.free_device_config(device_config) + + c = OcfCompletion( + [("cache", c_void_p), ("priv", c_void_p), ("error", c_int)], + callback=callback + ) self.owner.lib.ocf_mngt_cache_attach(self.cache_handle, byref(attach_cfg), c, None) - c.wait() - self.write_unlock() + return c - self.free_device_config(device_config) + def attach_device( + self, + device, + force=False, + perform_test=False, + cache_line_size=None, + open_cores=False, + disable_cleaner=False, + ): + + c = self.attach_device_async( + device, + force, + perform_test, + cache_line_size, + open_cores, + disable_cleaner + ) + + c.wait() if c.results["error"]: raise OcfError( diff --git a/tests/functional/pyocf/types/shared.py b/tests/functional/pyocf/types/shared.py index a4cd19a3..4f1c5de5 100644 --- a/tests/functional/pyocf/types/shared.py +++ b/tests/functional/pyocf/types/shared.py @@ -83,7 +83,7 @@ def __getitem__(self, key): except KeyError: raise KeyError(f"No completion argument {key} specified") - def __init__(self, completion_args: list, context=None): + def __init__(self, completion_args: list, context=None, callback=None): """ Provide ctypes arg list, and optionally index of status argument in completion function which will be extracted (default - last argument). @@ -95,6 +95,7 @@ def __init__(self, completion_args: list, context=None): self.results = OcfCompletion.CompletionResult(completion_args) self._as_parameter_ = self.callback self.context = context + self.user_callback = callback @property def callback(self): @@ -102,6 +103,8 @@ def callback(self): def complete(*args): self.results.results = args self.e.set() + if self.user_callback: + self.user_callback(self) return complete diff --git a/tests/functional/tests/engine/test_d2c.py b/tests/functional/tests/engine/test_d2c.py index 5fb67107..f1a0fde5 100644 --- a/tests/functional/tests/engine/test_d2c.py +++ b/tests/functional/tests/engine/test_d2c.py @@ -4,6 +4,7 @@ # +from time import sleep import pytest @@ -18,7 +19,6 @@ CORE_SIZE = 4096 -@pytest.mark.xfail(reason="Data corruption when switching from D2C") def test_d2c_io(pyocf_ctx): """ Start cache in D2C @@ -46,7 +46,8 @@ def test_d2c_io(pyocf_ctx): d2c_data.write(b"a" * CORE_SIZE, CORE_SIZE) d2c_io.set_data(d2c_data) - cache.attach_device(cache_device) + c = cache.attach_device_async(cache_device) + sleep(1) wt_io = vol.new_io(queue, 0, CORE_SIZE, IoDir.WRITE, 0, 0) wt_data = Data(CORE_SIZE) @@ -55,11 +56,19 @@ def test_d2c_io(pyocf_ctx): wt_completion = Sync(wt_io).submit() assert int(wt_completion.results["err"]) == 0 - assert cache.get_stats()["req"]["wr_full_misses"]["value"] == 1 d2c_completion = Sync(d2c_io).submit() assert int(d2c_completion.results["err"]) == 0 - assert cache.get_stats()["req"]["wr_pt"]["value"] == 1 + + c.wait() + + if c.results["error"]: + raise OcfError( + f"Attaching cache device failed", + c.results["error"], + ) + + assert cache.get_stats()["req"]["wr_pt"]["value"] == 2 read_io = vol.new_io(queue, 0, CORE_SIZE, IoDir.READ, 0, 0) read_data = Data(CORE_SIZE) @@ -67,6 +76,7 @@ def test_d2c_io(pyocf_ctx): read_completion = Sync(read_io).submit() assert int(read_completion.results["err"]) == 0 + assert cache.get_stats()["req"]["rd_full_misses"]["value"] == 1 cache.stop()