From a384cb9b81e8e060808c0cac392022499887824a Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Wed, 16 Jul 2014 16:43:49 +0900 Subject: [PATCH 1/2] Fix bitcask_pulse model to know when a keydir may have been frozen. * Rename `bitcask_nifs:keydir_remove/3` -> * `bitcask_nifs:keydir_remove_int/3` * Use a compile-time -ifdef magic to add `pulse_log_call()` wrappers around all keydir NIF calls that can mutate the keydir. * Make several changes to the bitcask_pulse model: * Filter out bad reads from either kind of bad folds * If a bad read happens, that's bad * If a bad fold happens, then figure out during what period of at-last-one-folder-is-running-time that the bad result appeared. Then figure out if a keydir mutating NIF was called; if yes, then it's possible that the keydir was frozen ... we don't know for sure, but we won't cause the model to fail in this case. --- c_src/bitcask_nifs.c | 2 +- src/bitcask_nifs.erl | 28 ++++++++++++-- test/bitcask_pulse.erl | 85 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 104 insertions(+), 11 deletions(-) diff --git a/c_src/bitcask_nifs.c b/c_src/bitcask_nifs.c index 41f7b230..c0c29ea1 100644 --- a/c_src/bitcask_nifs.c +++ b/c_src/bitcask_nifs.c @@ -386,7 +386,7 @@ static ErlNifFunc nif_funcs[] = {"keydir_put_int", 10, bitcask_nifs_keydir_put_int}, {"keydir_get_int", 3, bitcask_nifs_keydir_get_int}, {"keydir_get_epoch", 1, bitcask_nifs_keydir_get_epoch}, - {"keydir_remove", 3, bitcask_nifs_keydir_remove}, + {"keydir_remove_int", 3, bitcask_nifs_keydir_remove}, {"keydir_remove_int", 6, bitcask_nifs_keydir_remove}, {"keydir_copy", 1, bitcask_nifs_keydir_copy}, {"keydir_itr_int", 4, bitcask_nifs_keydir_itr}, diff --git a/src/bitcask_nifs.erl b/src/bitcask_nifs.erl index 12486e1d..27f27eac 100644 --- a/src/bitcask_nifs.erl +++ b/src/bitcask_nifs.erl @@ -213,11 +213,13 @@ keydir_put(Ref, Key, FileId, TotalSz, Offset, Tstamp, NowSec, OldFileId, OldOffs keydir_put(Ref, Key, FileId, TotalSz, Offset, Tstamp, NowSec, NewestPutB, OldFileId, OldOffset) -> + pulse_log_call({keydir_mutate,put,Key}, fun() -> keydir_put_int(Ref, Key, FileId, TotalSz, <>, Tstamp, NowSec, if not NewestPutB -> 0; true -> 1 end, - OldFileId, <>). + OldFileId, <>) + end). keydir_put_int(_Ref, _Key, _FileId, _TotalSz, _Offset, _Tstamp, _NowSec, _NewestPutI, _OldFileId, _OldOffset) -> @@ -244,12 +246,19 @@ keydir_get_epoch(_Ref) -> keydir_remove(Ref, Key) -> keydir_remove(Ref, Key, bitcask_time:tstamp()). -keydir_remove(_Ref, _Key, _TStamp) -> +keydir_remove(Ref, Key, TStamp) -> + pulse_log_call({keydir_mutate,remove,Key}, fun() -> + keydir_remove_int(Ref, Key, TStamp) + end). + +keydir_remove_int(_Ref, _Key, _TStamp) -> erlang:nif_error({error, not_loaded}). keydir_remove(Ref, Key, Tstamp, FileId, Offset) -> - keydir_remove_int(Ref, Key, Tstamp, FileId, <>, - bitcask_time:tstamp()). + pulse_log_call({keydir_mutate,remove,Key}, fun() -> + keydir_remove_int(Ref, Key, Tstamp, FileId, <>, + bitcask_time:tstamp()) + end). keydir_remove_int(_Ref, _Key, _Tstamp, _FileId, _Offset, _TStamp) -> erlang:nif_error({error, not_loaded}). @@ -347,6 +356,13 @@ keydir_wait_ready(N) -> end, keydir_wait_ready(N-1) end. + +pulse_log_call(Tag, Fun) -> + event_logger:event({call, self(), Tag}), + __Result = Fun(), + event_logger:event({result, self(), __Result}), + __Result. + -else. keydir_wait_ready() -> receive @@ -355,6 +371,10 @@ keydir_wait_ready() -> error -> {error, shutdown} end. + +pulse_log_call(_Tag, Fun) -> + Fun(). + -endif. keydir_info(_Ref) -> diff --git a/test/bitcask_pulse.erl b/test/bitcask_pulse.erl index abdd9701..7bb5cbe0 100644 --- a/test/bitcask_pulse.erl +++ b/test/bitcask_pulse.erl @@ -574,7 +574,7 @@ check_trace(Trace) -> {ok, U} -> U end, case lists:member(V, Vs) of true -> []; %% V is a good result - false -> [{bad, Pid, {get, K, Vs, V}}] %% V is a bad result! + false -> [{bad_get, Pid, {K, Vs, V}}] %% V is a bad result! end; %% Check a call to fold_keys ({fold_keys, Pid, Vals}, {result, Pid, Keys}) -> @@ -583,7 +583,7 @@ check_trace(Trace) -> %% K not in Keys ==> not_found in Vals[K] case check_fold_keys_result(orddict:to_list(Vals), lists:sort(Keys)) of true -> []; - false -> [{bad, Pid, {fold_keys, orddict:to_list(Vals), lists:sort(Keys)}}] + false -> [{bad_fold_keys, Pid, {orddict:to_list(Vals), lists:sort(Keys)}}] end; %% Check a call to fold ({fold, Pid, Vals}, {result, Pid, KVs}) -> @@ -592,20 +592,83 @@ check_trace(Trace) -> %% K not in KVs ==> not_found in Vals[K] case check_fold_result(orddict:to_list(Vals), lists:sort(KVs)) of true -> []; - false -> [{bad, Pid, {fold, orddict:to_list(Vals), lists:sort(KVs)}}] + false -> [{bad_fold, Pid, {orddict:to_list(Vals), lists:sort(KVs)}}] end end, eqc_temporal:union(Events, eqc_temporal:map(fun(D) -> {values, D} end, ValueDict))), %% Filter out the bad stuff from the Reads relation. - Bad = eqc_temporal:map(fun(X={bad, _, _}) -> X end, Reads), + BadGets = eqc_temporal:map(fun(X={bad_get, _, _}) -> X end, Reads), + BadFolds = eqc_temporal:map(fun(X={bad_fold_keys, _, _}) -> X; + (X={bad_fold, _, _}) -> X end, Reads), + + FoldOps = eqc_temporal:stateful( + fun({call, Pid, {fold_keys, _H}}) -> {folding, Pid}; + ({call, Pid, {fold, _H}}) -> {folding, Pid} end, + fun({folding, Pid}, {result, Pid, _Keys}) -> + [] + end, Events), + %% The keydir *may* be frozen during any unbroken overlapping + %% sequence of folds. Let's figure out when those time periods are. + FoldingR = eqc_temporal:map(fun({folding,_}) -> folding_N_in_progress end, + FoldOps), + Mutations = eqc_temporal:stateful( + fun({call, Pid, {keydir_mutate, _How, K}}) -> + {mutation_in_progress, Pid, K} + end, + fun({mutation_in_progress, Pid, _K}, {result, Pid, _R}) -> + [] + end, Events), + + BadFoldStartTimes = [Start || {Start, _End, Facts} <- BadFolds, + Fact <- Facts, + element(1, Fact) == bad_fold_keys + orelse + element(1, Fact) == bad_fold], + BadFoldExceptions = + [begin + %% Construct a pseudo-relation that contains the known + %% at-least-one-folder for each of our bad folds. + Foldz = [X || X = {Start,End,_Facts} <- FoldingR, + is_integer(Start) andalso + is_integer(End) andalso + Start =< BadStart andalso + BadStart =< End], + %% Fix that pseudo-relation so that eqc_temporal likes it. + Foldz2 = temporal_ize(Foldz), + %% If there was a mutation that happened during this + %% at-least-one-folder time, then we'll discover it with + %% this intersection() trick: if the intersection is + %% not empty, then there was no mutation that could + %% have frozen the keydir, so frozen-ness cannot explain + %% this observed fold problem. + FoldzMap = eqc_temporal:map(fun(_) -> i_hope end, + Foldz2), + MutationsMap = eqc_temporal:map(fun(_) -> i_hope end, + Mutations), + Intersection = eqc_temporal:intersection(FoldzMap, MutationsMap), + case eqc_temporal:is_false(Intersection) of + true -> bummer_the_intersection_was_empty; + false -> no_exception_this_time + end + end || BadStart <- BadFoldStartTimes], + ?WHENFAIL( begin ?QC_FMT("Time: ~p ~p\n", [date(), time()]), ?QC_FMT("Events:\n~p\n", [Events]), - ?QC_FMT("Bad:\n~p\n", [Bad]) + ?QC_FMT("Calls:\n~p\n", [Calls]), + ?QC_FMT("FoldingR:\n~p\n", [FoldingR]), + FoldMutations = eqc_temporal:unions([FoldingR, Mutations, BadGets, BadFolds]), + ?QC_FMT("FoldMutations:\n~p\n", [FoldMutations]), + ?QC_FMT("BadGets:\n~p\n", [BadGets]), + ?QC_FMT("BadFolds:\n~p\n", [BadFolds]), + ?QC_FMT("BadFoldExceptions:\n~p\n", [BadFoldExceptions]) end, - eqc_temporal:is_false(Bad)). + eqc_temporal:is_false(BadGets) andalso + (eqc_temporal:is_false(BadFolds) + orelse + lists:usort(BadFoldExceptions) == [no_exception_this_time])). clean_events(Events) -> lists:map(fun pprint_ref_pid/1, Events). @@ -1106,6 +1169,16 @@ mangle_temporal_relation_with_finite_time([{Start, infinity, [_|_]=L}]) -> mangle_temporal_relation_with_finite_time([H|T]) -> [H|mangle_temporal_relation_with_finite_time(T)]. +temporal_ize(L) -> + temporal_ize(L, 0). + +temporal_ize([], Start) -> + [{Start, infinity, []}]; +temporal_ize([{_, infinity, _}]=Only, _Start) -> + Only; +temporal_ize([{T1, T2, _}=X|Tail], Start) -> + [{Start, T1, []}, X|temporal_ize(Tail, T2)]. + %% This version of check_no_tombstones() plus the fold_keys %% implementation has a flaw that this QuickCheck model isn't smart %% enough to figure out: From 9aaa165ff1881882fbd6df3a2620212d5e4ec276 Mon Sep 17 00:00:00 2001 From: Scott Lystig Fritchie Date: Wed, 16 Jul 2014 19:08:02 +0900 Subject: [PATCH 2/2] Fix up this branch's PULSE model with a couple more things 1. Use `keydir_call` and `keydir_return` for the trace labels for NIF mutations. 2. Only consider *successful* mutations when checking for keydir frozenness. --- src/bitcask_nifs.erl | 4 ++-- test/bitcask_pulse.erl | 24 +++++++++++++++++++++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/bitcask_nifs.erl b/src/bitcask_nifs.erl index 27f27eac..cadec112 100644 --- a/src/bitcask_nifs.erl +++ b/src/bitcask_nifs.erl @@ -358,9 +358,9 @@ keydir_wait_ready(N) -> end. pulse_log_call(Tag, Fun) -> - event_logger:event({call, self(), Tag}), + event_logger:event({keydir_call, self(), Tag}), __Result = Fun(), - event_logger:event({result, self(), __Result}), + event_logger:event({keydir_result, self(), __Result}), __Result. -else. diff --git a/test/bitcask_pulse.erl b/test/bitcask_pulse.erl index 7bb5cbe0..c8af3fca 100644 --- a/test/bitcask_pulse.erl +++ b/test/bitcask_pulse.erl @@ -459,7 +459,10 @@ copy_bitcask_app() -> %% or the new value until the operation has finished. Likewise, a get %% (or a fold/fold_keys) may see any of the potential values if the %% operation overlaps with a put/delete. -check_trace(Trace) -> +check_trace(Trace0) -> + %% Amend the trace to include future return value for keydir_mutate + Trace = add_keydir_mutate_results_to_trace(lists:sort(Trace0)), + %% Turn the trace into a temporal relation Events0 = eqc_temporal:from_timed_list(Trace), %% convert pids and refs into something the reader won't choke on, @@ -613,10 +616,10 @@ check_trace(Trace) -> FoldingR = eqc_temporal:map(fun({folding,_}) -> folding_N_in_progress end, FoldOps), Mutations = eqc_temporal:stateful( - fun({call, Pid, {keydir_mutate, _How, K}}) -> + fun({keydir_call, Pid, {keydir_mutate, _How, K, will_be, ok}}) -> {mutation_in_progress, Pid, K} end, - fun({mutation_in_progress, Pid, _K}, {result, Pid, _R}) -> + fun({mutation_in_progress, Pid, _K}, {keydir_result, Pid, ok}) -> [] end, Events), @@ -670,6 +673,21 @@ check_trace(Trace) -> orelse lists:usort(BadFoldExceptions) == [no_exception_this_time])). +add_keydir_mutate_results_to_trace( + [{TS, {keydir_call, Pid, {keydir_mutate, X, Y}}}|Rest]) -> + Res = trace_lookahead_pid(Pid, Rest), + New = {TS, {call, Pid, {keydir_mutate, X, Y, will_be, Res}}}, + [New|add_keydir_mutate_results_to_trace(Rest)]; +add_keydir_mutate_results_to_trace([X|Rest]) -> + [X|add_keydir_mutate_results_to_trace(Rest)]; +add_keydir_mutate_results_to_trace([]) -> + []. + +trace_lookahead_pid(Pid, [{_TS, {keydir_result, Pid, Res}}|_]) -> + Res; +trace_lookahead_pid(Pid, [_H|T]) -> + trace_lookahead_pid(Pid, T). + clean_events(Events) -> lists:map(fun pprint_ref_pid/1, Events).