From 3d3cee0b25621d8c7d8b575e84a84e38943e09d3 Mon Sep 17 00:00:00 2001 From: "Engel A. Sanchez" Date: Thu, 31 Jan 2013 11:44:07 -0500 Subject: [PATCH 001/139] Verify fix to writes on reads when LWW+Bitcask This verifies the fix to issue basho/riak_kv#334 The test needs to run with bitcask: * It sets last_write_wins on a bucket * Writes on object * Repeatedly reads it * Verifies that the write/read repair count doesn't change --- tests/verify_no_writes_on_read.erl | 49 ++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tests/verify_no_writes_on_read.erl diff --git a/tests/verify_no_writes_on_read.erl b/tests/verify_no_writes_on_read.erl new file mode 100644 index 000000000..ca4e95d31 --- /dev/null +++ b/tests/verify_no_writes_on_read.erl @@ -0,0 +1,49 @@ +-module(verify_no_writes_on_read). +-behaviour(riak_test). +-export([confirm/0]). +-compile(export_all). +-include_lib("eunit/include/eunit.hrl"). + +-define(NUM_NODES, 3). +-define(BUCKET, <<"bucket">>). + +confirm() -> + Backend = proplists:get_value(backend, riak_test_runner:metadata()), + lager:info("Running with backend ~p", [Backend]), + ?assertEqual(bitcask, Backend), + [Node1 | _Rest] = _Nodes = rt:build_cluster(?NUM_NODES), + PBC = rt:pbc(Node1), + lager:info("Setting last write wins on bucket"), + B = ?BUCKET, + ?assertMatch(ok, rpc:call(Node1, riak_core_bucket, set_bucket, [B, [{last_write_wins, true}]])), + BProps = rpc:call(Node1, riak_core_bucket, get_bucket, [B]), + lager:info("Bucket properties ~p", [BProps]), + K = <<"Key">>, + V = <<"Value">>, + Obj = riakc_obj:new(B, K, V), + lager:info("Writing a simple object"), + riakc_pb_socket:put(PBC,Obj), + lager:info("Waiting some time to let the stats update"), + timer:sleep(10000), + OrigStats = get_write_stats(Node1), + lager:info("Stats are now ~p", [OrigStats]), + Read1 = fun(_N) -> + ?assertMatch({ok,_O}, riakc_pb_socket:get(PBC, B, K)) + end, + lager:info("Repeatedly read that object. There should be no writes"), + lists:foreach(Read1, lists:seq(1,100)), + lager:info("Waiting some time to let the stats update"), + timer:sleep(10000), + Stats = get_write_stats(Node1), + lager:info("Stats are now ~p", [Stats]), + ?assertEqual(OrigStats, Stats), + riakc_pb_socket:stop(PBC), + pass. + + +get_write_stats(Node) -> + Stats = rpc:call(Node, riak_kv_stat, get_stats, []), + Puts = proplists:get_value(vnode_puts, Stats), + ReadRepairs = proplists:get_value(read_repairs, Stats), + [{puts, Puts}, {read_repairs, ReadRepairs}]. + From b62a568089ffeb46875fa915567b2d7e118c6356 Mon Sep 17 00:00:00 2001 From: Kelly McLaughlin Date: Mon, 1 Jul 2013 14:04:24 -0600 Subject: [PATCH 002/139] Add option to allow tests in a directory to be explicitly skipped Add skip option for use with the dir option to allow specified tests in a directory to be skipped during a test run. This is useful in cases where a directory contains a large number of tests of which a small number are only appropriate to run in certain circumstances. A good example of such a case is providing a way for open source users to build and run the RiakCS tests and avoid the failure that would occur trying to run the replication tests. --- src/riak_test_escript.erl | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/riak_test_escript.erl b/src/riak_test_escript.erl index 95f288f90..145b2acba 100644 --- a/src/riak_test_escript.erl +++ b/src/riak_test_escript.erl @@ -35,6 +35,7 @@ cli_options() -> {tests, $t, "tests", string, "specifies which tests to run"}, {suites, $s, "suites", string, "which suites to run"}, {dir, $d, "dir", string, "run all tests in the specified directory"}, + {skip, $x, "skip", string, "list of tests to skip in a directory"}, {verbose, $v, "verbose", undefined, "verbose output"}, {outdir, $o, "outdir", string, "output directory"}, {backend, $b, "backend", atom, "backend to test [memory | bitcask | eleveldb]"}, @@ -168,7 +169,8 @@ parse_command_line_tests(ParsedArgs) -> [code:add_patha(CodePath) || CodePath <- CodePaths, CodePath /= "."], Dirs = proplists:get_all_values(dir, ParsedArgs), - DirTests = lists:append([load_tests_in_dir(Dir) || Dir <- Dirs]), + SkipTests = string:tokens(proplists:get_value(skip, ParsedArgs, []), [$,]), + DirTests = lists:append([load_tests_in_dir(Dir, SkipTests) || Dir <- Dirs]), lists:foldl(fun(Test, Tests) -> [{ list_to_atom(Test), @@ -268,7 +270,7 @@ run_test(Test, Outdir, TestMetaData, Report, _HarnessArgs, NumTests) -> {ok, Base} -> %% Now push up the artifacts [ giddyup:post_artifact(Base, File) || File <- rt:get_node_logs() ] - end + end end, SingleTestResult. @@ -324,14 +326,28 @@ results_filter(Result) -> true end. -load_tests_in_dir(Dir) -> +load_tests_in_dir(Dir, SkipTests) -> case filelib:is_dir(Dir) of true -> code:add_path(Dir), - lists:sort([ string:substr(Filename, 1, length(Filename) - 5) || Filename <- filelib:wildcard("*.beam", Dir)]); + lists:sort( + lists:foldl(load_tests_folder(SkipTests), + [], + filelib:wildcard("*.beam", Dir))); _ -> io:format("~s is not a dir!~n", [Dir]) end. +load_tests_folder(SkipTests) -> + fun(X, Acc) -> + Test = string:substr(X, 1, length(X) - 5), + case lists:member(Test, SkipTests) of + true -> + Acc; + false -> + [Test | Acc] + end + end. + so_kill_riak_maybe() -> io:format("~n~nSo, we find ourselves in a tricky situation here. ~n"), io:format("You've run a single test, and it has failed.~n"), From d326a7c267d0b552c65f93cf8fb70c656cf00fb2 Mon Sep 17 00:00:00 2001 From: Dave Parfitt Date: Fri, 6 Dec 2013 15:40:11 -0500 Subject: [PATCH 003/139] add shell completion for test names --- utils/riak_test | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 utils/riak_test diff --git a/utils/riak_test b/utils/riak_test new file mode 100644 index 000000000..ecd930538 --- /dev/null +++ b/utils/riak_test @@ -0,0 +1,19 @@ +# bash_completion for riak_test +_riak_test() +{ + local cur prev + _get_comp_words_by_ref cur prev + + case $prev in + riak_test) + COMPREPLY=( $( compgen -W "-h -c -t -s -d -v -o -b -u -r" -- "$cur" ) ) + ;; + -t) + RT_TESTS=`grep -l confirm ./tests/*.erl | xargs basename -s .erl` + COMPREPLY=( $( compgen -W "$RT_TESTS" -- "$cur") ) + ;; + + esac +} +complete -F _riak_test riak_test + From 8d13052d870ded5949e47927ea9150aab52a42bc Mon Sep 17 00:00:00 2001 From: Dave Parfitt Date: Fri, 6 Dec 2013 15:48:34 -0500 Subject: [PATCH 004/139] updated readme w/ shell completion doc --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 146481f29..9caa61715 100644 --- a/README.md +++ b/README.md @@ -374,3 +374,9 @@ the config survive restarts and are essentially always in play. A user can also manually add an intercept by making an `rpc` call from the test code to the remote node. This method is ephemeral and the intercept will not survive restarts. + + +#### Shell Completion + +To have bash shell complete test names, source the `utils/riak_test` file. + From a0ff4a20e4feb838b2341fef5e6af3d00dd506cc Mon Sep 17 00:00:00 2001 From: Jon Anderson Date: Wed, 27 Nov 2013 13:26:22 -0500 Subject: [PATCH 005/139] added checks of rtq pending after each test on all nodes merged in missing functions added back sibings setting took out duplicate functions from merge --- tests/rt_cascading.erl | 72 ++++++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/tests/rt_cascading.erl b/tests/rt_cascading.erl index 27e817e1e..911f638dc 100644 --- a/tests/rt_cascading.erl +++ b/tests/rt_cascading.erl @@ -22,7 +22,7 @@ confirm() -> %% test requires allow_mult=false b/c of rt:systest_read rt:set_conf(all, [{"buckets.default.siblings", "off"}]), - + case eunit:test(?MODULE, [verbose]) of ok -> pass; @@ -110,8 +110,12 @@ simple_test_() -> riakc_pb_socket:stop(Client), ?assertEqual(Bin, maybe_eventually_exists(State#simple_state.middle, ?bucket, Bin)), ?assertEqual(Bin, maybe_eventually_exists(State#simple_state.ending, ?bucket, Bin)) - end} - + end}, + {"check pendings", fun() -> + wait_until_pending_count_zero([State#simple_state.middle, + State#simple_state.beginning, + State#simple_state.ending]) + end} ] end}}. big_circle_test_() -> @@ -224,8 +228,10 @@ big_circle_test_() -> % so, by adding 4 clusters, we've added 2 overlaps. % best guess based on what's above is: % NumDuplicateWrites = ceil(NumClusters/2 - 1.5) - end} - + end}, + {"check pendings", fun() -> + wait_until_pending_count_zero(Nodes) + end} ] end}}. circle_test_() -> @@ -287,8 +293,10 @@ circle_test_() -> Status = rpc:call(Two, riak_repl2_rt, status, []), [SinkData] = proplists:get_value(sinks, Status, [[]]), ?assertEqual(2, proplists:get_value(expect_seq, SinkData)) - end} - + end}, + {"check pendings", fun() -> + wait_until_pending_count_zero(Nodes) + end} ] end}}. pyramid_test_() -> @@ -339,8 +347,10 @@ pyramid_test_() -> ?debugFmt("Checking ~p", [N]), ?assertEqual(Bin, maybe_eventually_exists(N, Bucket, Bin)) end, Nodes) - end} - + end}, + {"check pendings", fun() -> + wait_until_pending_count_zero(Nodes) + end} ] end}}. diamond_test_() -> @@ -431,8 +441,10 @@ diamond_test_() -> [Sink2] = proplists:get_value(sinks, Status2, [[]]), GotSeq = proplists:get_value(expect_seq, Sink2), ?assertEqual(ExpectSeq, GotSeq) - end} - + end}, + {"check pendings", fun() -> + wait_until_pending_count_zero(Nodes) + end} ] end}}. circle_and_spurs_test_() -> @@ -505,8 +517,10 @@ circle_and_spurs_test_() -> ?debugFmt("Checking ~p", [N]), ?assertEqual({error, notfound}, maybe_eventually_exists(N, Bucket, Bin)) end || N <- Nodes, N =/= NorthSpur] - end} - + end}, + {"check pendings", fun() -> + wait_until_pending_count_zero(Nodes) + end} ] end}}. mixed_version_clusters_test_() -> @@ -688,7 +702,10 @@ Reses)]), end, [MakeTest(Node, N) || Node <- Nodes, N <- lists:seq(1, 3)] end - }} + }}, + {"check pendings", fun() -> + wait_until_pending_count_zero(Nodes) + end} ] end}}. @@ -776,8 +793,10 @@ new_to_old_test_dep() -> riakc_pb_socket:stop(Client), ?assertEqual(Bin, maybe_eventually_exists(New3, ?bucket, Bin)), ?assertEqual({error, notfound}, maybe_eventually_exists(New1, ?bucket, Bin)) - end} - + end}, + {"check pendings", fun() -> + wait_until_pending_count_zero(["new1", "old2", "new3"]) + end} ] end}}. ensure_ack_test_() -> @@ -1133,3 +1152,24 @@ maybe_skip_teardown(TearDownFun) -> end end. +wait_until_pending_count_zero(Nodes) -> + WaitFun = fun() -> + {Statuses, _} = rpc:multicall(Nodes, riak_repl2_rtq, status, []), + Out = [check_status(S) || S <- Statuses], + not lists:member(false, Out) + end, + ?assertEqual(ok, rt:wait_until(WaitFun)), + ok. + +check_status(Status) -> + case proplists:get_all_values(consumers, Status) of + undefined -> + true; + [] -> + true; + Cs -> + PendingList = [proplists:lookup_all(pending, C) || {_, C} <- lists:flatten(Cs)], + PendingCount = lists:sum(proplists:get_all_values(pending, lists:flatten(PendingList))), + ?debugFmt("RTQ status pending on test node:~p", [PendingCount]), + PendingCount == 0 + end. From 27456146d4159cf2b46f74829829cf68e950c374 Mon Sep 17 00:00:00 2001 From: Dave Parfitt Date: Fri, 6 Dec 2013 16:13:04 -0500 Subject: [PATCH 006/139] added zsh completion from jrwest Source: https://gist.github.com/jrwest/cfad8b14eb286a0bb93d --- README.md | 8 +++++++- utils/{riak_test => riak_test.bash} | 0 utils/riak_test.zsh | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) rename utils/{riak_test => riak_test.bash} (100%) create mode 100644 utils/riak_test.zsh diff --git a/README.md b/README.md index 9caa61715..ba0fe5b12 100644 --- a/README.md +++ b/README.md @@ -378,5 +378,11 @@ intercept will not survive restarts. #### Shell Completion -To have bash shell complete test names, source the `utils/riak_test` file. +##### Bash + +To have bash shell complete test names, source the `utils/riak_test.bash` file. + +##### Zsh + +put `utils/riak_test.zsh` somewhere on `$fpath`. diff --git a/utils/riak_test b/utils/riak_test.bash similarity index 100% rename from utils/riak_test rename to utils/riak_test.bash diff --git a/utils/riak_test.zsh b/utils/riak_test.zsh new file mode 100644 index 000000000..4083b5cae --- /dev/null +++ b/utils/riak_test.zsh @@ -0,0 +1,16 @@ +#compdef riak_test + +_riak_test() { + local curcontext="$curcontext" state line + typeset -A opt_args + + TESTS=$(ls ./tests/*.erl | xargs basename -s .erl | tr '\n' ' ') + CONFIGS=$(cat ~/.riak_test.config | grep \^{ | sed s/{// | tr ', [\n' ' ') + + _arguments \ + "(-t -c -s -d -v -o -b -r)-h[print usage page]" \ + "-c+[specify the project configuraiton file]:config:($CONFIGS)" \ + "-t+[specify which tests to run]:tests:($TESTS)" +} + +_riak_test "$@" From eaf4b2095e71e661acf245a08eb60a47670d563b Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Fri, 6 Dec 2013 20:57:43 -0500 Subject: [PATCH 007/139] init ~/rt/riak even if releases aren't built This change allows initialization of ~/rt/riak to proceed even if releases aren't built, or if you run this script outside of your test-releases directory. It will report this to the user however. This is useful if you only want to run riak tests against current. New users of riak test and people working on bugs on crazy branches will especially want this since they likely do not want to take the time to build multiple releases that may not even be relevant to the bug. --- bin/rtdev-setup-releases.sh | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/bin/rtdev-setup-releases.sh b/bin/rtdev-setup-releases.sh index 2266aad99..a692e5a21 100755 --- a/bin/rtdev-setup-releases.sh +++ b/bin/rtdev-setup-releases.sh @@ -15,19 +15,31 @@ echo " - Creating $RT_DEST_DIR" rm -rf $RT_DEST_DIR mkdir -p $RT_DEST_DIR -for rel in */dev; do - vsn=$(dirname "$rel") - echo " - Initializing $RT_DEST_DIR/$vsn" - mkdir -p "$RT_DEST_DIR/$vsn" - cp -p -P -R "$rel" "$RT_DEST_DIR/$vsn" -done + +count=$(ls */dev 2> /dev/null | wc -l) +if [ "$count" -ne "0" ] +then + for rel in */dev; do + vsn=$(dirname "$rel") + echo " - Initializing $RT_DEST_DIR/$vsn" + mkdir -p "$RT_DEST_DIR/$vsn" + cp -p -P -R "$rel" "$RT_DEST_DIR/$vsn" + done +else + # This is useful when only testing with 'current' + # The repo still needs to be initialized for current + # and we don't want to bomb out if */dev doesn't exist + touch $RT_DEST_DIR/.current_init + echo "No devdirs found. Not copying any releases." +fi + cd $RT_DEST_DIR -echo " - Creating the git repository" -git init > /dev/null 2>&1 +git init ## Some versions of git and/or OS require these fields git config user.name "Riak Test" git config user.email "dev@basho.com" git add . -git commit -a -m "riak_test init" > /dev/null 2>&1 +git commit -a -m "riak_test init" > /dev/null +echo " - Successfully completed initial git commit of $RT_DEST_DIR" From 28f0eb9bf7588f737e95a52cd998cdc000bd273d Mon Sep 17 00:00:00 2001 From: Joseph Blomstedt Date: Mon, 16 Dec 2013 13:21:33 -0800 Subject: [PATCH 008/139] Port rt:wait_until_aae_trees_built/1 to master --- src/rt.erl | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/rt.erl b/src/rt.erl index 2e3b58429..4b7b32930 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -121,6 +121,7 @@ wait_for_control/2, wait_until/2, wait_until/1, + wait_until_aae_trees_built/1, wait_until_all_members/1, wait_until_all_members/2, wait_until_capability/3, @@ -752,6 +753,28 @@ wait_until_nodes_agree_about_ownership(Nodes) -> Results = [ wait_until_owners_according_to(Node, Nodes) || Node <- Nodes ], ?assert(lists:all(fun(X) -> ok =:= X end, Results)). +%% AAE support +wait_until_aae_trees_built([AnyNode|_]=Nodes) -> + lager:info("Wait until AAE builds all partition trees across ~p", [Nodes]), + %% Wait until all nodes report no undefined trees + rt:wait_until(AnyNode, + fun(_) -> + Busy = lists:foldl( + fun(Node,Busy1) -> + %% will be false when all trees are built on Node + lists:keymember(undefined, + 2, + rpc:call(Node, + riak_kv_entropy_info, + compute_tree_info, + [])) + or Busy1 + end, + false, + Nodes), + not Busy + end). + %%%=================================================================== %%% Ring Functions %%%=================================================================== From 7d1ba9f477a4537fe982148a776afb519c635b87 Mon Sep 17 00:00:00 2001 From: Joseph Blomstedt Date: Mon, 16 Dec 2013 13:23:10 -0800 Subject: [PATCH 009/139] Use additional pmaps for faster testing --- src/rtdev.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rtdev.erl b/src/rtdev.erl index 9ddb1f894..63e7c5a21 100644 --- a/src/rtdev.erl +++ b/src/rtdev.erl @@ -243,7 +243,7 @@ get_backends() -> end. get_backends(DevPath) -> - [get_backend(AppConfig) || AppConfig <- all_the_app_configs(DevPath)]. + rt:pmap(fun get_backend/1, all_the_app_configs(DevPath)). get_backend(AppConfig) -> lager:info("get_backend(~s)", [AppConfig]), @@ -395,7 +395,7 @@ stop_all(DevPath) -> end, lager:info("Stopped Node... ~s ~~ ~s.", [Cmd, Status]) end, - [Stop(D) || D <- Devs]; + rt:pmap(Stop, Devs); _ -> lager:info("~s is not a directory.", [DevPath]) end, ok. @@ -587,7 +587,7 @@ get_version() -> teardown() -> rt_cover:maybe_stop_on_nodes(), %% Stop all discoverable nodes, not just nodes we'll be using for this test. - [stop_all(X ++ "/dev") || X <- devpaths()]. + rt:pmap(fun(X) -> stop_all(X ++ "/dev") end, devpaths()). whats_up() -> io:format("Here's what's running...~n"), From cfc606a0d83e8d8cb6a31cbc118b23d1d2db8296 Mon Sep 17 00:00:00 2001 From: Joseph Blomstedt Date: Mon, 16 Dec 2013 13:26:12 -0800 Subject: [PATCH 010/139] Add gh_riak_kv_765 test (AAE improvements) --- tests/gh_riak_kv_765.erl | 115 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 tests/gh_riak_kv_765.erl diff --git a/tests/gh_riak_kv_765.erl b/tests/gh_riak_kv_765.erl new file mode 100644 index 000000000..391bd7428 --- /dev/null +++ b/tests/gh_riak_kv_765.erl @@ -0,0 +1,115 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2013 Basho Technologies, Inc. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% This module tests the various AAE additions made in +%% https://github.com/basho/riak_kv/pull/765 + +-module(gh_riak_kv_765). +-compile(export_all). +-include_lib("eunit/include/eunit.hrl"). + +confirm() -> + pass = check_empty_build(), + pass = check_throttle_and_expiration(), + pass. + +check_empty_build() -> + Config = [{riak_core, [{vnode_management_timer, 1000}, + {ring_creation_size, 4}]}], + Nodes = rt:build_cluster(1, Config), + Node = hd(Nodes), + timer:sleep(2000), + Self = self(), + spawn(fun() -> + time_build(Node), + Self ! done + end), + Result = receive + done -> pass + after + 10000 -> + lager:info("Failed. Empty AAE trees were not built instantly"), + fail + end, + rt:clean_cluster(Nodes), + Result. + +check_throttle_and_expiration() -> + Config = [{riak_kv, [{anti_entropy_build_limit, {100, 1000}}, + {anti_entropy_concurrency, 100}, + {anti_entropy_tick, 1000}, + {anti_entropy, {off, []}}]}, + {riak_core, [{vnode_management_timer, 1000}, + {ring_creation_size, 4}]}], + Nodes = rt:build_cluster(1, Config), + Node = hd(Nodes), + timer:sleep(2000), + + lager:info("Write 1000 keys"), + rt:systest_write(Node, 1000), + enable_aae(Node), + time_build(Node), + Duration1 = rebuild(Node, 30000, 1000), + Duration2 = rebuild(Node, 30000, 3500), + ?assert(Duration2 > (2 * Duration1)), + + %% Test manual expiration + lager:info("Disabling automatic expiration"), + rpc:call(Node, application, set_env, + [riak_kv, anti_entropy_expire, never]), + lager:info("Manually expiring hashtree for partition 0"), + expire_tree(Node, 0), + pass. + +time_build(Node) -> + T0 = erlang:now(), + rt:wait_until_aae_trees_built([Node]), + Duration = timer:now_diff(erlang:now(), T0), + lager:info("Build took ~b us", [Duration]), + Duration. + +rebuild(Node, Limit, Wait) -> + rpc:call(Node, application, set_env, + [riak_kv, anti_entropy_build_throttle, {Limit, Wait}]), + rpc:call(Node, application, set_env, + [riak_kv, anti_entropy_expire, 0]), + timer:sleep(1500), + disable_aae(Node), + rpc:call(Node, ets, delete_all_objects, [ets_riak_kv_entropy]), + enable_aae(Node), + time_build(Node). + +enable_aae(Node) -> + rpc:call(Node, riak_kv_entropy_manager, enable, []). + +disable_aae(Node) -> + rpc:call(Node, riak_kv_entropy_manager, disable, []). + +expire_tree(Node, Partition) -> + Now = erlang:now(), + {ok, Tree} = rpc:call(Node, riak_kv_vnode, hashtree_pid, [Partition]), + rpc:call(Node, riak_kv_index_hashtree, expire, [Tree]), + rt:wait_until(Node, + fun(_) -> + Info = rpc:call(Node, riak_kv_entropy_info, compute_tree_info, []), + {0, Built} = lists:keyfind(0, 1, Info), + Built > Now + end), + ok. From bee5ec58b040fbefd44e7727ecc811b55705d7fa Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Thu, 19 Dec 2013 17:23:54 -0500 Subject: [PATCH 011/139] Test options for SSL cipher order, TLS protocol and CRL checking --- tests/pb_cipher_suites.erl | 223 +++++++++++++++++++++++++++++++++++++ 1 file changed, 223 insertions(+) create mode 100644 tests/pb_cipher_suites.erl diff --git a/tests/pb_cipher_suites.erl b/tests/pb_cipher_suites.erl new file mode 100644 index 000000000..3d5ddd050 --- /dev/null +++ b/tests/pb_cipher_suites.erl @@ -0,0 +1,223 @@ +-module(pb_cipher_suites). + +-behavior(riak_test). +-export([confirm/0]). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("riakc/include/riakc.hrl"). + +-define(assertDenied(Op), ?assertMatch({error, <<"Permission",_/binary>>}, Op)). + +confirm() -> + application:start(crypto), + application:start(asn1), + application:start(public_key), + application:start(ssl), + application:start(inets), + + CertDir = rt_config:get(rt_scratch_dir) ++ "/certs", + + %% make a bunch of crypto keys + make_certs:rootCA(CertDir, "rootCA"), + make_certs:intermediateCA(CertDir, "intCA", "rootCA"), + make_certs:intermediateCA(CertDir, "revokedCA", "rootCA"), + make_certs:endusers(CertDir, "intCA", ["site1.basho.com", "site2.basho.com"]), + make_certs:endusers(CertDir, "rootCA", ["site3.basho.com", "site4.basho.com", "site5.basho.com"]), + make_certs:enduser(CertDir, "revokedCA", "site6.basho.com"), + make_certs:revoke(CertDir, "rootCA", "site5.basho.com"), + make_certs:revoke(CertDir, "rootCA", "revokedCA"), + + %% start a HTTP server to serve the CRLs + inets:start(httpd, [{port, 8000}, {server_name, "localhost"}, + {server_root, "/tmp"}, + {document_root, CertDir}, + {modules, [mod_get]}]), + + lager:info("Deploy some nodes"), + Conf = [ + {riak_api, [ + {certfile, filename:join([CertDir,"site3.basho.com/cert.pem"])}, + {keyfile, filename:join([CertDir, "site3.basho.com/key.pem"])}, + {cacertfile, filename:join([CertDir, "site3.basho.com/cacerts.pem"])} + ]}, + {riak_core, [ + {security, true} + ]}, + {riak_search, [ + {enabled, true} + ]} + ], + + Nodes = rt:build_cluster(4, Conf), + Node = hd(Nodes), + + [_, {pb, {"127.0.0.1", Port}}] = rt:connection_info(Node), + + lager:info("Creating user"), + %% grant the user credentials + ok = rpc:call(Node, riak_core_console, add_user, [["user", "password=password"]]), + + lager:info("Setting password mode on user"), + %% require password on localhost + ok = rpc:call(Node, riak_core_console, add_source, [["user", "127.0.0.1/32", + "password"]]), + + CipherList = "ECDHE-RSA-AES128-SHA256:RC4-SHA", + %% set a simple default cipher list, one good one a and one shitty one + rpc:call(Node, riak_core_security, set_ciphers, + [CipherList]), + + [ECDHE, RC4] = ParsedCiphers = [begin + %% this includes the pseudo random function, which apparently + %% we don't want + {A, B, C, _D} = ssl_cipher:suite_definition(E), + {A, B, C} + end || + E <- element(1, + riak_core_ssl_util:parse_ciphers(CipherList))], + + lager:info("Check that the server's preference for ECDHE-RSA-AES128-SHA256" + "is honored"), + ?assertEqual({ok, {'tlsv1.2', ECDHE}}, + pb_connection_info(Port, + [{credentials, "user", + "password"}, {cacertfile, + filename:join([CertDir, + "rootCA/cert.pem"])}, + {ssl_opts, [{ciphers, + lists:reverse(ParsedCiphers)}]} + ])), + + lager:info("disabling honor_cipher_info"), + rpc:call(Node, application, set_env, [riak_api, honor_cipher_order, + false]), + + lager:info("Check that the client's preference for RC4-SHA" + "is honored"), + ?assertEqual({ok, {'tlsv1.2', RC4}}, + pb_connection_info(Port, + [{credentials, "user", + "password"}, {cacertfile, + filename:join([CertDir, + "rootCA/cert.pem"])}, + {ssl_opts, [{ciphers, + lists:reverse(ParsedCiphers)}]} + ])), + + lager:info("check that connections trying to use tls 1.1 fail"), + ?assertError({badmatch, _}, + pb_connection_info(Port, + [{credentials, "user", + "password"}, {cacertfile, + filename:join([CertDir, + "rootCA/cert.pem"])}, + {ssl_opts, [{versions, ['tlsv1.1']}]} + ])), + + lager:info("check that connections trying to use tls 1.0 fail"), + ?assertError({badmatch, _}, + pb_connection_info(Port, + [{credentials, "user", + "password"}, {cacertfile, + filename:join([CertDir, + "rootCA/cert.pem"])}, + {ssl_opts, [{versions, ['tlsv1']}]} + ])), + lager:info("check that connections trying to use ssl 3.0 fail"), + ?assertError({badmatch, _}, + pb_connection_info(Port, + [{credentials, "user", + "password"}, {cacertfile, + filename:join([CertDir, + "rootCA/cert.pem"])}, + {ssl_opts, [{versions, ['sslv3']}]} + ])), + + lager:info("Enable ssl 3.0, tls 1.0 and tls 1.1 and disable tls 1.2"), + rpc:call(Node, application, set_env, [riak_api, tls_protocols, + [sslv3, tlsv1, 'tlsv1.1']]), + + lager:info("check that connections trying to use tls 1.2 fail"), + ?assertError({badmatch, _}, + pb_connection_info(Port, + [{credentials, "user", + "password"}, {cacertfile, + filename:join([CertDir, + "rootCA/cert.pem"])}, + {ssl_opts, [{versions, ['tls1.2']}]} + ])), + + lager:info("check tls 1.1 works"), + ?assertMatch({ok, {'tlsv1.1', _}}, + pb_connection_info(Port, + [{credentials, "user", + "password"}, {cacertfile, + filename:join([CertDir, + "rootCA/cert.pem"])}, + {ssl_opts, [{versions, ['tlsv1.1']}]} + ])), + + lager:info("check tls 1.0 works"), + ?assertMatch({ok, {'tlsv1', _}}, + pb_connection_info(Port, + [{credentials, "user", + "password"}, {cacertfile, + filename:join([CertDir, + "rootCA/cert.pem"])}, + {ssl_opts, [{versions, ['tlsv1']}]} + ])), + + lager:info("Reset tls protocols back to the default"), + rpc:call(Node, application, set_env, [riak_api, tls_protocols, + ['tlsv1.2']]), + + lager:info("checking CRLs are checked for client certificates by" + " default"), + + ok = rpc:call(Node, riak_core_console, add_user, [["site5.basho.com"]]), + + %% require certificate auth on localhost + ok = rpc:call(Node, riak_core_console, add_source, [["site5.basho.com", + "127.0.0.1/32", + "certificate"]]), + + lager:info("Checking revoked certificates are denied"), + ?assertMatch({error, {tcp, _Reason}}, riakc_pb_socket:start("127.0.0.1", Port, + [{credentials, "site5.basho.com", + "password"}, + {cacertfile, filename:join([CertDir, "rootCA/cert.pem"])}, + {certfile, filename:join([CertDir, "site5.basho.com/cert.pem"])}, + {keyfile, filename:join([CertDir, "site5.basho.com/key.pem"])} + ])), + + lager:info("Disable CRL checking"), + rpc:call(Node, application, set_env, [riak_api, check_crl, + false]), + + lager:info("Checking revoked certificates are allowed"), + {ok, PB} = riakc_pb_socket:start("127.0.0.1", Port, + [{credentials, "site5.basho.com", + ""}, + {cacertfile, filename:join([CertDir, "rootCA/cert.pem"])}, + {certfile, filename:join([CertDir, "site5.basho.com/cert.pem"])}, + {keyfile, filename:join([CertDir, "site5.basho.com/key.pem"])} + ]), + ?assertEqual(pong, riakc_pb_socket:ping(PB)), + riakc_pb_socket:stop(PB), + ok. + +pb_get_socket(PB) -> + %% XXX this peeks into the pb_socket internal state and plucks out the + %% socket. If the internal representation ever changes, this will break. + element(6, sys:get_state(PB)). + +pb_connection_info(Port, Config) -> + {ok, PB} = riakc_pb_socket:start("127.0.0.1", Port, Config), + ?assertEqual(pong, riakc_pb_socket:ping(PB)), + + ConnInfo = ssl:connection_info(pb_get_socket(PB)), + + riakc_pb_socket:stop(PB), + ConnInfo. + + From 8aee9d666e32179462fc1261faf95b93cfb864cc Mon Sep 17 00:00:00 2001 From: Joseph Blomstedt Date: Mon, 30 Dec 2013 12:33:54 -0800 Subject: [PATCH 012/139] Update gh_riak_kv_765 test Make test more deterministic on slow hardware. Add note about test not being designed for Giddyup/CI. --- tests/gh_riak_kv_765.erl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/gh_riak_kv_765.erl b/tests/gh_riak_kv_765.erl index 391bd7428..a8f070cdd 100644 --- a/tests/gh_riak_kv_765.erl +++ b/tests/gh_riak_kv_765.erl @@ -21,6 +21,14 @@ %% This module tests the various AAE additions made in %% https://github.com/basho/riak_kv/pull/765 +%% !!! DO NOT ADD TO GIDDYUP +%% +%% This module is not meant to be used as an automated CI test. It +%% exists for development/code review purposes to ensure the changes +%% made in basho/riak_kv#765 work as the pull-request claims. +%% +%% !!! DO NOT ADD TO GIDDYUP + -module(gh_riak_kv_765). -compile(export_all). -include_lib("eunit/include/eunit.hrl"). @@ -67,7 +75,7 @@ check_throttle_and_expiration() -> enable_aae(Node), time_build(Node), Duration1 = rebuild(Node, 30000, 1000), - Duration2 = rebuild(Node, 30000, 3500), + Duration2 = rebuild(Node, 30000, 5500), ?assert(Duration2 > (2 * Duration1)), %% Test manual expiration From 0964abb5e3477596d0f0fe26f542d642f54a19ef Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Tue, 17 Dec 2013 23:21:29 -0500 Subject: [PATCH 013/139] Add an exhaustive test for AAE based replication. Ensure that AAE replication is tested using all possible failure cases when dealing with the riak_kv_index_hashtrees and failed connections. First, use intercepts on riak_kv_vnode and riak_kv_index_hashtree to ensure that we simulate errors on a per node basis, starting with the source cluster and moving to the sink. Simulate ownership transfers, locked and incomplete hashtrees. Verify partitions generate the correct error count, after using a bounded set of retries, and finally remove all intercepts and verify that the fullsync completes and all keys have been migrated between the two clusters. --- .../riak_kv_index_hashtree_intercepts.erl | 16 +++ intercepts/riak_kv_vnode_intercepts.erl | 5 + src/rt.erl | 23 ---- tests/repl_aae_fullsync.erl | 120 ++++++++++++++++-- tests/repl_aae_fullsync_custom_n.erl | 9 +- tests/repl_aae_fullsync_util.erl | 31 +---- tests/repl_util.erl | 32 ++--- 7 files changed, 151 insertions(+), 85 deletions(-) create mode 100644 intercepts/riak_kv_index_hashtree_intercepts.erl diff --git a/intercepts/riak_kv_index_hashtree_intercepts.erl b/intercepts/riak_kv_index_hashtree_intercepts.erl new file mode 100644 index 000000000..9ee9578fe --- /dev/null +++ b/intercepts/riak_kv_index_hashtree_intercepts.erl @@ -0,0 +1,16 @@ +-module(riak_kv_index_hashtree_intercepts). +-compile(export_all). +-include("intercept.hrl"). + +-define(M, riak_kv_index_hashtree_orig). + +%% @doc When attempting to get the lock on a hashtree, return the +%% not_built atom which means the tree has not been computed yet. +not_built(_TreePid, _Type) -> + not_built. + +%% @doc When attempting to get the lock on a hashtree, return the +%% already_locked atom which means the tree is locked by another +%% process. +already_locked(_TreePid, _Type) -> + already_locked. diff --git a/intercepts/riak_kv_vnode_intercepts.erl b/intercepts/riak_kv_vnode_intercepts.erl index 0db0634b5..cf2f79d92 100644 --- a/intercepts/riak_kv_vnode_intercepts.erl +++ b/intercepts/riak_kv_vnode_intercepts.erl @@ -17,6 +17,11 @@ slow_handle_command(Req, Sender, State) -> timer:sleep(500), ?M:handle_command_orig(Req, Sender, State). +%% @doc Return wrong_node error because ownership transfer is happening +%% when trying to get the hashtree pid for a partition. +wrong_node(_Partition) -> + {error, wrong_node}. + %% @doc Make all KV vnode coverage commands take abnormally long. slow_handle_coverage(Req, Filter, Sender, State) -> random:seed(erlang:now()), diff --git a/src/rt.erl b/src/rt.erl index d4b3de1d3..ad5392a8b 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -124,7 +124,6 @@ wait_until/3, wait_until/2, wait_until/1, - wait_until_aae_trees_built/1, wait_until_all_members/1, wait_until_all_members/2, wait_until_capability/3, @@ -756,28 +755,6 @@ wait_until_nodes_agree_about_ownership(Nodes) -> Results = [ wait_until_owners_according_to(Node, Nodes) || Node <- Nodes ], ?assert(lists:all(fun(X) -> ok =:= X end, Results)). -%% AAE support -wait_until_aae_trees_built([AnyNode|_]=Nodes) -> - lager:info("Wait until AAE builds all partition trees across ~p", [Nodes]), - %% Wait until all nodes report no undefined trees - rt:wait_until(AnyNode, - fun(_) -> - Busy = lists:foldl( - fun(Node,Busy1) -> - %% will be false when all trees are built on Node - lists:keymember(undefined, - 2, - rpc:call(Node, - riak_kv_entropy_info, - compute_tree_info, - [])) - or Busy1 - end, - false, - Nodes), - not Busy - end). - %%%=================================================================== %%% Ring Functions %%%=================================================================== diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index 50a65d99a..eb6297390 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -1,7 +1,8 @@ %% @doc -%% This module implements a riak_test to exercise the Active Anti-Entropy Fullsync replication. -%% It sets up two clusters, runs a fullsync over all partitions, and verifies the missing keys -%% were replicated to the sink cluster. +%% This module implements a riak_test to exercise the Active +%% Anti-Entropy Fullsync replication. It sets up two clusters, runs a +%% fullsync over all partitions, and verifies the missing keys were +%% replicated to the sink cluster. -module(repl_aae_fullsync). -behavior(riak_test). @@ -14,6 +15,12 @@ confirm() -> NumKeysAOnly = 10000, %% how many keys on A that are missing on B NumKeysBoth = 10000, %% number of common keys on both A and B Conf = [ %% riak configuration + {riak_core, + [ + {ring_creation_size, 8}, + {default_bucket_props, [{n_val, 1}]} + ] + }, {riak_kv, [ %% Specify fast building of AAE trees @@ -31,18 +38,25 @@ confirm() -> ], %% build clusters - {ANodes, BNodes} = repl_aae_fullsync_util:make_clusters(NumNodesWanted, ClusterASize, Conf), + {ANodes, BNodes} = repl_aae_fullsync_util:make_clusters( + NumNodesWanted, ClusterASize, Conf), - %% run test + %% run normal aae repl test aae_fs_test(NumKeysAOnly, NumKeysBoth, ANodes, BNodes), + pass. aae_fs_test(NumKeysAOnly, NumKeysBoth, ANodes, BNodes) -> %% populate them with data TestHash = list_to_binary([io_lib:format("~2.16.0b", [X]) || <> <= erlang:md5(term_to_binary(os:timestamp()))]), + TestBucket = <>, - repl_aae_fullsync_util:prepare_cluster_data(TestBucket, NumKeysAOnly, NumKeysBoth, ANodes, BNodes), + repl_aae_fullsync_util:prepare_cluster_data(TestBucket, + NumKeysAOnly, + NumKeysBoth, + ANodes, + BNodes), AFirst = hd(ANodes), BFirst = hd(BNodes), @@ -54,19 +68,99 @@ aae_fs_test(NumKeysAOnly, NumKeysBoth, ANodes, BNodes) -> %% keys: 1..NumKeysAOnly %%--------------------------------------------------------- - rt:log_to_nodes(AllNodes, "Test fullsync from cluster A leader ~p to cluster B", [LeaderA]), - lager:info("Test fullsync from cluster A leader ~p to cluster B", [LeaderA]), + rt:log_to_nodes(AllNodes, + "Test fullsync from cluster A leader ~p to cluster B", + [LeaderA]), + lager:info("Test fullsync from cluster A leader ~p to cluster B", + [LeaderA]), repl_util:enable_fullsync(LeaderA, "B"), rt:wait_until_ring_converged(ANodes), - {Time,_} = timer:tc(repl_util,start_and_wait_until_fullsync_complete,[LeaderA]), - lager:info("Fullsync completed in ~p seconds", [Time/1000/1000]), + + TargetA = hd(ANodes -- [LeaderA]), + TargetB = hd(BNodes), + %% find out how many indices the first node owns + NumIndiciesA = length(rpc:call(TargetA, riak_core_ring, my_indices, + [rt:get_ring(TargetA)])), + NumIndiciesB = length(rpc:call(TargetB, riak_core_ring, my_indices, + [rt:get_ring(TargetB)])), + + lager:info("~p owns ~p indices", [TargetA, NumIndiciesA]), + lager:info("~p owns ~p indices", [TargetB, NumIndiciesB]), + + %% BLOOD FOR THE BLOOD GOD + ?assertEqual([], repl_util:do_write(AFirst, 1, 2000, + <<"scarificial">>, 1)), + + %% Before enabling fullsync, ensure trees on one source node return + %% not_built to defer fullsync process. + validate_fullsync(TargetA, + {riak_kv_index_hashtree, [{{get_lock, 2}, not_built}]}, + LeaderA, + NumIndiciesA), + + validate_fullsync(TargetB, + {riak_kv_index_hashtree, [{{get_lock, 2}, not_built}]}, + LeaderA, + NumIndiciesB), + + %% Before enabling fullsync, ensure trees on one source node return + %% not_built to defer fullsync process. + validate_fullsync(TargetA, + {riak_kv_index_hashtree, [{{get_lock, 2}, already_locked}]}, + LeaderA, + NumIndiciesA), + + validate_fullsync(TargetB, + {riak_kv_index_hashtree, [{{get_lock, 2}, already_locked}]}, + LeaderA, + NumIndiciesB), + + %% emulate the partitoons are changing ownership + validate_fullsync(TargetA, + {riak_kv_vnode, [{{hashtree_pid, 1}, wrong_node}]}, + LeaderA, + NumIndiciesA), + + %% emulate the partitoons are changing ownership on sink + validate_fullsync(TargetB, + {riak_kv_vnode, [{{hashtree_pid, 1}, wrong_node}]}, + LeaderA, + NumIndiciesB), %% verify data is replicated to B - rt:log_to_nodes(AllNodes, "Verify: Reading ~p keys repl'd from A(~p) to B(~p)", - [NumKeysAOnly, LeaderA, BFirst]), + repl_util:wait_until_aae_trees_built([TargetA]), + check_fullsync(LeaderA, 0), + rt:log_to_nodes(AllNodes, + "Verify: Reading ~p keys repl'd from A(~p) to B(~p)", + [NumKeysAOnly, LeaderA, BFirst]), lager:info("Verify: Reading ~p keys repl'd from A(~p) to B(~p)", [NumKeysAOnly, LeaderA, BFirst]), - ?assertEqual(0, repl_util:wait_for_reads(BFirst, 1, NumKeysAOnly, TestBucket, 2)), + ?assertEqual(0, repl_util:wait_for_reads( + BFirst, 1, NumKeysAOnly, TestBucket, 1)), ok. +check_fullsync(Node, ExpectedFailures) -> + {Time,_} = timer:tc(repl_util, + start_and_wait_until_fullsync_complete, + [Node]), + lager:info("Fullsync completed in ~p seconds", [Time/1000/1000]), + + Status = rpc:call(Node, riak_repl_console, status, [quiet]), + [{_Name, Props}] = proplists:get_value(fullsync_coordinator, Status), + %% check that the expected number of partitions failed to sync + ?assertEqual(ExpectedFailures, proplists:get_value(error_exits, Props)), + %% check that we retried each of them 5 times + ?assert(proplists:get_value(retry_exits, Props) >= ExpectedFailures * 5), + ok. + +reboot(Node) -> + rt:stop_and_wait(Node), + rt:start_and_wait(Node), + rt:wait_for_service(Node, riak_kv). + +validate_fullsync(TargetA, Intercept, LeaderA, NumIndicies) -> + ok = rt_intercept:add(TargetA, Intercept), + check_fullsync(LeaderA, NumIndicies), + reboot(TargetA), + repl_util:wait_until_aae_trees_built([TargetA]). diff --git a/tests/repl_aae_fullsync_custom_n.erl b/tests/repl_aae_fullsync_custom_n.erl index a57970f04..a8294bcd2 100644 --- a/tests/repl_aae_fullsync_custom_n.erl +++ b/tests/repl_aae_fullsync_custom_n.erl @@ -19,6 +19,12 @@ confirm() -> NumKeysAOnly = 10000, %% how many keys on A that are missing on B NumKeysBoth = 10000, %% number of common keys on both A and B Conf = [ %% riak configuration + {riak_core, + [ + {ring_creation_size, 8}, + {default_bucket_props, [{n_val, 1}]} + ] + }, {riak_kv, [ %% Specify fast building of AAE trees @@ -81,7 +87,8 @@ aae_fs_test(NumKeysAOnly, NumKeysBoth, ANodes, BNodes) -> [NumKeysAOnly, LeaderA, BFirst]), lager:info("Verify: Reading ~p keys repl'd from A(~p) to B(~p)", [NumKeysAOnly, LeaderA, BFirst]), - ?assertEqual(0, repl_util:wait_for_reads(BFirst, 1, NumKeysAOnly, TestBucket, 2)), + ?assertEqual(0, repl_util:wait_for_reads(BFirst, 1, NumKeysAOnly, + TestBucket, 1)), ok. diff --git a/tests/repl_aae_fullsync_util.erl b/tests/repl_aae_fullsync_util.erl index 69453ab5e..e89772c19 100644 --- a/tests/repl_aae_fullsync_util.erl +++ b/tests/repl_aae_fullsync_util.erl @@ -30,7 +30,7 @@ make_clusters(NumNodesWanted, ClusterSize, Conf) -> repl_util:make_cluster(BNodes), {ANodes, BNodes}. -prepare_cluster_data(TestBucket, NumKeysAOnly, NumKeysBoth, [AFirst|_] = ANodes, [BFirst|_] = BNodes) -> +prepare_cluster_data(TestBucket, NumKeysAOnly, _NumKeysBoth, [AFirst|_] = ANodes, [BFirst|_] = BNodes) -> AllNodes = ANodes ++ BNodes, log_to_nodes(AllNodes, "Starting AAE Fullsync test"), @@ -68,39 +68,14 @@ prepare_cluster_data(TestBucket, NumKeysAOnly, NumKeysBoth, [AFirst|_] = ANodes, %%--------------------------------------------------- lager:info("Writing ~p keys to A(~p)", [NumKeysAOnly, AFirst]), - ?assertEqual([], repl_util:do_write(AFirst, 1, NumKeysAOnly, TestBucket, 2)), + ?assertEqual([], repl_util:do_write(AFirst, 1, NumKeysAOnly, TestBucket, 1)), %% check that the keys we wrote initially aren't replicated yet, because %% we've disabled fullsync_on_connect lager:info("Check keys written before repl was connected are not present"), - Res2 = rt:systest_read(BFirst, 1, NumKeysAOnly, TestBucket, 2), + Res2 = rt:systest_read(BFirst, 1, NumKeysAOnly, TestBucket, 1), ?assertEqual(NumKeysAOnly, length(Res2)), - %%----------------------------------------------- - %% TEST: write data, replicated by RT - %% keys: NumKeysAOnly+1..NumKeysAOnly+NumKeysBoth - %%----------------------------------------------- - %% Enable and start Real-time replication - repl_util:enable_realtime(LeaderA, "B"), - rt:wait_until_ring_converged(ANodes), - repl_util:start_realtime(LeaderA, "B"), - rt:wait_until_ring_converged(ANodes), - - log_to_nodes(AllNodes, "Write data to A, verify replication to B via realtime"), - %% write some data on A - lager:info("Writing ~p more keys to A(~p)", [NumKeysBoth, LeaderA]), - ?assertEqual([], repl_util:do_write(LeaderA, - NumKeysAOnly+1, - NumKeysAOnly+NumKeysBoth, - TestBucket, 2)), - - %% verify data is replicated to B - lager:info("Verify: Reading ~p keys written to ~p from ~p", [NumKeysBoth, LeaderA, BFirst]), - ?assertEqual(0, repl_util:wait_for_reads(BFirst, - NumKeysAOnly+1, - NumKeysAOnly+NumKeysBoth, - TestBucket, 2)), - %% wait for the AAE trees to be built so that we don't get a not_built error repl_util:wait_until_aae_trees_built(ANodes), repl_util:wait_until_aae_trees_built(BNodes), diff --git a/tests/repl_util.erl b/tests/repl_util.erl index bdd63ae1a..8240c2cf8 100644 --- a/tests/repl_util.erl +++ b/tests/repl_util.erl @@ -261,26 +261,18 @@ nodes_all_have_version(Nodes, Version) -> Nodes == nodes_with_version(Nodes, Version). %% AAE support -wait_until_aae_trees_built([AnyNode|_]=Nodes) -> - lager:info("Wait until AAE builds all partition trees across ~p", [Nodes]), - %% Wait until all nodes report no undefined trees - rt:wait_until(AnyNode, - fun(_) -> - Busy = lists:foldl( - fun(Node,Busy1) -> - %% will be false when all trees are built on Node - lists:keymember(undefined, - 2, - rpc:call(Node, - riak_kv_entropy_info, - compute_tree_info, - [])) - or Busy1 - end, - false, - Nodes), - not Busy - end). +wait_until_aae_trees_built(Cluster) -> + lager:info("Check if all trees built for nodes ~p", [Cluster]), + F = fun(Node) -> + Info = rpc:call(Node, + riak_kv_entropy_info, + compute_tree_info, + []), + NotBuilt = [X || {_,undefined}=X <- Info], + NotBuilt == [] + end, + [rt:wait_until(Node, F) || Node <- Cluster], + ok. %% Return the number of partitions in the cluster where Node is a member. num_partitions(Node) -> From b5358e8544ae47b236894d36d3ae2c0bcc237f50 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Fri, 27 Dec 2013 14:22:17 -0500 Subject: [PATCH 014/139] Update test to default te max_fssource_retries value. --- tests/repl_aae_fullsync.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index eb6297390..a35facf56 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -33,7 +33,8 @@ confirm() -> [ {fullsync_strategy, aae}, {fullsync_on_connect, false}, - {fullsync_interval, disabled} + {fullsync_interval, disabled}, + {max_fssource_retries, 5} ]} ], From 931abb7823f258257898270932f62d3827e823ee Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Fri, 27 Dec 2013 15:45:29 -0500 Subject: [PATCH 015/139] Improve tests. Improve test infrastructure by extracing out common code into functions that can be used to build out further test cases. Label our existing unidirectional test as a "simple" test, and begin building out a more exhaustive test case. The exhaustive test case is not complete and currently a *work in progress*. However, it currently configures three clusters and sets up replication from one source to two sinks. This is as far as the test gets because this currently *fails* in setting up the initial connections to the two clusters causing fullsync to completely abort. It appears, but it unconfirmed, that a crash is causing cluster_conn to fail and lose information about how to connect to the clusters documented due to async storage of the remote cluster information to the ring. (I believe this to be the issue as this bug was discovered by Kelly and I during debugging of another repl related race condition during startup.) Removing the second cluster connection call, and letting it proceed with a connection to only one cluster allows the test to proceed. --- tests/repl_aae_fullsync.erl | 421 +++++++++++++++++++++++++----------- 1 file changed, 296 insertions(+), 125 deletions(-) diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index a35facf56..f3bd04701 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -9,69 +9,84 @@ -export([confirm/0]). -include_lib("eunit/include/eunit.hrl"). -confirm() -> - NumNodesWanted = 6, %% total number of nodes needed - ClusterASize = 3, %% how many to allocate to cluster A - NumKeysAOnly = 10000, %% how many keys on A that are missing on B - NumKeysBoth = 10000, %% number of common keys on both A and B - Conf = [ %% riak configuration - {riak_core, - [ - {ring_creation_size, 8}, - {default_bucket_props, [{n_val, 1}]} - ] - }, - {riak_kv, - [ - %% Specify fast building of AAE trees - {anti_entropy, {on, []}}, - {anti_entropy_build_limit, {100, 1000}}, - {anti_entropy_concurrency, 100} - ] - }, - {riak_repl, - [ - {fullsync_strategy, aae}, - {fullsync_on_connect, false}, - {fullsync_interval, disabled}, - {max_fssource_retries, 5} - ]} - ], - - %% build clusters - {ANodes, BNodes} = repl_aae_fullsync_util:make_clusters( - NumNodesWanted, ClusterASize, Conf), - - %% run normal aae repl test - aae_fs_test(NumKeysAOnly, NumKeysBoth, ANodes, BNodes), +-import(rt, [deploy_nodes/2]). + +-define(TEST_BUCKET, <<"repl-aae-fullsync-systest_a">>). +-define(NUM_KEYS, 1000). + +-define(CONF, [ + {riak_core, + [ + {ring_creation_size, 8}, + {default_bucket_props, [{n_val, 1}]} + ] + }, + {riak_kv, + [ + %% Specify fast building of AAE trees + {anti_entropy, {on, []}}, + {anti_entropy_build_limit, {100, 1000}}, + {anti_entropy_concurrency, 100} + ] + }, + {riak_repl, + [ + {fullsync_strategy, aae}, + {fullsync_on_connect, false}, + {fullsync_interval, disabled}, + {max_fssource_retries, 5} + ]} + ]). +confirm() -> + simple_test(), + exhaustive_test(), pass. -aae_fs_test(NumKeysAOnly, NumKeysBoth, ANodes, BNodes) -> - %% populate them with data - TestHash = list_to_binary([io_lib:format("~2.16.0b", [X]) || - <> <= erlang:md5(term_to_binary(os:timestamp()))]), +simple_test() -> + %% Deploy 6 nodes. + Nodes = deploy_nodes(6, ?CONF), + + %% Break up the 6 nodes into three clustes. + {ANodes, BNodes} = lists:split(3, Nodes), - TestBucket = <>, - repl_aae_fullsync_util:prepare_cluster_data(TestBucket, - NumKeysAOnly, - NumKeysBoth, - ANodes, - BNodes), + lager:info("ANodes: ~p", [ANodes]), + lager:info("BNodes: ~p", [BNodes]), + + lager:info("Building two clusters."), + [repl_util:make_cluster(N) || N <- [ANodes, BNodes]], AFirst = hd(ANodes), BFirst = hd(BNodes), - AllNodes = ANodes ++ BNodes, - LeaderA = rpc:call(AFirst, riak_core_cluster_mgr, get_leader, []), - %%--------------------------------------------------------- - %% TEST: fullsync, check that non-RT'd keys get repl'd to B - %% keys: 1..NumKeysAOnly - %%--------------------------------------------------------- + lager:info("Naming clusters."), + repl_util:name_cluster(AFirst, "A"), + repl_util:name_cluster(BFirst, "B"), + + lager:info("Waiting for convergence."), + rt:wait_until_ring_converged(ANodes), + rt:wait_until_ring_converged(BNodes), + + lager:info("Get leaders."), + LeaderA = get_leader(AFirst), + LeaderB = get_leader(BFirst), + + lager:info("Finding connection manager ports."), + BPort = get_port(LeaderB), + + lager:info("Connecting cluster A to B"), + connect_cluster(LeaderA, BPort, "B"), + + %% Write keys prior to fullsync. + write_to_cluster(AFirst, 1, ?NUM_KEYS), + + %% Read keys prior to fullsync. + read_from_cluster(BFirst, 1, ?NUM_KEYS, ?NUM_KEYS), + + %% Wait for trees to compute. + repl_util:wait_until_aae_trees_built(ANodes), + repl_util:wait_until_aae_trees_built(BNodes), - rt:log_to_nodes(AllNodes, - "Test fullsync from cluster A leader ~p to cluster B", - [LeaderA]), lager:info("Test fullsync from cluster A leader ~p to cluster B", [LeaderA]), repl_util:enable_fullsync(LeaderA, "B"), @@ -79,89 +94,245 @@ aae_fs_test(NumKeysAOnly, NumKeysBoth, ANodes, BNodes) -> TargetA = hd(ANodes -- [LeaderA]), TargetB = hd(BNodes), - %% find out how many indices the first node owns - NumIndiciesA = length(rpc:call(TargetA, riak_core_ring, my_indices, - [rt:get_ring(TargetA)])), - NumIndiciesB = length(rpc:call(TargetB, riak_core_ring, my_indices, - [rt:get_ring(TargetB)])), - lager:info("~p owns ~p indices", [TargetA, NumIndiciesA]), - lager:info("~p owns ~p indices", [TargetB, NumIndiciesB]), + %% Flush AAE trees to disk. + perform_sacrifice(AFirst), - %% BLOOD FOR THE BLOOD GOD - ?assertEqual([], repl_util:do_write(AFirst, 1, 2000, - <<"scarificial">>, 1)), + %% Validate replication from A -> B is fault-tolerant regardless of + %% errors occurring on the source or destination. + validate_intercepted_fullsync(TargetA, LeaderA, "B"), + validate_intercepted_fullsync(TargetB, LeaderA, "B"), - %% Before enabling fullsync, ensure trees on one source node return - %% not_built to defer fullsync process. - validate_fullsync(TargetA, - {riak_kv_index_hashtree, [{{get_lock, 2}, not_built}]}, - LeaderA, - NumIndiciesA), + %% Verify data is replicated from A -> B successfully once the + %% intercepts are removed. + validate_completed_fullsync(LeaderA, BFirst, "B", 1, ?NUM_KEYS), - validate_fullsync(TargetB, - {riak_kv_index_hashtree, [{{get_lock, 2}, not_built}]}, - LeaderA, - NumIndiciesB), + pass. - %% Before enabling fullsync, ensure trees on one source node return - %% not_built to defer fullsync process. - validate_fullsync(TargetA, - {riak_kv_index_hashtree, [{{get_lock, 2}, already_locked}]}, - LeaderA, - NumIndiciesA), - - validate_fullsync(TargetB, - {riak_kv_index_hashtree, [{{get_lock, 2}, already_locked}]}, - LeaderA, - NumIndiciesB), - - %% emulate the partitoons are changing ownership - validate_fullsync(TargetA, - {riak_kv_vnode, [{{hashtree_pid, 1}, wrong_node}]}, - LeaderA, - NumIndiciesA), - - %% emulate the partitoons are changing ownership on sink - validate_fullsync(TargetB, - {riak_kv_vnode, [{{hashtree_pid, 1}, wrong_node}]}, - LeaderA, - NumIndiciesB), - - %% verify data is replicated to B - repl_util:wait_until_aae_trees_built([TargetA]), - check_fullsync(LeaderA, 0), - rt:log_to_nodes(AllNodes, - "Verify: Reading ~p keys repl'd from A(~p) to B(~p)", - [NumKeysAOnly, LeaderA, BFirst]), - lager:info("Verify: Reading ~p keys repl'd from A(~p) to B(~p)", - [NumKeysAOnly, LeaderA, BFirst]), - ?assertEqual(0, repl_util:wait_for_reads( - BFirst, 1, NumKeysAOnly, TestBucket, 1)), +exhaustive_test() -> + %% Deploy 6 nodes. + Nodes = deploy_nodes(6, ?CONF), - ok. + %% Break up the 6 nodes into three clustes. + {ANodes, Rest} = lists:split(2, Nodes), + {BNodes, CNodes} = lists:split(2, Rest), + + lager:info("ANodes: ~p", [ANodes]), + lager:info("BNodes: ~p", [BNodes]), + lager:info("BNodes: ~p", [CNodes]), + + lager:info("Building three clusters."), + [repl_util:make_cluster(N) || N <- [ANodes, BNodes, CNodes]], + + AFirst = hd(ANodes), + BFirst = hd(BNodes), + CFirst = hd(CNodes), + + lager:info("Naming clusters."), + repl_util:name_cluster(AFirst, "A"), + repl_util:name_cluster(BFirst, "B"), + repl_util:name_cluster(CFirst, "C"), + + lager:info("Waiting for convergence."), + rt:wait_until_ring_converged(ANodes), + rt:wait_until_ring_converged(BNodes), + rt:wait_until_ring_converged(CNodes), + + lager:info("Get leaders."), + LeaderA = get_leader(AFirst), + LeaderB = get_leader(BFirst), + LeaderC = get_leader(CFirst), + + lager:info("Finding connection manager ports."), + APort = get_port(LeaderA), + BPort = get_port(LeaderB), + CPort = get_port(LeaderC), + + lager:info("Connecting all clusters into fully connected topology."), + connect_cluster(LeaderA, BPort, "B"), + connect_cluster(LeaderA, CPort, "C"), + connect_cluster(LeaderB, APort, "A"), + connect_cluster(LeaderB, CPort, "C"), + connect_cluster(LeaderC, APort, "A"), + connect_cluster(LeaderC, BPort, "B"), + + %% Write keys to cluster A, verify B and C do not have them. + write_to_cluster(AFirst, 1, ?NUM_KEYS), + read_from_cluster(BFirst, 1, ?NUM_KEYS, ?NUM_KEYS), + read_from_cluster(CFirst, 1, ?NUM_KEYS, ?NUM_KEYS), -check_fullsync(Node, ExpectedFailures) -> - {Time,_} = timer:tc(repl_util, - start_and_wait_until_fullsync_complete, - [Node]), + %% Enable fullsync from A to B. + lager:info("Enabling fullsync from A to B"), + repl_util:enable_fullsync(LeaderA, "B"), + rt:wait_until_ring_converged(ANodes), + + %% Enable fullsync from A to C. + %% TODO: This causes the test to fail and fullsync to stall. + lager:info("Enabling fullsync from A to C"), + repl_util:enable_fullsync(LeaderA, "C"), + rt:wait_until_ring_converged(ANodes), + + %% Flush AAE trees to disk. + perform_sacrifice(AFirst), + + %% Verify data is replicated from A -> B successfully once the + %% intercepts are removed. + validate_completed_fullsync(LeaderA, BFirst, "B", 1, ?NUM_KEYS), + + %% Verify data is replicated from A -> B successfully once the + %% intercepts are removed. + validate_completed_fullsync(LeaderA, CFirst, "C", 1, ?NUM_KEYS), + + pass. + +%% @doc Required for 1.4+ Riak, write sacrificial keys to force AAE +%% trees to flush to disk. +perform_sacrifice(Node) -> + ?assertEqual([], repl_util:do_write(Node, 1, 2000, + <<"scarificial">>, 1)). + +%% @doc Validate fullsync completed and all keys are available. +validate_completed_fullsync(ReplicationLeader, + DestinationNode, + DestinationCluster, + Start, + End) -> + ok = check_fullsync(ReplicationLeader, DestinationCluster, 0), + lager:info("Verify: Reading ~p keys repl'd from A(~p) to B(~p)", + [?NUM_KEYS, ReplicationLeader, DestinationNode]), + ?assertEqual(0, + repl_util:wait_for_reads(DestinationNode, + Start, + End, + ?TEST_BUCKET, + 1)). + +%% @doc Assert we can perform one fullsync cycle, and that the number of +%% expected failures is correct. +check_fullsync(Node, Cluster, ExpectedFailures) -> + {Time, _} = timer:tc(repl_util, + start_and_wait_until_fullsync_complete, + [Node]), lager:info("Fullsync completed in ~p seconds", [Time/1000/1000]), Status = rpc:call(Node, riak_repl_console, status, [quiet]), - [{_Name, Props}] = proplists:get_value(fullsync_coordinator, Status), + + Props = case proplists:get_value(fullsync_coordinator, Status, undefined) of + [{_Name, Props0}] -> + Props0; + Multiple -> + {_Name, Props0} = lists:keyfind(Cluster, 1, Multiple), + Props0 + end, + %% check that the expected number of partitions failed to sync - ?assertEqual(ExpectedFailures, proplists:get_value(error_exits, Props)), + ?assertEqual(ExpectedFailures, + proplists:get_value(error_exits, Props)), + %% check that we retried each of them 5 times - ?assert(proplists:get_value(retry_exits, Props) >= ExpectedFailures * 5), + ?assert( + proplists:get_value(retry_exits, Props) >= ExpectedFailures * 5), + ok. -reboot(Node) -> - rt:stop_and_wait(Node), - rt:start_and_wait(Node), - rt:wait_for_service(Node, riak_kv). +%% @doc Validate fullsync handles errors for all possible intercept +%% combinations. +validate_intercepted_fullsync(InterceptTarget, + ReplicationLeader, + ReplicationCluster) -> + NumIndicies = length(rpc:call(InterceptTarget, + riak_core_ring, + my_indices, + [rt:get_ring(InterceptTarget)])), + lager:info("~p owns ~p indices", + [InterceptTarget, NumIndicies]), + + %% Before enabling fullsync, ensure trees on one source node return + %% not_built to defer fullsync process. + validate_intercepted_fullsync(InterceptTarget, + {riak_kv_index_hashtree, + [{{get_lock, 2}, not_built}]}, + ReplicationLeader, + ReplicationCluster, + NumIndicies), + + %% Before enabling fullsync, ensure trees on one source node return + %% already_locked to defer fullsync process. + validate_intercepted_fullsync(InterceptTarget, + {riak_kv_index_hashtree, + [{{get_lock, 2}, already_locked}]}, + ReplicationLeader, + ReplicationCluster, + NumIndicies), + + %% Emulate in progress ownership transfers. + validate_intercepted_fullsync(InterceptTarget, + {riak_kv_vnode, + [{{hashtree_pid, 1}, wrong_node}]}, + ReplicationLeader, + ReplicationCluster, + NumIndicies). + +%% @doc Add an intercept on a target node to simulate a given failure +%% mode, and then enable fullsync replication and verify completes +%% a full cycle. Subsequently reboot the node. +validate_intercepted_fullsync(InterceptTarget, + Intercept, + ReplicationLeader, + ReplicationCluster, + NumIndicies) -> + lager:info("Validating intercept ~p on ~p.", + [Intercept, InterceptTarget]), + + %% Add intercept. + ok = rt_intercept:add(InterceptTarget, Intercept), + + %% Verify fullsync. + ok = check_fullsync(ReplicationLeader, + ReplicationCluster, + NumIndicies), + + %% Reboot node. + rt:stop_and_wait(InterceptTarget), + rt:start_and_wait(InterceptTarget), + + %% Wait for riak_kv and riak_repl to initialize. + rt:wait_for_service(InterceptTarget, riak_kv), + rt:wait_for_service(InterceptTarget, riak_repl), + + %% Wait until AAE trees are compueted on the rebooted node. + repl_util:wait_until_aae_trees_built([InterceptTarget]). + +%% @doc Given a node, find the port that the cluster manager is +%% listening on. +get_port(Node) -> + {ok, {_IP, Port}} = rpc:call(Node, + application, + get_env, + [riak_core, cluster_mgr]), + Port. + +%% @doc Given a node, find out who the current replication leader in its +%% cluster is. +get_leader(Node) -> + rpc:call(Node, riak_core_cluster_mgr, get_leader, []). + +%% @doc Connect two clusters using a given name. +connect_cluster(Source, Port, Name) -> + lager:info("Connecting ~p to ~p for cluster ~p.", + [Source, Port, Name]), + repl_util:connect_cluster(Source, "127.0.0.1", Port), + ?assertEqual(ok, repl_util:wait_for_connection(Source, Name)). + +%% @doc Write a series of keys and ensure they are all written. +write_to_cluster(Node, Start, End) -> + lager:info("Writing ~p keys to node ~p.", [End - Start, Node]), + ?assertEqual([], + repl_util:do_write(Node, Start, End, ?TEST_BUCKET, 1)). -validate_fullsync(TargetA, Intercept, LeaderA, NumIndicies) -> - ok = rt_intercept:add(TargetA, Intercept), - check_fullsync(LeaderA, NumIndicies), - reboot(TargetA), - repl_util:wait_until_aae_trees_built([TargetA]). +%% @doc Read from cluster a series of keys, asserting a certain number +%% of errors. +read_from_cluster(Node, Start, End, Errors) -> + lager:info("Reading ~p keys from node ~p.", [End - Start, Node]), + Res2 = rt:systest_read(Node, Start, ?NUM_KEYS, ?TEST_BUCKET, 1), + ?assertEqual(Errors, length(Res2)). From ab59896b244eb5b713c76191eb4291c30b9c7cb7 Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Mon, 30 Dec 2013 13:14:40 -0500 Subject: [PATCH 016/139] Fix several bugs in the test refactor, and adjust some assumptions --- tests/repl_aae_fullsync.erl | 47 +++++++++++++++++++++++++++---------- tests/repl_util.erl | 22 +++++++++++++++-- 2 files changed, 54 insertions(+), 15 deletions(-) diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index f3bd04701..3337016ab 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -14,7 +14,7 @@ -define(TEST_BUCKET, <<"repl-aae-fullsync-systest_a">>). -define(NUM_KEYS, 1000). --define(CONF, [ +-define(CONF(Retries), [ {riak_core, [ {ring_creation_size, 8}, @@ -34,7 +34,7 @@ {fullsync_strategy, aae}, {fullsync_on_connect, false}, {fullsync_interval, disabled}, - {max_fssource_retries, 5} + {max_fssource_retries, Retries} ]} ]). @@ -45,7 +45,7 @@ confirm() -> simple_test() -> %% Deploy 6 nodes. - Nodes = deploy_nodes(6, ?CONF), + Nodes = deploy_nodes(6, ?CONF(5)), %% Break up the 6 nodes into three clustes. {ANodes, BNodes} = lists:split(3, Nodes), @@ -107,11 +107,13 @@ simple_test() -> %% intercepts are removed. validate_completed_fullsync(LeaderA, BFirst, "B", 1, ?NUM_KEYS), + rt:clean_cluster(Nodes), + pass. exhaustive_test() -> %% Deploy 6 nodes. - Nodes = deploy_nodes(6, ?CONF), + Nodes = deploy_nodes(6, ?CONF(infinity)), %% Break up the 6 nodes into three clustes. {ANodes, Rest} = lists:split(2, Nodes), @@ -172,24 +174,42 @@ exhaustive_test() -> repl_util:enable_fullsync(LeaderA, "C"), rt:wait_until_ring_converged(ANodes), + %% Wait for trees to compute. + repl_util:wait_until_aae_trees_built(ANodes), + repl_util:wait_until_aae_trees_built(BNodes), + repl_util:wait_until_aae_trees_built(CNodes), + %% Flush AAE trees to disk. perform_sacrifice(AFirst), - %% Verify data is replicated from A -> B successfully once the - %% intercepts are removed. + %% Verify data is replicated from A -> B successfully validate_completed_fullsync(LeaderA, BFirst, "B", 1, ?NUM_KEYS), - %% Verify data is replicated from A -> B successfully once the - %% intercepts are removed. + %% Verify data is replicated from A -> C successfully validate_completed_fullsync(LeaderA, CFirst, "C", 1, ?NUM_KEYS), + + write_to_cluster(AFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS), + read_from_cluster(BFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, ?NUM_KEYS), + read_from_cluster(CFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, ?NUM_KEYS), + + + %% Verify that duelling fullsyncs eventually complete + {Time, _} = timer:tc(repl_util, + start_and_wait_until_fullsync_complete, + [LeaderA]), + + read_from_cluster(BFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, 0), + read_from_cluster(CFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, 0), + lager:info("Fullsync A->B and A->C completed in ~p seconds", [Time/1000/1000]), + pass. %% @doc Required for 1.4+ Riak, write sacrificial keys to force AAE %% trees to flush to disk. perform_sacrifice(Node) -> ?assertEqual([], repl_util:do_write(Node, 1, 2000, - <<"scarificial">>, 1)). + <<"sacrificial">>, 1)). %% @doc Validate fullsync completed and all keys are available. validate_completed_fullsync(ReplicationLeader, @@ -198,8 +218,9 @@ validate_completed_fullsync(ReplicationLeader, Start, End) -> ok = check_fullsync(ReplicationLeader, DestinationCluster, 0), - lager:info("Verify: Reading ~p keys repl'd from A(~p) to B(~p)", - [?NUM_KEYS, ReplicationLeader, DestinationNode]), + lager:info("Verify: Reading ~p keys repl'd from A(~p) to ~p(~p)", + [?NUM_KEYS, ReplicationLeader, + DestinationCluster, DestinationNode]), ?assertEqual(0, repl_util:wait_for_reads(DestinationNode, Start, @@ -212,7 +233,7 @@ validate_completed_fullsync(ReplicationLeader, check_fullsync(Node, Cluster, ExpectedFailures) -> {Time, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, - [Node]), + [Node, Cluster]), lager:info("Fullsync completed in ~p seconds", [Time/1000/1000]), Status = rpc:call(Node, riak_repl_console, status, [quiet]), @@ -334,5 +355,5 @@ write_to_cluster(Node, Start, End) -> %% of errors. read_from_cluster(Node, Start, End, Errors) -> lager:info("Reading ~p keys from node ~p.", [End - Start, Node]), - Res2 = rt:systest_read(Node, Start, ?NUM_KEYS, ?TEST_BUCKET, 1), + Res2 = rt:systest_read(Node, Start, End, ?TEST_BUCKET, 1), ?assertEqual(Errors, length(Res2)). diff --git a/tests/repl_util.erl b/tests/repl_util.erl index 8240c2cf8..3501c0355 100644 --- a/tests/repl_util.erl +++ b/tests/repl_util.erl @@ -15,6 +15,7 @@ wait_until_aae_trees_built/1, wait_for_reads/5, start_and_wait_until_fullsync_complete/1, + start_and_wait_until_fullsync_complete/2, connect_cluster/3, disconnect_cluster/2, wait_for_connection/2, @@ -152,13 +153,30 @@ get_fs_coord_status_item(Node, SinkName, ItemName) -> proplists:get_value(ItemName, ClusterProps). start_and_wait_until_fullsync_complete(Node) -> + start_and_wait_until_fullsync_complete(Node, undefined). + +start_and_wait_until_fullsync_complete(Node, Cluster) -> Status0 = rpc:call(Node, riak_repl_console, status, [quiet]), - Count = proplists:get_value(server_fullsyncs, Status0) + 1, + Count0 = proplists:get_value(server_fullsyncs, Status0), + Count = case Cluster of + undefined -> + %% count the # of fullsync enabled clusters + Count0 + length(string:tokens(proplists:get_value(fullsync_enabled, + Status0), ", ")); + _ -> + Count0 + 1 + end, lager:info("waiting for fullsync count to be ~p", [Count]), lager:info("Starting fullsync on ~p (~p)", [Node, rtdev:node_version(rtdev:node_id(Node))]), - rpc:call(Node, riak_repl_console, fullsync, [["start"]]), + Args = case Cluster of + undefined -> + ["start"]; + _ -> + ["start", Cluster] + end, + rpc:call(Node, riak_repl_console, fullsync, [Args]), %% sleep because of the old bug where stats will crash if you call it too %% soon after starting a fullsync timer:sleep(500), From 5fc11e5a3cf33836b576fe4c5dadbd64c2f12670 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Mon, 30 Dec 2013 15:59:16 -0500 Subject: [PATCH 017/139] Change test name. --- tests/repl_aae_fullsync.erl | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index 3337016ab..42a72058f 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -40,7 +40,7 @@ confirm() -> simple_test(), - exhaustive_test(), + dual_test(), pass. simple_test() -> @@ -111,7 +111,7 @@ simple_test() -> pass. -exhaustive_test() -> +dual_test() -> %% Deploy 6 nodes. Nodes = deploy_nodes(6, ?CONF(infinity)), @@ -169,7 +169,6 @@ exhaustive_test() -> rt:wait_until_ring_converged(ANodes), %% Enable fullsync from A to C. - %% TODO: This causes the test to fail and fullsync to stall. lager:info("Enabling fullsync from A to C"), repl_util:enable_fullsync(LeaderA, "C"), rt:wait_until_ring_converged(ANodes), @@ -188,11 +187,12 @@ exhaustive_test() -> %% Verify data is replicated from A -> C successfully validate_completed_fullsync(LeaderA, CFirst, "C", 1, ?NUM_KEYS), - - write_to_cluster(AFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS), - read_from_cluster(BFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, ?NUM_KEYS), - read_from_cluster(CFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, ?NUM_KEYS), - + write_to_cluster(AFirst, + ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS), + read_from_cluster(BFirst, + ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, ?NUM_KEYS), + read_from_cluster(CFirst, + ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, ?NUM_KEYS), %% Verify that duelling fullsyncs eventually complete {Time, _} = timer:tc(repl_util, @@ -201,7 +201,8 @@ exhaustive_test() -> read_from_cluster(BFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, 0), read_from_cluster(CFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, 0), - lager:info("Fullsync A->B and A->C completed in ~p seconds", [Time/1000/1000]), + lager:info("Fullsync A->B and A->C completed in ~p seconds", + [Time/1000/1000]), pass. @@ -238,7 +239,7 @@ check_fullsync(Node, Cluster, ExpectedFailures) -> Status = rpc:call(Node, riak_repl_console, status, [quiet]), - Props = case proplists:get_value(fullsync_coordinator, Status, undefined) of + Props = case proplists:get_value(fullsync_coordinator, Status) of [{_Name, Props0}] -> Props0; Multiple -> From bdc5297c5f9bd8df230293486c7196ba1d557f81 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Mon, 30 Dec 2013 17:26:09 -0500 Subject: [PATCH 018/139] Add bidirectional test. --- tests/repl_aae_fullsync.erl | 76 +++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index 42a72058f..5cc8c7a6a 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -206,6 +206,82 @@ dual_test() -> pass. +bidirectional_test() -> + %% Deploy 6 nodes. + Nodes = deploy_nodes(6, ?CONF(5)), + + %% Break up the 6 nodes into three clustes. + {ANodes, BNodes} = lists:split(3, Nodes), + + lager:info("ANodes: ~p", [ANodes]), + lager:info("BNodes: ~p", [BNodes]), + + lager:info("Building two clusters."), + [repl_util:make_cluster(N) || N <- [ANodes, BNodes]], + + AFirst = hd(ANodes), + BFirst = hd(BNodes), + + lager:info("Naming clusters."), + repl_util:name_cluster(AFirst, "A"), + repl_util:name_cluster(BFirst, "B"), + + lager:info("Waiting for convergence."), + rt:wait_until_ring_converged(ANodes), + rt:wait_until_ring_converged(BNodes), + + lager:info("Get leaders."), + LeaderA = get_leader(AFirst), + LeaderB = get_leader(BFirst), + + lager:info("Finding connection manager ports."), + APort = get_port(LeaderA), + BPort = get_port(LeaderB), + + lager:info("Connecting cluster A to B"), + connect_cluster(LeaderA, BPort, "B"), + + lager:info("Connecting cluster B to A"), + connect_cluster(LeaderB, APort, "A"), + + %% Write keys to cluster A, verify B does not have them. + write_to_cluster(AFirst, 1, ?NUM_KEYS), + read_from_cluster(BFirst, 1, ?NUM_KEYS, ?NUM_KEYS), + + %% Enable fullsync from A to B. + lager:info("Enabling fullsync from A to B"), + repl_util:enable_fullsync(LeaderA, "B"), + rt:wait_until_ring_converged(ANodes), + + %% Enable fullsync from B to A. + lager:info("Enabling fullsync from B to A"), + repl_util:enable_fullsync(LeaderB, "A"), + rt:wait_until_ring_converged(BNodes), + + %% Wait for trees to compute. + repl_util:wait_until_aae_trees_built(ANodes), + + %% Flush AAE trees to disk. + perform_sacrifice(AFirst), + + %% Verify A replicated to B. + validate_completed_fullsync(LeaderA, BFirst, "B", 1, ?NUM_KEYS), + + %% Write keys to cluster B, verify A does not have them. + write_to_cluster(AFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS), + read_from_cluster(BFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, ?NUM_KEYS), + + %% Wait for trees to compute. + repl_util:wait_until_aae_trees_built(BNodes), + + %% Flush AAE trees to disk. + perform_sacrifice(BFirst), + + %% Verify B replicated to A. + validate_completed_fullsync(LeaderB, AFirst, "A", ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS), + + pass. + %% @doc Required for 1.4+ Riak, write sacrificial keys to force AAE %% trees to flush to disk. perform_sacrifice(Node) -> From d4c49417507d2ebc62f265fba94d3d4b96c3f994 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Mon, 30 Dec 2013 17:34:00 -0500 Subject: [PATCH 019/139] Add clean. --- tests/repl_aae_fullsync.erl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index 5cc8c7a6a..2313aba45 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -280,6 +280,9 @@ bidirectional_test() -> %% Verify B replicated to A. validate_completed_fullsync(LeaderB, AFirst, "A", ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS), + %% Clean. + rt:clean_cluster(Nodes), + pass. %% @doc Required for 1.4+ Riak, write sacrificial keys to force AAE From 19f534b78ee61bf3f922ae9078c7d367ee36c655 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Mon, 30 Dec 2013 18:25:18 -0500 Subject: [PATCH 020/139] Reorder tree building code. --- tests/repl_aae_fullsync.erl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index 2313aba45..dc9bd041d 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -258,12 +258,12 @@ bidirectional_test() -> repl_util:enable_fullsync(LeaderB, "A"), rt:wait_until_ring_converged(BNodes), - %% Wait for trees to compute. - repl_util:wait_until_aae_trees_built(ANodes), - %% Flush AAE trees to disk. perform_sacrifice(AFirst), + %% Wait for trees to compute. + repl_util:wait_until_aae_trees_built(ANodes), + %% Verify A replicated to B. validate_completed_fullsync(LeaderA, BFirst, "B", 1, ?NUM_KEYS), @@ -271,12 +271,12 @@ bidirectional_test() -> write_to_cluster(AFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS), read_from_cluster(BFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, ?NUM_KEYS), - %% Wait for trees to compute. - repl_util:wait_until_aae_trees_built(BNodes), - %% Flush AAE trees to disk. perform_sacrifice(BFirst), + %% Wait for trees to compute. + repl_util:wait_until_aae_trees_built(BNodes), + %% Verify B replicated to A. validate_completed_fullsync(LeaderB, AFirst, "A", ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS), From 8936c5959796a10de17c8e1035be76e468ccc789 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Mon, 30 Dec 2013 18:53:30 -0500 Subject: [PATCH 021/139] Wait for transfers to prevent race in tree ownership. --- tests/repl_aae_fullsync.erl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index dc9bd041d..d284a3683 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -67,6 +67,10 @@ simple_test() -> rt:wait_until_ring_converged(ANodes), rt:wait_until_ring_converged(BNodes), + lager:info("Waiting for transfers to complete."), + rt:wait_until_transfers_complete(ANodes), + rt:wait_until_transfers_complete(BNodes), + lager:info("Get leaders."), LeaderA = get_leader(AFirst), LeaderB = get_leader(BFirst), @@ -140,6 +144,11 @@ dual_test() -> rt:wait_until_ring_converged(BNodes), rt:wait_until_ring_converged(CNodes), + lager:info("Waiting for transfers to complete."), + rt:wait_until_transfers_complete(ANodes), + rt:wait_until_transfers_complete(BNodes), + rt:wait_until_transfers_complete(CNodes), + lager:info("Get leaders."), LeaderA = get_leader(AFirst), LeaderB = get_leader(BFirst), @@ -230,6 +239,10 @@ bidirectional_test() -> rt:wait_until_ring_converged(ANodes), rt:wait_until_ring_converged(BNodes), + lager:info("Waiting for transfers to complete."), + rt:wait_until_transfers_complete(ANodes), + rt:wait_until_transfers_complete(BNodes), + lager:info("Get leaders."), LeaderA = get_leader(AFirst), LeaderB = get_leader(BFirst), From 36006a9a68968dd4c1a60b59f9079882dfaff89c Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Thu, 2 Jan 2014 13:51:44 -0500 Subject: [PATCH 022/139] Fix typo. --- tests/repl_aae_fullsync.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index d284a3683..35f7ae41b 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -125,7 +125,7 @@ dual_test() -> lager:info("ANodes: ~p", [ANodes]), lager:info("BNodes: ~p", [BNodes]), - lager:info("BNodes: ~p", [CNodes]), + lager:info("CNodes: ~p", [CNodes]), lager:info("Building three clusters."), [repl_util:make_cluster(N) || N <- [ANodes, BNodes, CNodes]], From b928d2d72be2520642cc42f43010fb9287b46e37 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Thu, 2 Jan 2014 17:00:27 -0500 Subject: [PATCH 023/139] Ensure we call bidirectional test. --- tests/repl_aae_fullsync.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index 35f7ae41b..4b6f87cf4 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -41,6 +41,7 @@ confirm() -> simple_test(), dual_test(), + bidirectional_test(), pass. simple_test() -> From 8076099d64d675d30eee02978d6a16559880ba18 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Thu, 2 Jan 2014 17:11:36 -0500 Subject: [PATCH 024/139] Run bidirectional first because it cleans post-run. --- tests/repl_aae_fullsync.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index 4b6f87cf4..34c2e0e34 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -40,8 +40,8 @@ confirm() -> simple_test(), - dual_test(), bidirectional_test(), + dual_test(), pass. simple_test() -> From 05403e42287c8050b987750086598e881f656547 Mon Sep 17 00:00:00 2001 From: Sean Cribbs Date: Fri, 3 Jan 2014 14:51:48 -0600 Subject: [PATCH 025/139] Post the test output log separately from the test result, fixes bug when non-UTF8 chars are in logs. --- src/riak_test_escript.erl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/riak_test_escript.erl b/src/riak_test_escript.erl index 5876ce6c6..69b679744 100644 --- a/src/riak_test_escript.erl +++ b/src/riak_test_escript.erl @@ -280,12 +280,14 @@ run_test(Test, Outdir, TestMetaData, Report, _HarnessArgs, NumTests) -> case Report of undefined -> ok; _ -> - case giddyup:post_result(SingleTestResult) of + {value, {log, L}, TestResult} = lists:keytake(log, 1, SingleTestResult), + case giddyup:post_result(TestResult) of error -> woops; {ok, Base} -> - %% Now push up the artifacts + %% Now push up the artifacts, starting with the test log + giddyup:post_artifact(Base, {"riak_test.log", L}), [ giddyup:post_artifact(Base, File) || File <- rt:get_node_logs() ], - ResultPlusGiddyUp = SingleTestResult ++ [{giddyup_url, list_to_binary(Base)}], + ResultPlusGiddyUp = TestResult ++ [{giddyup_url, list_to_binary(Base)}], [ rt:post_result(ResultPlusGiddyUp, WebHook) || WebHook <- get_webhooks() ] end end, From 8bc9858548b85391eab4249daf09848e793ca47f Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Fri, 3 Jan 2014 21:46:17 -0500 Subject: [PATCH 026/139] Address key offset confusion. --- tests/repl_aae_fullsync.erl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index 34c2e0e34..d3492e24e 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -198,19 +198,19 @@ dual_test() -> validate_completed_fullsync(LeaderA, CFirst, "C", 1, ?NUM_KEYS), write_to_cluster(AFirst, - ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS), + ?NUM_KEYS + 1, ?NUM_KEYS + ?NUM_KEYS), read_from_cluster(BFirst, - ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, ?NUM_KEYS), + ?NUM_KEYS + 1, ?NUM_KEYS + ?NUM_KEYS, ?NUM_KEYS), read_from_cluster(CFirst, - ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, ?NUM_KEYS), + ?NUM_KEYS + 1, ?NUM_KEYS + ?NUM_KEYS, ?NUM_KEYS), %% Verify that duelling fullsyncs eventually complete {Time, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, [LeaderA]), - read_from_cluster(BFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, 0), - read_from_cluster(CFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, 0), + read_from_cluster(BFirst, ?NUM_KEYS + 1, ?NUM_KEYS + ?NUM_KEYS, 0), + read_from_cluster(CFirst, ?NUM_KEYS + 1, ?NUM_KEYS + ?NUM_KEYS, 0), lager:info("Fullsync A->B and A->C completed in ~p seconds", [Time/1000/1000]), @@ -282,8 +282,8 @@ bidirectional_test() -> validate_completed_fullsync(LeaderA, BFirst, "B", 1, ?NUM_KEYS), %% Write keys to cluster B, verify A does not have them. - write_to_cluster(AFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS), - read_from_cluster(BFirst, ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS, ?NUM_KEYS), + write_to_cluster(AFirst, ?NUM_KEYS + 1, ?NUM_KEYS + ?NUM_KEYS), + read_from_cluster(BFirst, ?NUM_KEYS + 1, ?NUM_KEYS + ?NUM_KEYS, ?NUM_KEYS), %% Flush AAE trees to disk. perform_sacrifice(BFirst), @@ -292,7 +292,7 @@ bidirectional_test() -> repl_util:wait_until_aae_trees_built(BNodes), %% Verify B replicated to A. - validate_completed_fullsync(LeaderB, AFirst, "A", ?NUM_KEYS, ?NUM_KEYS + ?NUM_KEYS), + validate_completed_fullsync(LeaderB, AFirst, "A", ?NUM_KEYS + 1, ?NUM_KEYS + ?NUM_KEYS), %% Clean. rt:clean_cluster(Nodes), From a363890b1bddba2f0146594f8032f6419875a182 Mon Sep 17 00:00:00 2001 From: Dave Parfitt Date: Mon, 6 Jan 2014 11:18:10 -0500 Subject: [PATCH 027/139] fixed markdown around intercept example --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 146481f29..ec5a9fc12 100644 --- a/README.md +++ b/README.md @@ -317,7 +317,7 @@ them with. The example above would result in all calls to To add the `dropped_put` intercept manually you would do the following. - `rt_intercept:add(Node, {riak_kv_vnode, [{{put,7}, dropped_put}]})` + rt_intercept:add(Node, {riak_kv_vnode, [{{put,7}, dropped_put}]}) ### How Does it Work? From 07687596ce9150025f9e67759f79a69ce40c5d73 Mon Sep 17 00:00:00 2001 From: Sean Cribbs Date: Mon, 6 Jan 2014 11:43:59 -0600 Subject: [PATCH 028/139] Be more defensive when posting the result. * Catch all classes of errors in rt:post_result/2. * Avoid running mochijson2:encode/1 inside the catch block, return `error` from the block. * Match on the return value of rt:post_result/2 in giddyup:post_result/1. --- src/giddyup.erl | 12 ++++++++---- src/rt.erl | 7 ++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/giddyup.erl b/src/giddyup.erl index 08ead69b3..1902d0f8a 100644 --- a/src/giddyup.erl +++ b/src/giddyup.erl @@ -82,10 +82,14 @@ post_result(TestResult) -> URL = "http://" ++ Host ++ "/test_results", lager:info("giddyup url: ~s", [URL]), rt:check_ibrowse(), - {ok, RC, Headers} = rt:post_result(TestResult, #rt_webhook{name="GiddyUp", url=URL, headers=[basic_auth()]}), - {_, Location} = lists:keyfind("Location", 1, Headers), - lager:info("Test Result successfully POSTed to GiddyUp! ResponseCode: ~s, URL: ~s", [RC, Location]), - {ok, Location}. + case rt:post_result(TestResult, #rt_webhook{name="GiddyUp", url=URL, headers=[basic_auth()]}) of + {ok, RC, Headers} -> + {_, Location} = lists:keyfind("Location", 1, Headers), + lager:info("Test Result successfully POSTed to GiddyUp! ResponseCode: ~s, URL: ~s", [RC, Location]), + {ok, Location}; + error -> + error + end. post_artifact(TRURL, {FName, Body}) -> %% First compute the path of where to post the artifact diff --git a/src/rt.erl b/src/rt.erl index d4b3de1d3..bf6656bcf 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -1274,9 +1274,10 @@ post_result(TestResult, #rt_webhook{url=URL, headers=HookHeaders, name=Name}) -> lager:warning("Some error POSTing test result: ~p", [X]), error catch - Throws -> - lager:error("Error reporting to ~s. ~p", [Name, Throws]), - lager:error("Payload: ~s", [mochijson2:encode(TestResult)]) + Class:Reason -> + lager:error("Error reporting to ~s. ~p:~p", [Name, Class, Reason]), + lager:error("Payload: ~p", [TestResult]), + error end. %%%=================================================================== From 2b2a9bf8599b7ea5f79b3b8ff9c71e08147caf91 Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Mon, 6 Jan 2014 16:31:27 -0500 Subject: [PATCH 029/139] Renew certificates and handle new return value of wait_until --- priv/certs/cacert.org/ca-cert.pem | 53 ++++++++++++++---------- priv/certs/cacert.org/ca-key.pem | 67 ++++++++++++++++++++++++------- priv/certs/cacert.org/ny-cert.pem | 53 ++++++++++++++---------- priv/certs/cacert.org/ny-key.pem | 67 ++++++++++++++++++++++++------- tests/replication2_ssl.erl | 11 +++-- tests/replication_ssl.erl | 10 ++--- 6 files changed, 176 insertions(+), 85 deletions(-) diff --git a/priv/certs/cacert.org/ca-cert.pem b/priv/certs/cacert.org/ca-cert.pem index 25c4572d5..19d1540d8 100644 --- a/priv/certs/cacert.org/ca-cert.pem +++ b/priv/certs/cacert.org/ca-cert.pem @@ -1,26 +1,35 @@ -----BEGIN CERTIFICATE----- -MIIEVjCCAj6gAwIBAgIDDU7jMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jv +MIIGKzCCBBOgAwIBAgIDDlBlMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jv b3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZ Q0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9y -dEBjYWNlcnQub3JnMB4XDTEzMDYwMzEzNTk1MVoXDTEzMTEzMDEzNTk1MVowJDEi -MCAGA1UEAxMZY2EuY2F0YWNseXNtLXNvZnR3YXJlLm5ldDCBnzANBgkqhkiG9w0B -AQEFAAOBjQAwgYkCgYEA03ZAGq21/E22WvtAAeQ2fzd+J3n1TmWw9Fr+qGBOLOXm -NnGbWD5seICYdmOV6A6tMx2dDF/4l4/iLJHV1B3NdfugLzVGwRH7wPpmCMmpzLYz -y/2UZOCOuHybE7eHglUnK3uSnGlebR/rVn6Nw4pochQ9+BV4qAbaIGNfIzWmGLUC -AwEAAaOBvzCBvDAMBgNVHRMBAf8EAjAAMA4GA1UdDwEB/wQEAwIDqDA0BgNVHSUE -LTArBggrBgEFBQcDAgYIKwYBBQUHAwEGCWCGSAGG+EIEAQYKKwYBBAGCNwoDAzAz -BggrBgEFBQcBAQQnMCUwIwYIKwYBBQUHMAGGF2h0dHA6Ly9vY3NwLmNhY2VydC5v -cmcvMDEGA1UdHwQqMCgwJqAkoCKGIGh0dHA6Ly9jcmwuY2FjZXJ0Lm9yZy9yZXZv -a2UuY3JsMA0GCSqGSIb3DQEBBQUAA4ICAQAw8KFHyQgS7u/v4PuzK2MBxT812MRG -PrRzXFfU9T3XIw8PqFLw85VDz42ZMCTqE7VdyYRWh/Wj7UjxSrseaMIZ79Pe3q7S -1zYN9d9GevfUCzZY3rcvfdjwiIZUgHBqqasA5pa+MufyDWJrct0xwNE9Xf4nFpah -5PnarkQEHnjKmK3VVOZLysHrtpGLIS9nYC8sLg6vrkogppOkdtmc1z8M/89dKyDm -ydgU59jDOWres5Rf5p/7wRbyxrH5dBFemkdemhoH5Y4jh/X9szQz1HkE+RjAGuYd -jkjKTQQWCjFRZc08FueJx8ZEJ0UbKfciYi0TziN9ZOEx+7koxyA4GU7Gf7bq0+wZ -65eu2IWBUdIrzV9ZhZhsrw+Ly2y1FpsNllNLXLRHwNThmlsbh0Qy6Pxhj1yLvp2p -zGmqqit6lHnrK7Ob9Zhb1s0/HHbSDtWmPgf9Ju1L9MZWmsdidjcnRFxUWn6mdfcc -jTDC6ORjQ25L6rwVl7p8Z6UYO7DUENwnZCH2YD60VeMqVpuLaWTvHCKMs/VkW5H+ -IAuLR0RQuEitnT+f8cAqom9s7FYVohIPO5PtvUd5cFw5LbTzOthsXNWEG2NPlgYI -XhkhNWi/mDqkDBWhBp6z5lD57MPGCHfUVndPpn0exGJ9qHRMsnHaxcbHjJi6R8x9 -4eayUbS5bM7tng== +dEBjYWNlcnQub3JnMB4XDTE0MDEwNjE3NTEzMVoXDTE0MDcwNTE3NTEzMVowJDEi +MCAGA1UEAxMZY2EuY2F0YWNseXNtLXNvZnR3YXJlLm5ldDCCAiIwDQYJKoZIhvcN +AQEBBQADggIPADCCAgoCggIBALUCZ9vl+2MUip9979h8b8CRSLh2yHj5A4+j7Hg2 +/0C3zLPRuNjNYA453W2ob+JF6ANAOLykh9a6R5FLD/xbLUfPeycvV5Gnz/MGuStV +leM3ef+lg/GMsxMCl4OflB3Ui4Lr7IziwtuMOQ9Jg5p3xESPfkE2EqbfmLbqNZ8n +uGtmHLK/Mgyav+TFUJExn0OeNtANB199jl2tH4VNIRKkUhKPliVjt+Y7g/5Nvk0m +YWEQqzfQiGrEVeNuDOZDF9+hG4mxtNtGdEoTyhig7Ixznz7w3wRw0MLA2hW14r1L +N8W+Tkwa+hgwDTjlGwGFoUWbvGv1tzxnU816cFkSGRq/r+yDDpTBt1cjJX0hhDXg +hdByfeTmwKctuFpIpW684bwWiSqx8K1vWpWH1xhQbOq/6BK4nv2xZGLI3gqofh/W +/hJA+/9Yi1xwiMtC9ZYbIIJcCUtYASrrH0oh0emdHjoUdNfTmeJwadql4X6SS6vt +5qMZodtt47AwC4TzoiZ2pIoVRslIrG0Ov0xhujxKiW6wyek3loAQAyQPBUCqAyYq +BPs0lvGPWOPIvONK4P8XCDAg2IwDKFfpFv+hsB7u8MDlWtnwF0EGk3g11fdoGECQ +BIZyUJmlozQnDq51VAcsmzvQEncZEnIMzCp1kd+zar4TayZao20RQGeBupH73kxz +0fkbAgMBAAGjggEPMIIBCzAMBgNVHRMBAf8EAjAAMA4GA1UdDwEB/wQEAwIDqDA0 +BgNVHSUELTArBggrBgEFBQcDAgYIKwYBBQUHAwEGCWCGSAGG+EIEAQYKKwYBBAGC +NwoDAzAzBggrBgEFBQcBAQQnMCUwIwYIKwYBBQUHMAGGF2h0dHA6Ly9vY3NwLmNh +Y2VydC5vcmcvMDEGA1UdHwQqMCgwJqAkoCKGIGh0dHA6Ly9jcmwuY2FjZXJ0Lm9y +Zy9yZXZva2UuY3JsME0GA1UdEQRGMESCGWNhLmNhdGFjbHlzbS1zb2Z0d2FyZS5u +ZXSgJwYIKwYBBQUHCAWgGwwZY2EuY2F0YWNseXNtLXNvZnR3YXJlLm5ldDANBgkq +hkiG9w0BAQUFAAOCAgEAJ3sFvIAfTQGk8tEVH5dSSWpc5h02qmJj9L8DxxU4ZYMl +SZvej9EhCUqx3X1HTdTNFdmAcdeEqi41Npmj58J1o8pF1Dm6hMcJfHEvIe6IQYnQ +/KxAfi0uHOT2viNZPjNgf0DN33vEBMdpDhHTExmtrZM/afYnbh3lmpE1Zxjnsib0 +8/RunbU7sUweQoGQH/g3Lz8faJS4XN436+N/mC7mHYPy33CRtgYqLh3/+xVM4dXL +1tw61JJvjLSMDxwBY/N7SxrYl6eNvhX//FDyMSG3+a9IqoR8HPGaQTPehcDTWNbp +UakaZW1dBSP6oFE/MIAFrR5+62rjbAiUn7+fIXiulHyBQz2q+anvlLe3s8qdhTHp +3zG2i3GeY4E8/tChmeWSX3J0HDXVa52UYXScXdosAdaY/L9uBmpuHOrNvqwbbEPJ +RriQMEKsOS6YQkWHfP8VWRX6VGMyCQakIxWGyA4BEho7EvIP8JMNLWBIQFlSSibe +XHBADHAJwgx6t4mF6eCFPpemx0G87ukaQjj1FS/W7pTnRH+FbSxN/KE44R22qAFK +nooqpK61p3xrGYC/6Npuix+q0+vltwlYlo4OyQLBKvE2WiYwYxzdQmQQzr/NyAkp +mRyB9mjklDV9x1HTz2Rb1ATwRnP0olcmJOGOReml05FSsSk5HUkoAZ9INNWF/3I= -----END CERTIFICATE----- diff --git a/priv/certs/cacert.org/ca-key.pem b/priv/certs/cacert.org/ca-key.pem index a0231e5c0..380966419 100644 --- a/priv/certs/cacert.org/ca-key.pem +++ b/priv/certs/cacert.org/ca-key.pem @@ -1,15 +1,52 @@ ------BEGIN RSA PRIVATE KEY----- -MIICXAIBAAKBgQDTdkAarbX8TbZa+0AB5DZ/N34nefVOZbD0Wv6oYE4s5eY2cZtY -Pmx4gJh2Y5XoDq0zHZ0MX/iXj+IskdXUHc11+6AvNUbBEfvA+mYIyanMtjPL/ZRk -4I64fJsTt4eCVScre5KcaV5tH+tWfo3DimhyFD34FXioBtogY18jNaYYtQIDAQAB -AoGAHvJESWM4qdbZpBD2+g/i5dVCMhbIDpQVRH14nT2S00FF4pvXpx9jknMHXPQQ -ASj8selYjXISGGQnLmydRXCjG6fD/eTYSTge0A1nlAZWQ27yxHe1HHnS8zE8RFS/ -7WnfmWGn5w4q1WLuirkCBYhr+BSsublOXxt3WypyNeOn76ECQQD863HuYLFPSKzI -1EX8rwyTmLHwyow4Wz7cBbkLtZyN/xJtrEClO8xVOORiOYDqF1vVsJz8AoV8ywdb -kaw+pcSNAkEA1gmMRn5WAJHsVk6X2yUkn62CBzHHY6U8EkBFzD5QmnlyhJ3sx73a -KJLaHQ6AHK/GjW5o2EcerF9DQxMl019eyQJADa9HmNhOf7W086DYsJVgHtRSPG8Z -Kyt0uT7uBj4rXFuCycBaNxaHNc1APjhSS1g8NdG6jMAuLNRnb4LwSOt7OQJATDni -R0i/ADz32lTrKBS2UlfMsxs7U3WJS0awlOisdBdEfY5wwUIP69tA6pA7d/vxznS/ -yiap+eGDZuPu2LRiYQJBAPEUvSvcc678wFoDq6AE3EG3PPJWey6Oe6y2cSqbnSot -Gg1V8GTkwMXFffawi87sKhnc+ygCk3C5cdEMMJ+q6qA= ------END RSA PRIVATE KEY----- +-----BEGIN PRIVATE KEY----- +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQC1Amfb5ftjFIqf +fe/YfG/AkUi4dsh4+QOPo+x4Nv9At8yz0bjYzWAOOd1tqG/iRegDQDi8pIfWukeR +Sw/8Wy1Hz3snL1eRp8/zBrkrVZXjN3n/pYPxjLMTApeDn5Qd1IuC6+yM4sLbjDkP +SYOad8REj35BNhKm35i26jWfJ7hrZhyyvzIMmr/kxVCRMZ9DnjbQDQdffY5drR+F +TSESpFISj5YlY7fmO4P+Tb5NJmFhEKs30IhqxFXjbgzmQxffoRuJsbTbRnRKE8oY +oOyMc58+8N8EcNDCwNoVteK9SzfFvk5MGvoYMA045RsBhaFFm7xr9bc8Z1PNenBZ +Ehkav6/sgw6UwbdXIyV9IYQ14IXQcn3k5sCnLbhaSKVuvOG8FokqsfCtb1qVh9cY +UGzqv+gSuJ79sWRiyN4KqH4f1v4SQPv/WItccIjLQvWWGyCCXAlLWAEq6x9KIdHp +nR46FHTX05nicGnapeF+kkur7eajGaHbbeOwMAuE86ImdqSKFUbJSKxtDr9MYbo8 +SolusMnpN5aAEAMkDwVAqgMmKgT7NJbxj1jjyLzjSuD/FwgwINiMAyhX6Rb/obAe +7vDA5VrZ8BdBBpN4NdX3aBhAkASGclCZpaM0Jw6udVQHLJs70BJ3GRJyDMwqdZHf +s2q+E2smWqNtEUBngbqR+95Mc9H5GwIDAQABAoICAALUu80bprfg/V4LOTIYmCLZ +4tpHuVDzDBQ2Cx/CNHHNrhzt3cc+Rw9R5l2tsbpHP561ZW3F4P4BwdbK5sAaqPWj +kBiHzAXSKxVw+HpuBz8bggP2kb64ZaCZfMcpOsbQqTHuALAfZ87JPac4jh9fWil8 +WQwqDVagzqGlvhr0IxVC7pX5GY5K6U9pqp3qucPz+IV+fqwGqbBbYc/S6f6BpWr2 +5fFFwv2N1r2md6B7RjCfcx4XW9rXoURCV19Ok1QDEAwyRFLdojn6X0HNWoXD1SzF +1acbIJi7LW4Dfz1YrNGh7VWRYnH4ZmyKYMbAUUpmaicCG9CNCSmAjbtG5i+d6eFx +WpgMvge6p7tDXGYDyBzRscDNG1kGkvN2vYp6EQ5ud3jVClcvF084mhY0zpQiMvqu +KsUXhXNb8DtR1TRdAgh7N4J8DM8rtBUje3XQUMsY7JldeX25MXT8k0cddCdxudBi +Rr1dtLtFUO9LQ/MkrE6z2l3TQpl0CAbECKfV3DXAtl1810GNfQWbCuOREGgg9qA/ +I9t+YLCGsjWuyMsOmyDXl6GEUggcIAv0vh23fz0fQfOQc4lknc5udL1ubC3Mj6Fu +aPZ/zXmwBN7mNXKlYvp/3rmEf7GshOIQxsLAGP0+7j4nem92VRkvGm7xcggwLutb +3IrwVp2kZ7EuYueRhCHhAoIBAQDeYi05yOb2MJp8TAbdjGIrKy339VwPEo6knboJ +Lcd97w5LaiV2H04x5r9L431Zt51y07GAoz3qDK11c5KeOtxHKRmM3ztruPiJr2wD +MRs/WNPuVHsyjiHBm67H5x2uUDxh99xXdENyKrPw6SnQKjchinTxinoOfLaMieyr +AokIqRds3oOuv2FL2s6Q/ci0qvRXsc/LOQqvR1eA7xaSm3RIo9qqBXw5sHdBrjVh +vynenxnovXNFFEp6cwzOo0pfK6YhRH4omk1IHuyLfJg6+bYHoj57+MiL4pcpQAkf +SqNqLNUikZQXLbsWPRl667yEI8Wu1WCz0PyZfX8y/qvRxA8LAoIBAQDQXyEECksK +YA0UsfiFEMH+wDvWh9OBjo8G7q0OuxLvio0piriRdaTEQREdUP9P7s1mZlTb85/n +KXaoi6BmXm/bWxbblViWG0n3UFSbp56g4thMvR7w94NHa4FUxf81IFV9Vy+yCRPR +gjC7DH13CUC4otg3zyr7+YC2qV3hmZvq3pTV38u3xTO/lVlBq1G5f09YP/DlliyC +Ri7YCdxKsNhpdW9SVQub/yonJq9UpAfiDMirAElBDq8X9esfppIP8UPIOy6SDcZn +NPCeFRNE5uCwTppLlGYO64IHMFVAlHF0wqVHhuBD5vwvOUofRucIIkdAL0DX7Uyu +dh8AzChlMEgxAoIBAQDN0w5KylBCElAZtQAxs8n0bH2aXwR6itFtOPLDphRa0ZRo +ZnEPt//Wndv2MDKHhDN84MQL7IrUgjFVYMeWHaJdVoPoZ4CGvmeCf9M3zQ6L/foB +Pz3bV8OUfzQwnw9Qk8CfaBoO83OtSirLUfK6USg/qfd8y48Ws4T95HYg/TK5yk79 +G6HDbixuWbEQb2OBHC0UsWVCkaP7AMvTcPZycOEajlVHQdZtSo/GQaf0tUraWmVP +jXEPwa0BUlTLOEUQesTDS7J8f/wBQWXr4EDBbPd2b0rn0SZEmk8QBc2roZhIIACQ +gy8OmOvstFZN97u+jaFcpO9g3o+8vHOrThzXBGBrAoIBAQCdxhoCrCSLs5TUAbnS +T5EEatwkIW5l3+KudhR1XfMFi5w9QztjcHeU3kuuLgFTrda63WsNrFd8OeoNzsNi +H8mEh2E0XM3ZwsnryFh4D6BN49WxrVV7UX6GmoEwBkeqG1R/9n83yCXIclKhnuB1 +PH1UEIG1KFk8t3vdUzqxspTbbzO1CaKAAIKhYt1VmvnjgeE7ShcGUAJdlfSsQjMa +HvOOTow/BAQpVzi8sk75wMXzRIjA6hONfifX9a3VplNcXOtc9mrZzjJXPT+gAwc3 +GbYx6SJtbe0lCO/Ir7Etl/icVzG8igTPhVuYCkH0ksQmZYYbtGvd02FHjEsHWI6R +p9BRAoIBAERmTNo8F+qKnChJ/n+EzV1LlO3n3UOZPND7NBScfDsKlQq4yVCa7qta +7sTN3wQoUyuwsF+w2AM6K4vtosANgGLnX4/rw0D6+Tn0D7z9pVtIJFqWZMcrj9pE +6nIWpLLcGruBJXxHfZ4J7E6LVXpM6B7TjQmxcsvs9F1+w1ReweJHwn6OGn4Tp/AO +a4z5d232eXQKVnQkYIqSXqTUnjUspXrcwPOWkuPVD610yudrASLrskaQJifL5ovJ +6E2KCniYYO9H9IqyrmqW4pOyvoTtFKRsWYUYyrx0ZVLZPwG7180SJR6pObG8dP8v +A/TGb9Z4jj8keWYxRRRnZutEcTbVD/M= +-----END PRIVATE KEY----- diff --git a/priv/certs/cacert.org/ny-cert.pem b/priv/certs/cacert.org/ny-cert.pem index dd5eead11..d348be6be 100644 --- a/priv/certs/cacert.org/ny-cert.pem +++ b/priv/certs/cacert.org/ny-cert.pem @@ -1,26 +1,35 @@ -----BEGIN CERTIFICATE----- -MIIEVjCCAj6gAwIBAgIDDU7kMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jv +MIIGKzCCBBOgAwIBAgIDDlBjMA0GCSqGSIb3DQEBBQUAMHkxEDAOBgNVBAoTB1Jv b3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEiMCAGA1UEAxMZ Q0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJARYSc3VwcG9y -dEBjYWNlcnQub3JnMB4XDTEzMDYwMzEzNTk1OVoXDTEzMTEzMDEzNTk1OVowJDEi -MCAGA1UEAxMZbnkuY2F0YWNseXNtLXNvZnR3YXJlLm5ldDCBnzANBgkqhkiG9w0B -AQEFAAOBjQAwgYkCgYEAtPMzngDQJdM8JcOVcsVKU8yHCUv9SoervEaPAgsFaBFl -qwSDpLAVH3khEBOkG5Ue9+tCi7c75/XQtgEkM05mwAXNuHPPLqDHMvfymkp7tqHJ -zH1eeedZEKl0YsWxFT0hINZKxuwafZ6uPWgTS1YXP2hTjEOP2k/4Gv9ZzNWicGEC -AwEAAaOBvzCBvDAMBgNVHRMBAf8EAjAAMA4GA1UdDwEB/wQEAwIDqDA0BgNVHSUE -LTArBggrBgEFBQcDAgYIKwYBBQUHAwEGCWCGSAGG+EIEAQYKKwYBBAGCNwoDAzAz -BggrBgEFBQcBAQQnMCUwIwYIKwYBBQUHMAGGF2h0dHA6Ly9vY3NwLmNhY2VydC5v -cmcvMDEGA1UdHwQqMCgwJqAkoCKGIGh0dHA6Ly9jcmwuY2FjZXJ0Lm9yZy9yZXZv -a2UuY3JsMA0GCSqGSIb3DQEBBQUAA4ICAQBlhNk3y5hD92X8OZWsvXAug4hiRzgo -ZZX5Jai6CXk4gJztQftzQwTgyNXEM47EbZq+o8tQEoAxPhkoU34lW2gSBTdiQ0+C -2mXGmlSUNxE43lV8q8mxhXbmDExPmoIbrpUyguvw/wikk5JnatsVPQ42eXu/7rZw -HJFoXmoqEJ43JeFJoF41mK0mcZvQCqp93s8u9H/x6f+GwqVSn5zCOK/gk1MXUPVT -ncUJwK2Tq24MaqA/uqo8JRAKeod2aLi1rJyoIUf/BJ79dr/ZG4/ZzCVhBOrEeK7i -MTSOrvTHISkkpZFh8aa43Xb9WfY57Gf5uu/myZK6IJxVm5OyR7zrkquGhHQFJxMd -rgCu2soWNyftfgUW8A6QMSPFr83DW/wzmYJW0sBkrK2ihXjFjsG7gIUJzKonsodT -iZ7WjkFvF73xZGXjndIQjCL0Q5Zp6mua+8HxY2RRZI+rlyemz7LnsQ+/5vLNW+ob -bYAsbQVu81ruKggM4ZQVVNuuJa6BVkMbcYGIGz3ybsz3iMh7F06F9xoPx1khFz2F -b3DfeY73HSxzOXO9rS9Z4Cf9KsWkmJ8k38U1ZvxuYD4DK73LL0hyjXTnJqZJUmSs -EEyXDz2O8er0A6eqHLg+iATDQ+e/iCCWWiDloOk4UWyHt7qb2wJtbVIC7/I4fqIO -8pzAEDzg/8Quyw== +dEBjYWNlcnQub3JnMB4XDTE0MDEwNjE3NTAyOVoXDTE0MDcwNTE3NTAyOVowJDEi +MCAGA1UEAxMZbnkuY2F0YWNseXNtLXNvZnR3YXJlLm5ldDCCAiIwDQYJKoZIhvcN +AQEBBQADggIPADCCAgoCggIBAL49A8Cyw0qv8kNuvfBd3jjzPMDVVMvfwg+DAKCf +c9giVWbS5x4IoEBWGDbFuwO4mEbAzJuHhVs3kH99DNplSuRpopThYHW6pqRYJRou +9Y9ZqoBAClC4k1vR7jf52rgoW+BwO0pSa9KV/RWbGJ+bvpkjDwVHcrxDvp45H1mw +hzgszZnfMnP1GoLVLSGS/QTmqdLDDhr8AeAenD+3nKiLtLYHDB3DBKmuOxZoku7y +qmGRerme055hsDxqWwNsIIAJtkH3mXqHXqCz2AgwtPmdwSrVNsDJFrLzW/U7WJlj +7g1nvdDEcXgRRkeTig87EVTZDkOYSmBHvPCeaxCzXwzDDpeEulnZVs/y77Xr1vIG +CN0khVWKFkaZTiqR3N8cBw0YoImjhrMGbN22zPBAPuEV4oSluBl1g8HIUCnfzVPa +mobberyOcslrOys6SXKNkMfzEeMqf7e0Mgnma1kvXhF+OfCEnpddD5hEO6SRwXy3 +/58tZtou/OK0g5hgUNxrseaAhnbaH/0xfgiRltXe3qNa0a6+bQ/h54b7d9nxVb88 +PTlel9JiqGLXzixTQFvKWH8QcHNQWCxBpL5x826Tx1Om37DpntbAIb1Tp3uIyW3c +HQOHS1F7GwK1e7zFko9+tuZ8l/phTTCb5/SmU5n07niipW+V61px4vU7jpgUYALk +Gw5VAgMBAAGjggEPMIIBCzAMBgNVHRMBAf8EAjAAMA4GA1UdDwEB/wQEAwIDqDA0 +BgNVHSUELTArBggrBgEFBQcDAgYIKwYBBQUHAwEGCWCGSAGG+EIEAQYKKwYBBAGC +NwoDAzAzBggrBgEFBQcBAQQnMCUwIwYIKwYBBQUHMAGGF2h0dHA6Ly9vY3NwLmNh +Y2VydC5vcmcvMDEGA1UdHwQqMCgwJqAkoCKGIGh0dHA6Ly9jcmwuY2FjZXJ0Lm9y +Zy9yZXZva2UuY3JsME0GA1UdEQRGMESCGW55LmNhdGFjbHlzbS1zb2Z0d2FyZS5u +ZXSgJwYIKwYBBQUHCAWgGwwZbnkuY2F0YWNseXNtLXNvZnR3YXJlLm5ldDANBgkq +hkiG9w0BAQUFAAOCAgEAb3VGTCfuDkpVNwJjF8UtAQ9g3wIzkwMYE0gY0TzVyuAn +SkyTY4W8PDbXBNmwksucpsPnqowEAT+zOegh9TKM8EFMBzyAR4jecbL/gtFQQ09P +MPeMKkrdaEkKuViP7RWfWSzOTe5HW4dKAYo2Cjgp7a2KHFg7PFVxoI/T7NMZg/rz ++PnBkZYm5fQDxoEbTfsaUPMxSC7GU5IqFWS5xtgUAyRYWitkS+tktr6YmNRpRsS8 +5wMqaK1Y1lEycUzeJgSV5Uwo2FocdfLBD4jSBJ4LPzDTz+uWaf5HEH7Ta4/mH7ro +ITwjbtESbSgwf1TUwhame/pDy3iBT8FQxF2RNJn1WgpBcInpS5oLKECcTBtHd7YC +oH0MGSxyV1zAm41bXSZDZ33DkF04K8eKhUmPTAFQz0IAL9X/WtuhCJvwpRipEoNr +EuwBhElm9bzOcl16d8Ls89vSa7CVqU4nl2DR4PqS4/fDpHX0oHlNgocLp0LjtDzY ++sGEySZVPu+AL+mLRA8+xNU+VuYV6/vckBUGAvdDCGRpNQV6/NUb49AsC6W0H7sS +dahCWRyFOT1TYHvc39qnqmBfBj74feRNK2mrzj0EJ0PKnxWlTpDcx+h9ZMkDuoiy +lDfBe3CArkQ647sUIlYPsAf+/IX1vmaevlPuw3Hyii4vJPMy5VsG2cBPZsvrXq0= -----END CERTIFICATE----- diff --git a/priv/certs/cacert.org/ny-key.pem b/priv/certs/cacert.org/ny-key.pem index c3426aa80..d9f5cd4d8 100644 --- a/priv/certs/cacert.org/ny-key.pem +++ b/priv/certs/cacert.org/ny-key.pem @@ -1,15 +1,52 @@ ------BEGIN RSA PRIVATE KEY----- -MIICXAIBAAKBgQC08zOeANAl0zwlw5VyxUpTzIcJS/1Kh6u8Ro8CCwVoEWWrBIOk -sBUfeSEQE6QblR7360KLtzvn9dC2ASQzTmbABc24c88uoMcy9/KaSnu2ocnMfV55 -51kQqXRixbEVPSEg1krG7Bp9nq49aBNLVhc/aFOMQ4/aT/ga/1nM1aJwYQIDAQAB -AoGANrfcggko6GdKJvlP9hbm9RRCOXN7MtB4BEUp9aozhlbMcby5ulFs8cmFUAFA -fP7JEJtZ0jokqW5SINeThNhOhXyHtnEsHK01SV5VDaKBTNVTMQW5JbS4E6OU3EyM -Kr40wQ23nTPnqY/02yIPhANtqb1vsp3nn3y2zY/R5+eBeuECQQDcM3j4p40VrM3Z -GlosOE10ahIS2hSp/i9Ink8Auo2SLvuVHW88jLzh5lzCUs7Sb1O7Ti+tBYsWEP40 -k0SHzpNNAkEA0l4kmeQ0ECDl4zY9LcCXJ9hLzeB0C37+RC28vRPI0a3fd8itVdfU -ONXkVH38YsWkR38LV3/hRIRrOCsPdHwfZQJAZ7Hdj8rnR4O+1DPyrsw8vUuOA06r -WHoYrivOrFste9+gHdJkpbDo6cBfgisYtkQyvAvPq7zmiJer/foz1XhUKQJAa3Og -2GZC3ZYgNl1nBG6iBr0pgyBJxfF46NQ/2og3hDp+sY10VCuo/9rpBOtRNhZj4g0X -evsb0kadCZSdEU+xLQJBAJAqVzOb1disH+H499HijDccWdDAhdcAAHZqxA37uwra -EWMkElLknGvtocyswnmJwcLnSHVYeUtbO6zYskiIGLs= ------END RSA PRIVATE KEY----- +-----BEGIN PRIVATE KEY----- +MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQC+PQPAssNKr/JD +br3wXd448zzA1VTL38IPgwCgn3PYIlVm0uceCKBAVhg2xbsDuJhGwMybh4VbN5B/ +fQzaZUrkaaKU4WB1uqakWCUaLvWPWaqAQApQuJNb0e43+dq4KFvgcDtKUmvSlf0V +mxifm76ZIw8FR3K8Q76eOR9ZsIc4LM2Z3zJz9RqC1S0hkv0E5qnSww4a/AHgHpw/ +t5yoi7S2BwwdwwSprjsWaJLu8qphkXq5ntOeYbA8alsDbCCACbZB95l6h16gs9gI +MLT5ncEq1TbAyRay81v1O1iZY+4NZ73QxHF4EUZHk4oPOxFU2Q5DmEpgR7zwnmsQ +s18Mww6XhLpZ2VbP8u+169byBgjdJIVVihZGmU4qkdzfHAcNGKCJo4azBmzdtszw +QD7hFeKEpbgZdYPByFAp381T2pqG23q8jnLJazsrOklyjZDH8xHjKn+3tDIJ5mtZ +L14RfjnwhJ6XXQ+YRDukkcF8t/+fLWbaLvzitIOYYFDca7HmgIZ22h/9MX4IkZbV +3t6jWtGuvm0P4eeG+3fZ8VW/PD05XpfSYqhi184sU0Bbylh/EHBzUFgsQaS+cfNu +k8dTpt+w6Z7WwCG9U6d7iMlt3B0Dh0tRexsCtXu8xZKPfrbmfJf6YU0wm+f0plOZ +9O54oqVvletaceL1O46YFGAC5BsOVQIDAQABAoICAGrHMoEJlaYpRdDlqsTs2xua +I4KU+0iHqpQsUsDVXx90ffz96e2op9IUwPiVWZgIF6i29OFjWs3NTVTRqtI2wqec +Rz5LlmneP0TFmOWub/RrPtFHoMKYJ1YFajGPE14EgTVjqVPZGV4VA/4JlPOZZozr +6bVr2hGj8K08oMUl2XAFbc2tQtNlQuEmqHNM3qnOkomTJCqvQpo6dDcrZ0oPSHpX +UEywNsDK4617OuXV3R5wmrLvXBZs/I+cnsM4ALH9QYibYlEtBEctStemBx6IlnDl +RhtFg3EkOXxXOC64sVhi3177tDjRcWIhhP9xhxPZ1TDJnQ2Xlox0TuIL3BD8twYK +39prNHQX9k7LxoxXOZO4xKWdZDvdQvhH09oWh+hAoJkSMPXwqYX5Ds+Ju8sqjuwa +DRzpMcASJ9NqClCWhMbZAtX9sRwK2ItDrvP/X9keQke2D9aIXjdp4p0wSXLbRuoy +gVk3uREa69nezTApmQoDCpcg80uFaFs3Hmg1rSp/Lw1TZgwpMhNGIRHQEKqxRVHs +nDZ612ZwPUdBn/rGcZmp6tOGvS4+obbqHSBHbGZxhbU7ORL33yceW+pMXRudSq4/ +gDHIMUpfh+IEl6Jo1z68t89lgG8Zi5i8dZQAjLGJcsYyRKhGGvYRwUPFdC2KZ/+F +9gu9eTIl4VUFpcmmDIvtAoIBAQDnLU78o+N59gwK3nKpl+TIILuH2YwZ8c6njdF8 +z6KkkcS9mQCZGtyhxd3UxgvGoFNF2yMtI0NrZ+b2wUDSdDrDmkUjZTutMk/ex5av +v0egLwkhZWCjkGFAxDKk2155XnIT0yE3jhBNhTIRQwwT+OXSS8/cWjz6JniOyN7J +qv8B36gk6oRVUByQ1OMM2+E7tPYHTJZ9/+mFNvQFmxSUFk9CVhgHqd7Uyc5luJ5W +D7PuSJX1YSCMQWYktZeVhBTcz+ArQgFtzG6XwQdpcjdGwVsfzAePKr+UTKS4rjd7 +s1TL7W1cFNsnOK+voLZCzvfMjYhSC3edbN6CtsMUfeMq7bhrAoIBAQDSql37+aSE +UPMVbY2YNXYH1Y5oP6D3lbikptNCEovx7PoBqUyrf7yZFwTGkxzLLuktos2N7l2A +YT0n5+4V9FVdVyMvdZHyMqds1aUQ8ykGW7cP+ZQI0ye+wVE1ppEboUhUI6jNnUBd +rTwKWC9F4O/8fSxNND9vN6vxHYuKZbeljrtCoJ1Pvq4jyeO5fVeUxiOI9ufCIqKt +T5tUwhboHqyCbj5u5J65HayeVuO73RPU19kBlz+zyVy7NMYvNkQm9c+9fVhu6GkY +5z4/KUnUZ7/JuYmslOqdGrqZhkVcHBmpO0CXqO61oLPFxa0RGQf2TCsswjFLBz/r +wSGbubyYawQ/AoIBAE4z+5Zncppda7F5ktxgL9qriATqyQQ10qzHwugv7VsynJVu +3s7sUVrAbsx9upoUvlbRTNMGLNZ2raEnut/2xbJJy72vj0KF1JMOfaEyWe6MOtB7 +VPe/7cZVGLTAKPNxybF/KKt8eXNf7ZH9VmajOkurrcNjG2GcBgh3VzZtjBp8KItk +ciPSF/e+rJg7AbSLAPdzJy20lrCj6GTFE28NJP7R1Qsyodjl89ZhX97JLRscnu4F +T1djAETG0WKY7d3MzI6Vo5obIAZiHDnFk4FyA3t8L8p0sZ5fVxHKjaZhAfyCcw5F +V+WqvXPyL/XlwdoGvujSTEKLA7LAVLnn5CMvi8UCggEAUXoI3YKhCXfRfFEhuWO7 +4a3L5S/uzrL+ddf7zwsjSw8vZmMXPyEpTUBk3XRaZ48eKkNUIve0/AkfwQZnw9/7 +AB3KgvmC19yVendq3xJcpJPmSmxpv7wDHDjictCYQMpxEfG0wMl38oMkvI98wRcN +WmcDMlzpx9cNTjTyGjXbAFIixNMf5I7IBRl5VO1QaT700W1FF4WTy1oFppNmjqIG +cG+4/+S2Krx6XccXTzpjCCfvSzk3Lw7LKbBwkiP32eWqUgxfno2O0E3nekA6yMbT +m26B+9igoOCMmQIgY06O8i/zfHSe1mpv/SbGF93JgY3WCPqWXAFSzJimqfb9JSoG +uwKCAQEAqk+U1OnbVUgMehetXOT+s/WEv9tFIqXNversJ8cV5sb/An0fSApekF5t +POXgJZ7y2+p1KYyI+F01zvm3D5ZW+/qokhgfIILsqYv0EHlVoSnryy3h65YmMkVd +g1jo9FElszAODIOq3y7d2srdIowq2oGroGadG5X0yCR1odULkfvZgKHwc1hj2Fus +uf6PwQ+JCBMauuXUN2IEMH3t+ilv/aLygddE6mNGNon0MZ1pjHzDAnuMfFXf6y1r +k7+GT7vIJ/OxJGfK12iU/+NlHXv6GhWWE+/Tg8/L16U/i2rM01NfHE06Se53lKsT +IQ6gcTCkJj2klPdyIGONMpsv5RJl8A== +-----END PRIVATE KEY----- diff --git a/tests/replication2_ssl.erl b/tests/replication2_ssl.erl index fea11a167..e2390007b 100644 --- a/tests/replication2_ssl.erl +++ b/tests/replication2_ssl.erl @@ -188,24 +188,23 @@ confirm() -> rt:wait_for_service(Node2, riak_repl), - lager:info("===testing basic connectivity"), rt:log_to_nodes([Node1, Node2], "Basic connectivity test"), ?assertEqual(ok, test_connection({Node1, BaseConf}, {Node2, BaseConf})), lager:info("===testing you can't connect to a server with a cert with the same common name"), rt:log_to_nodes([Node1, Node2], "Testing identical cert is disallowed"), - ?assertEqual(fail, test_connection({Node1, merge_config(SSLConfig1, BaseConf)}, + ?assertMatch({fail, _}, test_connection({Node1, merge_config(SSLConfig1, BaseConf)}, {Node2, merge_config(SSLConfig1, BaseConf)})), lager:info("===testing you can't connect when peer doesn't support SSL"), rt:log_to_nodes([Node1, Node2], "Testing missing ssl on peer fails"), - ?assertEqual(fail, test_connection({Node1, merge_config(SSLConfig1, BaseConf)}, + ?assertMatch({fail, _}, test_connection({Node1, merge_config(SSLConfig1, BaseConf)}, {Node2, BaseConf})), lager:info("===testing you can't connect when local doesn't support SSL"), rt:log_to_nodes([Node1, Node2], "Testing missing ssl locally fails"), - ?assertEqual(fail, test_connection({Node1, BaseConf}, + ?assertMatch({fail, _}, test_connection({Node1, BaseConf}, {Node2, merge_config(SSLConfig2, BaseConf)})), lager:info("===testing simple SSL connectivity"), @@ -225,7 +224,7 @@ confirm() -> lager:info("===testing disallowing intermediate CAs disallows connections"), rt:log_to_nodes([Node1, Node2], "Disallowing intermediate CA test 2"), - ?assertEqual(fail, test_connection({Node1, merge_config(SSLConfig3A, BaseConf)}, + ?assertMatch({fail, _}, test_connection({Node1, merge_config(SSLConfig3A, BaseConf)}, {Node2, merge_config(SSLConfig1, BaseConf)})), lager:info("===testing wildcard and strict ACLs with cacert.org certs"), @@ -235,7 +234,7 @@ confirm() -> lager:info("===testing expired certificates fail"), rt:log_to_nodes([Node1, Node2], "expired certificates test"), - ?assertEqual(fail, test_connection({Node1, merge_config(SSLConfig5, BaseConf)}, + ?assertMatch({fail, _}, test_connection({Node1, merge_config(SSLConfig5, BaseConf)}, {Node2, merge_config(SSLConfig7, BaseConf)})), lager:info("Connectivity tests passed"), diff --git a/tests/replication_ssl.erl b/tests/replication_ssl.erl index c2a2c998f..e41588254 100644 --- a/tests/replication_ssl.erl +++ b/tests/replication_ssl.erl @@ -165,17 +165,17 @@ confirm() -> lager:info("===testing you can't connect to a server with a cert with the same common name"), rt:log_to_nodes([Node1, Node2], "Testing identical cert is disallowed"), - ?assertEqual(fail, test_connection({Node1, merge_config(SSLConfig1, BaseConf)}, + ?assertMatch({fail, _}, test_connection({Node1, merge_config(SSLConfig1, BaseConf)}, {Node2, merge_config(SSLConfig1, BaseConf)})), lager:info("===testing you can't connect when peer doesn't support SSL"), rt:log_to_nodes([Node1, Node2], "Testing missing ssl on peer fails"), - ?assertEqual(fail, test_connection({Node1, merge_config(SSLConfig1, BaseConf)}, + ?assertMatch({fail, _}, test_connection({Node1, merge_config(SSLConfig1, BaseConf)}, {Node2, BaseConf})), lager:info("===testing you can't connect when local doesn't support SSL"), rt:log_to_nodes([Node1, Node2], "Testing missing ssl locally fails"), - ?assertEqual(fail, test_connection({Node1, BaseConf}, + ?assertMatch({fail, _}, test_connection({Node1, BaseConf}, {Node2, merge_config(SSLConfig2, BaseConf)})), lager:info("===testing simple SSL connectivity"), @@ -195,7 +195,7 @@ confirm() -> lager:info("===testing disallowing intermediate CAs disallows connections"), rt:log_to_nodes([Node1, Node2], "Disallowing intermediate CA test 2"), - ?assertEqual(fail, test_connection({Node1, merge_config(SSLConfig3A, BaseConf)}, + ?assertMatch({fail, _}, test_connection({Node1, merge_config(SSLConfig3A, BaseConf)}, {Node2, merge_config(SSLConfig1, BaseConf)})), lager:info("===testing wildcard and strict ACLs with cacert.org certs"), @@ -205,7 +205,7 @@ confirm() -> lager:info("===testing expired certificates fail"), rt:log_to_nodes([Node1, Node2], "expired certificates test"), - ?assertEqual(fail, test_connection({Node1, merge_config(SSLConfig5, BaseConf)}, + ?assertMatch({fail, _}, test_connection({Node1, merge_config(SSLConfig5, BaseConf)}, {Node2, merge_config(SSLConfig7, BaseConf)})), lager:info("Connectivity tests passed"), From 8ec844e9ec343bc75e04f57efc64a6d702bfca6a Mon Sep 17 00:00:00 2001 From: Jon Anderson Date: Mon, 6 Jan 2014 18:23:29 -0500 Subject: [PATCH 030/139] fixed whitespace issues --- tests/rt_cascading.erl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/rt_cascading.erl b/tests/rt_cascading.erl index 911f638dc..58243c69c 100644 --- a/tests/rt_cascading.erl +++ b/tests/rt_cascading.erl @@ -22,7 +22,7 @@ confirm() -> %% test requires allow_mult=false b/c of rt:systest_read rt:set_conf(all, [{"buckets.default.siblings", "off"}]), - + case eunit:test(?MODULE, [verbose]) of ok -> pass; @@ -115,7 +115,7 @@ simple_test_() -> wait_until_pending_count_zero([State#simple_state.middle, State#simple_state.beginning, State#simple_state.ending]) - end} + end} ] end}}. big_circle_test_() -> @@ -231,7 +231,7 @@ big_circle_test_() -> end}, {"check pendings", fun() -> wait_until_pending_count_zero(Nodes) - end} + end} ] end}}. circle_test_() -> @@ -296,7 +296,7 @@ circle_test_() -> end}, {"check pendings", fun() -> wait_until_pending_count_zero(Nodes) - end} + end} ] end}}. pyramid_test_() -> @@ -350,7 +350,7 @@ pyramid_test_() -> end}, {"check pendings", fun() -> wait_until_pending_count_zero(Nodes) - end} + end} ] end}}. diamond_test_() -> @@ -444,7 +444,7 @@ diamond_test_() -> end}, {"check pendings", fun() -> wait_until_pending_count_zero(Nodes) - end} + end} ] end}}. circle_and_spurs_test_() -> @@ -520,7 +520,7 @@ circle_and_spurs_test_() -> end}, {"check pendings", fun() -> wait_until_pending_count_zero(Nodes) - end} + end} ] end}}. mixed_version_clusters_test_() -> @@ -705,7 +705,7 @@ Reses)]), }}, {"check pendings", fun() -> wait_until_pending_count_zero(Nodes) - end} + end} ] end}}. @@ -796,7 +796,7 @@ new_to_old_test_dep() -> end}, {"check pendings", fun() -> wait_until_pending_count_zero(["new1", "old2", "new3"]) - end} + end} ] end}}. ensure_ack_test_() -> From cab96502125b006cff4992a68557d9dd1b426c9e Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Wed, 8 Jan 2014 12:07:32 -0500 Subject: [PATCH 031/139] Disable allow_mult for SSL tests --- tests/replication2_ssl.erl | 4 ++++ tests/replication_ssl.erl | 3 +++ 2 files changed, 7 insertions(+) diff --git a/tests/replication2_ssl.erl b/tests/replication2_ssl.erl index e2390007b..152c9f088 100644 --- a/tests/replication2_ssl.erl +++ b/tests/replication2_ssl.erl @@ -5,6 +5,10 @@ -include_lib("eunit/include/eunit.hrl"). confirm() -> + + %% test requires allow_mult=false + rt:set_conf(all, [{"buckets.default.siblings", "off"}]), + NumNodes = rt_config:get(num_nodes, 6), ClusterASize = rt_config:get(cluster_a_size, 3), diff --git a/tests/replication_ssl.erl b/tests/replication_ssl.erl index e41588254..7a9e56067 100644 --- a/tests/replication_ssl.erl +++ b/tests/replication_ssl.erl @@ -5,6 +5,9 @@ -include_lib("eunit/include/eunit.hrl"). confirm() -> + %% test requires allow_mult=false + rt:set_conf(all, [{"buckets.default.siblings", "off"}]), + NumNodes = rt_config:get(num_nodes, 6), ClusterASize = rt_config:get(cluster_a_size, 3), From f863c75cd2ec6c07ed48de6d7d86df09cfdcf8e3 Mon Sep 17 00:00:00 2001 From: Dave Parfitt Date: Mon, 6 Jan 2014 15:26:20 -0500 Subject: [PATCH 032/139] simplify repl scheduled fs test --- intercepts/riak_repl_util_intercepts.erl | 46 ++++++ tests/replication2_fsschedule.erl | 192 ++++++++--------------- 2 files changed, 113 insertions(+), 125 deletions(-) create mode 100644 intercepts/riak_repl_util_intercepts.erl diff --git a/intercepts/riak_repl_util_intercepts.erl b/intercepts/riak_repl_util_intercepts.erl new file mode 100644 index 000000000..e5b677dbe --- /dev/null +++ b/intercepts/riak_repl_util_intercepts.erl @@ -0,0 +1,46 @@ +-module(riak_repl_util_intercepts). +-compile(export_all). +-include("intercept.hrl"). + +-define(M, riak_repl_util_orig). + + +%% intercept calls to riak_repl_util:start_fullsync_timer/3, +%% which is used for v3 repl +%% don't sleep, but see if the specified interval is correct +%% run fullsync after checking interval +interval_check_v3(Pid, FullsyncIvalMins, Cluster) -> + io:format(user, "Scheduled fullsync from ~p ~p ~p~n",[Pid, + FullsyncIvalMins, + Cluster]), + %% fs to B should always be 1 minute + %% fs to C should always be 2 minutes + %% the fs schedule test that doesn't specify + %% a cluster uses 99 + case Cluster of + "B" when FullsyncIvalMins =/= 1 + andalso FullsyncIvalMins =/= 99 + -> throw("Invalid interval for cluster"); + "C" when FullsyncIvalMins =/= 2 + andalso FullsyncIvalMins =/= 99 + -> throw("Invalid interval for cluster"); + _ -> gen_server:cast(Pid, start_fullsync) + end. + + +%% intercept calls to riak_repl_util:schedule_fullsync, +%% which is used for v2 repl +%% don't sleep, but see if the interval in app:env is correct +%% the test that uses this intercept specifies a single +%% interval (99 minutes) for all sink clusters. +%% run fullsync after checking interval +interval_check_v2(Pid) -> + %328:Scheduled v2 fullsync in [{"B",1},{"C",2}] minutes + {ok, Interval} = application:get_env(riak_repl, fullsync_interval), + io:format(user, "Scheduled v2 fullsync in ~p minutes~n", [Interval]), + case Interval of + 99 -> riak_repl_keylist_server:start_fullsync(Pid), + ok; + _ -> throw("Invalid interval specified for v2 replication") + end. + diff --git a/tests/replication2_fsschedule.erl b/tests/replication2_fsschedule.erl index 73d6da080..9e12b7752 100644 --- a/tests/replication2_fsschedule.erl +++ b/tests/replication2_fsschedule.erl @@ -3,20 +3,22 @@ -include_lib("eunit/include/eunit.hrl"). -import(rt, [deploy_nodes/2, -join/2, + join/2, wait_until_nodes_ready/1, wait_until_no_pending_changes/1]). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% This tests fullsync scheduling in 1.2 and 1.3 Advanced Replication +%% This tests fullsync scheduling in 1.4+ Advanced Replication%% intercept +%% gets called w/ v3 test too, let it %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -setup_repl_clusters(Conf) -> +setup_repl_clusters(Conf, InterceptSetup) -> NumNodes = 6, lager:info("Deploy ~p nodes", [NumNodes]), Nodes = deploy_nodes(NumNodes, Conf), + InterceptSetup(Nodes), lager:info("Nodes = ~p", [Nodes]), {[AFirst|_] = ANodes, Rest} = lists:split(2, Nodes), @@ -74,7 +76,6 @@ setup_repl_clusters(Conf) -> ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "C")), rt:wait_until_ring_converged(ANodes), - %% write some data on A ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")), ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")), @@ -85,10 +86,8 @@ setup_repl_clusters(Conf) -> test_multiple_schedules() -> - TestHash = erlang:md5(term_to_binary(os:timestamp())), - TestBucket = <>, - Conf = [ + {riak_core, [{ring_creation_size, 4}]}, {riak_repl, [ {fullsync_on_connect, false}, @@ -96,108 +95,50 @@ test_multiple_schedules() -> ]} ], {LeaderA, _ANodes, _BNodes, _CNodes, AllNodes} = - setup_repl_clusters(Conf), - rt:log_to_nodes(AllNodes, "Test multiple fullsync schedules from A -> [B,C]"), - - lager:info("Writing 500 keys to ~p", [LeaderA]), - ?assertEqual([], repl_util:do_write(LeaderA, 0, 500, TestBucket, 1)), - - Status0 = rpc:call(LeaderA, riak_repl_console, status, [quiet]), - Count = proplists:get_value(server_fullsyncs, Status0), - ?assertEqual(0, Count), - - Start = riak_core_util:moment(), - lager:info("Note: Waiting for fullsyncs can take several minutes"), - wait_until_n_bnw_fullsyncs(LeaderA, "B", 3), - Finish = riak_core_util:moment(), - Diff = Finish - Start, - Minutes = Diff / 60, - %% Why 5? 1 minute for repl to B to start, 3 fullsyncs + room for slow boxes - ?assert(Minutes =< 5), - - {_AFirst, BFirst, CFirst} = get_firsts(AllNodes), - %% verify data is replicated to B - lager:info("Reading 500 keys written to ~p from ~p", [LeaderA, BFirst]), - ?assertEqual(0, repl_util:wait_for_reads(BFirst, 0, 500, TestBucket, 2)), - %% verify data is replicated to C - lager:info("Reading 500 keys written to ~p from ~p", [LeaderA, CFirst]), - ?assertEqual(0, repl_util:wait_for_reads(CFirst, 0, 500, TestBucket, 2)), - - FSCountToC = get_cluster_fullsyncs(LeaderA, "C"), - %% Why 2? 1 minute for repl to C to start, 1 fullsync - ?assert(FSCountToC =< 2), + setup_repl_clusters(Conf, fun install_v3_intercepts/1), + lager:info("Waiting for fullsyncs"), + wait_until_fullsyncs(LeaderA, "B", 5), + wait_until_fullsyncs(LeaderA, "C", 5), rt:clean_cluster(AllNodes), pass. test_single_schedule() -> - TestHash = erlang:md5(term_to_binary(os:timestamp())), - TestBucket = <>, - Conf = [ + {riak_core, [{ring_creation_size, 4}]}, {riak_repl, [ {fullsync_on_connect, false}, - {fullsync_interval, 1} + {fullsync_interval, 99} ]} ], {LeaderA, _ANodes, _BNodes, _CNodes, AllNodes} = - setup_repl_clusters(Conf), - rt:log_to_nodes(AllNodes, "Test single fullsync schedule from A -> [B,C]"), - - lager:info("Writing 500 keys to ~p", [LeaderA]), - ?assertEqual([], repl_util:do_write(LeaderA, 0, 500, TestBucket, 1)), - - Status0 = rpc:call(LeaderA, riak_repl_console, status, [quiet]), - Count = proplists:get_value(server_fullsyncs, Status0), - ?assertEqual(0, Count), - - Start = riak_core_util:moment(), - lager:info("Note: Waiting for fullsyncs can take several minutes"), - wait_until_n_bnw_fullsyncs(LeaderA, "B", 3), - Finish = riak_core_util:moment(), - Diff = Finish - Start, - Minutes = Diff / 60, - ?assert(Minutes =< 5 andalso Minutes >= 3), - - {_AFirst, BFirst, CFirst} = get_firsts(AllNodes), - %% verify data is replicated to B - lager:info("Reading 500 keys written to ~p from ~p", [LeaderA, BFirst]), - ?assertEqual(0, repl_util:wait_for_reads(BFirst, 0, 500, TestBucket, 2)), - - %% verify data is replicated to C - lager:info("Reading 500 keys written to ~p from ~p", [LeaderA, CFirst]), - ?assertEqual(0, repl_util:wait_for_reads(CFirst, 0, 500, TestBucket, 2)), - - FSCountToC = get_cluster_fullsyncs(LeaderA, "C"), - %% Why 2? 1 minute for repl to C to start, 1 fullsync - ?assert(FSCountToC =< 5 andalso FSCountToC >= 3), + setup_repl_clusters(Conf, fun install_v3_intercepts/1), + rt:log_to_nodes(AllNodes, "Test shared fullsync schedule from A -> [B,C]"), + %% let some msgs queue up, doesn't matter how long we wait + lager:info("Waiting for fullsyncs"), + wait_until_fullsyncs(LeaderA, "B", 10), + wait_until_fullsyncs(LeaderA, "C", 10), rt:clean_cluster(AllNodes), pass. - test_mixed_12_13() -> - TestHash = erlang:md5(term_to_binary(os:timestamp())), - TestBucket = <>, - Conf = [ + {riak_core, [{ring_creation_size, 4}]}, {riak_repl, [ {fullsync_on_connect, false}, - {fullsync_interval, 1} + {fullsync_interval, 99} ]} ], {LeaderA, ANodes, BNodes, CNodes, AllNodes} = - setup_repl_clusters(Conf), + setup_repl_clusters(Conf, fun install_mixed_intercepts/1), - {AFirst, BFirst, _CFirst} = get_firsts(AllNodes), + {_AFirst, BFirst, _CFirst} = get_firsts(AllNodes), repl_util:wait_until_leader_converge(ANodes), repl_util:wait_until_leader_converge(BNodes), repl_util:wait_until_leader_converge(CNodes), - lager:info("Writing 500 keys to ~p", [LeaderA]), - ?assertEqual([], repl_util:do_write(LeaderA, 0, 500, TestBucket, 1)), - lager:info("Adding repl listener to cluster A"), ListenerArgs = [[atom_to_list(LeaderA), "127.0.0.1", "9010"]], Res = rpc:call(LeaderA, riak_repl_console, add_listener, ListenerArgs), @@ -207,38 +148,38 @@ test_mixed_12_13() -> SiteArgs = ["127.0.0.1", "9010", "rtmixed"], Res = rpc:call(BFirst, riak_repl_console, add_site, [SiteArgs]), - lager:info("Waiting until scheduled fullsync occurs. Go grab a beer, this may take awhile."), - - wait_until_n_bnw_fullsyncs(LeaderA, "B", 3), - wait_until_n_bnw_fullsyncs(LeaderA, "C", 3), - %% 1.3 fullsyncs increment the 1.2 fullsync counter, backwards - %% compatability is a terrible thing + lager:info("Waiting for v2 repl to catch up. Good time to light up a cold can of Tab."), + wait_until_fullsyncs(LeaderA, "B", 3), + wait_until_fullsyncs(LeaderA, "C", 3), wait_until_12_fs_complete(LeaderA, 9), - - Status0 = rpc:call(LeaderA, riak_repl_console, status, [quiet]), - Count0 = proplists:get_value(server_fullsyncs, Status0), - FS_B = get_cluster_fullsyncs(AFirst, "B"), - FS_C = get_cluster_fullsyncs(AFirst, "C"), - %% count the actual 1.2 fullsyncs - Count = Count0 - (FS_B + FS_C), - - lager:info("1.2 Count = ~p", [Count]), - lager:info("1.3 B Count = ~p", [FS_B]), - lager:info("1.3 C Count = ~p", [FS_C]), - - ?assert(Count >= 3 andalso Count =< 6), - ?assert(FS_B >= 3 andalso FS_B =< 6), - ?assert(FS_C >= 3 andalso FS_C =< 6), + rt:clean_cluster(AllNodes), pass. confirm() -> - AllTests = [test_multiple_schedules(), test_single_schedule(), test_mixed_12_13()], + AllTests = [test_mixed_12_13(), test_multiple_schedules(), test_single_schedule()], case lists:all(fun (Result) -> Result == pass end, AllTests) of true -> pass; false -> sadtrombone end. +wait_until_fullsyncs(Node, ClusterName, N) -> + Res = rt:wait_until(Node, + fun(_) -> + FS = get_cluster_fullsyncs(Node, ClusterName), + case FS of + {badrpc, _} -> + false; + undefined -> + false; + X when X >= N -> + true; + _ -> + false + end + end), + ?assertEqual(ok, Res). + wait_until_12_fs_complete(Node, N) -> rt:wait_until(Node, fun(_) -> @@ -258,26 +199,27 @@ get_firsts(Nodes) -> get_cluster_fullsyncs(Node, ClusterName) -> Status = rpc:call(Node, riak_repl2_fscoordinator, status, []), - % let it fail if keys are missing - ClusterData = proplists:get_value(ClusterName, Status), - proplists:get_value(fullsyncs_completed, ClusterData). + case proplists:lookup(ClusterName, Status) of + none -> 0; + {_, ClusterData} -> + case proplists:lookup(fullsyncs_completed, ClusterData) of + none -> 0; + FSC -> FSC + end + end. + +%% skip v2 repl interval checks +install_v3_intercepts(Nodes) -> + [rt_intercept:add(Node, {riak_repl_util, [{{start_fullsync_timer,3}, + interval_check_v3} + ]}) + || Node <- Nodes]. + +%% check v2 + v3 intervals +install_mixed_intercepts(Nodes) -> + [rt_intercept:add(Node, {riak_repl_util, [{{start_fullsync_timer,3}, + interval_check_v3}, + {{schedule_fullsync,1}, + interval_check_v2}]}) + || Node <- Nodes]. -wait_until_n_bnw_fullsyncs(Node, DestCluster, N) -> - lager:info("Waiting for fullsync count for ~p to be ~p", [DestCluster, N]), - Res = rt:wait_until(Node, - fun(_) -> - Fullsyncs = get_cluster_fullsyncs(Node, DestCluster), - case Fullsyncs of - C when C >= N -> - true; - _Other -> - %% keep this in for tracing - %%lager:info("Total fullsyncs = ~p", [Other]), - %% sleep a while so the default 3 minute time out - %% doesn't screw us - timer:sleep(20000), - false - end - end), - ?assertEqual(ok, Res), - lager:info("Fullsync on ~p complete", [Node]). From d73d4b0ff361b755ef43043caa246e33d34f12c8 Mon Sep 17 00:00:00 2001 From: Dave Parfitt Date: Wed, 8 Jan 2014 21:59:56 -0500 Subject: [PATCH 033/139] removed a stray comment --- intercepts/riak_repl_util_intercepts.erl | 1 - 1 file changed, 1 deletion(-) diff --git a/intercepts/riak_repl_util_intercepts.erl b/intercepts/riak_repl_util_intercepts.erl index e5b677dbe..aba688484 100644 --- a/intercepts/riak_repl_util_intercepts.erl +++ b/intercepts/riak_repl_util_intercepts.erl @@ -35,7 +35,6 @@ interval_check_v3(Pid, FullsyncIvalMins, Cluster) -> %% interval (99 minutes) for all sink clusters. %% run fullsync after checking interval interval_check_v2(Pid) -> - %328:Scheduled v2 fullsync in [{"B",1},{"C",2}] minutes {ok, Interval} = application:get_env(riak_repl, fullsync_interval), io:format(user, "Scheduled v2 fullsync in ~p minutes~n", [Interval]), case Interval of From dc0eb72b65d0f7a42af0c0317de1c7ec8af5da56 Mon Sep 17 00:00:00 2001 From: Kelly McLaughlin Date: Tue, 7 Jan 2014 15:56:20 -0700 Subject: [PATCH 034/139] Changes to loaded_upgrade test to improve reliability * Wait for services after upgrading a node * Remove hardcoded sleep call * Reduce time between upgrades from 300 to 120 seconds * Refactor worker functions to accept a report pid parameter instead of assuming a loaded_upgrade named process. * Remove the mapreduce load. The results of the mapreduce queries during node shutdown are not predictable enough at this time to rely on for this test. --- src/rt_worker_sup.erl | 13 +-- tests/loaded_upgrade.erl | 141 +++++++++++++--------------- tests/loaded_upgrade_worker_sup.erl | 114 +++++++++++----------- 3 files changed, 132 insertions(+), 136 deletions(-) diff --git a/src/rt_worker_sup.erl b/src/rt_worker_sup.erl index c4d82d3ee..90b0f6b68 100644 --- a/src/rt_worker_sup.erl +++ b/src/rt_worker_sup.erl @@ -23,11 +23,11 @@ -behavior(supervisor). %% Helper macro for declaring children of supervisor --define(CHILD(Id, Mod, Node, Backend, Vsn), { - list_to_atom(atom_to_list(Node) ++ "_loader_" ++ integer_to_list(Id)), - { Mod, - start_link, - [list_to_atom(atom_to_list(Node) ++ "_loader_" ++ integer_to_list(Id)), Node, Backend, Vsn]}, +-define(CHILD(Id, Mod, Node, Backend, Vsn, ReportPid), { + list_to_atom(atom_to_list(Node) ++ "_loader_" ++ integer_to_list(Id)), + { Mod, + start_link, + [list_to_atom(atom_to_list(Node) ++ "_loader_" ++ integer_to_list(Id)), Node, Backend, Vsn, ReportPid]}, permanent, 5000, worker, [Mod]}). -export([init/1]). @@ -41,9 +41,10 @@ init(Props) -> Node = proplists:get_value(node, Props), Backend = proplists:get_value(backend, Props), Vsn = proplists:get_value(version, Props), + ReportPid = proplists:get_value(report_pid, Props), ChildSpecs = [ - ?CHILD(Num, loaded_upgrade_worker_sup, Node, Backend, Vsn) + ?CHILD(Num, loaded_upgrade_worker_sup, Node, Backend, Vsn, ReportPid) || Num <- lists:seq(1, WorkersPerNode)], lager:info("Starting ~p workers to ~p", [WorkersPerNode, Node]), diff --git a/tests/loaded_upgrade.erl b/tests/loaded_upgrade.erl index da65e32fb..9dd09fbdc 100644 --- a/tests/loaded_upgrade.erl +++ b/tests/loaded_upgrade.erl @@ -25,14 +25,14 @@ -export([kv_valgen/1, bucket/1, erlang_mr/0, int_to_key/1]). --define(TIME_BETWEEN_UPGRADES, 300). %% Seconds! +-define(TIME_BETWEEN_UPGRADES, 120). %% Seconds! confirm() -> case whereis(loaded_upgrade) of undefined -> meh; _ -> unregister(loaded_upgrade) - end, + end, register(loaded_upgrade, self()), %% Build Cluster TestMetaData = riak_test_runner:metadata(), @@ -46,50 +46,43 @@ confirm() -> Nodes = rt:build_cluster(Vsns), seed_cluster(Nodes), + %% Now we have a cluster! %% Let's spawn workers against it. - timer:sleep(10000), - Concurrent = rt_config:get(load_workers, 10), - Sups = [ - {rt_worker_sup:start_link([ - {concurrent, Concurrent}, - {node, Node}, - {backend, Backend}, - {version, OldVsn} - ]), Node} - || Node <- Nodes], + Sups = [{rt_worker_sup:start_link([{concurrent, Concurrent}, + {node, Node}, + {backend, Backend}, + {version, OldVsn}, + {report_pid, self()}]), Node} || Node <- Nodes], upgrade_recv_loop(), [begin - exit(Sup, normal), - lager:info("Upgrading ~p", [Node]), - rt:upgrade(Node, current), - {ok, NewSup} = rt_worker_sup:start_link([ - {concurrent, Concurrent}, - {node, Node}, - {backend, Backend}, - {version, current} - ]), - - _NodeMon = init_node_monitor(Node, NewSup, self()), - upgrade_recv_loop() - - end || {{ok, Sup}, Node} <- Sups], - + exit(Sup, normal), + lager:info("Upgrading ~p", [Node]), + rt:upgrade(Node, current), + rt:wait_for_service(Node, [riak_search,riak_kv,riak_pipe]), + {ok, NewSup} = rt_worker_sup:start_link([{concurrent, Concurrent}, + {node, Node}, + {backend, Backend}, + {version, current}, + {report_pid, self()}]), + _NodeMon = init_node_monitor(Node, NewSup, self()), + upgrade_recv_loop() + end || {{ok, Sup}, Node} <- Sups], pass. upgrade_recv_loop() -> {SMega, SSec, SMicro} = os:timestamp(), EndSecs = SSec + ?TIME_BETWEEN_UPGRADES, EndTime = case EndSecs > 1000000 of - true -> - {SMega + 1, EndSecs - 1000000, SMicro}; - _ -> - {SMega, EndSecs, SMicro} - end, + true -> + {SMega + 1, EndSecs - 1000000, SMicro}; + _ -> + {SMega, EndSecs, SMicro} + end, upgrade_recv_loop(EndTime). %% TODO: Collect error message counts in ets table @@ -99,23 +92,23 @@ upgrade_recv_loop(EndTime) -> true -> lager:info("Done waiting 'cause ~p > ~p", [Now, EndTime]); _ -> - receive - {mapred, Node, bad_result} -> - ?assertEqual(true, {mapred, Node, bad_result}); - {kv, Node, not_equal} -> - ?assertEqual(true, {kv, Node, bad_result}); - {kv, Node, {notfound, Key}} -> - ?assertEqual(true, {kv, Node, {notfound, Key}}); - {listkeys, Node, not_equal} -> - ?assertEqual(true, {listkeys, Node, not_equal}); - {search, Node, bad_result} -> - ?assertEqual(true, {search, Node, bad_result}); - Msg -> - lager:debug("Received Mesg ~p", [Msg]), - upgrade_recv_loop(EndTime) - after timer:now_diff(EndTime, Now) div 1000 -> - lager:info("Done waiting 'cause ~p is up", [?TIME_BETWEEN_UPGRADES]) - end + receive + {mapred, Node, bad_result} -> + ?assertEqual(true, {mapred, Node, bad_result}); + {kv, Node, not_equal} -> + ?assertEqual(true, {kv, Node, bad_result}); + {kv, Node, {notfound, Key}} -> + ?assertEqual(true, {kv, Node, {notfound, Key}}); + {listkeys, Node, not_equal} -> + ?assertEqual(true, {listkeys, Node, not_equal}); + {search, Node, bad_result} -> + ?assertEqual(true, {search, Node, bad_result}); + Msg -> + lager:debug("Received Mesg ~p", [Msg]), + upgrade_recv_loop(EndTime) + after timer:now_diff(EndTime, Now) div 1000 -> + lager:info("Done waiting 'cause ~p is up", [?TIME_BETWEEN_UPGRADES]) + end end. seed_cluster(Nodes=[Node1|_]) -> @@ -127,9 +120,9 @@ seed_cluster(Nodes=[Node1|_]) -> ?assertEqual([], rt:systest_read(Node1, 100, 1)), seed(Node1, 0, 100, fun(Key) -> - Bin = iolist_to_binary(io_lib:format("~p", [Key])), - riakc_obj:new(<<"objects">>, Bin, Bin) - end), + Bin = iolist_to_binary(io_lib:format("~p", [Key])), + riakc_obj:new(<<"objects">>, Bin, Bin) + end), %% For KV kv_seed(Node1), @@ -155,9 +148,9 @@ seed_search(Node) -> Pid = rt:pbc(Node), SpamDir = rt_config:get(spam_dir), Files = case SpamDir of - undefined -> undefined; - _ -> filelib:wildcard(SpamDir ++ "/*") - end, + undefined -> undefined; + _ -> filelib:wildcard(SpamDir ++ "/*") + end, seed_search(Pid, Files), riakc_pb_socket:stop(Pid). @@ -169,8 +162,8 @@ seed_search(Pid, [File|Files]) -> kv_seed(Node) -> ValFun = fun(Key) -> - riakc_obj:new(bucket(kv), iolist_to_binary(io_lib:format("~p", [Key])), kv_valgen(Key)) - end, + riakc_obj:new(bucket(kv), iolist_to_binary(io_lib:format("~p", [Key])), kv_valgen(Key)) + end, seed(Node, 0, 7999, ValFun). kv_valgen(Key) -> @@ -184,36 +177,36 @@ int_to_key(KInt) -> %% bin_plustwo -> [<<"Key + 2">>] twoi_seed(Node) -> ValFun = fun(Key) -> - Obj = riakc_obj:new(bucket(twoi), iolist_to_binary(io_lib:format("~p", [Key])), kv_valgen(Key)), - MD1 = riakc_obj:get_update_metadata(Obj), - MD2 = riakc_obj:set_secondary_index(MD1, [ - {{integer_index, "plusone"}, [Key + 1, Key + 10000]}, - {{binary_index, "plustwo"}, [int_to_key(Key + 2)]} - ]), - riakc_obj:update_metadata(Obj, MD2) - end, + Obj = riakc_obj:new(bucket(twoi), iolist_to_binary(io_lib:format("~p", [Key])), kv_valgen(Key)), + MD1 = riakc_obj:get_update_metadata(Obj), + MD2 = riakc_obj:set_secondary_index(MD1, [ + {{integer_index, "plusone"}, [Key + 1, Key + 10000]}, + {{binary_index, "plustwo"}, [int_to_key(Key + 2)]} + ]), + riakc_obj:update_metadata(Obj, MD2) + end, seed(Node, 0, 7999, ValFun). erlang_mr() -> [{map, {modfun, riak_kv_mapreduce, map_object_value}, none, false}, - {reduce, {modfun, riak_kv_mapreduce, reduce_count_inputs}, none, true}]. + {reduce, {modfun, riak_kv_mapreduce, reduce_count_inputs}, none, true}]. mr_seed(Node) -> -%% to be used along with sequential_int keygen to populate known -%% mapreduce set + %% to be used along with sequential_int keygen to populate known + %% mapreduce set ValFun = fun(Key) -> - Value = iolist_to_binary(io_lib:format("~p", [Key])), - riakc_obj:new(bucket(mapred), Value, Value) - end, + Value = iolist_to_binary(io_lib:format("~p", [Key])), + riakc_obj:new(bucket(mapred), Value, Value) + end, seed(Node, 0, 9999, ValFun). seed(Node, Start, End, ValFun) -> PBC = rt:pbc(Node), [ begin - Obj = ValFun(Key), - riakc_pb_socket:put(PBC, Obj, [{w,3}]) - end || Key <- lists:seq(Start, End)], + Obj = ValFun(Key), + riakc_pb_socket:put(PBC, Obj, [{w,3}]) + end || Key <- lists:seq(Start, End)], riakc_pb_socket:stop(PBC). diff --git a/tests/loaded_upgrade_worker_sup.erl b/tests/loaded_upgrade_worker_sup.erl index f307d4213..3e62c440c 100644 --- a/tests/loaded_upgrade_worker_sup.erl +++ b/tests/loaded_upgrade_worker_sup.erl @@ -27,33 +27,33 @@ %% API -export([assert_equal/2]). --export([list_keys_tester/4, kv_tester/4, mapred_tester/4, - twoi_tester/4, search_tester/4, tester_start_link/3]). +-export([list_keys_tester/5, kv_tester/5, mapred_tester/5, + twoi_tester/5, search_tester/5, tester_start_link/4]). -export([init/1]). --export([start_link/4]). +-export([start_link/5]). %% Helper macro for declaring children of supervisor --define(CHILD(Name, FunName, Node, Vsn), { +-define(CHILD(Name, FunName, Node, Vsn, ReportPid), { list_to_atom(atom_to_list(Name) ++ "_" ++ atom_to_list(FunName)), - { ?MODULE, - tester_start_link, - [FunName, Node, Vsn]}, + { ?MODULE, + tester_start_link, + [FunName, Node, Vsn, ReportPid]}, permanent, 5000, worker, [?MODULE]}). -start_link(Name, Node, Backend, Vsn) -> - supervisor:start_link(?MODULE, [Name, Node, Backend, Vsn]). +start_link(Name, Node, Backend, Vsn, ReportPid) -> + supervisor:start_link(?MODULE, [Name, Node, Backend, Vsn, ReportPid]). -init([Name, Node, Backend, Vsn]) -> +init([Name, Node, Backend, Vsn, ReportPid]) -> rt:wait_for_service(Node, [riak_search,riak_kv,riak_pipe]), - ChildSpecs1 = [ - ?CHILD(Name, FunName, Node, Vsn) - || FunName <- [list_keys_tester, mapred_tester, kv_tester, search_tester]], + ChildSpecs1 = [ + ?CHILD(Name, FunName, Node, Vsn, ReportPid) + || FunName <- [list_keys_tester, kv_tester, search_tester]], ChildSpecs = case Backend of eleveldb -> - [?CHILD(Name, twoi_tester, Node, Vsn) | ChildSpecs1]; + [?CHILD(Name, twoi_tester, Node, Vsn, ReportPid) | ChildSpecs1]; _ -> ChildSpecs1 end, {ok, {{one_for_one, 1000, 60}, ChildSpecs}}. @@ -63,10 +63,10 @@ init([Name, Node, Backend, Vsn]) -> %%% Internal functions %%%=================================================================== -tester_start_link(Function, Node, Vsn) -> - {ok, spawn_link(?MODULE, Function, [Node, 0, undefined, Vsn])}. +tester_start_link(Function, Node, Vsn, ReportPid) -> + {ok, spawn_link(?MODULE, Function, [Node, 0, undefined, Vsn, ReportPid])}. -list_keys_tester(Node, Count, Pid, Vsn) -> +list_keys_tester(Node, Count, Pid, Vsn, ReportPid) -> PBC = pb_pid_recycler(Pid, Node), case riakc_pb_socket:list_keys(PBC, <<"objects">>) of {ok, Keys} -> @@ -74,42 +74,43 @@ list_keys_tester(Node, Count, Pid, Vsn) -> ExpectedKeys = lists:usort([loaded_upgrade:int_to_key(K) || K <- lists:seq(0, 100)]), case assert_equal(ExpectedKeys, ActualKeys) of true -> cool; - _ -> loaded_upgrade ! {listkeys, Node, not_equal} + _ -> ReportPid ! {listkeys, Node, not_equal} end; {error, timeout} -> - loaded_upgrade ! {listkeys, Node, timeout}; + ReportPid ! {listkeys, Node, timeout}; {error, {timeout, _}} -> - loaded_upgrade ! {listkeys, Node, timeout}; + ReportPid ! {listkeys, Node, timeout}; Unexpected -> - loaded_upgrade ! {listkeys, Node, Unexpected} + ReportPid ! {listkeys, Node, Unexpected} end, - list_keys_tester(Node, Count + 1, PBC, Vsn). + list_keys_tester(Node, Count + 1, PBC, Vsn, ReportPid). -kv_tester(Node, Count, Pid, Vsn) -> +kv_tester(Node, Count, Pid, Vsn, ReportPid) -> PBC = pb_pid_recycler(Pid, Node), Key = Count rem 8000, case riakc_pb_socket:get(PBC, loaded_upgrade:bucket(kv), loaded_upgrade:int_to_key(Key)) of {ok, Val} -> case loaded_upgrade:kv_valgen(Key) == riakc_obj:get_value(Val) of true -> cool; - _ -> loaded_upgrade ! {kv, Node, not_equal} + _ -> ReportPid ! {kv, Node, not_equal} end; {error, disconnected} -> ok; {error, notfound} -> - loaded_upgrade ! {kv, Node, {notfound, Key}}; + ReportPid ! {kv, Node, {notfound, Key}}; Unexpected -> - loaded_upgrade ! {kv, Node, Unexpected} + ReportPid ! {kv, Node, Unexpected} end, - kv_tester(Node, Count + 1, PBC, Vsn). + kv_tester(Node, Count + 1, PBC, Vsn, ReportPid). -mapred_tester(Node, Count, Pid, Vsn) -> +mapred_tester(Node, Count, Pid, Vsn, ReportPid) -> PBC = pb_pid_recycler(Pid, Node), case riakc_pb_socket:mapred(PBC, loaded_upgrade:bucket(mapred), loaded_upgrade:erlang_mr()) of {ok, [{1, [10000]}]} -> ok; - {ok, _R} -> - loaded_upgrade ! {mapred, Node, bad_result}; + {ok, R} -> + lager:warning("Bad MR result: ~p", [R]), + ReportPid ! {mapred, Node, bad_result}; {error, disconnected} -> ok; %% Finkmaster Flex says timeouts are ok @@ -138,16 +139,16 @@ mapred_tester(Node, Count, Pid, Vsn) -> {error, <<"{\"phase\":0,\"error\":\"[{vnode_down,noproc}]", _/binary>>} -> ok; Unexpected -> - loaded_upgrade ! {mapred, Node, Unexpected} + ReportPid ! {mapred, Node, Unexpected} end, - mapred_tester(Node, Count + 1, PBC, Vsn). + mapred_tester(Node, Count + 1, PBC, Vsn, ReportPid). -twoi_tester(Node, 0, undefined, legacy) -> +twoi_tester(Node, 0, undefined, legacy, ReportPid) -> lager:warning("Legacy nodes do not have 2i load applied"), - twoi_tester(Node, 1, undefined, legacy); -twoi_tester(Node, Count, Pid, legacy) -> - twoi_tester(Node, Count + 1, Pid, legacy); -twoi_tester(Node, Count, Pid, Vsn) -> + twoi_tester(Node, 1, undefined, legacy, ReportPid); +twoi_tester(Node, Count, Pid, legacy, ReportPid) -> + twoi_tester(Node, Count + 1, Pid, legacy, ReportPid); +twoi_tester(Node, Count, Pid, Vsn, ReportPid) -> PBC = pb_pid_recycler(Pid, Node), Key = Count rem 8000, ExpectedKeys = [loaded_upgrade:int_to_key(Key)], @@ -158,38 +159,40 @@ twoi_tester(Node, Count, Pid, Vsn) -> {binary_index, "plustwo"}, loaded_upgrade:int_to_key(Key + 2)), riakc_pb_socket:get_index( - PBC, + PBC, loaded_upgrade:bucket(twoi), {integer_index, "plusone"}, Key + 1) - } of + } of {{ok, ?INDEX_RESULTS{keys=BinKeys}}, {ok, ?INDEX_RESULTS{keys=IntKeys}}} -> case {assert_equal(ExpectedKeys, BinKeys), assert_equal(ExpectedKeys, IntKeys)} of {true, true} -> cool; {false, false} -> - loaded_upgrade ! {twoi, Node, bolth_no_match}; + ReportPid ! {twoi, Node, bolth_no_match}; {false, true} -> - loaded_upgrade ! {twoi, Node, bin_no_match}; + ReportPid ! {twoi, Node, bin_no_match}; {true, false} -> - loaded_upgrade ! {twoi, Node, int_no_match} + ReportPid ! {twoi, Node, int_no_match} end; {{error, Reason}, _} -> - loaded_upgrade ! {twoi, Node, {error, Reason}}; + ReportPid ! {twoi, Node, {error, Reason}}; {_, {error, Reason}} -> - loaded_upgrade ! {twoi, Node, {error, Reason}}; + ReportPid ! {twoi, Node, {error, Reason}}; Unexpected -> - loaded_upgrade ! {twoi, Node, Unexpected} + ReportPid ! {twoi, Node, Unexpected} end, - twoi_tester(Node, Count + 1, PBC, Vsn). + twoi_tester(Node, Count + 1, PBC, Vsn, ReportPid). -search_tester(Node, Count, Pid, Vsn) -> +search_tester(Node, Count, Pid, Vsn, ReportPid) -> PBC = pb_pid_recycler(Pid, Node), {Term, Size} = search_check(Count), case riakc_pb_socket:search(PBC, loaded_upgrade:bucket(search), Term) of {ok, Result} -> case Size == Result#search_results.num_found of true -> ok; - _ -> loaded_upgrade ! {search, Node, bad_result} + _ -> + lager:warning("Bad search result: ~p Expected: ~p", [Result#search_results.num_found, Size]), + ReportPid ! {search, Node, bad_result} end; {error, disconnected} -> %% oh well, reconnect @@ -197,24 +200,24 @@ search_tester(Node, Count, Pid, Vsn) -> {error, <<"Error processing incoming message: throw:{timeout,range_loop}:[{riak_search_backend", _/binary>>} -> case rt:is_mixed_cluster(Node) of - true -> + true -> ok; _ -> - loaded_upgrade ! {search, Node, {timeout, range_loop}} + ReportPid ! {search, Node, {timeout, range_loop}} end; {error,<<"Error processing incoming message: error:{case_clause,", _/binary>>} -> %% although it doesn't say so, this is the infamous badfun case rt:is_mixed_cluster(Node) of - true -> + true -> ok; _ -> - loaded_upgrade ! {search, Node, {error, badfun}} + ReportPid ! {search, Node, {error, badfun}} end; Unexpected -> - loaded_upgrade ! {search, Node, Unexpected} + ReportPid ! {search, Node, Unexpected} end, - search_tester(Node, Count + 1, PBC, Vsn). + search_tester(Node, Count + 1, PBC, Vsn, ReportPid). search_check(Count) -> case Count rem 6 of @@ -227,7 +230,7 @@ search_check(Count) -> end. assert_equal(Expected, Actual) -> - case Expected -- Actual of + case Expected -- Actual of [] -> ok; Diff -> lager:info("Expected -- Actual: ~p", [Diff]) end, @@ -243,4 +246,3 @@ pb_pid_recycler(Pid, Node) -> riakc_pb_socket:stop(Pid), rt:pbc(Node) end. - From 96aeb6e70698d7e6e2bbfcc620cfaadd8cd0884a Mon Sep 17 00:00:00 2001 From: Dave Parfitt Date: Thu, 9 Jan 2014 12:39:34 -0500 Subject: [PATCH 035/139] added a test to verify riak-repl script parameters --- intercepts/riak_repl_console_intercepts.erl | 137 ++++++++++++++++++++ src/rt.erl | 7 + src/rtdev.erl | 17 +++ tests/replication2_console_tests.erl | 127 ++++++++++++++++++ 4 files changed, 288 insertions(+) create mode 100644 intercepts/riak_repl_console_intercepts.erl create mode 100644 tests/replication2_console_tests.erl diff --git a/intercepts/riak_repl_console_intercepts.erl b/intercepts/riak_repl_console_intercepts.erl new file mode 100644 index 000000000..b9fe82e9c --- /dev/null +++ b/intercepts/riak_repl_console_intercepts.erl @@ -0,0 +1,137 @@ +-module(riak_repl_console_intercepts). +-compile(export_all). +-include("intercept.hrl"). + +-define(M, riak_repl_console_orig). + +%% Hello - if you mess with the riak-repl script, this test might help you +%% out. It intercepts (registered) calls to riak_repl_console and checks that +%% parameters are received correctly. Tests using these intercepts will +%% fail in ?PASS *isn't* returned. + +%% Please see ./tests/replication2_console_tests.erl for more information! + +%% these *strings* are passed back out as IO from the riak-repl shell script +%% The IO from this script is used in asserts in +%% replication2_console_tests.erl +-define(PASS, io:format("pass", [])). +-define(FAIL, io:format("fail", [])). + +verify_clusterstats(Val) -> + case Val of + [] -> ?PASS; + ["cluster_mgr"] -> ?PASS; + ["192.168.1.1:5555"] -> ?PASS; + _ -> ?FAIL + end. + +verify_clustername(Val) -> + case Val of + ["foo"] -> ?PASS; + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_max_fssource_node(Val) -> + case Val of + "" -> ?PASS; + ["99"] -> ?PASS; + _ -> ?FAIL + end. + +verify_max_fssource_cluster(Val) -> + case Val of + "" -> ?PASS; + ["99"] -> ?PASS; + _ -> ?FAIL + end. + +verify_max_fssink_node(Val) -> + case Val of + "" -> ?PASS; + ["99"] -> ?PASS; + _ -> ?FAIL + end. + +verify_fullsync(Val) -> + case Val of + ["enable","foo"] -> ?PASS; + ["disable","bar"] -> ?PASS; + _ -> ?FAIL + end. + +verify_realtime(Val) -> + case Val of + ["enable","foo"] -> ?PASS; + ["disable","bar"] -> ?PASS; + ["cascades"] -> ?PASS; %% display current cascades info, no additional + %% params + ["cascades","always"] -> ?PASS; + _ -> ?FAIL + end. + +verify_proxy_get(Val) -> + case Val of + ["enable","foo"] -> ?PASS; + ["disable","bar"] -> ?PASS; + _ -> ?FAIL + end. + +verify_add_nat_map(Val) -> + case Val of + ["1.2.3.4:4321","192.168.1.1"] -> ?PASS; + _ -> ?FAIL + end. + +verify_del_nat_map(Val) -> + case Val of + ["1.2.3.4:4321","192.168.1.1"] -> ?PASS; + _ -> ?FAIL + end. + +verify_show_nat_map(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_modes(Val) -> + case Val of + [] -> ?PASS; + ["mode_repl12"] -> ?PASS; + ["mode_repl12","mode_repl13"] -> ?PASS; + _ -> ?FAIL + end. + +verify_full_objects(Val) -> + case Val of + [] -> ?PASS; + ["always"] -> ?PASS; + ["99"] -> ?PASS; + _ -> ?FAIL + end. + +verify_add_block_provider_redirect(Val) -> + case Val of + ["a","b"] -> ?PASS; + _ -> ?FAIL + end. + +verify_show_block_provider_redirect(Val) -> + case Val of + ["a"] -> ?PASS; + _ -> ?FAIL + end. + +verify_delete_block_provider_redirect(Val) -> + case Val of + ["a"] -> ?PASS; + _ -> ?FAIL + end. + +verify_show_local_cluster_id(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + diff --git a/src/rt.erl b/src/rt.erl index ad5392a8b..3a8650008 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -87,6 +87,7 @@ priv_dir/0, remove/2, riak/2, + riak_repl/2, rpc_get_env/2, set_backend/1, set_backend/2, @@ -1078,6 +1079,11 @@ admin(Node, Args) -> riak(Node, Args) -> ?HARNESS:riak(Node, Args). + +%% @doc Call 'bin/riak-repl' command on `Node' with arguments `Args' +riak_repl(Node, Args) -> + ?HARNESS:riak_repl(Node, Args). + search_cmd(Node, Args) -> {ok, Cwd} = file:get_cwd(), rpc:call(Node, riak_search_cmd, command, [[Cwd | Args]]). @@ -1373,3 +1379,4 @@ wait_for_control(Vsn, Node) when is_atom(Node) -> %% @doc Wait for Riak Control to start on a series of nodes. wait_for_control(VersionedNodes) when is_list(VersionedNodes) -> [wait_for_control(Vsn, Node) || {Vsn, Node} <- VersionedNodes]. + diff --git a/src/rtdev.erl b/src/rtdev.erl index 63e7c5a21..487731b83 100644 --- a/src/rtdev.erl +++ b/src/rtdev.erl @@ -33,6 +33,9 @@ get_deps() -> riakcmd(Path, N, Cmd) -> io_lib:format("~s/dev/dev~b/bin/riak ~s", [Path, N, Cmd]). +riakreplcmd(Path, N, Cmd) -> + io_lib:format("~s/dev/dev~b/bin/riak-repl ~s", [Path, N, Cmd]). + gitcmd(Path, Cmd) -> io_lib:format("git --git-dir=\"~s/.git\" --work-tree=\"~s/\" ~s", [Path, Path, Cmd]). @@ -73,6 +76,12 @@ run_riak(N, Path, Cmd) -> R end. +run_riak_repl(N, Path, Cmd) -> + lager:info("Running: ~s", [riakcmd(Path, N, Cmd)]), + os:cmd(riakreplcmd(Path, N, Cmd)). + %% don't mess with intercepts and/or coverage, + %% they should already be setup at this point + setup_harness(_Test, _Args) -> Path = relpath(root), %% Stop all discoverable nodes, not just nodes we'll be using for this test. @@ -516,6 +525,14 @@ riak(Node, Args) -> lager:info("~s", [Result]), {ok, Result}. + +riak_repl(Node, Args) -> + N = node_id(Node), + Path = relpath(node_version(N)), + Result = run_riak_repl(N, Path, Args), + lager:info("~s", [Result]), + {ok, Result}. + node_id(Node) -> NodeMap = rt_config:get(rt_nodes), orddict:fetch(Node, NodeMap). diff --git a/tests/replication2_console_tests.erl b/tests/replication2_console_tests.erl new file mode 100644 index 000000000..b9175d753 --- /dev/null +++ b/tests/replication2_console_tests.erl @@ -0,0 +1,127 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +-module(replication2_console_tests). +-include_lib("eunit/include/eunit.hrl"). + +%% This test checks to see if the riak-repl *shell script* +%% communicates it's command line args to riak_repl_console +%% correctly. This test needs to be exercised on all supported +%% Riak platforms. This test helped fix a problem on Ubuntu +%% where "riak-repl cascades" failed due to a shift error in +%% the script. Hopefully, this script will catch similar errors +%% with future changes to riak-repl. +%% Note, this test is more about verifying parameter *arity* in +%% riak_repl_console than verifying all valid combinations +%% of arguments for each +%% command. +%% +%% test flow: +%% riak_test -> riak_repl (shell script) -> intercept +%% a) if input received by riak-repl is correct, +%% display "pass" to the console. Test will +%% pass via assert in check_cmd/2. +%% b) if input received by riap-repl is unexpected +%% display "fail" to the console, test will fail +%% via assert in check_cmd/2 +%% c) if interrupt isn't called, "pass" won't be printed +%% to stdout, test will fail via assert in check_cmd/2 + +-export([confirm/0]). + +confirm() -> + %% Deploy a node to test against + lager:info("Deploy node to test riak-repl command line"), + [Node] = rt:deploy_nodes(1), + ?assertEqual(ok, rt:wait_until_nodes_ready([Node])), + rt_intercept:add(Node, + {riak_repl_console, + [ + {{clustername,1}, verify_clustername}, + {{modes,1}, verify_modes}, + {{clusterstats,1}, verify_clusterstats}, + {{max_fssource_node,1}, verify_max_fssource_node}, + {{max_fssource_cluster,1}, verify_max_fssource_cluster}, + {{max_fssink_node,1}, verify_max_fssink_node}, + {{fullsync,1}, verify_fullsync}, + {{proxy_get,1}, verify_proxy_get}, + {{add_nat_map,1}, verify_add_nat_map}, + {{del_nat_map,1}, verify_del_nat_map}, + {{show_nat_map,1}, verify_show_nat_map}, + {{realtime,1}, verify_realtime}, + {{full_objects,1}, verify_full_objects}, + {{add_block_provider_redirect,1}, verify_add_block_provider_redirect}, + {{show_block_provider_redirect,1}, verify_show_block_provider_redirect}, + {{delete_block_provider_redirect,1}, verify_delete_block_provider_redirect}, + {{show_local_cluster_id,1}, verify_show_local_cluster_id} + ]}), + + %% test different parameter arities + check_cmd(Node, "clusterstats"), + check_cmd(Node, "clusterstats cluster_mgr"), + check_cmd(Node, "clusterstats 192.168.1.1:5555"), + + check_cmd(Node, "modes"), + check_cmd(Node, "modes mode_repl12"), + check_cmd(Node, "modes mode_repl12 mode_repl13"), + + check_cmd(Node, "clustername"), + check_cmd(Node, "clustername foo"), + + check_cmd(Node, "realtime cascades"), + check_cmd(Node, "realtime cascades always"), + + check_cmd(Node, "fullsync max_fssource_node"), + check_cmd(Node, "fullsync max_fssource_node 99"), + + check_cmd(Node, "fullsync max_fssource_cluster"), + check_cmd(Node, "fullsync max_fssource_cluster 99"), + + check_cmd(Node, "fullsync max_fssink_node"), + check_cmd(Node, "fullsync max_fssink_node 99"), + + check_cmd(Node, "fullsync enable foo"), + check_cmd(Node, "fullsync disable bar"), + + check_cmd(Node, "realtime enable foo"), + check_cmd(Node, "realtime disable bar"), + + check_cmd(Node, "proxy_get enable foo"), + check_cmd(Node, "proxy_get disable bar"), + + check_cmd(Node, "nat-map show"), + check_cmd(Node, "nat-map add 1.2.3.4:4321 192.168.1.1"), + check_cmd(Node, "nat-map del 1.2.3.4:4321 192.168.1.1"), + + check_cmd(Node, "full_objects"), + check_cmd(Node, "full_objects always"), + check_cmd(Node, "full_objects 99"), + + check_cmd(Node, "add-block-provider-redirect a b"), + check_cmd(Node, "show-block-provider-redirect a"), + check_cmd(Node, "delete-block-provider-redirect a"), + check_cmd(Node, "show-local-cluster-id"), + + pass. + +check_cmd(Node, Cmd) -> + lager:info("Testing riak-repl ~s on ~s", [Cmd, Node]), + {ok, Out} = rt:riak_repl(Node, [Cmd]), + ?assertEqual("pass", Out). + From 6736eef17a60f2988c9647d3d33ebf6639c476c0 Mon Sep 17 00:00:00 2001 From: Dave Parfitt Date: Mon, 13 Jan 2014 12:14:25 -0500 Subject: [PATCH 036/139] typo --- intercepts/riak_repl_console_intercepts.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intercepts/riak_repl_console_intercepts.erl b/intercepts/riak_repl_console_intercepts.erl index b9fe82e9c..0b4b91df6 100644 --- a/intercepts/riak_repl_console_intercepts.erl +++ b/intercepts/riak_repl_console_intercepts.erl @@ -7,7 +7,7 @@ %% Hello - if you mess with the riak-repl script, this test might help you %% out. It intercepts (registered) calls to riak_repl_console and checks that %% parameters are received correctly. Tests using these intercepts will -%% fail in ?PASS *isn't* returned. +%% fail if ?PASS *isn't* returned. %% Please see ./tests/replication2_console_tests.erl for more information! From 2abd5c76d20842e6813224fde296ae178befca58 Mon Sep 17 00:00:00 2001 From: Dave Parfitt Date: Mon, 13 Jan 2014 16:22:50 -0500 Subject: [PATCH 037/139] add an additional intercept for realtime_cascades --- intercepts/riak_repl_console_intercepts.erl | 9 +++++++-- tests/replication2_console_tests.erl | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/intercepts/riak_repl_console_intercepts.erl b/intercepts/riak_repl_console_intercepts.erl index 0b4b91df6..25547114b 100644 --- a/intercepts/riak_repl_console_intercepts.erl +++ b/intercepts/riak_repl_console_intercepts.erl @@ -64,9 +64,14 @@ verify_realtime(Val) -> case Val of ["enable","foo"] -> ?PASS; ["disable","bar"] -> ?PASS; - ["cascades"] -> ?PASS; %% display current cascades info, no additional + _ -> ?FAIL + end. + +verify_realtime_cascades(Val) -> + case Val of + [] -> ?PASS; %% display current cascades info, no additional %% params - ["cascades","always"] -> ?PASS; + ["always"] -> ?PASS; _ -> ?FAIL end. diff --git a/tests/replication2_console_tests.erl b/tests/replication2_console_tests.erl index b9175d753..3791e7719 100644 --- a/tests/replication2_console_tests.erl +++ b/tests/replication2_console_tests.erl @@ -56,6 +56,7 @@ confirm() -> {{clustername,1}, verify_clustername}, {{modes,1}, verify_modes}, {{clusterstats,1}, verify_clusterstats}, + {{realtime_cascades,1}, verify_realtime_cascades}, {{max_fssource_node,1}, verify_max_fssource_node}, {{max_fssource_cluster,1}, verify_max_fssource_cluster}, {{max_fssink_node,1}, verify_max_fssink_node}, From 114fb9f7e2491d7b407512e90db6626337fe3e8c Mon Sep 17 00:00:00 2001 From: Joseph Blomstedt Date: Fri, 10 May 2013 13:54:50 -0700 Subject: [PATCH 038/139] Add test for vnode + FSM overload protection --- src/rt.erl | 2 +- src/rtdev.erl | 16 ++-- tests/overload.erl | 199 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 208 insertions(+), 9 deletions(-) create mode 100644 tests/overload.erl diff --git a/src/rt.erl b/src/rt.erl index 824e1bced..d5c99a613 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -1362,7 +1362,7 @@ wait_for_control(Vsn, Node) when is_atom(Node) -> case lists:keyfind(GuiResource, 2, Routes) of false -> - lager:info("Control routes not found yet: ~p ~p.", + lager:info("Control routes not found yet: ~p ~p.", [Vsn, Routes]), false; _ -> diff --git a/src/rtdev.erl b/src/rtdev.erl index 63e7c5a21..be0e8ee8c 100644 --- a/src/rtdev.erl +++ b/src/rtdev.erl @@ -165,7 +165,7 @@ get_riak_conf(Node) -> append_to_conf_file(File, NameValuePairs) -> Settings = lists:flatten( - [io_lib:format("~n~s = ~s~n", [Name, Value]) || {Name, Value} <- NameValuePairs]), + [io_lib:format("~n~s = ~s~n", [Name, Value]) || {Name, Value} <- NameValuePairs]), file:write_file(File, Settings, [append]). all_the_files(DevPath, File) -> @@ -176,7 +176,7 @@ all_the_files(DevPath, File) -> _ -> lager:debug("~s is not a directory.", [DevPath]), [] - end. + end. all_the_app_configs(DevPath) -> AppConfigs = all_the_files(DevPath, "etc/app.config"), @@ -201,23 +201,23 @@ update_app_config(Node, Config) when is_atom(Node) -> %% If there's an app.config, do it old style %% if not, use cuttlefish's adavnced.config case filelib:is_file(AppConfigFile) of - true -> + true -> update_app_config_file(AppConfigFile, Config); _ -> update_app_config_file(AdvConfigFile, Config) - end; + end; update_app_config(DevPath, Config) -> [update_app_config_file(AppConfig, Config) || AppConfig <- all_the_app_configs(DevPath)]. update_app_config_file(ConfigFile, Config) -> lager:info("rtdev:update_app_config_file(~s, ~p)", [ConfigFile, Config]), - + BaseConfig = case file:consult(ConfigFile) of {ok, [ValidConfig]} -> ValidConfig; {error, enoent} -> [] - end, + end, MergeA = orddict:from_list(Config), MergeB = orddict:from_list(BaseConfig), NewConfig = @@ -263,7 +263,7 @@ get_backend(AppConfig) -> %% ConfigFileOutputLine looks like this: %% -config /path/to/app.config -args_file /path/to/vm.args -vm_args /path/to/vm.args - Files =[ Filename || Filename <- string:tokens(ConfigFileOutputLine, "\s"), + Files =[ Filename || Filename <- string:tokens(ConfigFileOutputLine, "\s"), ".config" == filename:extension(Filename) ], case Files of @@ -283,7 +283,7 @@ get_backend(AppConfig) -> end, case file:consult(ConfigFile) of - {ok, [Config]} -> + {ok, [Config]} -> kvc:path('riak_kv.storage_backend', Config); E -> lager:error("Error reading ~s, ~p", [ConfigFile, E]), diff --git a/tests/overload.erl b/tests/overload.erl new file mode 100644 index 000000000..726320ba3 --- /dev/null +++ b/tests/overload.erl @@ -0,0 +1,199 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2013 Basho Technologies, Inc. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +-module(overload). +-compile(export_all). +-include_lib("eunit/include/eunit.hrl"). + +-define(NUM_REQUESTS, 1000). +-define(THRESHOLD, 500). +-define(BUCKET, <<"test">>). +-define(KEY, <<"hotkey">>). + +confirm() -> + Config = [{riak_core, [{ring_creation_size, 8}, + {enable_health_checks, false}, + {vnode_overload_threshold, undefined}]}, + {riak_kv, [{fsm_limit, undefined}, + {storage_backend, riak_kv_memory_backend}, + {anti_entropy, {off, []}}]}], + Nodes = rt:build_cluster(2, Config), + [_Node1, Node2] = Nodes, + + Ring = rt:get_ring(Node2), + Hash = riak_core_util:chash_std_keyfun({?BUCKET, ?KEY}), + PL = lists:sublist(riak_core_ring:preflist(Hash, Ring), 3), + Victim = hd([Idx || {Idx, Node} <- PL, + Node =:= Node2]), + RO = riak_object:new(?BUCKET, ?KEY, <<"test">>), + + lager:info("Testing with no overload protection"), + {NumProcs, QueueLen} = run_test(Nodes, Victim, RO), + ?assert(NumProcs >= (2*?NUM_REQUESTS * 0.9)), + ?assert(QueueLen >= (?NUM_REQUESTS * 0.9)), + + ok = test_vnode_protection(Nodes, Victim, RO), + ok = test_fsm_protection(Nodes, Victim, RO), + pass. + +test_vnode_protection(Nodes, Victim, RO) -> + [Node1, Node2] = Nodes, + lager:info("Testing with vnode queue protection enabled"), + lager:info("Setting vnode overload threshold to ~b", [?THRESHOLD]), + Config2 = [{riak_core, [{vnode_overload_threshold, ?THRESHOLD}]}], + rt:pmap(fun(Node) -> + rt:update_app_config(Node, Config2) + end, Nodes), + {NumProcs2, QueueLen2} = run_test(Nodes, Victim, RO), + ?assert(NumProcs2 =< (2*?THRESHOLD * 1.5)), + ?assert(QueueLen2 =< (?THRESHOLD * 1.1)), + + %% This stats check often fails. Manual testing shows stats + %% always incrementing properly. Plus, if I add code to Riak + %% to log when the dropped stat is incremented I see it called + %% the correct number of times. This looks like a stats bug + %% that is outside the scope of this test. Punting for now. + %% + %% ShouldDrop = ?NUM_REQUESTS - ?THRESHOLD, + %% ok = rt:wait_until(Node2, fun(Node) -> + %% dropped_stat(Node) =:= ShouldDrop + %% end), + + CheckInterval = ?THRESHOLD div 2, + Dropped = read_until_success(Node1), + lager:info("Unnecessary dropped requests: ~b", [Dropped]), + ?assert(Dropped =< CheckInterval), + + lager:info("Suspending vnode proxy for ~b", [Victim]), + Pid = suspend_vnode_proxy(Node2, Victim), + {NumProcs3, QueueLen3} = run_test(Nodes, Victim, RO), + Pid ! resume, + ?assert(NumProcs3 >= (2*?NUM_REQUESTS * 0.9)), + ?assert(QueueLen3 =< (?THRESHOLD * 1.1)), + ok. + +test_fsm_protection(Nodes, Victim, RO) -> + lager:info("Testing with coordinator protection enabled"), + lager:info("Setting FSM limit to ~b", [?THRESHOLD]), + Config3 = [{riak_kv, [{fsm_limit, ?THRESHOLD}]}], + rt:pmap(fun(Node) -> + rt:update_app_config(Node, Config3) + end, Nodes), + {NumProcs4, QueueLen4} = run_test(Nodes, Victim, RO), + ?assert(NumProcs4 =< (?THRESHOLD * 1.1)), + ?assert(QueueLen4 =< (?THRESHOLD * 1.1)), + ok. + +run_test(Nodes, Victim, RO) -> + [Node1, Node2] = Nodes, + rt:wait_for_cluster_service(Nodes, riak_kv), + lager:info("Sleeping for 10s to let process count stablize"), + timer:sleep(10000), + rt:load_modules_on_nodes([?MODULE], Nodes), + lager:info("Suspending vnode ~p/~p", [Node2, Victim]), + Suspended = suspend_vnode(Node2, Victim), + NumProcs1 = process_count(Node1), + lager:info("Initial process count on ~p: ~b", [Node1, NumProcs1]), + lager:info("Sending ~b write requests", [?NUM_REQUESTS]), + write_once(Node1, RO), + Writes = spawn_reads(Node1, ?NUM_REQUESTS), + timer:sleep(5000), + NumProcs2 = process_count(Node1), + QueueLen = vnode_queue_len(Node2, Victim), + + lager:info("Final process count on ~p: ~b", [Node1, NumProcs2]), + lager:info("Final vnode queue length: ~b", [QueueLen]), + + resume_vnode(Suspended), + rt:wait_until(Node2, fun(Node) -> + vnode_queue_len(Node, Victim) =:= 0 + end), + + kill_writes(Writes), + {NumProcs2 - NumProcs1, QueueLen}. + +write_once(Node, RO) -> + {ok, C} = riak:client_connect(Node), + C:put(RO, 3). + +read_until_success(Node) -> + {ok, C} = riak:client_connect(Node), + read_until_success(C, 0). + +read_until_success(C, Count) -> + case C:get(?BUCKET, ?KEY) of + {error, overload} -> + read_until_success(C, Count+1); + _ -> + Count + end. + +spawn_reads(Node, Num) -> + [spawn(fun() -> + {ok, C} = riak:client_connect(Node), + riak_client:get(?BUCKET, ?KEY, C) + end) || _ <- lists:seq(1,Num)]. + +kill_writes(Pids) -> + [exit(Pid, kill) || Pid <- Pids]. + +suspend_vnode(Node, Idx) -> + Pid = rpc:call(Node, ?MODULE, remote_suspend_vnode, [Idx], infinity), + Pid. + +remote_suspend_vnode(Idx) -> + spawn(fun() -> + {ok, Pid} = riak_core_vnode_manager:get_vnode_pid(Idx, riak_kv_vnode), + erlang:suspend_process(Pid, []), + receive resume -> + erlang:resume_process(Pid) + end + end). + +suspend_vnode_proxy(Node, Idx) -> + Pid = rpc:call(Node, ?MODULE, remote_suspend_vnode_proxy, [Idx], infinity), + Pid. + +remote_suspend_vnode_proxy(Idx) -> + spawn(fun() -> + Name = riak_core_vnode_proxy:reg_name(riak_kv_vnode, Idx), + Pid = whereis(Name), + erlang:suspend_process(Pid, []), + receive resume -> + erlang:resume_process(Pid) + end + end). + +resume_vnode(Pid) -> + Pid ! resume. + +process_count(Node) -> + rpc:call(Node, erlang, system_info, [process_count]). + +vnode_queue_len(Node, Idx) -> + rpc:call(Node, ?MODULE, remote_vnode_queue, [Idx]). + +dropped_stat(Node) -> + Stats = rpc:call(Node, riak_core_stat, get_stats, []), + proplists:get_value(dropped_vnode_requests_total, Stats). + +remote_vnode_queue(Idx) -> + {ok, Pid} = riak_core_vnode_manager:get_vnode_pid(Idx, riak_kv_vnode), + {message_queue_len, Len} = process_info(Pid, message_queue_len), + Len. From 02502fb29dc649d3f045fd4ff0924ca23950b4a4 Mon Sep 17 00:00:00 2001 From: rzezeski Date: Tue, 14 Jan 2014 19:21:32 +0000 Subject: [PATCH 039/139] Fix verify_handoff Need to activate a bucket type before it can be used. Use the function built into `rt` for creating bucket types. --- tests/verify_handoff.erl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/verify_handoff.erl b/tests/verify_handoff.erl index e2d05f7d4..f8dd56639 100644 --- a/tests/verify_handoff.erl +++ b/tests/verify_handoff.erl @@ -84,9 +84,8 @@ run_test(TestMode, NTestItems, NTestNodes, HandoffEncoding) -> lager:info("Populating root node."), rt:systest_write(RootNode, NTestItems), %% write one object with a bucket type - ok = rpc:call(RootNode, riak_core_bucket_type, create, [<<"type">>, []]), + rt:create_and_activate_bucket_type(RootNode, <<"type">>, []), %% allow cluster metadata some time to propogate - timer:sleep(1000), rt:systest_write(RootNode, 1, 2, {<<"type">>, <<"bucket">>}, 2), %% Test handoff on each node: From 202e22495f2282d9760930c1b7d52ffc82e8091b Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Wed, 15 Jan 2014 09:58:24 -0500 Subject: [PATCH 040/139] Assume integer. All of the replication tests assume that num_nodes will be an integer, not a list. Fix partition repair to do similar. Also, change ho_concurrency to be a integer as well. --- tests/partition_repair.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/partition_repair.erl b/tests/partition_repair.erl index 8b4eddc93..62ba5806c 100644 --- a/tests/partition_repair.erl +++ b/tests/partition_repair.erl @@ -35,8 +35,8 @@ confirm() -> TestMetaData = riak_test_runner:metadata(), KVBackend = proplists:get_value(backend, TestMetaData), - NumNodes = list_to_integer(rt_config:config_or_os_env(num_nodes, "4")), - HOConcurrency = list_to_integer(rt_config:config_or_os_env(ho_concurrency, "2")), + NumNodes = rt_config:config_or_os_env(num_nodes, 4), + HOConcurrency = rt_config:config_or_os_env(ho_concurrency, 2), {_KVBackendMod, KVDataDir} = backend_mod_dir(KVBackend), Bucket = <<"scotts_spam">>, From dccbbbb205d20b3dc80ae042ed21ea636f0d4737 Mon Sep 17 00:00:00 2001 From: Chris Tilt Date: Fri, 22 Feb 2013 17:15:13 -0800 Subject: [PATCH 041/139] Adding test for simultaneous replication and riak object reformatting. Currently failing --- tests/replication_object_reformat.erl | 167 ++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 tests/replication_object_reformat.erl diff --git a/tests/replication_object_reformat.erl b/tests/replication_object_reformat.erl new file mode 100644 index 000000000..0470700bb --- /dev/null +++ b/tests/replication_object_reformat.erl @@ -0,0 +1,167 @@ +%% Riak test for replication combined with binary riak object cluster downgrading +%% +%% Strategy: run realtime and fullsync replication while doing a cluster downgrade + +-module(replication_object_reformat). +-behavior(riak_test). +-export([confirm/0]). +-include_lib("eunit/include/eunit.hrl"). + +confirm() -> + NumNodes = rt:config(num_nodes, 6), + ClusterASize = rt:config(cluster_a_size, 3), + + lager:info("Deploy ~p nodes", [NumNodes]), + Conf = [ + {riak_kv, + [ + {anti_entropy, {off, []}} + ] + }, + {riak_repl, + [ + {fullsync_on_connect, false}, + {fullsync_interval, disabled} + ]} + ], + + Nodes = rt:deploy_nodes(NumNodes, Conf), + + {ANodes, BNodes} = lists:split(ClusterASize, Nodes), + lager:info("ANodes: ~p", [ANodes]), + lager:info("BNodes: ~p", [BNodes]), + + lager:info("Build cluster A"), + repl_util:make_cluster(ANodes), + + lager:info("Build cluster B"), + repl_util:make_cluster(BNodes), + + replication(ANodes, BNodes, false), + pass. + +replication([AFirst|_] = ANodes, [BFirst|_] = BNodes, Connected) -> + + DowngradeVsn = previous, %% TODO: make configurable + + AllNodes = ANodes ++ BNodes, + rt:log_to_nodes(AllNodes, "Starting replication-object-reformat test"), + + TestHash = erlang:md5(term_to_binary(os:timestamp())), + TestBucket = <>, +%% FullsyncOnly = <>, +%% RealtimeOnly = <>, +%% NoRepl = <>, + + case Connected of + false -> + %% clusters are not connected, connect them + + %% write some initial data to A + lager:info("Writing 100 keys to ~p", [AFirst]), + ?assertEqual([], repl_util:do_write(AFirst, 1, 100, TestBucket, 2)), + + repl_util:name_cluster(AFirst, "A"), + repl_util:name_cluster(BFirst, "B"), + rt:wait_until_ring_converged(ANodes), + rt:wait_until_ring_converged(BNodes), + + %% TODO: we'll need to wait for cluster names before continuing + + %% get the leader for the first cluster + repl_util:wait_until_leader(AFirst), + LeaderA = rpc:call(AFirst, riak_core_cluster_mgr, get_leader, []), + + {ok, {_IP, Port}} = rpc:call(BFirst, application, get_env, + [riak_core, cluster_mgr]), + repl_util:connect_cluster(LeaderA, "127.0.0.1", Port), + + ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")), + repl_util:enable_realtime(LeaderA, "B"), + rt:wait_until_ring_converged(ANodes), + repl_util:start_realtime(LeaderA, "B"), + rt:wait_until_ring_converged(ANodes), + repl_util:enable_fullsync(LeaderA, "B"), + rt:wait_until_ring_converged(ANodes); + _ -> + lager:info("waiting for leader to converge on cluster A"), + ?assertEqual(ok, repl_util:wait_until_leader_converge(ANodes)), + lager:info("waiting for leader to converge on cluster B"), + ?assertEqual(ok, repl_util:wait_until_leader_converge(BNodes)), + %% get the leader for the first cluster + LeaderA = rpc:call(AFirst, riak_core_cluster_mgr, get_leader, []), + lager:info("Leader on cluster A is ~p", [LeaderA]), + {ok, {_IP, _Port}} = rpc:call(BFirst, application, get_env, + [riak_core, cluster_mgr]) + end, + + %% verify data in correct format on A and B + confirm_object_format(AllNodes, v1), + + rt:log_to_nodes(AllNodes, + "Write data to A, downgade, and verify replication to B via realtime"), + + %% perform downgrade of riak binary object format on sink cluster + Nodes = BNodes, + N = length(Nodes), + %% key ranges for successive writes to node A, based on how many trips in the loop... + Firsts = lists:seq(1001,(N*1000)+1,1000), + Lasts = lists:seq(2000,((N+1)*1000),1000), + lager:info("BNodes: ~p, Nodes: ~p", [BNodes, Nodes]), + [begin + %% write some data on A + ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")), + lager:info("Writing 1000 more keys to ~p", [LeaderA]), + ?assertEqual([], repl_util:do_write(LeaderA, First, Last, TestBucket, 2)), + + %% reformat objects on cluster + lager:info("Reformatting objects and downgrading ~p", [Node]), + run_reformat(Node, Node =:= BFirst), %% wait for handoffs on one node, kill on rest + rt:wait_until_ring_converged(Nodes), + confirm_object_format(Nodes, v0), + rt:upgrade(Node, DowngradeVsn), %% use upgrade to downgrade + rt:wait_for_service(Node, riak_kv), + + %% make sure cluster is still connected + lager:info("Ensure clusters connected"), + ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")), + + %% verify data is replicated to B + lager:info("Reading 1000 keys written to ~p from ~p", [LeaderA, BFirst]), + ?assertEqual(0, repl_util:wait_for_reads(BFirst, First, Last, TestBucket, 2)) + + end || {Node,First,Last} <- lists:zip3(Nodes,Firsts,Lasts)], + + %% ensure all BNodes are online + lager:info("Check all BNodes are pingable and kv service available"), + [begin + rt:wait_until_pingable(Node), + rt:wait_for_service(Node, riak_kv) + end || Node <- BNodes], + + case Connected of + false -> + %% check that the keys we wrote initially aren't replicated yet, because + %% we've disabled fullsync_on_connect + lager:info("Check keys written before fullsync are not present on B"), + Res2 = rt:systest_read(BFirst, 1, 100, TestBucket, 2), + ?assertEqual(100, length(Res2)), + + rt:log_to_nodes(AllNodes, "Fullsync from leader ~p", [LeaderA]), + repl_util:start_and_wait_until_fullsync_complete(LeaderA), + + lager:info("Check keys written before fullsync are now present on B"), + ?assertEqual(0, repl_util:wait_for_reads(BFirst, 1, 100, TestBucket, 2)); + _ -> + ok + end, + + lager:info("Test passed"), + fin. + +run_reformat(Node, KillHandoffs) -> + verify_riak_object_reformat:run_reformat(Node, KillHandoffs). + +confirm_object_format(Node, Version) -> + verify_riak_object_reformat:confirm_object_format(Node, Version). + From c0e44156143bd87e0ed4d0c54cf56117c1adfa4d Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Wed, 15 Jan 2014 16:44:03 -0500 Subject: [PATCH 042/139] Verify object format differences between cluster. Verify that replication from a v1 object format cluster to a v0 object format cluster succeeds. --- src/rt.erl | 90 +++++++++- tests/replication_object_reformat.erl | 238 +++++++++----------------- 2 files changed, 170 insertions(+), 158 deletions(-) diff --git a/src/rt.erl b/src/rt.erl index 824e1bced..763cee3d9 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -40,6 +40,7 @@ capability/2, capability/3, check_singleton_node/1, + check_fullsync/3, check_ibrowse/0, claimant_according_to/1, clean_cluster/1, @@ -85,8 +86,12 @@ pmap/2, post_result/2, priv_dir/0, + read_from_cluster/5, remove/2, riak/2, + repl_get_leader/1, + repl_get_port/1, + repl_connect_cluster/3, rpc_get_env/2, set_backend/1, set_backend/2, @@ -115,6 +120,7 @@ update_app_config/2, upgrade/2, upgrade/3, + validate_completed_fullsync/6, versions/0, wait_for_cluster_service/2, wait_for_cmd/1, @@ -142,7 +148,8 @@ wait_until_transfers_complete/1, wait_until_unpingable/1, wait_until_bucket_type_status/3, - whats_up/0 + whats_up/0, + write_to_cluster/4 ]). -define(HARNESS, (rt_config:get(rt_harness))). @@ -734,6 +741,7 @@ wait_until_capability(Node, Capability, Value, Default) -> rt:wait_until(Node, fun(_) -> Cap = capability(Node, Capability, Default), + io:format("capability is ~p ~p",[Node, Cap]), cap_equal(Value, Cap) end). @@ -1374,3 +1382,83 @@ wait_for_control(Vsn, Node) when is_atom(Node) -> %% @doc Wait for Riak Control to start on a series of nodes. wait_for_control(VersionedNodes) when is_list(VersionedNodes) -> [wait_for_control(Vsn, Node) || {Vsn, Node} <- VersionedNodes]. + +%% @doc Connect two clusters using a given name. +repl_connect_cluster(Source, Port, Name) -> + lager:info("Connecting ~p to ~p for cluster ~p.", + [Source, Port, Name]), + repl_util:connect_cluster(Source, "127.0.0.1", Port), + ?assertEqual(ok, repl_util:wait_for_connection(Source, Name)). + +%% @doc Given a node, find the port that the cluster manager is +%% listening on. +repl_get_port(Node) -> + {ok, {_IP, Port}} = rpc:call(Node, + application, + get_env, + [riak_core, cluster_mgr]), + Port. + +%% @doc Given a node, find out who the current replication leader in its +%% cluster is. +repl_get_leader(Node) -> + rpc:call(Node, riak_core_cluster_mgr, get_leader, []). + +%% @doc Validate fullsync completed and all keys are available. +validate_completed_fullsync(ReplicationLeader, + DestinationNode, + DestinationCluster, + Start, + End, + Bucket) -> + ok = check_fullsync(ReplicationLeader, DestinationCluster, 0), + lager:info("Verify: Reading ~p keys repl'd from A(~p) to ~p(~p)", + [End - Start, ReplicationLeader, + DestinationCluster, DestinationNode]), + ?assertEqual(0, + repl_util:wait_for_reads(DestinationNode, + Start, + End, + Bucket, + 1)). + +%% @doc Write a series of keys and ensure they are all written. +write_to_cluster(Node, Start, End, Bucket) -> + lager:info("Writing ~p keys to node ~p.", [End - Start, Node]), + ?assertEqual([], + repl_util:do_write(Node, Start, End, Bucket, 1)). + +%% @doc Read from cluster a series of keys, asserting a certain number +%% of errors. +read_from_cluster(Node, Start, End, Bucket, Errors) -> + lager:info("Reading ~p keys from node ~p.", [End - Start, Node]), + Res2 = rt:systest_read(Node, Start, End, Bucket, 1), + ?assertEqual(Errors, length(Res2)). + +%% @doc Assert we can perform one fullsync cycle, and that the number of +%% expected failures is correct. +check_fullsync(Node, Cluster, ExpectedFailures) -> + {Time, _} = timer:tc(repl_util, + start_and_wait_until_fullsync_complete, + [Node, Cluster]), + lager:info("Fullsync completed in ~p seconds", [Time/1000/1000]), + + Status = rpc:call(Node, riak_repl_console, status, [quiet]), + + Props = case proplists:get_value(fullsync_coordinator, Status) of + [{_Name, Props0}] -> + Props0; + Multiple -> + {_Name, Props0} = lists:keyfind(Cluster, 1, Multiple), + Props0 + end, + + %% check that the expected number of partitions failed to sync + ?assertEqual(ExpectedFailures, + proplists:get_value(error_exits, Props)), + + %% check that we retried each of them 5 times + ?assert( + proplists:get_value(retry_exits, Props) >= ExpectedFailures * 5), + + ok. diff --git a/tests/replication_object_reformat.erl b/tests/replication_object_reformat.erl index 0470700bb..363501a8a 100644 --- a/tests/replication_object_reformat.erl +++ b/tests/replication_object_reformat.erl @@ -1,167 +1,91 @@ -%% Riak test for replication combined with binary riak object cluster downgrading -%% -%% Strategy: run realtime and fullsync replication while doing a cluster downgrade - -module(replication_object_reformat). -behavior(riak_test). -export([confirm/0]). -include_lib("eunit/include/eunit.hrl"). +-import(rt, [deploy_nodes/2]). + +-define(TEST_BUCKET, <<"repl-aae-fullsync-systest_a">>). +-define(NUM_KEYS, 1000). + +-define(CONF(Retries), [ + {riak_core, + [ + {ring_creation_size, 8}, + {default_bucket_props, [{n_val, 1}]} + ] + }, + {riak_kv, + [ + {object_format, v1}, + {anti_entropy, {on, []}}, + {anti_entropy_build_limit, {100, 1000}}, + {anti_entropy_concurrency, 100} + ] + }, + {riak_repl, + [ + {fullsync_strategy, aae}, + {fullsync_on_connect, false}, + {fullsync_interval, disabled}, + {max_fssource_retries, Retries} + ]} + ]). + confirm() -> - NumNodes = rt:config(num_nodes, 6), - ClusterASize = rt:config(cluster_a_size, 3), - - lager:info("Deploy ~p nodes", [NumNodes]), - Conf = [ - {riak_kv, - [ - {anti_entropy, {off, []}} - ] - }, - {riak_repl, - [ - {fullsync_on_connect, false}, - {fullsync_interval, disabled} - ]} - ], - - Nodes = rt:deploy_nodes(NumNodes, Conf), - - {ANodes, BNodes} = lists:split(ClusterASize, Nodes), + Nodes = deploy_nodes(6, ?CONF(5)), + + {ANodes, BNodes} = lists:split(3, Nodes), + lager:info("ANodes: ~p", [ANodes]), lager:info("BNodes: ~p", [BNodes]), - lager:info("Build cluster A"), - repl_util:make_cluster(ANodes), - - lager:info("Build cluster B"), - repl_util:make_cluster(BNodes), - - replication(ANodes, BNodes, false), - pass. - -replication([AFirst|_] = ANodes, [BFirst|_] = BNodes, Connected) -> - - DowngradeVsn = previous, %% TODO: make configurable - - AllNodes = ANodes ++ BNodes, - rt:log_to_nodes(AllNodes, "Starting replication-object-reformat test"), - - TestHash = erlang:md5(term_to_binary(os:timestamp())), - TestBucket = <>, -%% FullsyncOnly = <>, -%% RealtimeOnly = <>, -%% NoRepl = <>, - - case Connected of - false -> - %% clusters are not connected, connect them - - %% write some initial data to A - lager:info("Writing 100 keys to ~p", [AFirst]), - ?assertEqual([], repl_util:do_write(AFirst, 1, 100, TestBucket, 2)), - - repl_util:name_cluster(AFirst, "A"), - repl_util:name_cluster(BFirst, "B"), - rt:wait_until_ring_converged(ANodes), - rt:wait_until_ring_converged(BNodes), - - %% TODO: we'll need to wait for cluster names before continuing - - %% get the leader for the first cluster - repl_util:wait_until_leader(AFirst), - LeaderA = rpc:call(AFirst, riak_core_cluster_mgr, get_leader, []), - - {ok, {_IP, Port}} = rpc:call(BFirst, application, get_env, - [riak_core, cluster_mgr]), - repl_util:connect_cluster(LeaderA, "127.0.0.1", Port), - - ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")), - repl_util:enable_realtime(LeaderA, "B"), - rt:wait_until_ring_converged(ANodes), - repl_util:start_realtime(LeaderA, "B"), - rt:wait_until_ring_converged(ANodes), - repl_util:enable_fullsync(LeaderA, "B"), - rt:wait_until_ring_converged(ANodes); - _ -> - lager:info("waiting for leader to converge on cluster A"), - ?assertEqual(ok, repl_util:wait_until_leader_converge(ANodes)), - lager:info("waiting for leader to converge on cluster B"), - ?assertEqual(ok, repl_util:wait_until_leader_converge(BNodes)), - %% get the leader for the first cluster - LeaderA = rpc:call(AFirst, riak_core_cluster_mgr, get_leader, []), - lager:info("Leader on cluster A is ~p", [LeaderA]), - {ok, {_IP, _Port}} = rpc:call(BFirst, application, get_env, - [riak_core, cluster_mgr]) - end, - - %% verify data in correct format on A and B - confirm_object_format(AllNodes, v1), - - rt:log_to_nodes(AllNodes, - "Write data to A, downgade, and verify replication to B via realtime"), - - %% perform downgrade of riak binary object format on sink cluster - Nodes = BNodes, - N = length(Nodes), - %% key ranges for successive writes to node A, based on how many trips in the loop... - Firsts = lists:seq(1001,(N*1000)+1,1000), - Lasts = lists:seq(2000,((N+1)*1000),1000), - lager:info("BNodes: ~p, Nodes: ~p", [BNodes, Nodes]), - [begin - %% write some data on A - ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")), - lager:info("Writing 1000 more keys to ~p", [LeaderA]), - ?assertEqual([], repl_util:do_write(LeaderA, First, Last, TestBucket, 2)), - - %% reformat objects on cluster - lager:info("Reformatting objects and downgrading ~p", [Node]), - run_reformat(Node, Node =:= BFirst), %% wait for handoffs on one node, kill on rest - rt:wait_until_ring_converged(Nodes), - confirm_object_format(Nodes, v0), - rt:upgrade(Node, DowngradeVsn), %% use upgrade to downgrade - rt:wait_for_service(Node, riak_kv), - - %% make sure cluster is still connected - lager:info("Ensure clusters connected"), - ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")), - - %% verify data is replicated to B - lager:info("Reading 1000 keys written to ~p from ~p", [LeaderA, BFirst]), - ?assertEqual(0, repl_util:wait_for_reads(BFirst, First, Last, TestBucket, 2)) - - end || {Node,First,Last} <- lists:zip3(Nodes,Firsts,Lasts)], - - %% ensure all BNodes are online - lager:info("Check all BNodes are pingable and kv service available"), - [begin - rt:wait_until_pingable(Node), - rt:wait_for_service(Node, riak_kv) - end || Node <- BNodes], - - case Connected of - false -> - %% check that the keys we wrote initially aren't replicated yet, because - %% we've disabled fullsync_on_connect - lager:info("Check keys written before fullsync are not present on B"), - Res2 = rt:systest_read(BFirst, 1, 100, TestBucket, 2), - ?assertEqual(100, length(Res2)), - - rt:log_to_nodes(AllNodes, "Fullsync from leader ~p", [LeaderA]), - repl_util:start_and_wait_until_fullsync_complete(LeaderA), - - lager:info("Check keys written before fullsync are now present on B"), - ?assertEqual(0, repl_util:wait_for_reads(BFirst, 1, 100, TestBucket, 2)); - _ -> - ok - end, - - lager:info("Test passed"), - fin. - -run_reformat(Node, KillHandoffs) -> - verify_riak_object_reformat:run_reformat(Node, KillHandoffs). - -confirm_object_format(Node, Version) -> - verify_riak_object_reformat:confirm_object_format(Node, Version). + lager:info("Building two clusters."), + [repl_util:make_cluster(N) || N <- [ANodes, BNodes]], + + lager:info("Updating app config to force v0 on sink cluster."), + [rt:update_app_config(N, [{riak_kv, [{object_format, v0}]}]) + || N <- BNodes], + + AFirst = hd(ANodes), + BFirst = hd(BNodes), + + lager:info("Naming clusters."), + repl_util:name_cluster(AFirst, "A"), + repl_util:name_cluster(BFirst, "B"), + + lager:info("Waiting for convergence."), + rt:wait_until_ring_converged(ANodes), + rt:wait_until_ring_converged(BNodes), + + lager:info("Waiting for transfers to complete."), + rt:wait_until_transfers_complete(ANodes), + rt:wait_until_transfers_complete(BNodes), + + lager:info("Wait for v1 capability on source cluster."), + [rt:wait_until_capability(N, {riak_kv, object_format}, v1, v0) + || N <- ANodes], + + lager:info("Wait for v0 capability on sink cluster."), + [rt:wait_until_capability(N, {riak_kv, object_format}, v0, v0) + || N <- BNodes], + + lager:info("Get leaders."), + LeaderA = rt:repl_get_leader(AFirst), + LeaderB = rt:repl_get_leader(BFirst), + + lager:info("Finding connection manager ports."), + BPort = rt:repl_get_port(LeaderB), + + lager:info("Connecting cluster A to B"), + rt:repl_connect_cluster(LeaderA, BPort, "B"), + + lager:info("Write keys, assert they are not available yet."), + rt:write_to_cluster(AFirst, 1, ?NUM_KEYS, ?TEST_BUCKET), + rt:read_from_cluster(BFirst, 1, ?NUM_KEYS, ?NUM_KEYS, ?TEST_BUCKET), + + lager:info("Enabling fullsync from A to B"), + repl_util:enable_fullsync(LeaderA, "B"), + rt:wait_until_ring_converged(ANodes), + rt:validate_completed_fullsync(LeaderA, BFirst, "B", 1, ?NUM_KEYS, ?TEST_BUCKET). From 12fcf22cb77c29449f1ef216d0bb6309c9e55052 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Wed, 15 Jan 2014 18:49:51 -0500 Subject: [PATCH 043/139] Clean up code, refactor into functions. --- tests/replication_object_reformat.erl | 48 +++++++++++++++++++-------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/tests/replication_object_reformat.erl b/tests/replication_object_reformat.erl index 363501a8a..94a91d7ce 100644 --- a/tests/replication_object_reformat.erl +++ b/tests/replication_object_reformat.erl @@ -5,7 +5,7 @@ -import(rt, [deploy_nodes/2]). --define(TEST_BUCKET, <<"repl-aae-fullsync-systest_a">>). +-define(TEST_BUCKET, <<"object-reformat">>). -define(NUM_KEYS, 1000). -define(CONF(Retries), [ @@ -17,7 +17,6 @@ }, {riak_kv, [ - {object_format, v1}, {anti_entropy, {on, []}}, {anti_entropy_build_limit, {100, 1000}}, {anti_entropy_concurrency, 100} @@ -43,10 +42,6 @@ confirm() -> lager:info("Building two clusters."), [repl_util:make_cluster(N) || N <- [ANodes, BNodes]], - lager:info("Updating app config to force v0 on sink cluster."), - [rt:update_app_config(N, [{riak_kv, [{object_format, v0}]}]) - || N <- BNodes], - AFirst = hd(ANodes), BFirst = hd(BNodes), @@ -62,12 +57,30 @@ confirm() -> rt:wait_until_transfers_complete(ANodes), rt:wait_until_transfers_complete(BNodes), - lager:info("Wait for v1 capability on source cluster."), - [rt:wait_until_capability(N, {riak_kv, object_format}, v1, v0) + verify_replication({ANodes, v0}, {BNodes, v1}, 1). + +verify_replication({ANodes, AVersion}, {BNodes, BVersion}, Start) -> + AFirst = hd(ANodes), + BFirst = hd(BNodes), + + lager:info("Updating app config to force ~p on source cluster.", + [AVersion]), + [rt:update_app_config(N, [{riak_kv, + [{object_format, AVersion}]}]) || N <- ANodes], - lager:info("Wait for v0 capability on sink cluster."), - [rt:wait_until_capability(N, {riak_kv, object_format}, v0, v0) + lager:info("Updating app config to force ~p on sink cluster.", + [BVersion]), + [rt:update_app_config(N, [{riak_kv, + [{object_format, BVersion}]}]) + || N <- BNodes], + + lager:info("Wait for capability on source cluster."), + [rt:wait_until_capability(N, {riak_kv, object_format}, AVersion, v0) + || N <- ANodes], + + lager:info("Wait for capability on sink cluster."), + [rt:wait_until_capability(N, {riak_kv, object_format}, BVersion, v0) || N <- BNodes], lager:info("Get leaders."), @@ -81,11 +94,20 @@ confirm() -> rt:repl_connect_cluster(LeaderA, BPort, "B"), lager:info("Write keys, assert they are not available yet."), - rt:write_to_cluster(AFirst, 1, ?NUM_KEYS, ?TEST_BUCKET), - rt:read_from_cluster(BFirst, 1, ?NUM_KEYS, ?NUM_KEYS, ?TEST_BUCKET), + rt:write_to_cluster(AFirst, Start, ?NUM_KEYS, ?TEST_BUCKET), + rt:read_from_cluster(BFirst, Start, ?NUM_KEYS, ?TEST_BUCKET, ?NUM_KEYS), + + %% Flush AAE trees to disk. + perform_sacrifice(AFirst), lager:info("Enabling fullsync from A to B"), repl_util:enable_fullsync(LeaderA, "B"), rt:wait_until_ring_converged(ANodes), - rt:validate_completed_fullsync(LeaderA, BFirst, "B", 1, ?NUM_KEYS, ?TEST_BUCKET). + rt:validate_completed_fullsync(LeaderA, BFirst, "B", Start, ?NUM_KEYS, ?TEST_BUCKET). + +%% @doc Required for 1.4+ Riak, write sacrificial keys to force AAE +%% trees to flush to disk. +perform_sacrifice(Node) -> + ?assertEqual([], repl_util:do_write(Node, 1, 2000, + <<"sacrificial">>, 1)). From 38d2660d86009540cb36e65a01856b4aa1ce978e Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Wed, 15 Jan 2014 19:09:19 -0500 Subject: [PATCH 044/139] Add v1 -> v0 test. --- tests/replication_object_reformat.erl | 31 ++++++++++++++++++--------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/tests/replication_object_reformat.erl b/tests/replication_object_reformat.erl index 94a91d7ce..206b707f6 100644 --- a/tests/replication_object_reformat.erl +++ b/tests/replication_object_reformat.erl @@ -57,9 +57,24 @@ confirm() -> rt:wait_until_transfers_complete(ANodes), rt:wait_until_transfers_complete(BNodes), - verify_replication({ANodes, v0}, {BNodes, v1}, 1). + lager:info("Get leaders."), + LeaderA = rt:repl_get_leader(AFirst), + LeaderB = rt:repl_get_leader(BFirst), + + lager:info("Finding connection manager ports."), + BPort = rt:repl_get_port(LeaderB), -verify_replication({ANodes, AVersion}, {BNodes, BVersion}, Start) -> + lager:info("Connecting cluster A to B"), + rt:repl_connect_cluster(LeaderA, BPort, "B"), + + lager:info("Enabling fullsync from A to B"), + repl_util:enable_fullsync(LeaderA, "B"), + rt:wait_until_ring_converged(ANodes), + + verify_replication({ANodes, v0}, {BNodes, v1}, 1, ?NUM_KEYS), + verify_replication({ANodes, v0}, {BNodes, v1}, ?NUM_KEYS + 1, ?NUM_KEYS + ?NUM_KEYS). + +verify_replication({ANodes, AVersion}, {BNodes, BVersion}, Start, End) -> AFirst = hd(ANodes), BFirst = hd(BNodes), @@ -90,21 +105,17 @@ verify_replication({ANodes, AVersion}, {BNodes, BVersion}, Start) -> lager:info("Finding connection manager ports."), BPort = rt:repl_get_port(LeaderB), - lager:info("Connecting cluster A to B"), + lager:info("Ensuring connection from cluster A to B"), rt:repl_connect_cluster(LeaderA, BPort, "B"), lager:info("Write keys, assert they are not available yet."), - rt:write_to_cluster(AFirst, Start, ?NUM_KEYS, ?TEST_BUCKET), - rt:read_from_cluster(BFirst, Start, ?NUM_KEYS, ?TEST_BUCKET, ?NUM_KEYS), + rt:write_to_cluster(AFirst, Start, End, ?TEST_BUCKET), + rt:read_from_cluster(BFirst, Start, End, ?TEST_BUCKET, ?NUM_KEYS), %% Flush AAE trees to disk. perform_sacrifice(AFirst), - lager:info("Enabling fullsync from A to B"), - repl_util:enable_fullsync(LeaderA, "B"), - rt:wait_until_ring_converged(ANodes), - - rt:validate_completed_fullsync(LeaderA, BFirst, "B", Start, ?NUM_KEYS, ?TEST_BUCKET). + rt:validate_completed_fullsync(LeaderA, BFirst, "B", Start, End, ?TEST_BUCKET). %% @doc Required for 1.4+ Riak, write sacrificial keys to force AAE %% trees to flush to disk. From d788e31a6230f924563e4eb71a22b897ad2112fb Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Thu, 16 Jan 2014 12:38:57 -0500 Subject: [PATCH 045/139] Use the keylist strategy. --- tests/replication_object_reformat.erl | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/tests/replication_object_reformat.erl b/tests/replication_object_reformat.erl index 206b707f6..cb02939b2 100644 --- a/tests/replication_object_reformat.erl +++ b/tests/replication_object_reformat.erl @@ -15,16 +15,9 @@ {default_bucket_props, [{n_val, 1}]} ] }, - {riak_kv, - [ - {anti_entropy, {on, []}}, - {anti_entropy_build_limit, {100, 1000}}, - {anti_entropy_concurrency, 100} - ] - }, {riak_repl, [ - {fullsync_strategy, aae}, + {fullsync_strategy, keylist}, {fullsync_on_connect, false}, {fullsync_interval, disabled}, {max_fssource_retries, Retries} @@ -71,8 +64,7 @@ confirm() -> repl_util:enable_fullsync(LeaderA, "B"), rt:wait_until_ring_converged(ANodes), - verify_replication({ANodes, v0}, {BNodes, v1}, 1, ?NUM_KEYS), - verify_replication({ANodes, v0}, {BNodes, v1}, ?NUM_KEYS + 1, ?NUM_KEYS + ?NUM_KEYS). + verify_replication({ANodes, v0}, {BNodes, v1}, 1, ?NUM_KEYS). verify_replication({ANodes, AVersion}, {BNodes, BVersion}, Start, End) -> AFirst = hd(ANodes), @@ -112,13 +104,4 @@ verify_replication({ANodes, AVersion}, {BNodes, BVersion}, Start, End) -> rt:write_to_cluster(AFirst, Start, End, ?TEST_BUCKET), rt:read_from_cluster(BFirst, Start, End, ?TEST_BUCKET, ?NUM_KEYS), - %% Flush AAE trees to disk. - perform_sacrifice(AFirst), - rt:validate_completed_fullsync(LeaderA, BFirst, "B", Start, End, ?TEST_BUCKET). - -%% @doc Required for 1.4+ Riak, write sacrificial keys to force AAE -%% trees to flush to disk. -perform_sacrifice(Node) -> - ?assertEqual([], repl_util:do_write(Node, 1, 2000, - <<"sacrificial">>, 1)). From 127faef697e2b45c4ba223a84a83f1d4aed3ebc9 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Thu, 16 Jan 2014 14:29:27 -0500 Subject: [PATCH 046/139] Additional debugging; use AAE strategy. --- tests/replication_object_reformat.erl | 29 ++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/tests/replication_object_reformat.erl b/tests/replication_object_reformat.erl index cb02939b2..baf550c8a 100644 --- a/tests/replication_object_reformat.erl +++ b/tests/replication_object_reformat.erl @@ -15,9 +15,17 @@ {default_bucket_props, [{n_val, 1}]} ] }, + {riak_kv, + [ + %% Specify fast building of AAE trees + {anti_entropy, {on, []}}, + {anti_entropy_build_limit, {100, 1000}}, + {anti_entropy_concurrency, 100} + ] + }, {riak_repl, [ - {fullsync_strategy, keylist}, + {fullsync_strategy, aae}, {fullsync_on_connect, false}, {fullsync_interval, disabled}, {max_fssource_retries, Retries} @@ -25,7 +33,7 @@ ]). confirm() -> - Nodes = deploy_nodes(6, ?CONF(5)), + Nodes = deploy_nodes(6, ?CONF(infinity)), {ANodes, BNodes} = lists:split(3, Nodes), @@ -102,6 +110,21 @@ verify_replication({ANodes, AVersion}, {BNodes, BVersion}, Start, End) -> lager:info("Write keys, assert they are not available yet."), rt:write_to_cluster(AFirst, Start, End, ?TEST_BUCKET), + + lager:info("Verify we can not read the keys on the sink."), rt:read_from_cluster(BFirst, Start, End, ?TEST_BUCKET, ?NUM_KEYS), - rt:validate_completed_fullsync(LeaderA, BFirst, "B", Start, End, ?TEST_BUCKET). + lager:info("Verify we can read the keys on the source."), + rt:read_from_cluster(AFirst, Start, End, ?TEST_BUCKET, 0), + + lager:info("Performing sacrifice."), + perform_sacrifice(AFirst, Start), + + rt:validate_completed_fullsync(LeaderA, BFirst, "B", + Start, End, ?TEST_BUCKET). + +%% @doc Required for 1.4+ Riak, write sacrificial keys to force AAE +%% trees to flush to disk. +perform_sacrifice(Node, Start) -> + ?assertEqual([], repl_util:do_write(Node, Start, 2000, + <<"sacrificial">>, 1)). From af355530f13db66bfe0e477b41864b42b40d4255 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Thu, 16 Jan 2014 15:12:32 -0500 Subject: [PATCH 047/139] Add passing AAE-based object format v0 -> v1 replication test. --- tests/replication_object_reformat.erl | 46 +++++++++++---------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/tests/replication_object_reformat.erl b/tests/replication_object_reformat.erl index baf550c8a..b380fac03 100644 --- a/tests/replication_object_reformat.erl +++ b/tests/replication_object_reformat.erl @@ -17,7 +17,6 @@ }, {riak_kv, [ - %% Specify fast building of AAE trees {anti_entropy, {on, []}}, {anti_entropy_build_limit, {100, 1000}}, {anti_entropy_concurrency, 100} @@ -33,6 +32,9 @@ ]). confirm() -> + verify_replication(v0, v1, 1, ?NUM_KEYS). + +verify_replication(AVersion, BVersion, Start, End) -> Nodes = deploy_nodes(6, ?CONF(infinity)), {ANodes, BNodes} = lists:split(3, Nodes), @@ -40,6 +42,18 @@ confirm() -> lager:info("ANodes: ~p", [ANodes]), lager:info("BNodes: ~p", [BNodes]), + lager:info("Updating app config to force ~p on source cluster.", + [AVersion]), + [rt:update_app_config(N, [{riak_kv, + [{object_format, AVersion}]}]) + || N <- ANodes], + + lager:info("Updating app config to force ~p on sink cluster.", + [BVersion]), + [rt:update_app_config(N, [{riak_kv, + [{object_format, BVersion}]}]) + || N <- BNodes], + lager:info("Building two clusters."), [repl_util:make_cluster(N) || N <- [ANodes, BNodes]], @@ -71,24 +85,7 @@ confirm() -> lager:info("Enabling fullsync from A to B"), repl_util:enable_fullsync(LeaderA, "B"), rt:wait_until_ring_converged(ANodes), - - verify_replication({ANodes, v0}, {BNodes, v1}, 1, ?NUM_KEYS). - -verify_replication({ANodes, AVersion}, {BNodes, BVersion}, Start, End) -> - AFirst = hd(ANodes), - BFirst = hd(BNodes), - - lager:info("Updating app config to force ~p on source cluster.", - [AVersion]), - [rt:update_app_config(N, [{riak_kv, - [{object_format, AVersion}]}]) - || N <- ANodes], - - lager:info("Updating app config to force ~p on sink cluster.", - [BVersion]), - [rt:update_app_config(N, [{riak_kv, - [{object_format, BVersion}]}]) - || N <- BNodes], + rt:wait_until_ring_converged(BNodes), lager:info("Wait for capability on source cluster."), [rt:wait_until_capability(N, {riak_kv, object_format}, AVersion, v0) @@ -98,13 +95,6 @@ verify_replication({ANodes, AVersion}, {BNodes, BVersion}, Start, End) -> [rt:wait_until_capability(N, {riak_kv, object_format}, BVersion, v0) || N <- BNodes], - lager:info("Get leaders."), - LeaderA = rt:repl_get_leader(AFirst), - LeaderB = rt:repl_get_leader(BFirst), - - lager:info("Finding connection manager ports."), - BPort = rt:repl_get_port(LeaderB), - lager:info("Ensuring connection from cluster A to B"), rt:repl_connect_cluster(LeaderA, BPort, "B"), @@ -121,7 +111,9 @@ verify_replication({ANodes, AVersion}, {BNodes, BVersion}, Start, End) -> perform_sacrifice(AFirst, Start), rt:validate_completed_fullsync(LeaderA, BFirst, "B", - Start, End, ?TEST_BUCKET). + Start, End, ?TEST_BUCKET), + + rt:clean_cluster(Nodes). %% @doc Required for 1.4+ Riak, write sacrificial keys to force AAE %% trees to flush to disk. From 766b795944bc9ce776b5bf4af88935e718dd4057 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Thu, 16 Jan 2014 15:33:35 -0500 Subject: [PATCH 048/139] Add v1 -> v0 replication test. --- tests/replication_object_reformat.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/replication_object_reformat.erl b/tests/replication_object_reformat.erl index b380fac03..37364755e 100644 --- a/tests/replication_object_reformat.erl +++ b/tests/replication_object_reformat.erl @@ -32,7 +32,8 @@ ]). confirm() -> - verify_replication(v0, v1, 1, ?NUM_KEYS). + verify_replication(v0, v1, 1, ?NUM_KEYS), + verify_replication(v1, v0, 1, ?NUM_KEYS). verify_replication(AVersion, BVersion, Start, End) -> Nodes = deploy_nodes(6, ?CONF(infinity)), From 494cd2deb5c0115d5e261e8a3f5e705b50ab283a Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Thu, 16 Jan 2014 16:29:24 -0500 Subject: [PATCH 049/139] General code cleanup. --- src/rt.erl | 89 +------------------------ tests/repl_util.erl | 94 ++++++++++++++++++++++++++- tests/replication_object_reformat.erl | 19 +++--- 3 files changed, 102 insertions(+), 100 deletions(-) diff --git a/src/rt.erl b/src/rt.erl index 763cee3d9..444fbdebd 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -40,7 +40,6 @@ capability/2, capability/3, check_singleton_node/1, - check_fullsync/3, check_ibrowse/0, claimant_according_to/1, clean_cluster/1, @@ -86,12 +85,8 @@ pmap/2, post_result/2, priv_dir/0, - read_from_cluster/5, remove/2, riak/2, - repl_get_leader/1, - repl_get_port/1, - repl_connect_cluster/3, rpc_get_env/2, set_backend/1, set_backend/2, @@ -120,7 +115,6 @@ update_app_config/2, upgrade/2, upgrade/3, - validate_completed_fullsync/6, versions/0, wait_for_cluster_service/2, wait_for_cmd/1, @@ -148,8 +142,7 @@ wait_until_transfers_complete/1, wait_until_unpingable/1, wait_until_bucket_type_status/3, - whats_up/0, - write_to_cluster/4 + whats_up/0 ]). -define(HARNESS, (rt_config:get(rt_harness))). @@ -1382,83 +1375,3 @@ wait_for_control(Vsn, Node) when is_atom(Node) -> %% @doc Wait for Riak Control to start on a series of nodes. wait_for_control(VersionedNodes) when is_list(VersionedNodes) -> [wait_for_control(Vsn, Node) || {Vsn, Node} <- VersionedNodes]. - -%% @doc Connect two clusters using a given name. -repl_connect_cluster(Source, Port, Name) -> - lager:info("Connecting ~p to ~p for cluster ~p.", - [Source, Port, Name]), - repl_util:connect_cluster(Source, "127.0.0.1", Port), - ?assertEqual(ok, repl_util:wait_for_connection(Source, Name)). - -%% @doc Given a node, find the port that the cluster manager is -%% listening on. -repl_get_port(Node) -> - {ok, {_IP, Port}} = rpc:call(Node, - application, - get_env, - [riak_core, cluster_mgr]), - Port. - -%% @doc Given a node, find out who the current replication leader in its -%% cluster is. -repl_get_leader(Node) -> - rpc:call(Node, riak_core_cluster_mgr, get_leader, []). - -%% @doc Validate fullsync completed and all keys are available. -validate_completed_fullsync(ReplicationLeader, - DestinationNode, - DestinationCluster, - Start, - End, - Bucket) -> - ok = check_fullsync(ReplicationLeader, DestinationCluster, 0), - lager:info("Verify: Reading ~p keys repl'd from A(~p) to ~p(~p)", - [End - Start, ReplicationLeader, - DestinationCluster, DestinationNode]), - ?assertEqual(0, - repl_util:wait_for_reads(DestinationNode, - Start, - End, - Bucket, - 1)). - -%% @doc Write a series of keys and ensure they are all written. -write_to_cluster(Node, Start, End, Bucket) -> - lager:info("Writing ~p keys to node ~p.", [End - Start, Node]), - ?assertEqual([], - repl_util:do_write(Node, Start, End, Bucket, 1)). - -%% @doc Read from cluster a series of keys, asserting a certain number -%% of errors. -read_from_cluster(Node, Start, End, Bucket, Errors) -> - lager:info("Reading ~p keys from node ~p.", [End - Start, Node]), - Res2 = rt:systest_read(Node, Start, End, Bucket, 1), - ?assertEqual(Errors, length(Res2)). - -%% @doc Assert we can perform one fullsync cycle, and that the number of -%% expected failures is correct. -check_fullsync(Node, Cluster, ExpectedFailures) -> - {Time, _} = timer:tc(repl_util, - start_and_wait_until_fullsync_complete, - [Node, Cluster]), - lager:info("Fullsync completed in ~p seconds", [Time/1000/1000]), - - Status = rpc:call(Node, riak_repl_console, status, [quiet]), - - Props = case proplists:get_value(fullsync_coordinator, Status) of - [{_Name, Props0}] -> - Props0; - Multiple -> - {_Name, Props0} = lists:keyfind(Cluster, 1, Multiple), - Props0 - end, - - %% check that the expected number of partitions failed to sync - ?assertEqual(ExpectedFailures, - proplists:get_value(error_exits, Props)), - - %% check that we retried each of them 5 times - ?assert( - proplists:get_value(retry_exits, Props) >= ExpectedFailures * 5), - - ok. diff --git a/tests/repl_util.erl b/tests/repl_util.erl index 3501c0355..9c0f4f16b 100644 --- a/tests/repl_util.erl +++ b/tests/repl_util.erl @@ -29,7 +29,14 @@ num_partitions/1, get_cluster_mgr_port/1, maybe_reconnect_rt/3, - connect_rt/3 + connect_rt/3, + connect_cluster_by_name/3, + get_port/1, + get_leader/1, + write_to_cluster/4, + read_from_cluster/5, + check_fullsync/3, + validate_completed_fullsync/6 ]). -include_lib("eunit/include/eunit.hrl"). @@ -140,7 +147,8 @@ wait_until_no_connection(Node) -> wait_for_reads(Node, Start, End, Bucket, R) -> rt:wait_until(Node, fun(_) -> - rt:systest_read(Node, Start, End, Bucket, R) == [] + Reads = rt:systest_read(Node, Start, End, Bucket, R), + Reads == [] end), Reads = rt:systest_read(Node, Start, End, Bucket, R), lager:info("Reads: ~p", [Reads]), @@ -315,3 +323,85 @@ connect_rt(SourceNode, SinkPort, SinkName) -> repl_util:wait_for_connection(SourceNode, SinkName), repl_util:enable_realtime(SourceNode, SinkName), repl_util:start_realtime(SourceNode, SinkName). + +%% @doc Connect two clusters using a given name. +connect_cluster_by_name(Source, Port, Name) -> + lager:info("Connecting ~p to ~p for cluster ~p.", + [Source, Port, Name]), + repl_util:connect_cluster(Source, "127.0.0.1", Port), + ?assertEqual(ok, repl_util:wait_for_connection(Source, Name)). + +%% @doc Given a node, find the port that the cluster manager is +%% listening on. +get_port(Node) -> + {ok, {_IP, Port}} = rpc:call(Node, + application, + get_env, + [riak_core, cluster_mgr]), + Port. + +%% @doc Given a node, find out who the current replication leader in its +%% cluster is. +get_leader(Node) -> + rpc:call(Node, riak_core_cluster_mgr, get_leader, []). + +%% @doc Validate fullsync completed and all keys are available. +validate_completed_fullsync(ReplicationLeader, + DestinationNode, + DestinationCluster, + Start, + End, + Bucket) -> + ok = check_fullsync(ReplicationLeader, DestinationCluster, 0), + lager:info("Verify: Reading ~p keys repl'd from A(~p) to ~p(~p)", + [End - Start, ReplicationLeader, + DestinationCluster, DestinationNode]), + ?assertEqual(0, + repl_util:wait_for_reads(DestinationNode, + Start, + End, + Bucket, + 1)). + +%% @doc Write a series of keys and ensure they are all written. +write_to_cluster(Node, Start, End, Bucket) -> + lager:info("Writing ~p keys to node ~p.", [End - Start, Node]), + ?assertEqual([], + repl_util:do_write(Node, Start, End, Bucket, 1)). + +%% @doc Read from cluster a series of keys, asserting a certain number +%% of errors. +read_from_cluster(Node, Start, End, Bucket, Errors) -> + lager:info("Reading ~p keys from node ~p.", [End - Start, Node]), + Res2 = rt:systest_read(Node, Start, End, Bucket, 1), + ?assertEqual(Errors, length(Res2)). + +%% @doc Assert we can perform one fullsync cycle, and that the number of +%% expected failures is correct. +check_fullsync(Node, Cluster, ExpectedFailures) -> + {Time, _} = timer:tc(repl_util, + start_and_wait_until_fullsync_complete, + [Node, Cluster]), + lager:info("Fullsync completed in ~p seconds", [Time/1000/1000]), + + Status = rpc:call(Node, riak_repl_console, status, [quiet]), + + Props = case proplists:get_value(fullsync_coordinator, Status) of + [{_Name, Props0}] -> + Props0; + Multiple -> + {_Name, Props0} = lists:keyfind(Cluster, 1, Multiple), + Props0 + end, + + %% check that the expected number of partitions failed to sync + ErrorExits = proplists:get_value(error_exits, Props), + lager:info("Error exits: ~p", [ErrorExits]), + ?assertEqual(ExpectedFailures, ErrorExits), + + %% check that we retried each of them 5 times + RetryExits = proplists:get_value(retry_exits, Props), + lager:info("Retry exits: ~p", [RetryExits]), + ?assert(RetryExits >= ExpectedFailures * 5), + + ok. diff --git a/tests/replication_object_reformat.erl b/tests/replication_object_reformat.erl index 37364755e..18611fcf3 100644 --- a/tests/replication_object_reformat.erl +++ b/tests/replication_object_reformat.erl @@ -74,14 +74,14 @@ verify_replication(AVersion, BVersion, Start, End) -> rt:wait_until_transfers_complete(BNodes), lager:info("Get leaders."), - LeaderA = rt:repl_get_leader(AFirst), - LeaderB = rt:repl_get_leader(BFirst), + LeaderA = repl_util:get_leader(AFirst), + LeaderB = repl_util:get_leader(BFirst), lager:info("Finding connection manager ports."), - BPort = rt:repl_get_port(LeaderB), + BPort = repl_util:get_port(LeaderB), lager:info("Connecting cluster A to B"), - rt:repl_connect_cluster(LeaderA, BPort, "B"), + repl_util:connect_cluster_by_name(LeaderA, BPort, "B"), lager:info("Enabling fullsync from A to B"), repl_util:enable_fullsync(LeaderA, "B"), @@ -97,22 +97,21 @@ verify_replication(AVersion, BVersion, Start, End) -> || N <- BNodes], lager:info("Ensuring connection from cluster A to B"), - rt:repl_connect_cluster(LeaderA, BPort, "B"), + repl_util:connect_cluster_by_name(LeaderA, BPort, "B"), lager:info("Write keys, assert they are not available yet."), - rt:write_to_cluster(AFirst, Start, End, ?TEST_BUCKET), + repl_util:write_to_cluster(AFirst, Start, End, ?TEST_BUCKET), lager:info("Verify we can not read the keys on the sink."), - rt:read_from_cluster(BFirst, Start, End, ?TEST_BUCKET, ?NUM_KEYS), + repl_util:read_from_cluster(BFirst, Start, End, ?TEST_BUCKET, ?NUM_KEYS), lager:info("Verify we can read the keys on the source."), - rt:read_from_cluster(AFirst, Start, End, ?TEST_BUCKET, 0), + repl_util:read_from_cluster(AFirst, Start, End, ?TEST_BUCKET, 0), lager:info("Performing sacrifice."), perform_sacrifice(AFirst, Start), - rt:validate_completed_fullsync(LeaderA, BFirst, "B", - Start, End, ?TEST_BUCKET), + repl_util:validate_completed_fullsync(LeaderA, BFirst, "B", Start, End, ?TEST_BUCKET), rt:clean_cluster(Nodes). From 616baaed14fe4b760d3bb760b55d29897be5d266 Mon Sep 17 00:00:00 2001 From: Micah Warren Date: Mon, 1 Jul 2013 15:37:13 -0500 Subject: [PATCH 050/139] Fixed mixed version and upgrade test for older riak's. The tests were not meant to handle an upgrade from non-1.3 versions of riak; there were designed to ensure an upgrade from early 1.3 would go smoothly. Thus, the checks (and skippings w/o error) if the previous version of riak is too old for the test to be valid. --- tests/rt_cascading.erl | 80 ++++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 26 deletions(-) diff --git a/tests/rt_cascading.erl b/tests/rt_cascading.erl index 27e817e1e..10e8d9e5d 100644 --- a/tests/rt_cascading.erl +++ b/tests/rt_cascading.erl @@ -538,25 +538,39 @@ mixed_version_clusters_test_dep() -> DeployConfs = [{previous, Conf} || _ <- lists:seq(1,6)], Nodes = rt:deploy_nodes(DeployConfs), [N1, N2, N3, N4, N5, N6] = Nodes, - N12 = [N1, N2], - N34 = [N3, N4], - N56 = [N5, N6], - repl_util:make_cluster(N12), - repl_util:make_cluster(N34), - repl_util:make_cluster(N56), - repl_util:name_cluster(N1, "n12"), - repl_util:name_cluster(N3, "n34"), - repl_util:name_cluster(N5, "n56"), - [repl_util:wait_until_leader_converge(Cluster) || Cluster <- [N12, N34, N56]], - connect_rt(N1, get_cluster_mgr_port(N3), "n34"), - connect_rt(N3, get_cluster_mgr_port(N5), "n56"), - connect_rt(N5, get_cluster_mgr_port(N1), "n12"), - Nodes + case rpc:call(N1, application, get_key, [riak_core, vsn]) of + % this is meant to test upgrading from early BNW aka + % Brave New World aka Advanced Repl aka version 3 repl to + % a cascading realtime repl. Other tests handle going from pre + % repl 3 to repl 3. + {ok, Vsn} when Vsn < "1.3.0" -> + {too_old, Nodes}; + _ -> + N12 = [N1, N2], + N34 = [N3, N4], + N56 = [N5, N6], + repl_util:make_cluster(N12), + repl_util:make_cluster(N34), + repl_util:make_cluster(N56), + repl_util:name_cluster(N1, "n12"), + repl_util:name_cluster(N3, "n34"), + repl_util:name_cluster(N5, "n56"), + [repl_util:wait_until_leader_converge(Cluster) || Cluster <- [N12, N34, N56]], + connect_rt(N1, get_cluster_mgr_port(N3), "n34"), + connect_rt(N3, get_cluster_mgr_port(N5), "n56"), + connect_rt(N5, get_cluster_mgr_port(N1), "n12"), + Nodes + end end, - fun(Nodes) -> + fun(MaybeNodes) -> + Nodes = case MaybeNodes of + {too_old, Ns} -> Ns; + _ -> MaybeNodes + end, rt:clean_cluster(Nodes) end, - fun([N1, N2, N3, N4, N5, N6] = Nodes) -> [ + fun({too_old, _Nodes}) -> []; + ([N1, N2, N3, N4, N5, N6] = Nodes) -> [ {"no cascading at first", timeout, timeout(35), [ {timeout, timeout(15), fun() -> @@ -720,19 +734,33 @@ new_to_old_test_dep() -> Conf = conf(), DeployConfs = [{current, Conf}, {previous, Conf}, {current, Conf}], [New1, Old2, New3] = Nodes = rt:deploy_nodes(DeployConfs), - [repl_util:make_cluster([N]) || N <- Nodes], - Names = ["new1", "old2", "new3"], - [repl_util:name_cluster(Node, Name) || {Node, Name} <- lists:zip(Nodes, Names)], - [repl_util:wait_until_is_leader(N) || N <- Nodes], - connect_rt(New1, 10026, "old2"), - connect_rt(Old2, 10036, "new3"), - connect_rt(New3, 10016, "new1"), - Nodes + case rpc:call(Old2, application, get_key, [riak_core, vsn]) of + % this is meant to test upgrading from early BNW aka + % Brave New World aka Advanced Repl aka version 3 repl to + % a cascading realtime repl. Other tests handle going from pre + % repl 3 to repl 3. + {ok, Vsn} when Vsn < "1.3.0" -> + {too_old, Nodes}; + _ -> + [repl_util:make_cluster([N]) || N <- Nodes], + Names = ["new1", "old2", "new3"], + [repl_util:name_cluster(Node, Name) || {Node, Name} <- lists:zip(Nodes, Names)], + [repl_util:wait_until_is_leader(N) || N <- Nodes], + connect_rt(New1, 10026, "old2"), + connect_rt(Old2, 10036, "new3"), + connect_rt(New3, 10016, "new1"), + Nodes + end end, - fun(Nodes) -> + fun(MaybeNodes) -> + Nodes = case MaybeNodes of + {too_old, Ns} -> Ns; + _ -> MaybeNodes + end, rt:clean_cluster(Nodes) end, - fun([New1, Old2, New3]) -> [ + fun({too_old, _}) -> []; + ([New1, Old2, New3]) -> [ {"From new1 to old2", timeout, timeout(25), fun() -> Client = rt:pbc(New1), From bd2244c2a55d696f6b705a3d081b384786ec7ff7 Mon Sep 17 00:00:00 2001 From: Micah Warren Date: Fri, 17 Jan 2014 10:22:56 -0600 Subject: [PATCH 051/139] Added wrapper fun running some tests individually. This is to make it easier to test specific changes to them. --- tests/rt_cascading.erl | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/rt_cascading.erl b/tests/rt_cascading.erl index 10e8d9e5d..75ad7a036 100644 --- a/tests/rt_cascading.erl +++ b/tests/rt_cascading.erl @@ -16,6 +16,7 @@ -define(bucket, <<"objects">>). -export([confirm/0]). +-export([new_to_old/0, mixed_version_clusters/0]). % cluster_mgr port = 10006 + 10n where n is devN @@ -509,6 +510,19 @@ circle_and_spurs_test_() -> ] end}}. +mixed_version_clusters() -> + case eunit:test(?MODULE:mixed_version_clusters_test_(), [verbose]) of + ok -> + pass; + error -> + % at the time this is written, the return value isn't acutally + % checked, the only way to fail is to crash the process. + % i leave the fail here in hopes a future version will actually + % do what the documentation says. + exit(error), + fail + end. + mixed_version_clusters_test_() -> % +-----+ % | n12 | @@ -706,6 +720,19 @@ Reses)]), ] end}}. +new_to_old() -> + case eunit:test(?MODULE:new_to_old_test_(), [verbose]) of + ok -> + pass; + error -> + % at the time this is written, the return value isn't acutally + % checked, the only way to fail is to crash the process. + % i leave the fail here in hopes a future version will actually + % do what the documentation says. + exit(error), + fail + end. + new_to_old_test_() -> % +------+ % | New1 | From f1892c389fba6305d95759a36f66076147a12f89 Mon Sep 17 00:00:00 2001 From: Micah Warren Date: Fri, 17 Jan 2014 15:02:29 -0600 Subject: [PATCH 052/139] Documented the config option added. --- tests/rt_cascading.erl | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tests/rt_cascading.erl b/tests/rt_cascading.erl index 75ad7a036..aed59cd5a 100644 --- a/tests/rt_cascading.erl +++ b/tests/rt_cascading.erl @@ -4,8 +4,24 @@ %% legacy: 1.2.1 %% %% uses the following configs with given defaults: -%% default_timeout = 1000 :: timeout(), base timeout value; some tests will -%% use a larger value (multiple of). +%% +%% ## default_timeout = 1000 :: timeout() +%% +%% Base timeout value; some tests will use a larger value (multiple of). +%% +%% ## run_rt_cascading_1_3_tests = false :: any() +%% +%% Some tests (new_to_old and mixed_version_clusters) only make sense to +%% run if one is testing the version before cascading was introduced and +%% the version it was added; eg current being riak 1.4 and previous being +%% riak 1.3. If this is set to anything (other than 'false') those tests +%% are run. They will not function properly unless the correct versions +%% for riak are avialable. The tests check if the versions under test are +%% too old to be valid however. +%% +%% With this set to default, the tests that depend on this option will +%% emit a log message saying they are not configured to run. +%% -module(rt_cascading). -compile(export_all). From 44cb01180877199591ea4cf7ea20cacb307099d9 Mon Sep 17 00:00:00 2001 From: Dave Parfitt Date: Thu, 16 Jan 2014 11:36:08 -0500 Subject: [PATCH 053/139] add shell tests for riak-admin --- intercepts/riak_core_console_intercepts.erl | 186 +++++++++++++++ intercepts/riak_kv_console_intercepts.erl | 144 ++++++++++++ intercepts/riak_kv_js_manager_intercepts.erl | 19 ++ tests/riak_admin_console_tests.erl | 226 +++++++++++++++++++ 4 files changed, 575 insertions(+) create mode 100644 intercepts/riak_core_console_intercepts.erl create mode 100644 intercepts/riak_kv_console_intercepts.erl create mode 100644 intercepts/riak_kv_js_manager_intercepts.erl create mode 100644 tests/riak_admin_console_tests.erl diff --git a/intercepts/riak_core_console_intercepts.erl b/intercepts/riak_core_console_intercepts.erl new file mode 100644 index 000000000..b1e8c93cd --- /dev/null +++ b/intercepts/riak_core_console_intercepts.erl @@ -0,0 +1,186 @@ +-module(riak_core_console_intercepts). +-compile(export_all). +-include("intercept.hrl"). + +%% See tests/riak_admin_console_tests.erl for more info + +-define(M, riak_core_console_orig). + + +-define(PASS, io:format("pass", [])). +-define(FAIL, io:format("fail", [])). + +verify_console_stage_leave(Val) -> + case Val of + [] -> ?PASS; + ["dev99@127.0.0.1"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_stage_remove(Val) -> + case Val of + ["dev99@127.0.0.1"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_stage_replace(Val) -> + case Val of + ["dev98@127.0.0.1","dev99@127.0.0.1"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_stage_force_replace(Val) -> + case Val of + ["dev98@127.0.0.1","dev99@127.0.0.1"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_stage_resize_ring(Val) -> + case Val of + ["abort"] -> ?PASS; + ["42"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_print_staged(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_commit_staged(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_clear_staged(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_add_user(Val) -> + case Val of + ["foo"] -> ?PASS; + ["foo", "x1=y1", "x2=y2"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_alter_user(Val) -> + case Val of + ["foo", "x1=y1"] -> ?PASS; + ["foo", "x1=y1", "x2=y2"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_del_user(Val) -> + case Val of + ["foo"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_add_source(Val) -> + case Val of + ["all","192.168.100.0/22","x","x1=y1"] -> ?PASS; + ["all","192.168.100.0/22","y"] -> ?PASS; + ["foo,bar","192.168.100.0/22","x","x1=y1"] -> ?PASS; + ["foo,bar,baz","192.168.100.0/22","x","x1=y1","x2=y2"] -> ?PASS; + _ -> ?FAIL + end. + + +verify_console_del_source(Val) -> + case Val of + ["all","192.168.100.0/22"] -> ?PASS; + ["x","192.168.100.0/22"] -> ?PASS; + ["x,y,z","192.168.100.0/22"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_grant(Val) -> + case Val of + ["foo","on","any","my_bucket","to","x"] -> ?PASS; + ["foo,bar","on","any","my_bucket","to","x"] -> ?PASS; + ["foo","on","any","my_bucket","to","x,y,z"] -> ?PASS; + ["foo,bar,baz","on","any","my_bucket","to","y"] -> ?PASS; + ["foo,bar,baz","on","foo","my_bucket","to","y"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_revoke(Val) -> + case Val of + ["foo","on","any","my_bucket","from","x"] -> ?PASS; + ["foo,bar","on","any","my_bucket","from","x"] -> ?PASS; + ["foo","on","any","my_bucket","from","x,y,z"] -> ?PASS; + ["foo,bar,baz","on","any","my_bucket","from","y"] -> ?PASS; + ["foo,bar,baz","on","foo","my_bucket","from","y"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_print_user(Val) -> + case Val of + ["foo"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_print_users(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_print_sources(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_security_enable(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_security_disable(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_security_stats(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_ciphers(Val) -> + case Val of + ["foo"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_transfers(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_member_status(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_ring_status(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_transfer_limit(Val) -> + case Val of + ["1"] -> ?PASS; + ["dev55@127.0.0.1", "1"] -> ?PASS; + _ -> ?FAIL + end. diff --git a/intercepts/riak_kv_console_intercepts.erl b/intercepts/riak_kv_console_intercepts.erl new file mode 100644 index 000000000..1853a357e --- /dev/null +++ b/intercepts/riak_kv_console_intercepts.erl @@ -0,0 +1,144 @@ +-module(riak_kv_console_intercepts). +-compile(export_all). +-include("intercept.hrl"). + +%% See tests/riak_admin_console_tests.erl for more info + +-define(M, riak_kv_console_orig). + + +-define(PASS, io:format("pass", [])). +-define(FAIL, io:format("fail", [])). + + + +verify_console_staged_join(Val) -> + case Val of + ["dev99@127.0.0.1"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_bucket_type_status(Val) -> + case Val of + ["foo"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_bucket_type_activate(Val) -> + case Val of + ["foo"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_bucket_type_create(Val) -> + io:format(user, "XXXX~p~n", [Val]), + case Val of + ["foo","{props:{[]}}"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_bucket_type_update(Val) -> + case Val of + ["foo","{props:{[]}}"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_bucket_type_list(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_join(Val) -> + case Val of + ["dev99@127.0.0.1"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_leave(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_remove(Val) -> + case Val of + ["dev99@127.0.0.1"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_down(Val) -> + case Val of + ["dev98@127.0.0.1"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_status(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_vnode_status(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_ringready(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_repair_2i(Val) -> + case Val of + ["status"] -> ?PASS; + ["kill"] -> ?PASS; + ["--speed","5","foo","bar","baz"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_aae_status(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_cluster_info(Val) -> + case Val of + ["foo","local"] -> ?PASS; + ["foo","local","dev99@127.0.0.1"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_reload_code(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_reip(Val) -> + io:format(user, "XXXX ~p~n", [Val]), + case Val of + ["a", "b"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_reformat_indexes(Val) -> + case Val of + ["--downgrade"] -> ?PASS; + ["5"] -> ?PASS; + ["5", "--downgrade"] -> ?PASS; + ["6", "7"] -> ?PASS; + ["6", "7", "--downgrade"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_reformat_objects(Val) -> + case Val of + ["true"] -> ?PASS; + ["true","1"] -> ?PASS; + _ -> ?FAIL + end. + diff --git a/intercepts/riak_kv_js_manager_intercepts.erl b/intercepts/riak_kv_js_manager_intercepts.erl new file mode 100644 index 000000000..6b8a55437 --- /dev/null +++ b/intercepts/riak_kv_js_manager_intercepts.erl @@ -0,0 +1,19 @@ +-module(riak_kv_js_manager_intercepts). +-compile(export_all). +-include("intercept.hrl"). + +%% See tests/riak_admin_console_tests.erl for more info + +-define(M, riak_kv_js_manager_orig). + + +-define(PASS, io:format("pass", [])). +-define(FAIL, io:format("fail", [])). + +verify_console_reload(Val) -> + io:format(user, "XXXX ~p~n", [Val]), + case Val of + ["foo","bar","baz"] -> ?PASS; + _ -> ?FAIL + end. + diff --git a/tests/riak_admin_console_tests.erl b/tests/riak_admin_console_tests.erl new file mode 100644 index 000000000..61d733924 --- /dev/null +++ b/tests/riak_admin_console_tests.erl @@ -0,0 +1,226 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +-module(riak_admin_console_tests). +-include_lib("eunit/include/eunit.hrl"). + +-export([confirm/0]). + +%% This test passes params to the riak-admin shell script on to intercepts +%% that either return ?PASS or ?FAIL (which print out "pass" or "fail" to +%% the console). If an unexpected input is received in Erlang, ?FAIL is +%% returned. This test should (will?) make sure we don't implement +%% any unportable shell code. For example, `riak-repl cascades foo` +%% didn't work on Ubuntu due to an invalid call to shift. Since this test +%% will be run on giddyup and hence many platforms, we should be able +%% to catch these types of bugs earlier. +%% See also: replication2_console_tests.erl for a more detailed +%% description. + +%% UNTESTED, as they don't use rpc, or have a non-trivial impl +%% test +%% diag +%% top +%% wait-for-services +%% js-reload +%% reip + +%% riak-admin cluster +cluster_tests(Node) -> + check_admin_cmd(Node, "cluster join dev99@127.0.0.1"), + check_admin_cmd(Node, "cluster leave"), + check_admin_cmd(Node, "cluster leave dev99@127.0.0.1"), + check_admin_cmd(Node, "cluster force-remove dev99@127.0.0.1"), + check_admin_cmd(Node, "cluster replace dev98@127.0.0.1 dev99@127.0.0.1"), + check_admin_cmd(Node, "cluster force-replace dev98@127.0.0.1 dev99@127.0.0.1"), + check_admin_cmd(Node, "cluster resize-ring 42"), + check_admin_cmd(Node, "cluster resize-ring abort"), + check_admin_cmd(Node, "cluster plan"), + check_admin_cmd(Node, "cluster commit"), + check_admin_cmd(Node, "cluster clear"). + +%% riak-admin bucket_type +bucket_tests(Node) -> + check_admin_cmd(Node, "bucket-type status foo"), + check_admin_cmd(Node, "bucket-type activate foo"), + check_admin_cmd(Node, "bucket-type create foo {\"props\":{[]}}"), + check_admin_cmd(Node, "bucket-type update foo {\"props\":{[]}}"), + check_admin_cmd(Node, "bucket-type list"). + + +%% riak-admin security +security_tests(Node) -> + check_admin_cmd(Node, "security add-user foo"), + check_admin_cmd(Node, "security add-user foo x1=y1 x2=y2"), + check_admin_cmd(Node, "security alter-user foo x1=y1"), + check_admin_cmd(Node, "security alter-user foo x1=y1 x2=y2"), + check_admin_cmd(Node, "security del-user foo"), + check_admin_cmd(Node, "security add-source all 192.168.100.0/22 y"), + check_admin_cmd(Node, "security add-source all 192.168.100.0/22 x x1=y1"), + check_admin_cmd(Node, "security add-source foo,bar 192.168.100.0/22 x x1=y1"), + check_admin_cmd(Node, "security add-source foo,bar,baz 192.168.100.0/22 x x1=y1 x2=y2"), + check_admin_cmd(Node, "security del-source all 192.168.100.0/22"), + check_admin_cmd(Node, "security del-source x 192.168.100.0/22"), + check_admin_cmd(Node, "security del-source x,y,z 192.168.100.0/22"), + check_admin_cmd(Node, "security grant foo on any my_bucket to x"), + check_admin_cmd(Node, "security grant foo,bar on any my_bucket to x"), + check_admin_cmd(Node, "security grant foo on any my_bucket to x,y,z"), + check_admin_cmd(Node, "security grant foo,bar,baz on any my_bucket to y"), + check_admin_cmd(Node, "security grant foo,bar,baz on foo my_bucket to y"), + check_admin_cmd(Node, "security revoke foo on any my_bucket from x"), + check_admin_cmd(Node, "security revoke foo,bar on any my_bucket from x"), + check_admin_cmd(Node, "security revoke foo on any my_bucket from x,y,z"), + check_admin_cmd(Node, "security revoke foo,bar,baz on any my_bucket from y"), + check_admin_cmd(Node, "security revoke foo,bar,baz on foo my_bucket from y"), + check_admin_cmd(Node, "security print-users"), + check_admin_cmd(Node, "security print-sources"), + check_admin_cmd(Node, "security enable"), + check_admin_cmd(Node, "security disable"), + check_admin_cmd(Node, "security status"), + check_admin_cmd(Node, "security print-user foo"), + check_admin_cmd(Node, "security ciphers foo"). + +%% "top level" riak-admin COMMANDS +riak_admin_tests(Node) -> + check_admin_cmd(Node, "join -f dev99@127.0.0.1"), + check_admin_cmd(Node, "leave -f"), + check_admin_cmd(Node, "force-remove -f dev99@127.0.0.1"), + check_admin_cmd(Node, "force_remove -f dev99@127.0.0.1"), + check_admin_cmd(Node, "down dev98@127.0.0.1"), + check_admin_cmd(Node, "status"), + check_admin_cmd(Node, "vnode-status"), + check_admin_cmd(Node, "vnode_status"), + check_admin_cmd(Node, "ringready"), + check_admin_cmd(Node, "transfers"), + check_admin_cmd(Node, "member-status"), + check_admin_cmd(Node, "member_status"), + check_admin_cmd(Node, "ring-status"), + check_admin_cmd(Node, "ring_status"), + check_admin_cmd(Node, "aae-status"), + check_admin_cmd(Node, "aae_status"), + check_admin_cmd(Node, "repair_2i status"), + check_admin_cmd(Node, "repair_2i kill"), + check_admin_cmd(Node, "repair_2i --speed 5 foo bar baz"), + check_admin_cmd(Node, "repair-2i status"), + check_admin_cmd(Node, "repair-2i kill"), + check_admin_cmd(Node, "repair-2i --speed 5 foo bar baz"), + check_admin_cmd(Node, "cluster_info foo local"), + check_admin_cmd(Node, "cluster_info foo local dev99@127.0.0.1"), + check_admin_cmd(Node, "erl-reload"), + check_admin_cmd(Node, "erl_reload"), + check_admin_cmd(Node, "transfer-limit 1"), + check_admin_cmd(Node, "transfer-limit dev55@127.0.0.1 1"), + check_admin_cmd(Node, "transfer_limit 1"), + check_admin_cmd(Node, "transfer_limit dev55@127.0.0.1 1"), + check_admin_cmd(Node, "reformat-indexes --downgrade"), + check_admin_cmd(Node, "reformat-indexes 5"), + check_admin_cmd(Node, "reformat-indexes 6 7"), + check_admin_cmd(Node, "reformat-indexes 5 --downgrade"), + check_admin_cmd(Node, "reformat-indexes 6 7 --downgrade"), + check_admin_cmd(Node, "reformat_indexes --downgrade"), + check_admin_cmd(Node, "reformat_indexes 5"), + check_admin_cmd(Node, "reformat_indexes 6 7"), + check_admin_cmd(Node, "reformat_indexes 5 --downgrade"), + check_admin_cmd(Node, "reformat_indexes 6 7 --downgrade"), + check_admin_cmd(Node, "downgrade_objects true"), + check_admin_cmd(Node, "downgrade_objects true 1"), + check_admin_cmd(Node, "downgrade_objects true"), + check_admin_cmd(Node, "downgrade_objects true 1"), + check_admin_cmd(Node, "js-reload foo bar baz"), + ok. + +confirm() -> + %% Deploy a node to test against + lager:info("Deploy node to test riak command line"), + [Node] = rt:deploy_nodes(1), + ?assertEqual(ok, rt:wait_until_nodes_ready([Node])), + rt_intercept:add(Node, + {riak_core_console, + [ + {{transfers,1}, verify_console_transfers}, + {{member_status,1}, verify_console_member_status}, + {{ring_status,1}, verify_console_ring_status}, + {{stage_remove,1}, verify_console_stage_remove}, + {{stage_leave,1}, verify_console_stage_leave}, + {{stage_replace, 1}, verify_console_stage_replace}, + {{stage_force_replace, 1}, verify_console_stage_force_replace}, + {{stage_resize_ring, 1}, verify_console_stage_resize_ring}, + {{print_staged, 1}, verify_console_print_staged}, + {{commit_staged, 1}, verify_console_commit_staged}, + {{clear_staged, 1}, verify_console_clear_staged}, + {{transfer_limit, 1}, verify_console_transfer_limit}, + {{add_user, 1}, verify_console_add_user}, + {{alter_user, 1}, verify_console_alter_user}, + {{del_user, 1}, verify_console_del_user}, + {{add_source, 1}, verify_console_add_source}, + {{del_source, 1}, verify_console_del_source}, + {{grant, 1}, verify_console_grant}, + {{revoke, 1}, verify_console_revoke}, + {{print_user,1}, verify_console_print_user}, + {{print_users,1}, verify_console_print_users}, + {{print_sources, 1}, verify_console_print_sources}, + {{security_enable,1}, verify_console_security_enable}, + {{security_disable,1}, verify_console_security_disable}, + {{security_status,1}, verify_console_security_stats}, + {{ciphers,1}, verify_console_ciphers} ]}), + + rt_intercept:add(Node, + {riak_kv_console, + [ + {{join,1}, verify_console_join}, + {{leave,1}, verify_console_leave}, + {{remove,1}, verify_console_remove}, + {{staged_join,1}, verify_console_staged_join}, + {{down,1}, verify_console_down}, + {{status,1}, verify_console_status}, + {{vnode_status,1}, verify_console_vnode_status}, + {{ringready,1}, verify_console_ringready}, + {{aae_status,1}, verify_console_aae_status}, + {{cluster_info, 1}, verify_console_cluster_info}, + {{reload_code, 1}, verify_console_reload_code}, + {{repair_2i, 1}, verify_console_repair_2i}, + {{reformat_indexes, 1}, verify_console_reformat_indexes}, + {{reformat_objects, 1}, verify_console_reformat_objects}, + {{bucket_type_status,1}, verify_console_bucket_type_status}, + {{bucket_type_activate,1}, verify_console_bucket_type_activate}, + {{bucket_type_create,1}, verify_console_bucket_type_create}, + {{bucket_type_update,1}, verify_console_bucket_type_update}, + {{bucket_type_list,1}, verify_console_bucket_type_list} + ]}), + + rt_intercept:add(Node, + {riak_kv_js_manager, + [ + {{reload,1}, verify_console_reload} + ]}), + + rt_intercept:wait_until_loaded(Node), + + riak_admin_tests(Node), + cluster_tests(Node), + bucket_tests(Node), + security_tests(Node), + pass. + +check_admin_cmd(Node, Cmd) -> + S = string:tokens(Cmd, " "), + lager:info("Testing riak-admin ~s on ~s", [Cmd, Node]), + {ok, Out} = rt:admin(Node, S), + ?assertEqual("pass", Out). + From 084442c597889df422c9bb5f2ed125cfc879b9c5 Mon Sep 17 00:00:00 2001 From: Dave Parfitt Date: Fri, 17 Jan 2014 18:28:06 -0500 Subject: [PATCH 054/139] copyrights --- intercepts/riak_core_console_intercepts.erl | 19 +++++++++++++++++++ intercepts/riak_kv_console_intercepts.erl | 19 +++++++++++++++++++ intercepts/riak_kv_js_manager_intercepts.erl | 19 +++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/intercepts/riak_core_console_intercepts.erl b/intercepts/riak_core_console_intercepts.erl index b1e8c93cd..bbffc35d2 100644 --- a/intercepts/riak_core_console_intercepts.erl +++ b/intercepts/riak_core_console_intercepts.erl @@ -1,3 +1,22 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +% +%% ------------------------------------------------------------------- -module(riak_core_console_intercepts). -compile(export_all). -include("intercept.hrl"). diff --git a/intercepts/riak_kv_console_intercepts.erl b/intercepts/riak_kv_console_intercepts.erl index 1853a357e..865148a9b 100644 --- a/intercepts/riak_kv_console_intercepts.erl +++ b/intercepts/riak_kv_console_intercepts.erl @@ -1,3 +1,22 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +% +%% ------------------------------------------------------------------- -module(riak_kv_console_intercepts). -compile(export_all). -include("intercept.hrl"). diff --git a/intercepts/riak_kv_js_manager_intercepts.erl b/intercepts/riak_kv_js_manager_intercepts.erl index 6b8a55437..79484541f 100644 --- a/intercepts/riak_kv_js_manager_intercepts.erl +++ b/intercepts/riak_kv_js_manager_intercepts.erl @@ -1,3 +1,22 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2014 Basho Technologies, Inc. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +% +%% ------------------------------------------------------------------- -module(riak_kv_js_manager_intercepts). -compile(export_all). -include("intercept.hrl"). From 34b2d3723c1c9856a9c854fd7d39241ba74ce63f Mon Sep 17 00:00:00 2001 From: Sean Cribbs Date: Sun, 19 Jan 2014 11:47:50 -0600 Subject: [PATCH 055/139] Fix tests that used the wrong name for the riak.conf entry for allow_mult. --- tests/client_python_verify.erl | 2 +- tests/repl_bucket_types.erl | 2 +- tests/replication2.erl | 2 +- tests/replication2_ssl.erl | 2 +- tests/replication_ssl.erl | 2 +- tests/rt_cascading.erl | 2 +- tests/verify_api_timeouts.erl | 52 +++++++++++++++++----------------- tests/verify_build_cluster.erl | 4 +-- tests/verify_dynamic_ring.erl | 2 +- tests/verify_tick_change.erl | 2 +- 10 files changed, 36 insertions(+), 36 deletions(-) diff --git a/tests/client_python_verify.erl b/tests/client_python_verify.erl index a98565fa9..4be848b9b 100644 --- a/tests/client_python_verify.erl +++ b/tests/client_python_verify.erl @@ -14,7 +14,7 @@ confirm() -> %% test requires allow_mult=false b/c of rt:systest_read - rt:set_conf(all, [{"buckets.default.siblings", "off"}]), + rt:set_conf(all, [{"buckets.default.allow_mult", "false"}]), {ok, TestCommand} = prereqs(), Config = [{riak_kv, [{secondary_index_sort_default, true}]}, {riak_search, [{enabled, true}]}], diff --git a/tests/repl_bucket_types.erl b/tests/repl_bucket_types.erl index a57971099..7e6e1fdaa 100644 --- a/tests/repl_bucket_types.erl +++ b/tests/repl_bucket_types.erl @@ -17,7 +17,7 @@ %% @doc riak_test entry point confirm() -> - rt:set_conf(all, [{"buckets.default.siblings", "off"}]), + rt:set_conf(all, [{"buckets.default.allow_mult", "false"}]), %% Start up two >1.3.2 clusters and connect them, {LeaderA, LeaderB, ANodes, BNodes} = make_clusters(), diff --git a/tests/replication2.erl b/tests/replication2.erl index d4dbaa0b2..bd6bf7e74 100644 --- a/tests/replication2.erl +++ b/tests/replication2.erl @@ -13,7 +13,7 @@ confirm() -> %% test requires allow_mult=false - rt:set_conf(all, [{"buckets.default.siblings", "off"}]), + rt:set_conf(all, [{"buckets.default.allow_mult", "true"}]), NumNodes = rt_config:get(num_nodes, 6), ClusterASize = rt_config:get(cluster_a_size, 3), diff --git a/tests/replication2_ssl.erl b/tests/replication2_ssl.erl index 152c9f088..b345291fa 100644 --- a/tests/replication2_ssl.erl +++ b/tests/replication2_ssl.erl @@ -7,7 +7,7 @@ confirm() -> %% test requires allow_mult=false - rt:set_conf(all, [{"buckets.default.siblings", "off"}]), + rt:set_conf(all, [{"buckets.default.allow_mult", "true"}]), NumNodes = rt_config:get(num_nodes, 6), ClusterASize = rt_config:get(cluster_a_size, 3), diff --git a/tests/replication_ssl.erl b/tests/replication_ssl.erl index 7a9e56067..a7b981460 100644 --- a/tests/replication_ssl.erl +++ b/tests/replication_ssl.erl @@ -6,7 +6,7 @@ confirm() -> %% test requires allow_mult=false - rt:set_conf(all, [{"buckets.default.siblings", "off"}]), + rt:set_conf(all, [{"buckets.default.allow_mult", "true"}]), NumNodes = rt_config:get(num_nodes, 6), ClusterASize = rt_config:get(cluster_a_size, 3), diff --git a/tests/rt_cascading.erl b/tests/rt_cascading.erl index 1917cb0e6..8c6702ed8 100644 --- a/tests/rt_cascading.erl +++ b/tests/rt_cascading.erl @@ -38,7 +38,7 @@ confirm() -> %% test requires allow_mult=false b/c of rt:systest_read - rt:set_conf(all, [{"buckets.default.siblings", "off"}]), + rt:set_conf(all, [{"buckets.default.allow_mult", "false"}]), case eunit:test(?MODULE, [verbose]) of ok -> diff --git a/tests/verify_api_timeouts.erl b/tests/verify_api_timeouts.erl index 57367eb55..e587619f2 100644 --- a/tests/verify_api_timeouts.erl +++ b/tests/verify_api_timeouts.erl @@ -9,10 +9,10 @@ confirm() -> %% test requires allow_mult=false b/c of rt:systest_read - rt:set_conf(all, [{"buckets.default.siblings", "off"}]), + rt:set_conf(all, [{"buckets.default.allow_mult", "false"}]), [Node] = rt:build_cluster(1), rt:wait_until_pingable(Node), - + HC = rt:httpc(Node), lager:info("setting up initial data and loading remote code"), rt:httpc_write(HC, <<"foo">>, <<"bar">>, <<"foobarbaz\n">>), @@ -28,42 +28,42 @@ confirm() -> [{{prepare,2}, slow_prepare}]}), rt_intercept:add(Node, {riak_kv_vnode, [{{handle_coverage,4}, slow_handle_coverage}]}), - - + + lager:info("testing HTTP API"), lager:info("testing GET timeout"), {error, Tup1} = rhc:get(HC, <<"foo">>, <<"bar">>, [{timeout, 100}]), ?assertMatch({ok, "503", _, <<"request timed out\n">>}, Tup1), - + lager:info("testing PUT timeout"), {error, Tup2} = rhc:put(HC, riakc_obj:new(<<"foo">>, <<"bar">>, <<"getgetgetgetget\n">>), [{timeout, 100}]), ?assertMatch({ok, "503", _, <<"request timed out\n">>}, Tup2), - + lager:info("testing DELETE timeout"), {error, Tup3} = rhc:delete(HC, <<"foo">>, <<"bar">>, [{timeout, 100}]), ?assertMatch({ok, "503", _, <<"request timed out\n">>}, Tup3), - + lager:info("testing invalid timeout value"), {error, Tup4} = rhc:get(HC, <<"foo">>, <<"bar">>, [{timeout, asdasdasd}]), ?assertMatch({ok, "400", _, - <<"Bad timeout value \"asdasdasd\"\n">>}, + <<"Bad timeout value \"asdasdasd\"\n">>}, Tup4), lager:info("testing GET still works before long timeout"), {ok, O} = rhc:get(HC, <<"foo">>, <<"bar">>, [{timeout, 4000}]), %% either of these are potentially valid. - case riakc_obj:get_value(O) of - <<"foobarbaz\n">> -> + case riakc_obj:get_value(O) of + <<"foobarbaz\n">> -> lager:info("Original Value"), ok; - <<"getgetgetgetget\n">> -> + <<"getgetgetgetget\n">> -> lager:info("New Value"), ok; - V -> ?assertEqual({object_value, <<"getgetgetgetget\n">>}, + V -> ?assertEqual({object_value, <<"getgetgetgetget\n">>}, {object_value, V}) end, @@ -79,34 +79,34 @@ confirm() -> ?assertEqual(BOOM, PGET), lager:info("testing PUT timeout"), - PPUT = riakc_pb_socket:put(PC, + PPUT = riakc_pb_socket:put(PC, riakc_obj:new(<<"foo">>, <<"bar2">>, <<"get2get2get2get2get\n">>), [{timeout, 100}]), ?assertEqual(BOOM, PPUT), - + lager:info("testing DELETE timeout"), - PDEL = riakc_pb_socket:delete(PC, <<"foo">>, <<"bar2">>, + PDEL = riakc_pb_socket:delete(PC, <<"foo">>, <<"bar2">>, [{timeout, 100}]), ?assertEqual(BOOM, PDEL), lager:info("testing invalid timeout value"), - ?assertError(badarg, riakc_pb_socket:get(PC, <<"foo">>, <<"bar2">>, + ?assertError(badarg, riakc_pb_socket:get(PC, <<"foo">>, <<"bar2">>, [{timeout, asdasdasd}])), lager:info("testing GET still works before long timeout"), - {ok, O2} = riakc_pb_socket:get(PC, <<"foo">>, <<"bar2">>, + {ok, O2} = riakc_pb_socket:get(PC, <<"foo">>, <<"bar2">>, [{timeout, 4000}]), %% either of these are potentially valid. - case riakc_obj:get_value(O2) of - <<"get2get2get2get2get\n">> -> + case riakc_obj:get_value(O2) of + <<"get2get2get2get2get\n">> -> lager:info("New Value"), ok; - <<"foobarbaz2\n">> -> + <<"foobarbaz2\n">> -> lager:info("Original Value"), ok; - V2 -> ?assertEqual({object_value, <<"get2get2get2get2get\n">>}, + V2 -> ?assertEqual({object_value, <<"get2get2get2get2get\n">>}, {object_value, V2}) end, @@ -143,8 +143,8 @@ confirm() -> lager:info("Checking stream buckets works w/ long timeout"), {ok, ReqId7} = riakc_pb_socket:stream_list_buckets(Pid, Long), wait_for_end(ReqId7), - - + + lager:info("Checking HTTP"), LHC = rt:httpc(Node), lager:info("Checking keys timeout"), @@ -161,10 +161,10 @@ confirm() -> wait_for_end(ReqId4), lager:info("Checking buckets timeout"), - ?assertMatch({error, <<"timeout">>}, + ?assertMatch({error, <<"timeout">>}, rhc:list_buckets(LHC, Short)), lager:info("Checking buckets w/ long timeout"), - ?assertMatch({ok, _}, + ?assertMatch({ok, _}, rhc:list_buckets(LHC, Long)), lager:info("Checking stream buckets timeout"), {ok, ReqId3} = rhc:stream_list_buckets(LHC, Short), @@ -218,7 +218,7 @@ wait_for_end(ReqId) -> end. -put_buckets(Node, Num) -> +put_buckets(Node, Num) -> Pid = rt:pbc(Node), Buckets = [list_to_binary(["", integer_to_list(Ki)]) || Ki <- lists:seq(0, Num - 1)], diff --git a/tests/verify_build_cluster.erl b/tests/verify_build_cluster.erl index 24b0bc736..e934eb434 100644 --- a/tests/verify_build_cluster.erl +++ b/tests/verify_build_cluster.erl @@ -27,12 +27,12 @@ confirm() -> %% test requires allow_mult=false b/c of rt:systest_read - rt:set_conf(all, [{"buckets.default.siblings", "off"}]), + rt:set_conf(all, [{"buckets.default.allow_mult", "false"}]), %% Deploy a set of new nodes lager:info("Deploying 4 nodes"), %% handoff_concurrency needs to be raised to make the leave operation faster. %% most clusters go up to 10, but this one is one louder, isn't it? - [Node1, Node2, Node3, Node4] = Nodes = rt:deploy_nodes(4, [{riak_core, [{handoff_concurrency, 11}]}]), + [Node1, Node2, Node3, Node4] = Nodes = rt:deploy_nodes(4, [{riak_core, [{handoff_concurrency, 11}]}]), %% Ensure each node owns 100% of it's own ring lager:info("Ensure each nodes 100% of it's own ring"), diff --git a/tests/verify_dynamic_ring.erl b/tests/verify_dynamic_ring.erl index 726105bac..8e2b30f79 100644 --- a/tests/verify_dynamic_ring.erl +++ b/tests/verify_dynamic_ring.erl @@ -31,7 +31,7 @@ confirm() -> %% test requires allow_mult=false b/c of rt:systest_read - rt:set_conf(all, [{"buckets.default.siblings", "off"}]), + rt:set_conf(all, [{"buckets.default.allow_mult", "false"}]), rt:update_app_config(all, [{riak_core, [{ring_creation_size, ?START_SIZE}]}]), [ANode, AnotherNode, YetAnother, ReplacingNode] = AllNodes = rt:deploy_nodes(4), diff --git a/tests/verify_tick_change.erl b/tests/verify_tick_change.erl index c08431dbe..3390fbac8 100644 --- a/tests/verify_tick_change.erl +++ b/tests/verify_tick_change.erl @@ -25,7 +25,7 @@ confirm() -> ClusterSize = 4, - rt:set_conf(all, [{"buckets.default.siblings", "off"}]), + rt:set_conf(all, [{"buckets.default.allow_mult", "false"}]), NewConfig = [], Nodes = rt:build_cluster(ClusterSize, NewConfig), ?assertEqual(ok, rt:wait_until_nodes_ready(Nodes)), From 59976f25f0cb0d918a3416e4dc6bb8cecc6bdc83 Mon Sep 17 00:00:00 2001 From: Sean Cribbs Date: Sun, 19 Jan 2014 12:00:21 -0600 Subject: [PATCH 056/139] Fix typo true -> false --- tests/replication2.erl | 2 +- tests/replication2_ssl.erl | 2 +- tests/replication_ssl.erl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/replication2.erl b/tests/replication2.erl index bd6bf7e74..fad65ce1a 100644 --- a/tests/replication2.erl +++ b/tests/replication2.erl @@ -13,7 +13,7 @@ confirm() -> %% test requires allow_mult=false - rt:set_conf(all, [{"buckets.default.allow_mult", "true"}]), + rt:set_conf(all, [{"buckets.default.allow_mult", "false"}]), NumNodes = rt_config:get(num_nodes, 6), ClusterASize = rt_config:get(cluster_a_size, 3), diff --git a/tests/replication2_ssl.erl b/tests/replication2_ssl.erl index b345291fa..2fc71fb54 100644 --- a/tests/replication2_ssl.erl +++ b/tests/replication2_ssl.erl @@ -7,7 +7,7 @@ confirm() -> %% test requires allow_mult=false - rt:set_conf(all, [{"buckets.default.allow_mult", "true"}]), + rt:set_conf(all, [{"buckets.default.allow_mult", "false"}]), NumNodes = rt_config:get(num_nodes, 6), ClusterASize = rt_config:get(cluster_a_size, 3), diff --git a/tests/replication_ssl.erl b/tests/replication_ssl.erl index a7b981460..ec49ff5a2 100644 --- a/tests/replication_ssl.erl +++ b/tests/replication_ssl.erl @@ -6,7 +6,7 @@ confirm() -> %% test requires allow_mult=false - rt:set_conf(all, [{"buckets.default.allow_mult", "true"}]), + rt:set_conf(all, [{"buckets.default.allow_mult", "false"}]), NumNodes = rt_config:get(num_nodes, 6), ClusterASize = rt_config:get(cluster_a_size, 3), From cf515c14ea8fa4898647ab39fe41a7d25e21a8df Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Tue, 21 Jan 2014 00:17:34 -0500 Subject: [PATCH 057/139] Add missing replication2_connection test from the 1.4 branch. --- tests/replication2_connections.erl | 169 +++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 tests/replication2_connections.erl diff --git a/tests/replication2_connections.erl b/tests/replication2_connections.erl new file mode 100644 index 000000000..bcc8a9090 --- /dev/null +++ b/tests/replication2_connections.erl @@ -0,0 +1,169 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2013 Basho Technologies, Inc. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% + +-module(replication2_connections). +-behaviour(riak_test). +-export([confirm/0]). +-include_lib("eunit/include/eunit.hrl"). + +-define(HB_TIMEOUT, 2000). + +confirm() -> + NumNodes = rt_config:get(num_nodes, 6), + + lager:info("Deploy ~p nodes", [NumNodes]), + Conf = [ + {riak_repl, + [ + %% turn off fullsync + {fullsync_on_connect, false}, + {fullsync_interval, disabled}, + %% override defaults for RT heartbeat so that we + %% can see faults sooner and have a quicker test. + {rt_heartbeat_interval, ?HB_TIMEOUT}, + {rt_heartbeat_timeout, ?HB_TIMEOUT} + ]} + ], + + Nodes = rt:deploy_nodes(NumNodes, Conf), + {ANodes, BNodes} = lists:split(3, Nodes), + + lager:info("ANodes: ~p", [ANodes]), + lager:info("BNodes: ~p", [BNodes]), + + lager:info("Build cluster A"), + repl_util:make_cluster(ANodes), + + lager:info("Build cluster B"), + repl_util:make_cluster(BNodes), + + lager:info("Waiting for leader to converge on cluster A"), + ?assertEqual(ok, repl_util:wait_until_leader_converge(ANodes)), + AFirst = hd(ANodes), + + lager:info("Waiting for leader to converge on cluster B"), + ?assertEqual(ok, repl_util:wait_until_leader_converge(BNodes)), + BFirst = hd(BNodes), + + lager:info("Naming A"), + repl_util:name_cluster(AFirst, "A"), + rt:wait_until_ring_converged(ANodes), + + lager:info("Naming B"), + repl_util:name_cluster(BFirst, "B"), + rt:wait_until_ring_converged(BNodes), + + lager:info("Connecting A to B"), + connect_clusters(AFirst, BFirst), + + lager:info("Enabling realtime replication from A to B."), + repl_util:enable_realtime(AFirst, "B"), + rt:wait_until_ring_converged(ANodes), + repl_util:start_realtime(AFirst, "B"), + rt:wait_until_ring_converged(ANodes), + + lager:info("Connecting B to A"), + connect_clusters(BFirst, AFirst), + + lager:info("Enabling realtime replication from B to A."), + repl_util:enable_realtime(BFirst, "A"), + rt:wait_until_ring_converged(BNodes), + repl_util:start_realtime(BFirst, "A"), + rt:wait_until_ring_converged(BNodes), + + lager:info("Verifying connectivity between clusters."), + [verify_connectivity(Node, "B") || Node <- ANodes], + [verify_connectivity(Node, "A") || Node <- BNodes], + + pass. + +%% @doc Verify connectivity between sources and sink. +verify_connectivity(Node, Cluster) -> + print_repl_ring(Node), + wait_for_connections(Node, Cluster), + print_repl_ring(Node), + restart_process(Node, riak_core_connection_manager), + wait_for_connections(Node, Cluster). + +print_repl_ring(Node) -> + {ok, Ring} = rpc:call(Node, + riak_core_ring_manager, + get_my_ring, + []), + Clusters = rpc:call(Node, + riak_repl_ring, + get_clusters, + [Ring]), + lager:info("REPL ring shows clusters as: ~p", [Clusters]). + +%% @doc Wait for connections to be established from this node to the +%% named cluster. +wait_for_connections(Node, Cluster) -> + rt:wait_until(Node, fun(_) -> + lager:info("Attempting to verify connections on ~p.", + [Node]), + try + {ok, Connections} = rpc:call(Node, + riak_core_cluster_mgr, + get_connections, + []), + lager:info("Waiting for sink connections on ~p: ~p.", + [Node, Connections]), + case Connections of + [{{cluster_by_name, Cluster}, _}] -> + true; + _ -> + false + end + catch + _:Error -> + lager:info("Caught error: ~p.", [Error]), + false + end + end). + +%% @doc Restart a given process by name. +restart_process(Node, Name) -> + lager:info("Restarting ~p on ~p.", [Name, Node]), + + %% Find the process. + Pid0 = rpc:call(Node, erlang, whereis, [Name]), + lager:info("Found ~p on node ~p at ~p, killing.", + [Name, Node, Pid0]), + + %% Kill it. + true = rpc:call(Node, erlang, exit, [Pid0, brutal_kill]), + + %% Verify it restarts. + rt:wait_until(Node, fun(_) -> + lager:info("Waiting for ~p to restart...", [Name]), + Pid = rpc:call(Node, erlang, whereis, [Name]), + Pid =/= Pid0 andalso Pid =/= undefined + end), + + lager:info("Process restarted."). + +%% @doc Connect two clusters for replication using their respective +%% leader nodes. +connect_clusters(LeaderA, LeaderB) -> + {ok, {_IP, Port}} = rpc:call(LeaderB, application, get_env, + [riak_core, cluster_mgr]), + repl_util:connect_cluster(LeaderA, "127.0.0.1", Port). From ba45350da38910c20d9ebe1ff77355d4416fc4ac Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Tue, 21 Jan 2014 20:22:17 -0500 Subject: [PATCH 058/139] Fix overlapping key ranges in replication test. Fix overlapping key ranges which cause sibling generation because of puts with no vclock. Add additional checking to assert this is true. --- tests/replication2.erl | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/replication2.erl b/tests/replication2.erl index d4dbaa0b2..1e066b3aa 100644 --- a/tests/replication2.erl +++ b/tests/replication2.erl @@ -378,7 +378,6 @@ replication([AFirst|_] = ANodes, [BFirst|_] = BNodes, Connected) -> Res10 = rt:systest_read(BSecond, 1, 100, RealtimeOnly, 2), ?assertEqual(100, length(Res10)), - lager:info("Write 100 more keys into realtime only bucket on ~p", [ASecond]), ?assertEqual([], repl_util:do_write(ASecond, 101, 200, @@ -408,8 +407,7 @@ replication([AFirst|_] = ANodes, [BFirst|_] = BNodes, Connected) -> rt:wait_until_ring_converged(ANodes), lager:info("Writing 100 keys"), - ?assertEqual([], repl_util:do_write(LeaderA4, 800, 900, - TestBucket, 2)), + ?assertEqual([], repl_util:do_write(LeaderA4, 800, 900, TestBucket, 2)), lager:info("Starting realtime"), repl_util:start_realtime(LeaderA4, "B"), @@ -428,9 +426,17 @@ replication([AFirst|_] = ANodes, [BFirst|_] = BNodes, Connected) -> repl_util:stop_realtime(LeaderA4, "B"), rt:wait_until_ring_converged(ANodes), - lager:info("Writing 100 keys"), - ?assertEqual([], repl_util:do_write(Target, 900, 1000, - TestBucket, 2)), + lager:info("Verifying 100 keys are missing from ~p", [Target]), + repl_util:read_from_cluster(Target, 901, 1000, TestBucket, 100), + + lager:info("Writing 100 keys to ~p", [Target]), + ?assertEqual([], repl_util:do_write(Target, 901, 1000, TestBucket, 2)), + + lager:info("Verifying 100 keys are read from ~p", [Target]), + repl_util:read_from_cluster(Target, 901, 1000, TestBucket, 0), + + lager:info("Verifying 100 keys are missing from ~p", [BSecond]), + repl_util:read_from_cluster(BSecond, 901, 1000, TestBucket, 100), io:format("queue status: ~p", [rpc:call(Target, riak_repl2_rtq, status, [])]), @@ -444,9 +450,11 @@ replication([AFirst|_] = ANodes, [BFirst|_] = BNodes, Connected) -> repl_util:start_realtime(LeaderA4, "B"), timer:sleep(3000), + lager:info("Verifying 100 keys are now available on ~p", [BSecond]), + repl_util:read_from_cluster(BSecond, 901, 1000, TestBucket, 0), + lager:info("Reading keys written while repl was stopped"), - ?assertEqual(0, repl_util:wait_for_reads(BSecond, 900, 1000, - TestBucket, 2)), + ?assertEqual(0, repl_util:wait_for_reads(BSecond, 901, 1000, TestBucket, 2)), lager:info("Restarting node ~p", [Target]), From 1ec8bda21c2b41b41c222d5fd316434d68a5acba Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Wed, 22 Jan 2014 12:21:22 -0500 Subject: [PATCH 059/139] Add a simple 'smoke test' tool to run eunit/dialyzer on riak source trees --- .gitignore | 1 + Makefile | 1 + rebar.config.script | 8 ++ src/smoke_test_escript.erl | 214 +++++++++++++++++++++++++++++++++++++ 4 files changed, 224 insertions(+) create mode 100644 rebar.config.script create mode 100755 src/smoke_test_escript.erl diff --git a/.gitignore b/.gitignore index ce1f3b6c9..79d1ae60c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ deps ebin log riak_test +smoke_test .eunit .DS_Store out diff --git a/Makefile b/Makefile index 1528b0c00..726320106 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,7 @@ PLT = $(HOME)/.riak-test_dialyzer_plt all: deps compile ./rebar skip_deps=true escriptize + SMOKE_TEST=1 ./rebar skip_deps=true escriptize deps: ./rebar get-deps diff --git a/rebar.config.script b/rebar.config.script new file mode 100644 index 000000000..0fcc5e0ff --- /dev/null +++ b/rebar.config.script @@ -0,0 +1,8 @@ +case os:getenv("SMOKE_TEST") of + false -> CONFIG; + [] -> CONFIG; + _ -> + C1 = lists:keystore(escript_emu_args, 1, CONFIG, + {escript_emu_args, "%%! -escript main smoke_test_escript +K true +P 10000 -env ERL_MAX_PORTS 10000\n"}), + lists:keystore(escript_name, 1, C1, {escript_name, smoke_test}) +end. diff --git a/src/smoke_test_escript.erl b/src/smoke_test_escript.erl new file mode 100755 index 000000000..d508b5ac7 --- /dev/null +++ b/src/smoke_test_escript.erl @@ -0,0 +1,214 @@ +-module(smoke_test_escript). +-include_lib("kernel/include/file.hrl"). + +-export([main/1, get_version/0, worker/3]). + +get_version() -> + list_to_binary(string:strip(os:cmd("git describe"), right, $\n)). + +cli_options() -> +%% Option Name, Short Code, Long Code, Argument Spec, Help Message +[ + {project, $p, "project", string, "specifices which project"}, + {debug, $v, "debug", undefined, "debug?"}, + {directory, $d, "directory", string, "source tree directory"}, + {jobs, $j, "jobs", integer, "jobs?"} +]. + + +main(Args) -> + {ok, {Parsed, _Other}} = getopt:parse(cli_options(), Args), + application:start(ibrowse), + lager:start(), + rt_config:load("default", filename:join([os:getenv("HOME"), ".smoke_test.config"])), + case lists:keyfind(project, 1, Parsed) of + false -> + lager:error("Must specify project!"), + application:stop(lager), + halt(1); + {project, Project} -> + rt_config:set(rt_project, Project) + end, + case lists:keyfind(directory, 1, Parsed) of + false -> + %% run in current working directory + ok; + {directory, Dir} -> + lager:info("Changing working dir to ~s", [Dir]), + ok = file:set_cwd(filename:absname(Dir)) + end, + case lists:member(debug, Parsed) of + true -> + lager:set_loglevel(lager_console_backend, debug); + _ -> + ok + end, + rt_config:set(rt_harness, ?MODULE), + lager:debug("ParsedArgs ~p", [Parsed]), + Suites = giddyup:get_suite(rt_config:get(platform)), + Jobs = case lists:keyfind(jobs, 1, Parsed) of + false -> + 1; + {jobs, J} -> + J + end, + + {ok, PWD} = file:get_cwd(), + Rebar = filename:join(PWD, "rebar"), + + + setup_deps(Rebar, PWD, [filename:join([PWD, "deps", F]) + || F <- element(2, file:list_dir(filename:join(PWD, "deps"))), + filelib:is_dir(filename:join([PWD, "deps", F]))]), + + case Jobs > 1 of + true -> + %% partiton the suite list by the number of jobs + SplitSuites = dict:to_list(element(2, lists:foldl(fun(S, {Counter, Dict}) -> + {Counter + 1, dict:append(Counter rem Jobs, S, Dict)} + end, {0, dict:new()}, Suites))), + lager:debug("Split into ~p lists", [length(SplitSuites)]), + Workers = [spawn_monitor(?MODULE, worker, [Rebar, PWD, SS]) || {_, SS} <- SplitSuites], + wait_for_workers(Workers); + _ -> + worker(Rebar, PWD, Suites) + end. + +worker(Rebar, PWD, Suites) -> + lists:foreach(fun({Suite, Config}) -> + lager:info("Suite ~p config ~p", [Suite, Config]), + [Dep, Task] = string:tokens(atom_to_list(Suite), ":"), + FDep = filename:join([PWD, deps, Dep]), + case filelib:is_dir(FDep) of + true -> + case Task of + "eunit" -> + %% set up a symlink so that each dep has deps + P = erlang:open_port({spawn_executable, Rebar}, + [{args, ["eunit", "skip_deps=true"]}, + {cd, FDep}, exit_status, + {line, 1024}, stderr_to_stdout, binary]), + {Res, Log} = accumulate(P, []), + CleanedLog = cleanup_logs(Log), + giddyup:post_result([{test, Suite}, {status, get_status(Res)}, + {log, CleanedLog} | Config]), + Res; + "dialyzer" -> + P = erlang:open_port({spawn_executable, "/usr/bin/make"}, + [{args, ["dialyzer"]}, + {cd, FDep}, exit_status, + {line, 1024}, stderr_to_stdout, binary]), + {Res, Log} = accumulate(P, []), + %% TODO split the logs so that the PLT stuff is elided + CleanedLog = cleanup_logs(Log), + giddyup:post_result([{test, Suite}, {status, get_status(Res)}, + {log, CleanedLog} | Config]), + Res; + _ -> + ok + + end; + false -> + lager:debug("Not a dep: ~p", [FDep]) + end + end, Suites). + +setup_deps(_, _, []) -> ok; +setup_deps(Rebar, PWD, [Dep|Deps]) -> + %% clean up an old deps dir, if present + remove_deps_dir(Dep), + %% symlink ALL the deps in + file:make_symlink(filename:join(PWD, "deps"), filename:join(Dep, "deps")), + lager:debug("ln -sf ~s ~s", [filename:join(PWD, "deps"), + filename:join(Dep, "deps")]), + %% run rebar list deps, to find out which ones to keep + P = erlang:open_port({spawn_executable, Rebar}, + [{args, ["list-deps"]}, + {cd, Dep}, exit_status, + {line, 1024}, stderr_to_stdout, binary]), + {0, Log} = accumulate(P, []), + %% find all the deps, amongst the noise + case re:run(Log, "([a-zA-Z0-9_]+) (?:BRANCH|TAG|REV)", + [global, {capture, all_but_first, list}]) of + {match, Matches} -> + lager:info("Deps for ~p are ~p", [Dep, Matches]), + ok = file:delete(filename:join(Dep, "deps")), + ok = filelib:ensure_dir(filename:join(Dep, "deps")++"/"), + [file:make_symlink(filename:join([PWD, "deps", M]), + filename:join([Dep, "deps", M])) + || M <- Matches]; + nomatch -> + %% remove the symlink + file:delete(filename:join(Dep, "deps")), + lager:info("~p has no deps", [Dep]) + end, + setup_deps(Rebar, PWD, Deps). + +remove_deps_dir(Dep) -> + case filelib:is_dir(filename:join(Dep, "deps")) of + true -> + %% there should ONLY be a deps dir leftover from a previous run, + %% so it should be a directory filled with symlinks + {ok, Files} = file:list_dir(filename:join(Dep, "deps")), + lists:foreach(fun(F) -> + File = filename:join([Dep, "deps", F]), + {ok, FI} = file:read_link_info(File), + case FI#file_info.type of + symlink -> + ok = file:delete(File); + _ -> + ok + end + end, Files), + %% this will fail if the directory is not now empty + ok = file:del_dir(filename:join(Dep, "deps")), + ok; + false -> + ok + end. + +wait_for_workers([]) -> + ok; +wait_for_workers(Workers) -> + receive + {'DOWN', _, _, Pid, normal} -> + lager:info("Worker exited normally"), + wait_for_workers(Workers -- [Pid]); + {'DOWN', _, _, Pid, Reason} -> + lager:info("Worker exited abnormally: ~p", [Reason]), + wait_for_workers(Workers -- [Pid]) + end. + +cleanup_logs(Logs) -> + case unicode:characters_to_binary(Logs, latin1, unicode) of + {error, Bin, Rest} -> + lager:error("Bad binary ~p", [Rest]), + Bin; + {incomplete, Bin, Rest} -> + lager:error("Bad binary ~p", [Rest]), + Bin; + Bin -> + Bin + end. + +maybe_eol(eol) -> + "\n"; +maybe_eol(noeol) -> + "". + +get_status(0) -> + pass; +get_status(_) -> + fail. + +accumulate(P, Acc) -> + receive + {P, {data, {EOL, Data}}} -> + accumulate(P, [[Data,maybe_eol(EOL)]|Acc]); + {P, {exit_status, Status}} -> + lager:debug("Exited with status ~b", [Status]), + {Status, list_to_binary(lists:reverse(Acc))}; + {P, Other} -> + lager:warning("Unexpected return from port: ~p", [Other]), + accumulate(P, Acc) + end. From 69af7f86b2a4a7a7efca964b1db041d5dd7a0b45 Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Wed, 22 Jan 2014 12:54:33 -0500 Subject: [PATCH 060/139] Use the riak_test config file, it has everything we need --- src/smoke_test_escript.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/smoke_test_escript.erl b/src/smoke_test_escript.erl index d508b5ac7..d674d0c22 100755 --- a/src/smoke_test_escript.erl +++ b/src/smoke_test_escript.erl @@ -20,7 +20,7 @@ main(Args) -> {ok, {Parsed, _Other}} = getopt:parse(cli_options(), Args), application:start(ibrowse), lager:start(), - rt_config:load("default", filename:join([os:getenv("HOME"), ".smoke_test.config"])), + rt_config:load("default", filename:join([os:getenv("HOME"), ".riak_test.config"])), case lists:keyfind(project, 1, Parsed) of false -> lager:error("Must specify project!"), From 74851c6fc91b98ce375a0e388b1fe03a871caad9 Mon Sep 17 00:00:00 2001 From: Sean Cribbs Date: Thu, 23 Jan 2014 14:15:37 -0600 Subject: [PATCH 061/139] Fix renamed setting for cuttlefish_configuration test. --- tests/cuttlefish_configuration.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cuttlefish_configuration.erl b/tests/cuttlefish_configuration.erl index a9b3dbe01..4447bfca4 100644 --- a/tests/cuttlefish_configuration.erl +++ b/tests/cuttlefish_configuration.erl @@ -9,7 +9,7 @@ confirm() -> CuttlefishConf = [ {"ring_size", "8"}, - {"leveldb.sync", true} + {"leveldb.sync_on_write", "on"} ], [Node] = rt:deploy_nodes(1, {cuttlefish, CuttlefishConf}), @@ -21,4 +21,4 @@ confirm() -> ?assertEqual(true, LevelDBSync), - pass. \ No newline at end of file + pass. From af343ee6f6533d6a089cc5ce73536175ea4c01ba Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Fri, 24 Jan 2014 11:48:17 -0500 Subject: [PATCH 062/139] Fix verify_api_timeouts. Fix verify_api_timeouts by taking siblings into account. --- tests/verify_api_timeouts.erl | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/tests/verify_api_timeouts.erl b/tests/verify_api_timeouts.erl index e587619f2..0a6851cfd 100644 --- a/tests/verify_api_timeouts.erl +++ b/tests/verify_api_timeouts.erl @@ -9,7 +9,6 @@ confirm() -> %% test requires allow_mult=false b/c of rt:systest_read - rt:set_conf(all, [{"buckets.default.allow_mult", "false"}]), [Node] = rt:build_cluster(1), rt:wait_until_pingable(Node), @@ -56,13 +55,18 @@ confirm() -> {ok, O} = rhc:get(HC, <<"foo">>, <<"bar">>, [{timeout, 4000}]), %% either of these are potentially valid. - case riakc_obj:get_value(O) of - <<"foobarbaz\n">> -> + case riakc_obj:get_values(O) of + [<<"foobarbaz\n">>] -> lager:info("Original Value"), ok; - <<"getgetgetgetget\n">> -> + [<<"getgetgetgetget\n">>] -> lager:info("New Value"), ok; + [_A, _B] = L -> + ?assertEqual([<<"foobarbaz\n">>,<<"getgetgetgetget\n">>], + lists:sort(L)), + lager:info("Both Values"), + ok; V -> ?assertEqual({object_value, <<"getgetgetgetget\n">>}, {object_value, V}) end, @@ -99,14 +103,19 @@ confirm() -> [{timeout, 4000}]), %% either of these are potentially valid. - case riakc_obj:get_value(O2) of - <<"get2get2get2get2get\n">> -> + case riakc_obj:get_values(O2) of + [<<"get2get2get2get2get\n">>] -> lager:info("New Value"), ok; - <<"foobarbaz2\n">> -> + [<<"foobarbaz2\n">>] -> lager:info("Original Value"), ok; - V2 -> ?assertEqual({object_value, <<"get2get2get2get2get\n">>}, + [_A2, _B2] = L2 -> + ?assertEqual([<<"foobarbaz2\n">>, <<"get2get2get2get2get\n">>], + lists:sort(L2)), + lager:info("Both Values"), + ok; + V2 -> ?assertEqual({object_value, <<"get2get2get2get2get\n">>}, {object_value, V2}) end, From 3353ca526872fcc613ecae5dedbff2175e00e765 Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Fri, 24 Jan 2014 14:23:59 -0500 Subject: [PATCH 063/139] Implement optional maximum test times --- src/riak_test_runner.erl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/riak_test_runner.erl b/src/riak_test_runner.erl index 17860122d..1886795e1 100644 --- a/src/riak_test_runner.erl +++ b/src/riak_test_runner.erl @@ -96,8 +96,20 @@ execute(TestModule, {Mod, Fun}, TestMetaData) -> lager:info("Test Runner `uname -a` : ~s", [UName]), Pid = spawn_link(Mod, Fun, []), + Ref = case rt_config:get(test_timeout, undefined) of + Timeout when is_integer(Timeout) -> + erlang:send_after(Timeout, self(), test_took_too_long); + _ -> + undefined + end, {Status, Reason} = rec_loop(Pid, TestModule, TestMetaData), + case Ref of + undefined -> + ok; + _ -> + erlang:cancel_timer(Ref) + end, riak_test_group_leader:tidy_up(OldGroupLeader), case Status of fail -> @@ -121,6 +133,9 @@ function_name(TestModule) -> rec_loop(Pid, TestModule, TestMetaData) -> receive + test_took_too_long -> + exit(Pid, kill), + {fail, test_timed_out}; metadata -> Pid ! {metadata, TestMetaData}, rec_loop(Pid, TestModule, TestMetaData); From 7ecda007bef0e8d4fa72088d93229db9e5656e87 Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Fri, 24 Jan 2014 15:42:34 -0500 Subject: [PATCH 064/139] Update sample config --- riak_test.config.sample | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/riak_test.config.sample b/riak_test.config.sample index 559c6d627..d5d950894 100644 --- a/riak_test.config.sample +++ b/riak_test.config.sample @@ -34,6 +34,10 @@ {rt_max_wait_time, 600000}, {rt_retry_delay, 1000}, + %% How long to let any test run before killing it, undefined means no limit. + %% Valid timeouts are in milliseconds. + {test_timeout, undefined}, + %% The harness specifies how Riak nodes under test will be %% manipulated. There is currently only `rtdev', which uses local %% `devrel's inside a git repository. In the future, we may have From af3cff74d3b30f3effed041c1d190129a212f95e Mon Sep 17 00:00:00 2001 From: "Engel A. Sanchez" Date: Fri, 24 Jan 2014 20:30:22 -0500 Subject: [PATCH 065/139] Bring test back to 1.4.7 glory --- src/rt.erl | 51 +++++++++++++++++++++++++++++++++++++++ tests/verify_2i_aae.erl | 53 +++++++++++++++++++++++++++++++++++------ 2 files changed, 97 insertions(+), 7 deletions(-) diff --git a/src/rt.erl b/src/rt.erl index 2910670b4..c6b94fde2 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -125,6 +125,7 @@ wait_until/3, wait_until/2, wait_until/1, + wait_until_aae_trees_built/1, wait_until_all_members/1, wait_until_all_members/2, wait_until_capability/3, @@ -757,6 +758,56 @@ wait_until_nodes_agree_about_ownership(Nodes) -> Results = [ wait_until_owners_according_to(Node, Nodes) || Node <- Nodes ], ?assert(lists:all(fun(X) -> ok =:= X end, Results)). +%% AAE support +wait_until_aae_trees_built(Nodes) -> + lager:info("Wait until AAE builds all partition trees across ~p", [Nodes]), + %% Wait until all nodes report no undefined trees + AllBuiltFun = + fun(_, _AllBuilt = false) -> + false; + (Node, _AllBuilt = true) -> + Info = rpc:call(Node, + riak_kv_entropy_info, + compute_tree_info, + []), + lager:debug("Entropy table on node ~p : ~p", [Node, Info]), + AllHaveBuildTimes = not lists:keymember(undefined, 2, Info), + case AllHaveBuildTimes of + false -> + false; + true -> + lager:debug("Check if really built by locking"), + %% Try to lock each partition. If you get not_built, + %% the manager has not detected the built process has + %% died yet. + %% Notice that the process locking is spawned by the + %% pmap. That's important! as it should die eventually + %% so the test can lock on the tree. + IdxBuilt = + fun(Idx) -> + {ok, TreePid} = rpc:call(Node, riak_kv_vnode, + hashtree_pid, [Idx]), + TreeLocked = + rpc:call(Node, riak_kv_index_hashtree, get_lock, + [TreePid, for_riak_test]), + lager:debug("Partition ~p : ~p", [Idx, TreeLocked]), + TreeLocked == ok + orelse TreeLocked == already_locked + end, + + Partitions = [I || {I, _} <- Info], + + AllBuilt = + lists:all(fun(V) -> V == true end, + rt:pmap(IdxBuilt, Partitions)), + lager:debug("For node ~p all built = ~p", [Node, AllBuilt]), + AllBuilt + end + end, + wait_until(fun() -> + lists:foldl(AllBuiltFun, true, Nodes) + end). + %%%=================================================================== %%% Ring Functions %%%=================================================================== diff --git a/tests/verify_2i_aae.erl b/tests/verify_2i_aae.erl index 446ac02e1..1ea4c0e2d 100644 --- a/tests/verify_2i_aae.erl +++ b/tests/verify_2i_aae.erl @@ -31,22 +31,21 @@ -define(N_VAL, 3). confirm() -> - Nodes = [Node1] = rt:build_cluster(1, [{riak_kv, - [{anti_entropy_build_limit, {100, 1000}}, + [{anti_entropy, {off, []}}, + {anti_entropy_build_limit, {100, 500}}, {anti_entropy_concurrency, 100}, - {anti_entropy_tick, 1000}]}]), - rt:wait_until_aae_trees_built(Nodes), + {anti_entropy_tick, 200}]}]), rt_intercept:load_code(Node1), rt_intercept:add(Node1, {riak_object, [{{index_specs, 1}, skippable_index_specs}, {{diff_index_specs, 2}, skippable_diff_index_specs}]}), lager:info("Installed intercepts to corrupt index specs on node ~p", [Node1]), + %%rpc:call(Node1, lager, set_loglevel, [lager_console_backend, debug]), PBC = rt:pbc(Node1), NumItems = ?NUM_ITEMS, - %%NumDelItems = NumItems div 10, NumDel = ?NUM_DELETES, pass = check_lost_objects(Node1, PBC, NumItems, NumDel), pass = check_lost_indexes(Node1, PBC, NumItems), @@ -61,13 +60,25 @@ check_lost_objects(Node1, PBC, NumItems, NumDel) -> Index = {integer_index, "i"}, set_skip_index_specs(Node1, false), lager:info("Putting ~p objects with indexes", [NumItems]), - [put_obj(PBC, Bucket, N, N+1, Index) || N <- lists:seq(1, NumItems), + HalfNumItems = NumItems div 2, + [put_obj(PBC, Bucket, N, N+1, Index) || N <- lists:seq(1, HalfNumItems), Bucket <- ?BUCKETS], + lager:info("Put half the objects, now enable AAE and build tress"), + %% Enable AAE and build trees. + ok = rpc:call(Node1, application, set_env, + [riak_kv, anti_entropy, {on, [debug]}]), + ok = rpc:call(Node1, riak_kv_entropy_manager, enable, []), + rt:wait_until_aae_trees_built([Node1]), + + lager:info("AAE trees built, now put the rest of the data"), + [put_obj(PBC, Bucket, N, N+1, Index) + || N <- lists:seq(HalfNumItems+1, NumItems), Bucket <- ?BUCKETS], %% Verify they are there. ExpectedInitial = [{to_key(N+1), to_key(N)} || N <- lists:seq(1, NumItems)], lager:info("Check objects are there as expected"), [assert_range_query(PBC, Bucket, ExpectedInitial, Index, 1, NumItems+1) || Bucket <- ?BUCKETS], + lager:info("Now mess index spec code and change values"), set_skip_index_specs(Node1, true), [put_obj(PBC, Bucket, N, N, Index) || N <- lists:seq(1, NumItems-NumDel), @@ -75,7 +86,7 @@ check_lost_objects(Node1, PBC, NumItems, NumDel) -> DelRange = lists:seq(NumItems-NumDel+1, NumItems), lager:info("Deleting ~b objects without updating indexes", [NumDel]), [del_obj(PBC, Bucket, N) || N <- DelRange, Bucket <- ?BUCKETS], - DelKeys = [to_key(N) || N <- DelRange], + DelKeys = [to_key(N) || N <- DelRange], [rt:wait_until(fun() -> rt:pbc_really_deleted(PBC, Bucket, DelKeys) end) || Bucket <- ?BUCKETS], %% Verify they are damaged @@ -90,6 +101,33 @@ check_lost_objects(Node1, PBC, NumItems, NumDel) -> || Bucket <- ?BUCKETS], pass. +do_tree_rebuild(Node) -> + lager:info("Let's go through a tree rebuild right here"), + %% Cheat by clearing build times from ETS directly, as the code doesn't + %% ever clear them currently. + ?assertEqual(true, rpc:call(Node, ets, delete_all_objects, [ets_riak_kv_entropy])), + %% Make it so it doesn't go wild rebuilding things when the expiration is + %% tiny. + ?assertEqual(ok, rpc:call(Node, application, set_env, [riak_kv, + anti_entropy_build_limit, + {0, 5000}])), + %% Make any tree expire on tick. + ?assertEqual(ok, rpc:call(Node, application, set_env, [riak_kv, + anti_entropy_expire, + 1])), + %% Wait for a good number of ticks. + timer:sleep(5000), + %% Make sure things stop expiring on tick + ?assertEqual(ok, rpc:call(Node, application, set_env, [riak_kv, + anti_entropy_expire, + 7 * 24 * 60 * 60 * 1000])), + %% And let the manager start allowing builds again. + ?assertEqual(ok, rpc:call(Node, application, set_env, [riak_kv, + anti_entropy_build_limit, + {100, 1000}])), + rt:wait_until_aae_trees_built([Node]), + ok. + %% Write objects without a 2i index. Test that running 2i repair will generate %% the missing indexes. check_lost_indexes(Node1, PBC, NumItems) -> @@ -101,6 +139,7 @@ check_lost_indexes(Node1, PBC, NumItems) -> lager:info("Verify that objects cannot be found via index"), [assert_range_query(PBC, Bucket, [], Index, 1, NumItems+1) || Bucket <- ?BUCKETS], + do_tree_rebuild(Node1), run_2i_repair(Node1), lager:info("Check that objects can now be found via index"), Expected = [{to_key(N+1), to_key(N)} || N <- lists:seq(1, NumItems)], From 70826fd605e6c1703e079849d40d9028c0d57cdb Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Sat, 25 Jan 2014 02:58:29 -0500 Subject: [PATCH 066/139] Add test to ensure modified objects replicate. Ensure that objects, when modified, actually replicate correctly to the sink cluster. --- tests/repl_aae_fullsync.erl | 93 +++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index d3492e24e..feaa78860 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -39,6 +39,7 @@ ]). confirm() -> + difference_test(), simple_test(), bidirectional_test(), dual_test(), @@ -299,6 +300,98 @@ bidirectional_test() -> pass. +difference_test() -> + %% Deploy 6 nodes. + Nodes = deploy_nodes(6, ?CONF(5)), + + %% Break up the 6 nodes into three clustes. + {ANodes, BNodes} = lists:split(3, Nodes), + + lager:info("ANodes: ~p", [ANodes]), + lager:info("BNodes: ~p", [BNodes]), + + lager:info("Building two clusters."), + [repl_util:make_cluster(N) || N <- [ANodes, BNodes]], + + AFirst = hd(ANodes), + BFirst = hd(BNodes), + + lager:info("Naming clusters."), + repl_util:name_cluster(AFirst, "A"), + repl_util:name_cluster(BFirst, "B"), + + lager:info("Waiting for convergence."), + rt:wait_until_ring_converged(ANodes), + rt:wait_until_ring_converged(BNodes), + + lager:info("Waiting for transfers to complete."), + rt:wait_until_transfers_complete(ANodes), + rt:wait_until_transfers_complete(BNodes), + + lager:info("Get leaders."), + LeaderA = get_leader(AFirst), + LeaderB = get_leader(BFirst), + + lager:info("Finding connection manager ports."), + BPort = get_port(LeaderB), + + lager:info("Connecting cluster A to B"), + connect_cluster(LeaderA, BPort, "B"), + + %% Get PBC connections. + APBC = rt:pbc(LeaderA), + BPBC = rt:pbc(LeaderB), + + %% Write key. + ok = riakc_pb_socket:put(APBC, + riakc_obj:new(<<"foo">>, <<"bar">>, + <<"baz">>), + [{timeout, 4000}]), + + %% Wait for trees to compute. + repl_util:wait_until_aae_trees_built(ANodes), + repl_util:wait_until_aae_trees_built(BNodes), + + lager:info("Test fullsync from cluster A leader ~p to cluster B", + [LeaderA]), + repl_util:enable_fullsync(LeaderA, "B"), + rt:wait_until_ring_converged(ANodes), + + %% Flush AAE trees to disk. + perform_sacrifice(AFirst), + + %% Wait for fullsync. + {Time1, _} = timer:tc(repl_util, + start_and_wait_until_fullsync_complete, + [LeaderA, "B"]), + lager:info("Fullsync completed in ~p seconds", [Time1/1000/1000]), + + %% Read key from after fullsync. + {ok, O1} = riakc_pb_socket:get(BPBC, <<"foo">>, <<"bar">>, + [{timeout, 4000}]), + ?assertEqual(<<"baz">>, riakc_obj:get_value(O1)), + + %% Put, generate sibling. + ok = riakc_pb_socket:put(APBC, + riakc_obj:new(<<"foo">>, <<"bar">>, + <<"baz2">>), + [{timeout, 4000}]), + + %% Wait for fullsync. + {Time2, _} = timer:tc(repl_util, + start_and_wait_until_fullsync_complete, + [LeaderA, "B"]), + lager:info("Fullsync completed in ~p seconds", [Time2/1000/1000]), + + %% Read key from after fullsync. + {ok, O2} = riakc_pb_socket:get(BPBC, <<"foo">>, <<"bar">>, + [{timeout, 4000}]), + ?assertEqual([<<"baz">>, <<"baz2">>], lists:sort(riakc_obj:get_values(O2))), + + rt:clean_cluster(Nodes), + + pass. + %% @doc Required for 1.4+ Riak, write sacrificial keys to force AAE %% trees to flush to disk. perform_sacrifice(Node) -> From 8cbddf3552c38e8c4295f0a57b250277c82336ab Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Sat, 25 Jan 2014 14:20:03 -0500 Subject: [PATCH 067/139] Add test detecting source deadlock. Add test which ensures that the AAE source worker doesn't deadlock when waiting for responses from the process which is computing the hashtree differences. Unfortunately, this test uses timeouts because as the code currently stands, I can't figure out a way to make it any cleaner. --- .../riak_kv_index_hashtree_intercepts.erl | 6 ++ tests/repl_aae_fullsync.erl | 70 +++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/intercepts/riak_kv_index_hashtree_intercepts.erl b/intercepts/riak_kv_index_hashtree_intercepts.erl index 9ee9578fe..d608fe69f 100644 --- a/intercepts/riak_kv_index_hashtree_intercepts.erl +++ b/intercepts/riak_kv_index_hashtree_intercepts.erl @@ -4,6 +4,12 @@ -define(M, riak_kv_index_hashtree_orig). +%% @doc Perform a delayed compare, which delays the receipt of a +%% message. +delayed_compare(_IndexN, _Remote, _AccFun, _TreePid) -> + timer:sleep(1000000), + []. + %% @doc When attempting to get the lock on a hashtree, return the %% not_built atom which means the tree has not been computed yet. not_built(_TreePid, _Type) -> diff --git a/tests/repl_aae_fullsync.erl b/tests/repl_aae_fullsync.erl index feaa78860..afd73e7c6 100644 --- a/tests/repl_aae_fullsync.erl +++ b/tests/repl_aae_fullsync.erl @@ -40,6 +40,7 @@ confirm() -> difference_test(), + deadlock_test(), simple_test(), bidirectional_test(), dual_test(), @@ -392,6 +393,75 @@ difference_test() -> pass. +deadlock_test() -> + %% Deploy 6 nodes. + Nodes = deploy_nodes(6, ?CONF(5)), + + %% Break up the 6 nodes into three clustes. + {ANodes, BNodes} = lists:split(3, Nodes), + + lager:info("ANodes: ~p", [ANodes]), + lager:info("BNodes: ~p", [BNodes]), + + lager:info("Building two clusters."), + [repl_util:make_cluster(N) || N <- [ANodes, BNodes]], + + AFirst = hd(ANodes), + BFirst = hd(BNodes), + + lager:info("Naming clusters."), + repl_util:name_cluster(AFirst, "A"), + repl_util:name_cluster(BFirst, "B"), + + lager:info("Waiting for convergence."), + rt:wait_until_ring_converged(ANodes), + rt:wait_until_ring_converged(BNodes), + + lager:info("Waiting for transfers to complete."), + rt:wait_until_transfers_complete(ANodes), + rt:wait_until_transfers_complete(BNodes), + + lager:info("Get leaders."), + LeaderA = get_leader(AFirst), + LeaderB = get_leader(BFirst), + + lager:info("Finding connection manager ports."), + BPort = get_port(LeaderB), + + lager:info("Connecting cluster A to B"), + connect_cluster(LeaderA, BPort, "B"), + + %% Add intercept for delayed comparison of hashtrees. + Intercept = {riak_kv_index_hashtree, [{{compare, 4}, delayed_compare}]}, + [ok = rt_intercept:add(Target, Intercept) || Target <- ANodes], + + %% Wait for trees to compute. + repl_util:wait_until_aae_trees_built(ANodes), + repl_util:wait_until_aae_trees_built(BNodes), + + lager:info("Test fullsync from cluster A leader ~p to cluster B", + [LeaderA]), + repl_util:enable_fullsync(LeaderA, "B"), + rt:wait_until_ring_converged(ANodes), + + %% Start fullsync. + lager:info("Starting fullsync to cluster B."), + rpc:call(LeaderA, riak_repl_console, fullsync, [["start", "B"]]), + + %% Wait for fullsync to initialize and the AAE repl processes to + %% stall from the suspended intercepts. + %% TODO: What can be done better here? + timer:sleep(25000), + + %% Attempt to get status from fscoordintor. + Result = rpc:call(LeaderA, riak_repl2_fscoordinator, status, [], 500), + lager:info("Status result: ~p", [Result]), + ?assertNotEqual({badrpc, timeout}, Result), + + rt:clean_cluster(Nodes), + + pass. + %% @doc Required for 1.4+ Riak, write sacrificial keys to force AAE %% trees to flush to disk. perform_sacrifice(Node) -> From dcd2135d5de020253d1cfa458df36733b2148a5f Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Sun, 26 Jan 2014 12:13:17 -0500 Subject: [PATCH 068/139] Use correct intercepts. Correct a typo which was causing the module to attempt to use intercepts which do not exist. --- ...ink_intercepts.erl => riak_repl2_rtsink_conn_intercepts.erl} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename intercepts/{riak_repl2_rtsink_intercepts.erl => riak_repl2_rtsink_conn_intercepts.erl} (92%) diff --git a/intercepts/riak_repl2_rtsink_intercepts.erl b/intercepts/riak_repl2_rtsink_conn_intercepts.erl similarity index 92% rename from intercepts/riak_repl2_rtsink_intercepts.erl rename to intercepts/riak_repl2_rtsink_conn_intercepts.erl index 4f9016206..85a139739 100644 --- a/intercepts/riak_repl2_rtsink_intercepts.erl +++ b/intercepts/riak_repl2_rtsink_conn_intercepts.erl @@ -1,5 +1,5 @@ %% Intercepts functions for the riak_test in ../tests/repl_rt_heartbeat.erl --module(riak_repl2_rtsink_intercepts). +-module(riak_repl2_rtsink_conn_intercepts). -compile(export_all). -include("intercept.hrl"). From 074c73e8d918f35ab8b05888878bbaf2371c3ddc Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Mon, 27 Jan 2014 09:56:57 -0500 Subject: [PATCH 069/139] Update test to enable security the 'new' way --- tests/pb_cipher_suites.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/pb_cipher_suites.erl b/tests/pb_cipher_suites.erl index 3d5ddd050..d62391488 100644 --- a/tests/pb_cipher_suites.erl +++ b/tests/pb_cipher_suites.erl @@ -40,9 +40,6 @@ confirm() -> {keyfile, filename:join([CertDir, "site3.basho.com/key.pem"])}, {cacertfile, filename:join([CertDir, "site3.basho.com/cacerts.pem"])} ]}, - {riak_core, [ - {security, true} - ]}, {riak_search, [ {enabled, true} ]} @@ -50,6 +47,9 @@ confirm() -> Nodes = rt:build_cluster(4, Conf), Node = hd(Nodes), + %% enable security on the cluster + ok = rpc:call(Node, riak_core_console, security_enable, [[]]), + [_, {pb, {"127.0.0.1", Port}}] = rt:connection_info(Node), From b4a07894b72fde66d210b1bfbd70075458dea3a1 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 16 Jan 2014 21:06:26 -0500 Subject: [PATCH 070/139] WIP test listing keys respods with {error, overload} --- tests/overload.erl | 91 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 73 insertions(+), 18 deletions(-) diff --git a/tests/overload.erl b/tests/overload.erl index 726320ba3..a7f7ae9c6 100644 --- a/tests/overload.erl +++ b/tests/overload.erl @@ -21,8 +21,8 @@ -compile(export_all). -include_lib("eunit/include/eunit.hrl"). --define(NUM_REQUESTS, 1000). --define(THRESHOLD, 500). +-define(NUM_REQUESTS, 40). +-define(THRESHOLD, 20). -define(BUCKET, <<"test">>). -define(KEY, <<"hotkey">>). @@ -43,17 +43,21 @@ confirm() -> Node =:= Node2]), RO = riak_object:new(?BUCKET, ?KEY, <<"test">>), + +%% ok = test_no_overload_protection(Nodes, Victim, RO), + ok = test_vnode_protection(Nodes, Victim, RO), +%% ok = test_fsm_protection(Nodes, Victim, RO), + pass. + +test_no_overload_protection(Nodes, Victim, RO) -> lager:info("Testing with no overload protection"), {NumProcs, QueueLen} = run_test(Nodes, Victim, RO), ?assert(NumProcs >= (2*?NUM_REQUESTS * 0.9)), ?assert(QueueLen >= (?NUM_REQUESTS * 0.9)), - - ok = test_vnode_protection(Nodes, Victim, RO), - ok = test_fsm_protection(Nodes, Victim, RO), - pass. + ok. test_vnode_protection(Nodes, Victim, RO) -> - [Node1, Node2] = Nodes, + [Node1, _Node2] = Nodes, lager:info("Testing with vnode queue protection enabled"), lager:info("Setting vnode overload threshold to ~b", [?THRESHOLD]), Config2 = [{riak_core, [{vnode_overload_threshold, ?THRESHOLD}]}], @@ -80,12 +84,14 @@ test_vnode_protection(Nodes, Victim, RO) -> lager:info("Unnecessary dropped requests: ~b", [Dropped]), ?assert(Dropped =< CheckInterval), - lager:info("Suspending vnode proxy for ~b", [Victim]), - Pid = suspend_vnode_proxy(Node2, Victim), - {NumProcs3, QueueLen3} = run_test(Nodes, Victim, RO), - Pid ! resume, - ?assert(NumProcs3 >= (2*?NUM_REQUESTS * 0.9)), - ?assert(QueueLen3 =< (?THRESHOLD * 1.1)), + test_overload_list_keys(Nodes), + +%% lager:info("Suspending vnode proxy for ~b", [Victim]), +%% Pid = suspend_vnode_proxy(Node2, Victim), +%% {NumProcs3, QueueLen3} = run_test(Nodes, Victim, RO), +%% Pid ! resume, +%% ?assert(NumProcs3 >= (2*?NUM_REQUESTS * 0.9)), +%% ?assert(QueueLen3 =< (?THRESHOLD * 1.1)), ok. test_fsm_protection(Nodes, Victim, RO) -> @@ -110,9 +116,9 @@ run_test(Nodes, Victim, RO) -> Suspended = suspend_vnode(Node2, Victim), NumProcs1 = process_count(Node1), lager:info("Initial process count on ~p: ~b", [Node1, NumProcs1]), - lager:info("Sending ~b write requests", [?NUM_REQUESTS]), + lager:info("Sending ~b read requests", [?NUM_REQUESTS]), write_once(Node1, RO), - Writes = spawn_reads(Node1, ?NUM_REQUESTS), + Reads = spawn_reads(Node1, ?NUM_REQUESTS), timer:sleep(5000), NumProcs2 = process_count(Node1), QueueLen = vnode_queue_len(Node2, Victim), @@ -124,10 +130,38 @@ run_test(Nodes, Victim, RO) -> rt:wait_until(Node2, fun(Node) -> vnode_queue_len(Node, Victim) =:= 0 end), - - kill_writes(Writes), + kill_pids(Reads), {NumProcs2 - NumProcs1, QueueLen}. +test_overload_list_keys(Nodes) -> + [Node1, Node2] = Nodes, + lager:info("Suspending all vnodes on Node2~n"), + Pid = suspend_all_vnodes(Node2), + lager:info("Sending ~b list_keys requests", [?NUM_REQUESTS]), + Pids = spawn_list_keys(Node1, ?NUM_REQUESTS), + {ok, C} = riak:client_connect(Node1), + Keys = riak_client:list_keys(?BUCKET, C), + io:format("KEYS = ~p~n", [Keys]), + ?assertEqual({error, overload}, Keys), + resume_all_vnodes(Pid), + wait_for_all_vnode_queues_empty(Node2), + kill_pids(Pids). + +wait_for_all_vnode_queues_empty(Node) -> + rt:wait_until(Node, fun(N) -> + vnode_queues_empty(N) + end). + +vnode_queues_empty(Node) -> + rpc:call(Node, ?MODULE, remote_vnode_queues_empty, []). + +remote_vnode_queues_empty() -> + lists:all(fun({_, _, Pid}) -> + {message_queue_len, Len} = + process_info(Pid, message_queue_len), + Len =:= 0 + end, riak_core_vnode_manager:all_vnodes()). + write_once(Node, RO) -> {ok, C} = riak:client_connect(Node), C:put(RO, 3). @@ -144,15 +178,33 @@ read_until_success(C, Count) -> Count end. +spawn_list_keys(Node, Num) -> + [spawn(fun() -> + {ok, C} = riak:client_connect(Node), + riak_client:list_keys(?BUCKET, C) + end) || _ <- lists:seq(1,Num)]. + spawn_reads(Node, Num) -> [spawn(fun() -> {ok, C} = riak:client_connect(Node), riak_client:get(?BUCKET, ?KEY, C) end) || _ <- lists:seq(1,Num)]. -kill_writes(Pids) -> +kill_pids(Pids) -> [exit(Pid, kill) || Pid <- Pids]. +suspend_all_vnodes(Node) -> + rpc:call(Node, ?MODULE, remote_suspend_all_vnodes, []). + +remote_suspend_all_vnodes() -> + spawn(fun() -> + Vnodes = riak_core_vnode_manager:all_vnodes(), + [erlang:suspend_process(Pid, []) || {_, _, Pid} <- Vnodes], + receive resume -> + [erlang:resume_process(Pid) || {_, _, Pid} <- Vnodes] + end + end). + suspend_vnode(Node, Idx) -> Pid = rpc:call(Node, ?MODULE, remote_suspend_vnode, [Idx], infinity), Pid. @@ -180,6 +232,9 @@ remote_suspend_vnode_proxy(Idx) -> end end). +resume_all_vnodes(Pid) -> + Pid ! resume. + resume_vnode(Pid) -> Pid ! resume. From 598863b3f7572fe49f34167cfa37fb4c7646db3c Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Fri, 24 Jan 2014 15:13:24 -0500 Subject: [PATCH 071/139] list_keys overload check working --- tests/overload.erl | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/tests/overload.erl b/tests/overload.erl index a7f7ae9c6..b5fa0692f 100644 --- a/tests/overload.erl +++ b/tests/overload.erl @@ -21,8 +21,8 @@ -compile(export_all). -include_lib("eunit/include/eunit.hrl"). --define(NUM_REQUESTS, 40). --define(THRESHOLD, 20). +-define(NUM_REQUESTS, 200). +-define(THRESHOLD, 100). -define(BUCKET, <<"test">>). -define(KEY, <<"hotkey">>). @@ -44,9 +44,9 @@ confirm() -> RO = riak_object:new(?BUCKET, ?KEY, <<"test">>), -%% ok = test_no_overload_protection(Nodes, Victim, RO), + ok = test_no_overload_protection(Nodes, Victim, RO), ok = test_vnode_protection(Nodes, Victim, RO), -%% ok = test_fsm_protection(Nodes, Victim, RO), + ok = test_fsm_protection(Nodes, Victim, RO), pass. test_no_overload_protection(Nodes, Victim, RO) -> @@ -57,7 +57,7 @@ test_no_overload_protection(Nodes, Victim, RO) -> ok. test_vnode_protection(Nodes, Victim, RO) -> - [Node1, _Node2] = Nodes, + [Node1, Node2] = Nodes, lager:info("Testing with vnode queue protection enabled"), lager:info("Setting vnode overload threshold to ~b", [?THRESHOLD]), Config2 = [{riak_core, [{vnode_overload_threshold, ?THRESHOLD}]}], @@ -86,12 +86,12 @@ test_vnode_protection(Nodes, Victim, RO) -> test_overload_list_keys(Nodes), -%% lager:info("Suspending vnode proxy for ~b", [Victim]), -%% Pid = suspend_vnode_proxy(Node2, Victim), -%% {NumProcs3, QueueLen3} = run_test(Nodes, Victim, RO), -%% Pid ! resume, -%% ?assert(NumProcs3 >= (2*?NUM_REQUESTS * 0.9)), -%% ?assert(QueueLen3 =< (?THRESHOLD * 1.1)), + lager:info("Suspending vnode proxy for ~b", [Victim]), + Pid = suspend_vnode_proxy(Node2, Victim), + {NumProcs3, QueueLen3} = run_test(Nodes, Victim, RO), + Pid ! resume, + ?assert(NumProcs3 >= (2*?NUM_REQUESTS * 0.9)), + ?assert(QueueLen3 =< (?THRESHOLD * 1.1)), ok. test_fsm_protection(Nodes, Victim, RO) -> @@ -135,18 +135,22 @@ run_test(Nodes, Victim, RO) -> test_overload_list_keys(Nodes) -> [Node1, Node2] = Nodes, - lager:info("Suspending all vnodes on Node2~n"), + lager:info("Suspending all vnodes on Node2"), Pid = suspend_all_vnodes(Node2), + timer:sleep(3000), lager:info("Sending ~b list_keys requests", [?NUM_REQUESTS]), Pids = spawn_list_keys(Node1, ?NUM_REQUESTS), - {ok, C} = riak:client_connect(Node1), - Keys = riak_client:list_keys(?BUCKET, C), - io:format("KEYS = ~p~n", [Keys]), - ?assertEqual({error, overload}, Keys), + timer:sleep(3000), + Keys = rpc:call(Node1, ?MODULE, list_keys, []), + ?assertEqual({error, mailbox_overload}, Keys), resume_all_vnodes(Pid), wait_for_all_vnode_queues_empty(Node2), kill_pids(Pids). +list_keys() -> + {ok, C} = riak:client_connect(node()), + riak_client:list_keys(?BUCKET, C). + wait_for_all_vnode_queues_empty(Node) -> rt:wait_until(Node, fun(N) -> vnode_queues_empty(N) @@ -180,8 +184,7 @@ read_until_success(C, Count) -> spawn_list_keys(Node, Num) -> [spawn(fun() -> - {ok, C} = riak:client_connect(Node), - riak_client:list_keys(?BUCKET, C) + rpc:call(Node, ?MODULE, list_keys, []) end) || _ <- lists:seq(1,Num)]. spawn_reads(Node, Num) -> @@ -199,9 +202,12 @@ suspend_all_vnodes(Node) -> remote_suspend_all_vnodes() -> spawn(fun() -> Vnodes = riak_core_vnode_manager:all_vnodes(), - [erlang:suspend_process(Pid, []) || {_, _, Pid} <- Vnodes], + [erlang:suspend_process(Pid, []) || {riak_kv_vnode, _, Pid} + <- Vnodes], receive resume -> - [erlang:resume_process(Pid) || {_, _, Pid} <- Vnodes] + io:format("Resuming vnodes~n"), + [erlang:resume_process(Pid) || {riak_kv_vnode, _, Pid} + <- Vnodes] end end). From 242493baafcd747762021a06f56eb9839b8eb4c0 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Fri, 24 Jan 2014 21:44:22 -0500 Subject: [PATCH 072/139] modify how coverage queries are checked in overload test --- tests/overload.erl | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/tests/overload.erl b/tests/overload.erl index b5fa0692f..a7208b8bd 100644 --- a/tests/overload.erl +++ b/tests/overload.erl @@ -135,21 +135,38 @@ run_test(Nodes, Victim, RO) -> test_overload_list_keys(Nodes) -> [Node1, Node2] = Nodes, - lager:info("Suspending all vnodes on Node2"), + lager:info("Suspending all kv vnodes on Node2"), Pid = suspend_all_vnodes(Node2), timer:sleep(3000), + lager:info("Sending ~b list_keys requests", [?NUM_REQUESTS]), Pids = spawn_list_keys(Node1, ?NUM_REQUESTS), - timer:sleep(3000), - Keys = rpc:call(Node1, ?MODULE, list_keys, []), - ?assertEqual({error, mailbox_overload}, Keys), + timer:sleep(1000), + + lager:info("Checking Coverage queries for overload"), + + Res = list_keys(Node1), + ?assertEqual({error, mailbox_overload}, Res), + lager:info("list_keys correctly handled overload"), + + Res2 = list_buckets(Node1), + ?assertEqual({error, mailbox_overload}, Res2), + lager:info("list_buckets correctly handled overload"), + + lager:info("Resuming all kv vnodes on Node2"), resume_all_vnodes(Pid), + + lager:info("Waiting for vnode queues to empty"), wait_for_all_vnode_queues_empty(Node2), kill_pids(Pids). -list_keys() -> - {ok, C} = riak:client_connect(node()), - riak_client:list_keys(?BUCKET, C). +list_keys(Node) -> + {ok, C} = riak:client_connect(Node), + riak_client:list_keys(?BUCKET, 30000, C). + +list_buckets(Node) -> + {ok, C} = riak:client_connect(Node), + riak_client:list_buckets(30000, C). wait_for_all_vnode_queues_empty(Node) -> rt:wait_until(Node, fun(N) -> From 5e0ff64a66c61bc2a25be4cd6b3686c5b1d4d04e Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 28 Jan 2014 18:49:57 -0500 Subject: [PATCH 073/139] get coverage query overload tests to repeatedly pass --- tests/overload.erl | 48 ++++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/tests/overload.erl b/tests/overload.erl index a7208b8bd..4bb2e9044 100644 --- a/tests/overload.erl +++ b/tests/overload.erl @@ -58,9 +58,17 @@ test_no_overload_protection(Nodes, Victim, RO) -> test_vnode_protection(Nodes, Victim, RO) -> [Node1, Node2] = Nodes, + + %% Setting check_interval to one ensures that process_info is called + %% to check the queue length on each vnode send. + %% This allows us to artificially raise vnode queue lengths with dummy + %% messages instead of having to go through the vnode path for coverage + %% query overload testing. lager:info("Testing with vnode queue protection enabled"), lager:info("Setting vnode overload threshold to ~b", [?THRESHOLD]), - Config2 = [{riak_core, [{vnode_overload_threshold, ?THRESHOLD}]}], + lager:info("Setting vnode check interval to 1"), + Config2 = [{riak_core, [{vnode_overload_threshold, ?THRESHOLD}, + {vnode_check_interval, 1}]}], rt:pmap(fun(Node) -> rt:update_app_config(Node, Config2) end, Nodes), @@ -84,7 +92,7 @@ test_vnode_protection(Nodes, Victim, RO) -> lager:info("Unnecessary dropped requests: ~b", [Dropped]), ?assert(Dropped =< CheckInterval), - test_overload_list_keys(Nodes), + test_cover_queries_overload(Nodes), lager:info("Suspending vnode proxy for ~b", [Victim]), Pid = suspend_vnode_proxy(Node2, Victim), @@ -133,15 +141,10 @@ run_test(Nodes, Victim, RO) -> kill_pids(Reads), {NumProcs2 - NumProcs1, QueueLen}. -test_overload_list_keys(Nodes) -> +test_cover_queries_overload(Nodes) -> [Node1, Node2] = Nodes, lager:info("Suspending all kv vnodes on Node2"), - Pid = suspend_all_vnodes(Node2), - timer:sleep(3000), - - lager:info("Sending ~b list_keys requests", [?NUM_REQUESTS]), - Pids = spawn_list_keys(Node1, ?NUM_REQUESTS), - timer:sleep(1000), + Pid = suspend_and_overload_all_kv_vnodes(Node2), lager:info("Checking Coverage queries for overload"), @@ -157,8 +160,7 @@ test_overload_list_keys(Nodes) -> resume_all_vnodes(Pid), lager:info("Waiting for vnode queues to empty"), - wait_for_all_vnode_queues_empty(Node2), - kill_pids(Pids). + wait_for_all_vnode_queues_empty(Node2). list_keys(Node) -> {ok, C} = riak:client_connect(Node), @@ -199,11 +201,6 @@ read_until_success(C, Count) -> Count end. -spawn_list_keys(Node, Num) -> - [spawn(fun() -> - rpc:call(Node, ?MODULE, list_keys, []) - end) || _ <- lists:seq(1,Num)]. - spawn_reads(Node, Num) -> [spawn(fun() -> {ok, C} = riak:client_connect(Node), @@ -213,14 +210,24 @@ spawn_reads(Node, Num) -> kill_pids(Pids) -> [exit(Pid, kill) || Pid <- Pids]. -suspend_all_vnodes(Node) -> - rpc:call(Node, ?MODULE, remote_suspend_all_vnodes, []). +suspend_and_overload_all_kv_vnodes(Node) -> + Pid = rpc:call(Node, ?MODULE, remote_suspend_and_overload, []), + Pid ! {overload, self()}, + receive overloaded -> + Pid + end. -remote_suspend_all_vnodes() -> +remote_suspend_and_overload() -> spawn(fun() -> Vnodes = riak_core_vnode_manager:all_vnodes(), [erlang:suspend_process(Pid, []) || {riak_kv_vnode, _, Pid} <- Vnodes], + receive {overload, From} -> + io:format("Overloading vnodes ~n"), + [?MODULE:overload(Pid) || {riak_kv_vnode, _, Pid} + <- Vnodes], + From ! overloaded + end, receive resume -> io:format("Resuming vnodes~n"), [erlang:resume_process(Pid) || {riak_kv_vnode, _, Pid} @@ -228,6 +235,9 @@ remote_suspend_all_vnodes() -> end end). +overload(Pid) -> + [Pid ! hola || _ <- lists:seq(1, ?NUM_REQUESTS)]. + suspend_vnode(Node, Idx) -> Pid = rpc:call(Node, ?MODULE, remote_suspend_vnode, [Idx], infinity), Pid. From 766d2ab1cc59b9b9f2a2acd76c13a0daaec21203 Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Fri, 31 Jan 2014 11:38:29 -0500 Subject: [PATCH 074/139] Support xref smoke tests --- src/smoke_test_escript.erl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/smoke_test_escript.erl b/src/smoke_test_escript.erl index d674d0c22..76bff333f 100755 --- a/src/smoke_test_escript.erl +++ b/src/smoke_test_escript.erl @@ -104,6 +104,16 @@ worker(Rebar, PWD, Suites) -> giddyup:post_result([{test, Suite}, {status, get_status(Res)}, {log, CleanedLog} | Config]), Res; + "xref" -> + P = erlang:open_port({spawn_executable, Rebar}, + [{args, ["xref", "skip_deps=true"]}, + {cd, FDep}, exit_status, + {line, 1024}, stderr_to_stdout, binary]), + {Res, Log} = accumulate(P, []), + CleanedLog = cleanup_logs(Log), + giddyup:post_result([{test, Suite}, {status, get_status(Res)}, + {log, CleanedLog} | Config]), + Res; _ -> ok From a9100158a32a71e98d6db83c782b4ff1c23df2bf Mon Sep 17 00:00:00 2001 From: Jordan West Date: Fri, 31 Jan 2014 17:04:18 -0800 Subject: [PATCH 075/139] improve cluster_meta_basic cluster_meta_basic has been intermittently failing [1]. This commit includes two improvements, the second of which addreses this intermittent failure. The first change modifies the test to "wait_until_object_count" instead of reading the object count at a given moment and getting a possibly stale, or soon to be updated, value. This alone does not cause the to pass reliably. However, it highlights that the underlying race condition is one where the object count will never reach the expected value. The second change modifies the test to avoid the race, which was caused by resolving on two different nodes concurrently, each of which in turn wrote and broadcasted the resolved result. If an interleaving, such that both writes are allowed to succeeed before one node is notified of the modification on the other, occurs so does the failure. The test has been changed to only perform the write/broadcast on a single node ensuring that we eventually converge the the expected value and object count. [1] http://giddyup.basho.com/#/projects/riak_ee/scorecards/73/73-1556-cluster_meta_basic-centos-6-64/35530/artifacts/532185 --- tests/cluster_meta_basic.erl | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tests/cluster_meta_basic.erl b/tests/cluster_meta_basic.erl index 8038da53b..2bdbe4648 100644 --- a/tests/cluster_meta_basic.erl +++ b/tests/cluster_meta_basic.erl @@ -42,6 +42,7 @@ confirm() -> %% 4. bring up stopped nodes and ensure that either lazily queued messages or anti-entropy repair %% propogates key to all nodes in cluster test_writes_after_partial_cluster_failure([N1 | _]=Nodes) -> + lager:info("testing writes after partial cluster failure"), metadata_put(N1, ?PREFIX1, ?KEY1, ?VAL1), wait_until_metadata_value(Nodes, ?PREFIX1, ?KEY1, ?VAL1), print_tree(N1, Nodes), @@ -94,11 +95,13 @@ test_metadata_conflicts([N1, N2 | _]=Nodes) -> %% assert that we no longer have siblings when allow_put=true lager:info("checking object count afger resolve on get w/ put"), - wait_until_metadata_value([N1, N2], ?PREFIX1, ?KEY2, + wait_until_metadata_value(N1, ?PREFIX1, ?KEY2, [{resolver, fun list_resolver/2}], lists:usort([?VAL1, ?VAL2])), - ?assertEqual(1, rpc:call(N1, ?MODULE, object_count, [?PREFIX1, ?KEY2])), - ?assertEqual(1, rpc:call(N2, ?MODULE, object_count, [?PREFIX1, ?KEY2])), + wait_until_metadata_value([N1, N2], ?PREFIX1, ?KEY2, + [{resolver, fun list_resolver/2}, {allow_put, false}], + lists:usort([?VAL1, ?VAL2])), + wait_until_object_count([N1, N2], ?PREFIX1, ?KEY2, 1), ok. write_conflicting(N1, N2, Prefix, Key, Val1, Val2) -> @@ -152,6 +155,17 @@ wait_until_metadata_value(Node, Prefix, Key, Opts, Val) -> ?assertEqual(ok, rt:wait_until(F)), ok. +wait_until_object_count(Nodes, Prefix, Key, Count) when is_list(Nodes) -> + [wait_until_object_count(Node, Prefix, Key, Count) || Node <- Nodes]; +wait_until_object_count(Node, Prefix, Key, Count) -> + lager:info("wait until {~p, ~p} has object count ~p on ~p", [Prefix, Key, Count, Node]), + F = fun() -> + Count =:= rpc:call(Node, ?MODULE, object_count, [Prefix, Key]) + end, + ?assertEqual(ok, rt:wait_until(F)), + ok. + + eager_peers(Node, Root) -> {Eagers, _} = rpc:call(Node, riak_core_broadcast, debug_get_peers, [Node, Root]), Eagers. From 865173095c4f6b3bceb0da71762b91e8fc879786 Mon Sep 17 00:00:00 2001 From: Kelly McLaughlin Date: Thu, 23 Jan 2014 17:12:25 -0700 Subject: [PATCH 076/139] Add test to ensure that consistent objects are not replicated Replication of consistent objects is not currently supported. Add a test to ensure that fullsync replication filters these objects. No testing is necesary for realtime replication at this time because the postcommit hook mechnaism it uses is not invoked in the consistent object code path. --- tests/repl_consistent_object_filter.erl | 143 ++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 tests/repl_consistent_object_filter.erl diff --git a/tests/repl_consistent_object_filter.erl b/tests/repl_consistent_object_filter.erl new file mode 100644 index 000000000..92fa60a25 --- /dev/null +++ b/tests/repl_consistent_object_filter.erl @@ -0,0 +1,143 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2013 Basho Technologies, Inc. +%% +%% ------------------------------------------------------------------- +-module(repl_consistent_object_filter). +-export([confirm/0]). +-include_lib("eunit/include/eunit.hrl"). + +%% Test to verify that replication properly filters consistent bucket +%% types. This is intended to be a temporary state of affairs so this +%% test should have a limited life span. +%% +%% Currently this test only exercises fullsync replication. The write +%% path for consistent objects bypasses the riak_kv postcommit hooks +%% that are the mechanism by which realtime replication works. As a +%% result, no attempt is ever made to replicate consistent objects. + +%% @doc riak_test entry point +confirm() -> + rt:set_conf(all, [{"buckets.default.siblings", "off"}]), + rt:set_conf(all, [{"strong_consistency", "on"}]), + + %% Start up two >1.3.2 clusters and connect them, + {LeaderA, LeaderB, ANodes, BNodes} = make_clusters(), + + %% Temporary kludge to handle startup race condition between + %% riak_kv and riak_ensemble + %% @TODO Remove this once riak_ensemble helpers are in place that + %% provide a way for this race to be resolved. + timer:sleep(60000), + + PBA = get_pb_pid(LeaderA), + PBB = get_pb_pid(LeaderB), + + BucketType = <<"consistent_type">>, + + %% Create consistent bucket type on cluster A + rt:create_and_activate_bucket_type(LeaderA, BucketType, [{consistent, true}]), + rt:wait_until_bucket_type_status(BucketType, active, ANodes), + + %% Create consistent bucket type on cluster B + rt:create_and_activate_bucket_type(LeaderB, BucketType, [{consistent, true}]), + rt:wait_until_bucket_type_status(BucketType, active, BNodes), + + connect_clusters(LeaderA, LeaderB), + + %% Create two riak objects and execute consistent put of those + %% objects + Bucket = <<"unclebucket">>, + Key1 = <<"Maizy">>, + Key2 = <<"Miles">>, + Bin1 = <<"Take this quarter, go downtown, and have a rat gnaw that thing off your face! Good day to you, madam.">>, + Bin2 = <<"My Uncle was micro waving our socks and the dog threw up on the couch for an hour.">>, + Obj1 = riakc_obj:new({BucketType, Bucket}, Key1, Bin1), + Obj2 = riakc_obj:new({BucketType, Bucket}, Key2, Bin2), + lager:info("doing 2 consistent puts on A, bucket:~p", [Bucket]), + ok = riakc_pb_socket:put(PBA, Obj1), + ok = riakc_pb_socket:put(PBA, Obj2), + + %% Enable fullsync and wait for it to complete + repl_util:enable_fullsync(LeaderA, "B"), + rt:wait_until_ring_converged(ANodes), + + {Time, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, [LeaderA]), + lager:info("Fullsync completed in ~p seconds", [Time/1000/1000]), + + %% Attempt to read the objects from cluster B to verify they have + %% not been replicated via realtime replication + BReadRes3 = riakc_pb_socket:get(PBB, {BucketType, Bucket}, Key1), + BReadRes4 = riakc_pb_socket:get(PBB, {BucketType, Bucket}, Key2), + + ?assertEqual({error, notfound}, BReadRes3), + ?assertEqual({error, notfound}, BReadRes4), + + riakc_pb_socket:stop(PBA), + riakc_pb_socket:stop(PBB), + pass. + +%% @doc Connect two clusters for replication using their respective leader nodes. +connect_clusters(LeaderA, LeaderB) -> + {ok, {_IP, Port}} = rpc:call(LeaderB, application, get_env, + [riak_core, cluster_mgr]), + lager:info("connect cluster A:~p to B on port ~p", [LeaderA, Port]), + repl_util:connect_cluster(LeaderA, "127.0.0.1", Port), + ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")). + +%% @doc Create two clusters of 1 node each and connect them for replication: +%% Cluster "A" -> cluster "B" +make_clusters() -> + NumNodes = rt_config:get(num_nodes, 2), + ClusterASize = rt_config:get(cluster_a_size, 1), + + lager:info("Deploy ~p nodes", [NumNodes]), + Conf = [ + {riak_repl, + [ + %% turn off fullsync + {fullsync_on_connect, false}, + {max_fssource_node, 2}, + {max_fssink_node, 2}, + {max_fssource_cluster, 5}, + {max_fssource_retries, 5} + ]} + ], + + Nodes = rt:deploy_nodes(NumNodes, Conf), + {ANodes, BNodes} = lists:split(ClusterASize, Nodes), + lager:info("ANodes: ~p", [ANodes]), + lager:info("BNodes: ~p", [BNodes]), + + lager:info("Build cluster A"), + repl_util:make_cluster(ANodes), + + lager:info("Build cluster B"), + repl_util:make_cluster(BNodes), + + %% get the leader for the first cluster + lager:info("waiting for leader to converge on cluster A"), + ?assertEqual(ok, repl_util:wait_until_leader_converge(ANodes)), + AFirst = hd(ANodes), + + %% get the leader for the second cluster + lager:info("waiting for leader to converge on cluster B"), + ?assertEqual(ok, repl_util:wait_until_leader_converge(BNodes)), + BFirst = hd(BNodes), + + %% Name the clusters + repl_util:name_cluster(AFirst, "A"), + rt:wait_until_ring_converged(ANodes), + + repl_util:name_cluster(BFirst, "B"), + rt:wait_until_ring_converged(BNodes), + + repl_util:wait_until_leader_converge(ANodes), + repl_util:wait_until_leader_converge(BNodes), + + {AFirst, BFirst, ANodes, BNodes}. + +get_pb_pid(Leader) -> + {ok, [{IP, PortA}] } = rpc:call(Leader, application, get_env, [riak_api, pb]), + {ok, Pid} = riakc_pb_socket:start_link(IP, PortA, []), + Pid. From ca7dd246ce34c1a2dcc0879bd2a0c8ff25353691 Mon Sep 17 00:00:00 2001 From: Joseph Blomstedt Date: Mon, 3 Feb 2014 18:38:03 -0800 Subject: [PATCH 077/139] Commit various local changes --- src/observer.erl | 655 ++++++++++++++++++++++++++++++++++++++++++++++ src/riak_test.erl | 9 + src/rtssh.erl | 154 ++++++++++- 3 files changed, 815 insertions(+), 3 deletions(-) create mode 100644 src/observer.erl diff --git a/src/observer.erl b/src/observer.erl new file mode 100644 index 000000000..7b10e8cb3 --- /dev/null +++ b/src/observer.erl @@ -0,0 +1,655 @@ +-module(observer). +-compile(export_all). + +-record(history, {network, + disk, + rate, + nodes, + lvlref, + collector_sock, + collector_host, + collector_port}). + +-record(watcher, {nodes, + collector, + probes}). + +%% See: https://www.kernel.org/doc/Documentation/iostats.txt +-record(disk, {read, + read_merged, + read_sectors, + read_wait_ms, + write, + write_merged, + write_sectors, + write_wait_ms, + io_pending, + io_wait_ms, + io_wait_weighted_ms}). + +watch(Nodes, Collector) -> + %% io:format("Loading on ~p~n", [Nodes]), + %% load_modules_on_nodes([?MODULE], Nodes), + %% R = rpc:multicall(Nodes, ?MODULE, start, [self(), 1000, Collector, Nodes, collect]), + %% io:format("RPC: ~p~n", [R]), + spawn(?MODULE, watcher, [self(), Nodes, Collector]), + start(self(), 1000, Collector, Nodes, ping), + ok. + +watcher(Master, Nodes, Collector) -> + monitor(process, Master), + Probes = [{Node, undefined} || Node <- Nodes], + W = #watcher{nodes=Nodes, + collector=Collector, + probes=Probes}, + watcher_loop(W). + +watcher_loop(W=#watcher{probes=Probes}) -> + Missing = [Node || {Node, undefined} <- Probes], + %% io:format("Missing: ~p~n", [Missing]), + W2 = install_probes(Missing, W), + Probes2 = W2#watcher.probes, + receive + {'DOWN', MRef, process, _, _} -> + case lists:keyfind(MRef, 2, Probes2) of + false -> + %% master died, exit + io:format("watcher exiting~n"), + ok; + {Node, MRef} -> + io:format("Probe exit: ~p/~p~n", [Node, MRef]), + Probes3 = lists:keyreplace(Node, 1, Probes2, {Node, undefined}), + W3 = W2#watcher{probes=Probes3}, + ?MODULE:watcher_loop(W3) + end + after 1000 -> + ?MODULE:watcher_loop(W2) + end. + +install_probes(Nodes, W=#watcher{collector=Collector, nodes=AllNodes, probes=Probes}) -> + %% io:format("Loading on ~p~n", [Nodes]), + load_modules_on_nodes([?MODULE], Nodes), + R = rpc:multicall(Nodes, ?MODULE, start, [self(), 1000, Collector, AllNodes, collect]), + %% io:format("R: ~p~n", [R]), + {Pids, Down} = R, + %% io:format("I: ~p/~p~n", [Pids, Down]), + Probes2 = lists:foldl(fun({Node, Pid}, Acc) -> + if is_pid(Pid) -> + lists:keystore(Node, 1, Acc, {Node, monitor(process, Pid)}); + true -> + Acc + end + end, Probes, Pids), + Probes3 = lists:foldl(fun(Node, Acc) -> + lists:keystore(Node, 1, Acc, {Node, undefined}) + end, Probes2, Down), + %% io:format("P3: ~p~n", [Probes3]), + W#watcher{probes=Probes3}. + +start(Master, Rate, Collector, Nodes, Fun) -> + io:format("In start: ~p~n", [node()]), + Pid = spawn(?MODULE, init, [Master, Rate, Collector, Nodes, Fun]), + {node(), Pid}. + +init(Master, Rate, {Host, Port}, Nodes, Fun) -> + io:format("In init: ~p~n", [node()]), + {ok, Sock} = gen_udp:open(Port), + case application:get_env(riak_kv, storage_backend) of + {ok, riak_kv_eleveldb_backend} -> + LRef = get_leveldb_ref(); + _ -> + LRef = undefined + end, + H = #history{network=undefined, + %% disk=undefined, + disk=[], + rate=Rate div 1000, + lvlref=LRef, + nodes=Nodes, + collector_sock=Sock, + collector_host=Host, + collector_port=Port}, + %% case Fun of + %% collect -> + %% vmstat(Master, H); + %% _ -> + %% ok + %% end, + monitor(process, Master), + loop(Fun, Rate, H). + +loop(Fun, Rate, H) -> + %% io:format("loop: ~p~n", [node()]), + NewH = ?MODULE:Fun(H), + receive + {'DOWN', _, process, _, _} -> + io:format("shutting: ~p~n", [node()]), + ok + after Rate -> + ?MODULE:loop(Fun, Rate, NewH) + end. + +ping(H=#history{nodes=Nodes}) -> + TS = timestamp(), + XNodes = lists:zip(lists:seq(1, length(Nodes)), Nodes), + pmap(fun({X,Node}) -> + case net_adm:ping(Node) of + pang -> + notify_down(TS, X, Node, H), + ok; + pong -> + case rpc:call(Node, riak_core_node_watcher, services, [Node]) of + L when is_list(L) -> + case lists:member(riak_kv, L) of + true -> + ok; + false -> + notify_down(TS, X, Node, H) + end; + _ -> + notify_down(TS, X, Node, H) + end; + _ -> + ok + end + end, XNodes), + H. + +notify_down(TS, X, Node, H) -> + %% emit_stat(Stat, TS, Value, H) -> + NodeBin = atom_to_binary(Node, utf8), + Metric = <<"offline_nodes/", NodeBin/binary>>, + emit_stat2(Metric, TS, X, H). + +collect(H0) -> + H = try report_leveldb(H0) catch _:_ -> H0 end, + catch report_queues(H), + catch report_processes(H), + H2 = try report_network(H) catch _:_ -> H end, + %% H3 = report_disk2(H2), + %% H3 = report_disk2([{<<"dm-0">>, "dm-0"}, + %% {<<"dm-1">>, "dm-1"}], H2), + H3 = report_disk2([{<<"xvdb">>, "xvdb"}, + {<<"xvdc">>, "xvdc"}, + {<<"raid0">>, "md127"}], H2), + report_vmstat(H2), + report_memory(H2), + %% H3 = try report_disk2(H2) catch _:_ -> H2 end, + catch report_stats(riak_core_stat, [dropped_vnode_requests_total], H3), + catch report_stats(riak_kv_stat, + [node_gets, + node_puts, + + vnode_gets, + vnode_puts, + + node_get_fsm_active, + node_get_fsm_rejected, + node_get_fsm_in_rate, + node_get_fsm_out_rate, + + node_put_fsm_active, + node_put_fsm_rejected, + node_put_fsm_in_rate, + node_put_fsm_out_rate + ], H3), + + catch report_stats(riak_kv_stat, + [riak_kv_stat, + node_get_fsm_time_median, + node_get_fsm_time_95, + node_get_fsm_time_100, + + node_put_fsm_time_median, + node_put_fsm_time_95, + node_put_fsm_time_100 + ], H3, 1000), + + %% catch print_down(Nodes), + H3. + +report_queues(H) -> + Max = lists:max([Len || Pid <- processes(), + {message_queue_len, Len} <- [process_info(Pid, message_queue_len)]]), + TS = timestamp(), + emit_stat(<<"message_queue_max">>, TS, Max, H), + ok. + +%% report_queues(Threshold) -> +%% VNodes = riak_core_vnode_manager:all_vnodes(), +%% VNodes2 = [{Pid, {Mod,Idx}} || {Mod,Idx,Pid} <- VNodes], +%% VNodeMap = dict:from_list(VNodes2), +%% Queues = message_queues(processes(), Threshold, VNodeMap, []), +%% Queues2 = lists:keysort(1, filter(Queues, 2, [])), +%% ok. + +report_processes(H) -> + Procs = erlang:system_info(process_count), + %% Limit = erlang:system_info(process_limit), + %% Ratio = Procs * 100 div Limit, + TS = timestamp(), + emit_stat(<<"erlang_processes">>, TS, Procs, H), + ok. + +%% report_processes(Threshold) -> +%% Procs = erlang:system_info(process_count), +%% Limit = erlang:system_info(process_limit), +%% Ratio = Procs * 100 div Limit, +%% case Ratio > Threshold of +%% true -> +%% {Procs, Ratio}; +%% false -> +%% none +%% end. + +report_network(H=#history{network=LastStats, rate=Rate}) -> + {RX, TX} = get_network(), + case LastStats of + undefined -> + ok; + {LastRX, LastTX} -> + RXRate = net_rate(LastRX, RX) div Rate, + TXRate = net_rate(LastTX, TX) div Rate, + TS = timestamp(), + emit_stat(<<"net_rx">>, TS, RXRate, H), + emit_stat(<<"net_tx">>, TS, TXRate, H) + end, + H#history{network={RX, TX}}. + +report_disk2(Disks, H=#history{disk=DiskStats}) -> + NewStats = + lists:foldl(fun({Name, Dev}, Acc) -> + LastStats = case orddict:find(Dev, DiskStats) of + error -> + undefined; + {ok, LS} -> + LS + end, + Stats = report_disk2(Name, Dev, LastStats, H), + orddict:store(Dev, Stats, Acc) + end, DiskStats, Disks), + H#history{disk=NewStats}. + +report_disk2(Name, Dev, LastStats, H=#history{rate=Rate}) -> + Stats = get_disk2(Dev), + case LastStats of + undefined -> + ok; + _ -> + ReadRate = disk_rate(#disk.read_sectors, LastStats, Stats) div Rate, + WriteRate = disk_rate(#disk.write_sectors, LastStats, Stats) div Rate, + {AwaitR, AwaitW} = disk_await(LastStats, Stats), + Svctime = disk_svctime(LastStats, Stats), + QueueLen = disk_qlength(LastStats, Stats), + Util = disk_util(LastStats, Stats), + TS = timestamp(), + emit_stat(<<"disk_read (", Name/binary, ")">>, TS, ReadRate, H), + emit_stat(<<"disk_write (", Name/binary, ")">>, TS, WriteRate, H), + emit_stat(<<"disk_await_r (", Name/binary, ")">>, TS, AwaitR, H), + emit_stat(<<"disk_await_w (", Name/binary, ")">>, TS, AwaitW, H), + emit_stat(<<"disk_svctime (", Name/binary, ")">>, TS, Svctime, H), + emit_stat(<<"disk_queue_size (", Name/binary, ")">>, TS, QueueLen, H), + emit_stat(<<"disk_utilization (", Name/binary, ")">>, TS, Util, H) + end, + Stats. + +report_disk2(H=#history{disk=LastStats, rate=Rate}) -> + Stats = get_disk2(), + case LastStats of + undefined -> + ok; + _ -> + ReadRate = disk_rate(#disk.read_sectors, LastStats, Stats) div Rate, + WriteRate = disk_rate(#disk.write_sectors, LastStats, Stats) div Rate, + {AwaitR, AwaitW} = disk_await(LastStats, Stats), + Svctime = disk_svctime(LastStats, Stats), + QueueLen = disk_qlength(LastStats, Stats), + Util = disk_util(LastStats, Stats), + TS = timestamp(), + emit_stat(<<"disk_read">>, TS, ReadRate, H), + emit_stat(<<"disk_write">>, TS, WriteRate, H), + emit_stat(<<"disk_await_r">>, TS, AwaitR, H), + emit_stat(<<"disk_await_w">>, TS, AwaitW, H), + emit_stat(<<"disk_svctime">>, TS, Svctime, H), + emit_stat(<<"disk_queue_size">>, TS, QueueLen, H), + emit_stat(<<"disk_utilization">>, TS, Util, H) + end, + H#history{disk=Stats}. + +%% report_disk(H=#history{disk=LastStats, rate=Rate}) -> +%% {Read, Write} = get_disk(), +%% case LastStats of +%% undefined -> +%% ok; +%% {LastRead, LastWrite} -> +%% ReadRate = disk_rate(LastRead, Read) div Rate, +%% WriteRate = disk_rate(LastWrite, Write) div Rate, +%% TS = timestamp(), +%% emit_stat(<<"disk_read">>, TS, ReadRate, H), +%% emit_stat(<<"disk_write">>, TS, WriteRate, H) +%% end, +%% H#history{disk={Read, Write}}. + +report_memory(H) -> + Stats = get_memory(), + Util = memory_util(Stats), + Dirty = memory_dirty(Stats), + Writeback = memory_writeback(Stats), + TS = timestamp(), + emit_stat(<<"memory_utilization">>, TS, Util, H), + emit_stat(<<"memory_page_dirty">>, TS, Dirty, H), + emit_stat(<<"memory_page_writeback">>, TS, Writeback, H), + ok. + +report_leveldb(H=#history{lvlref=undefined}) -> + H; +report_leveldb(H=#history{lvlref=LRef}) -> + try case eleveldb:status(LRef, <<"leveldb.ThrottleGauge">>) of + {ok, Result} -> + Value = list_to_integer(binary_to_list(Result)), + TS = timestamp(), + emit_stat(<<"leveldb_write_throttle">>, TS, Value, H), + H; + _ -> + H + end + catch + _:_ -> + LRef2 = get_leveldb_ref(), + H#history{lvlref=LRef2} + end. + +%% print_down(Nodes) -> +%% Down = [Node || Node <- Nodes, +%% net_adm:ping(Node) =:= pang], +%% case Down of +%% [] -> +%% ok; +%% _ -> +%% io:format("Offline nodes:~n ~p~n", [Down]) +%% end. + +net_rate(Bytes1, Bytes2) -> + (Bytes2 - Bytes1) div 1024. + +disk_rate(I, Stats1, Stats2) -> + disk_rate(element(I, Stats1), element(I, Stats2)). + +disk_rate(Sectors1, Sectors2) -> + %% 512-byte sectors + (Sectors2 - Sectors1) div 2. + +disk_await(S1, S2) -> + NumR = erlang:max(S2#disk.read - S1#disk.read, 1), + NumW = erlang:max(S2#disk.write - S1#disk.write, 1), + AwaitR = (S2#disk.read_wait_ms - S1#disk.read_wait_ms) div NumR, + AwaitW = (S2#disk.write_wait_ms - S1#disk.write_wait_ms) div NumW, + {AwaitR, AwaitW}. + +disk_svctime(S1, S2) -> + NumR = S2#disk.read - S1#disk.read, + NumW = S2#disk.write - S1#disk.write, + NumIO = erlang:max(NumR + NumW, 1), + Wait = S2#disk.io_wait_ms - S1#disk.io_wait_ms, + Wait div NumIO. + +disk_util(S1, S2) -> + Wait = S2#disk.io_wait_ms - S1#disk.io_wait_ms, + Wait * 100 div 1000. %% Really should be div Rate + +disk_qlength(S1, S2) -> + (S2#disk.io_wait_weighted_ms - S1#disk.io_wait_weighted_ms) div 1000. + +filter(L, Pos, Val) -> + [T || T <- L, + element(Pos, T) /= Val]. + +message_queues([], _Threshold, _VNodeMap, Queues) -> + lists:reverse(lists:keysort(1, Queues)); +message_queues([Pid|Pids], Threshold, VNodeMap, Queues) -> + case process_info(Pid, [message_queue_len, registered_name]) of + [{message_queue_len, Len}, + {registered_name, RegName}] when Len > Threshold -> + Entry = {Len, pid_name(Pid, RegName, VNodeMap)}, + message_queues(Pids, Threshold, VNodeMap, [Entry|Queues]); + _ -> + message_queues(Pids, Threshold, VNodeMap, Queues) + end. + +get_network() -> + %% {ok, RX} = file:read_file("/sys/class/net/eth0/statistics/rx_bytes"), + %% {ok, TX} = file:read_file("/sys/class/net/eth0/statistics/tx_bytes"), + {ok, RX} = file:read_file("/sys/class/net/eth1/statistics/rx_bytes"), + {ok, TX} = file:read_file("/sys/class/net/eth1/statistics/tx_bytes"), + {to_integer(RX), to_integer(TX)}. + +get_disk2() -> + {ok, Bin} = file:read_file("/sys/block/md127/stat"), + %% {ok, Bin} = file:read_file("/sys/block/dm-0/stat"), + Stats = parse_disk_stats(Bin), + Stats. + +get_disk2(Dev) -> + {ok, Bin} = file:read_file("/sys/block/" ++ Dev ++ "/stat"), + Stats = parse_disk_stats(Bin), + Stats. + +%% get_disk() -> +%% {ok, Bin} = file:read_file("/sys/block/md127/stat"), +%% Stats = parse_disk_stats(Bin), +%% {Stats#disk.read_sectors, Stats#disk.write_sectors}. + +memory_util(Mem) -> + Stat = fun(Key) -> + list_to_integer(element(2, lists:keyfind(Key, 1, Mem))) + end, + Total = Stat("MemTotal:"), + Free = Stat("MemFree:"), + Buffers = Stat("Buffers:"), + Cached = Stat("Cached:"), + (Total - Free - Buffers - Cached) * 100 div Total. + +memory_dirty(Mem) -> + {_, Dirty} = lists:keyfind("Dirty:", 1, Mem), + list_to_integer(Dirty). + +memory_writeback(Mem) -> + {_, Writeback} = lists:keyfind("Writeback:", 1, Mem), + list_to_integer(Writeback). + +get_memory() -> + S = os:cmd("cat /proc/meminfo"), + [case string:tokens(L," ") of + [Key, Value, _] -> + {Key, Value}; + [Key, Value] -> + {Key, Value}; + _ -> + ignore + end || L <- string:tokens(S, "\n")]. + +parse_disk_stats(Bin) -> + [Line|_] = binary:split(Bin, <<"\n">>), + Fields = string:tokens(binary_to_list(Line), " "), + Fields2 = [list_to_integer(Field) || Field <- Fields], + list_to_tuple([disk|Fields2]). + +to_integer(Bin) -> + [Line|_] = binary:split(Bin, <<"\n">>), + list_to_integer(binary_to_list(Line)). + +pid_name(Pid, [], VNodeMap) -> + case dict:find(Pid, VNodeMap) of + {ok, VNode} -> + VNode; + _ -> + Pid + end; +pid_name(_Pid, RegName, _VNodeMap) -> + RegName. + +report_stats(Mod, Keys, H) -> + report_stats(Mod, Keys, H, 1). + +report_stats(Mod, Keys, H, Scale) -> + Stats = Mod:get_stats(), + TS = timestamp(), + [case lists:keyfind(Key, 1, Stats) of + false -> + ok; + {_, Value} -> + emit_stat(atom_to_binary(Key, utf8), TS, Value / Scale, H) + end || Key <- Keys], + ok. + +%%%=================================================================== +%%% Utility functions +%%%=================================================================== +pmap(F, L) -> + Parent = self(), + lists:mapfoldl( + fun(X, N) -> + Pid = spawn(fun() -> + Parent ! {pmap, N, F(X)} + end), + {Pid, N+1} + end, 0, L), + L2 = [receive {pmap, N, R} -> {N,R} end || _ <- L], + [R || {_, R} <- lists:keysort(1, L2)]. + +load_modules_on_nodes(Modules, Nodes) -> + [case code:get_object_code(Module) of + {Module, Bin, File} -> + %% rpc:multicall(Nodes, code, purge, [Module]), + rpc:multicall(Nodes, code, load_binary, [Module, File, Bin]); + error -> + error({no_object_code, Module}) + end || Module <- Modules]. + +get_leveldb_ref() -> + VNodes = riak_core_vnode_manager:all_vnodes(riak_kv_vnode), + {_, _, Pid} = hd(VNodes), + State = get_state(Pid), + ModState = element(4, State), + case element(3,ModState) of + riak_kv_eleveldb_backend -> + LvlState = element(4, ModState), + element(2, LvlState); + _ -> + undefined + end. + +get_state(Pid) -> + {status, Pid, _Mod, Status} = sys:get_status(Pid), + Status2 = lists:flatten(Status), + Status3 = [L || {data, L} <- Status2], + Status4 = lists:flatten(Status3), + State = proplists:get_value("StateData", Status4), + State. + +%% print_queues(Nodes) -> +%% pmap(remote(get_queues), Nodes). + +%% remote(F) -> +%% fun(Node) -> +%% rpc:call(Node, ?MODULE, F, []) +%% end. + +timestamp() -> + timestamp(os:timestamp()). + +timestamp({Mega, Secs, Micro}) -> + Mega*1000*1000*1000 + Secs * 1000 + (Micro div 1000). + +emit_stat(Stat, TS, Value, H) -> + NodeBin = atom_to_binary(node(), utf8), + %% Metric = <>, + Metric = <>, + emit_stat2(Metric, TS, Value, H). + +emit_stat2(Metric, TS, Value, #history{collector_sock=Sock, + collector_host=Host, + collector_port=Port}) -> + if is_integer(Value) -> + Packet = <<"=", TS:64/integer, Value:64/integer, Metric/binary>>, + %% io:format("Sending: ~p~n", [{TS, Value, Metric}]), + gen_udp:send(Sock, Host, Port, Packet); + is_float(Value) -> + %% IValue = erlang:trunc(Value), + %% Packet = <<"=", TS:64/integer, IValue:64/integer, Metric/binary>>, + %% %% io:format("Sending: ~p~n", [{TS, Value, Metric}]), + Packet = <<"#", (term_to_binary({Value, Metric, TS}))/binary>>, + gen_udp:send(Sock, Host, Port, Packet); + true -> + io:format("NT: ~p~n", [Value]) + end, + ok. + + +-record(vmstat, {procs_r, + procs_b, + mem_swpd, + mem_free, + mem_buff, + mem_cache, + swap_si, + swap_so, + io_bi, + io_bo, + system_in, + system_cs, + cpu_us, + cpu_sy, + cpu_id, + cpu_wa}). + +report_vmstat(H) -> + Result = os:cmd("vmstat 1 2"), + Lines = string:tokens(Result, "\n"), + Last = hd(lists:reverse(Lines)), + case parse_vmstat(Last) of + undefined -> + ok; + VM = #vmstat{} -> + TS = timestamp(), + emit_stat(<<"cpu_utilization">>, TS, 100 - VM#vmstat.cpu_id, H), + emit_stat(<<"cpu_iowait">>, TS, VM#vmstat.cpu_wa, H), + emit_stat(<<"memory_swap_in">>, TS, VM#vmstat.swap_si, H), + emit_stat(<<"memory_swap_out">>, TS, VM#vmstat.swap_so, H) + end, + ok. + +vmstat(Master, H) -> + spawn(fun() -> + monitor(process, Master), + Port = open_port({spawn, "vmstat 1"}, [{line,4096}, out]), + vmstat_loop(Port, H) + end). + +parse_vmstat(Line) -> + Values = string:tokens(Line, " "), + try + Fields = [list_to_integer(Field) || Field <- Values], + list_to_tuple([vmstat|Fields]) + catch + _:_ -> + undefined + end. + +vmstat_loop(Port, H) -> + receive + {'DOWN', _, process, _, _} -> + ok; + {Port, {data, Line}} -> + case parse_vmstat(Line) of + undefined -> + ok; + VM = #vmstat{} -> + TS = timestamp(), + emit_stat(<<"cpu_utilization">>, TS, 100 - VM#vmstat.cpu_id, H), + emit_stat(<<"cpu_iowait">>, TS, VM#vmstat.cpu_wa, H), + emit_stat(<<"memory_swap_in">>, TS, VM#vmstat.swap_si, H), + emit_stat(<<"memory_swap_out">>, TS, VM#vmstat.swap_so, H) + end, + vmstat_loop(Port, H) + end. diff --git a/src/riak_test.erl b/src/riak_test.erl index 71deafe30..6070075c4 100644 --- a/src/riak_test.erl +++ b/src/riak_test.erl @@ -22,6 +22,7 @@ -module(riak_test). -export([main/1]). -export([behaviour_info/1]). +-export([add_deps/1]). %% Define the riak_test behavior behaviour_info(callbacks) -> @@ -46,6 +47,7 @@ cli_options() -> {outdir, $o, "outdir", string, "output directory"}, {backend, $b, "backend", atom, "backend to test [memory | bitcask | eleveldb]"}, {upgrade_version, $u, "upgrade", atom, "which version to upgrade from [ previous | legacy ]"}, + {keep, undefined, "keep", boolean, "do not teardown cluster"}, {report, $r, "report", string, "you're reporting an official test run, provide platform info (e.g. ubuntu-1204-64)\nUse 'config' if you want to pull from ~/.riak_test.config"} ]. @@ -142,6 +144,13 @@ main(Args) -> TestResults = lists:filter(fun results_filter/1, [ run_test(Test, Outdir, TestMetaData, Report, HarnessArgs, length(Tests)) || {Test, TestMetaData} <- Tests]), print_summary(TestResults, Verbose), + Teardown = not proplists:get_value(keep, ParsedArgs, false), + maybe_teardown(Teardown, TestResults), + ok. + +maybe_teardown(false, _TestResults) -> + lager:info("Keeping cluster running as requested"); +maybe_teardown(true, TestResults) -> case {length(TestResults), proplists:get_value(status, hd(TestResults))} of {1, fail} -> so_kill_riak_maybe(); diff --git a/src/rtssh.erl b/src/rtssh.erl index e591fffff..74ffbd8e1 100644 --- a/src/rtssh.erl +++ b/src/rtssh.erl @@ -11,11 +11,24 @@ get_deps() -> setup_harness(_Test, _Args) -> Path = relpath(root), Hosts = load_hosts(), + Bench = load_bench(), rt:set_config(rt_hostnames, Hosts), %% [io:format("R: ~p~n", [wildcard(Host, "/tmp/*")]) || Host <- Hosts], + case rt:config(rtssh_bench) of + undefined -> + ok; + BenchPath -> + code:add_path(BenchPath ++ "/ebin"), + riak_test:add_deps(BenchPath ++ "/deps") + end, + + sync_bench(Bench), + sync_proxy(Bench), + %% Stop all discoverable nodes, not just nodes we'll be using for this test. stop_all(Hosts), + stop_all_bench(Bench), %% Reset nodes to base state lager:info("Resetting nodes to fresh state"), @@ -72,24 +85,28 @@ deploy_nodes(NodeConfig, Hosts) -> update_app_config(Node, Config) end, lists:zip(Nodes, Configs)), + timer:sleep(500), rt:pmap(fun(Node) -> Host = get_host(Node), + IP = get_ip(Host), Config = [{riak_api, [{pb, fun([{_, Port}]) -> - [{Host, Port}] + [{IP, Port}] end}, {pb_ip, fun(_) -> - Host + IP end}]}, {riak_core, [{http, fun([{_, Port}]) -> - [{Host, Port}] + [{IP, Port}] end}]}], update_app_config(Node, Config) end, Nodes), + timer:sleep(500), rt:pmap(fun(Node) -> update_vm_args(Node, [{"-name", Node}]) end, Nodes), + timer:sleep(500), rt:pmap(fun start/1, Nodes), @@ -149,6 +166,13 @@ load_hosts() -> rt:set_config(rtssh_aliases, Aliases), Hosts. +load_bench() -> + {HostsIn, _Aliases} = read_hosts_file("bench"), + Hosts = lists:sort(HostsIn), + rt:set_config(rtssh_bench_hosts, Hosts), + io:format("Bench: ~p~n", [Hosts]), + Hosts. + read_hosts_file(File) -> case file:consult(File) of {ok, Terms} -> @@ -167,6 +191,10 @@ read_hosts_file(File) -> get_host(Node) -> orddict:fetch(Node, rt:config(rt_hosts)). +get_ip(Host) -> + {ok, IP} = inet:getaddr(Host, inet), + string:join([integer_to_list(X) || X <- tuple_to_list(IP)], "."). + %%%=================================================================== %%% Remote file operations %%%=================================================================== @@ -392,9 +420,129 @@ stop_all(Host, DevPath) -> end, ok. +sync_bench(Hosts) -> + case rt:config(rtssh_bench) of + undefined -> + ok; + Path -> + Paths = filename:split(Path), + Root = filename:join(lists:sublist(Paths, length(Paths)-1)), + rt:pmap(fun(Host) -> + Cmd = "rsync -tr " ++ Path ++ " " ++ Host ++ ":" ++ Root, + Result = cmd(Cmd), + lager:info("Syncing bench :: ~p :: ~p :: ~p~n", [Host, Cmd, Result]) + end, Hosts) + end. + +sync_proxy(Hosts) -> + case rt:config(rtssh_proxy) of + undefined -> + ok; + Path -> + Paths = filename:split(Path), + Root = filename:join(lists:sublist(Paths, length(Paths)-1)), + rt:pmap(fun(Host) -> + Cmd = "rsync -tr " ++ Path ++ " " ++ Host ++ ":" ++ Root, + Result = cmd(Cmd), + lager:info("Syncing proxy :: ~p :: ~p :: ~p~n", [Host, Cmd, Result]) + end, Hosts) + end. + +stop_all_bench(Hosts) -> + case rt:config(rtssh_bench) of + undefined -> + ok; + Path -> + rt:pmap(fun(Host) -> + Cmd = "cd " ++ Path ++ " && bash ./bb.sh stop", + %% Result = ssh_cmd(Host, Cmd), + %% lager:info("Stopping basho_bench... ~s :: ~s ~~ ~p.", + %% [Host, Cmd, Result]) + {_, Result} = ssh_cmd(Host, Cmd), + [Output | _Tail] = string:tokens(Result, "\n"), + Status = case Output of + "ok" -> "ok"; + _ -> "wasn't running" + end, + lager:info("Stopping basho_bench... ~s :: ~s ~~ ~s.", + [Host, Cmd, Status]) + end, Hosts) + end. + +deploy_bench() -> + deploy_bench(rt:config(rtssh_bench_hosts)). + +deploy_bench(Hosts) -> + case rt:config(rtssh_bench) of + undefined -> + ok; + Path -> + rt:pmap(fun(Host) -> + Cookie = "riak", + This = lists:flatten(io_lib:format("~s", [node()])), + Cmd = + "cd " ++ Path ++ " && bash ./bb.sh" + " -N bench@" ++ Host ++ + " -C " ++ Cookie ++ + " -J " ++ This ++ + " -D", + spawn_ssh_cmd(Host, Cmd), + lager:info("Starting basho_bench... ~s :: ~s", + [Host, Cmd]) + end, Hosts), + [rt:wait_until_pingable(list_to_atom("bench@" ++ Host)) || Host <- Hosts], + timer:sleep(1000), + ok + end. + +deploy_proxy(Seed) -> + deploy_proxy(Seed, rt:config(rtssh_bench_hosts)). + +deploy_proxy(Seed, Hosts) -> + SeedStr = atom_to_list(Seed), + case rt:config(rtssh_proxy) of + undefined -> + ok; + Path -> + rt:pmap(fun(Host) -> + Cmd = "cd " ++ Path ++ " && bash go.sh \"" ++ SeedStr ++ "\"", + spawn_ssh_cmd(Host, Cmd), + lager:info("Starting riak_proxycfg... ~s :: ~s", + [Host, Cmd]) + end, Hosts), + timer:sleep(2000), + ok + end. + teardown() -> stop_all(rt:config(rt_hostnames)). +%%%=================================================================== +%%% Collector stuff +%%%=================================================================== + +collector_group_start(Name) -> + collector_call({group_start, timestamp(), Name}). + +collector_group_end() -> + collector_call({group_end, timestamp()}). + +collector_bench_start(Name, Config, Desc) -> + collector_call({bench_start, timestamp(), Name, Config, Desc}). + +collector_bench_end() -> + collector_call({bench_end, timestamp()}). + +collector_call(Msg) -> + {Node, _, _} = rt:config(rtssh_collector), + gen_server:call({collector, Node}, Msg, 30000). + +timestamp() -> + timestamp(os:timestamp()). + +timestamp({Mega, Secs, Micro}) -> + Mega*1000*1000*1000 + Secs * 1000 + (Micro div 1000). + %%%=================================================================== %%% Utilities %%%=================================================================== From 6974ec4a5616fbb73658c98de9ef584c3832019a Mon Sep 17 00:00:00 2001 From: Joseph Blomstedt Date: Mon, 3 Feb 2014 19:27:52 -0800 Subject: [PATCH 078/139] Move riak_test to riak_test_escript to match master --- rebar.config | 2 +- src/riak_test.erl | 331 ----------------------------------- src/riak_test_escript.erl | 352 ++++++++++++++++++++++++++++++++++++++ src/rtssh.erl | 2 +- 4 files changed, 354 insertions(+), 333 deletions(-) create mode 100644 src/riak_test_escript.erl diff --git a/rebar.config b/rebar.config index 43e077720..ad94b4a48 100644 --- a/rebar.config +++ b/rebar.config @@ -18,7 +18,7 @@ ]}. {escript_incl_apps, [lager, getopt, riakhttpc, riakc, ibrowse, mochiweb, kvc]}. -{escript_emu_args, "%%! +K true +P 10000 -env ERL_MAX_PORTS 10000\n"}. +{escript_emu_args, "%%! -escript main riak_test_escript +K true +P 10000 -env ERL_MAX_PORTS 10000\n"}. {plugin_dir, "src"}. {plugins, [rebar_riak_test_plugin]}. {riak_test, [ diff --git a/src/riak_test.erl b/src/riak_test.erl index 6070075c4..6f1e57ec5 100644 --- a/src/riak_test.erl +++ b/src/riak_test.erl @@ -18,342 +18,11 @@ %% %% ------------------------------------------------------------------- -%% @private -module(riak_test). --export([main/1]). -export([behaviour_info/1]). --export([add_deps/1]). %% Define the riak_test behavior behaviour_info(callbacks) -> [{confirm,0}]; behaviour_info(_Other) -> undefined. - -add_deps(Path) -> - {ok, Deps} = file:list_dir(Path), - [code:add_path(lists:append([Path, "/", Dep, "/ebin"])) || Dep <- Deps], - ok. - -cli_options() -> -%% Option Name, Short Code, Long Code, Argument Spec, Help Message -[ - {help, $h, "help", undefined, "Print this usage page"}, - {config, $c, "conf", string, "specifies the project configuration"}, - {tests, $t, "tests", string, "specifies which tests to run"}, - {suites, $s, "suites", string, "which suites to run"}, - {dir, $d, "dir", string, "run all tests in the specified directory"}, - {verbose, $v, "verbose", undefined, "verbose output"}, - {outdir, $o, "outdir", string, "output directory"}, - {backend, $b, "backend", atom, "backend to test [memory | bitcask | eleveldb]"}, - {upgrade_version, $u, "upgrade", atom, "which version to upgrade from [ previous | legacy ]"}, - {keep, undefined, "keep", boolean, "do not teardown cluster"}, - {report, $r, "report", string, "you're reporting an official test run, provide platform info (e.g. ubuntu-1204-64)\nUse 'config' if you want to pull from ~/.riak_test.config"} -]. - -print_help() -> - getopt:usage(cli_options(), - escript:script_name()), - halt(0). - -run_help([]) -> true; -run_help(ParsedArgs) -> - lists:member(help, ParsedArgs). - -main(Args) -> - register(riak_test, self()), - {ParsedArgs, HarnessArgs} = case getopt:parse(cli_options(), Args) of - {ok, {P, H}} -> {P, H}; - _ -> print_help() - end, - - case run_help(ParsedArgs) of - true -> print_help(); - _ -> ok - end, - - %% ibrowse - application:load(ibrowse), - application:start(ibrowse), - %% Start Lager - application:load(lager), - Config = proplists:get_value(config, ParsedArgs), - - %% Loads application defaults - application:load(riak_test), - - %% Loads from ~/.riak_test.config - rt:load_config(Config), - - %% Ensure existance of scratch_dir - case file:make_dir(rt:config(rt_scratch_dir)) of - ok -> great; - {eexist, _} -> great; - {ErrorType, ErrorReason} -> lager:error("Could not create scratch dir, {~p, ~p}", [ErrorType, ErrorReason]) - end, - - %% Fileoutput - Outdir = proplists:get_value(outdir, ParsedArgs), - ConsoleLagerLevel = case Outdir of - undefined -> rt:config(lager_level, info); - _ -> - filelib:ensure_dir(Outdir), - notice - end, - - application:set_env(lager, handlers, [{lager_console_backend, ConsoleLagerLevel}]), - lager:start(), - - %% Report - Report = case proplists:get_value(report, ParsedArgs, undefined) of - undefined -> undefined; - "config" -> rt:config(platform, undefined); - R -> R - end, - - Verbose = proplists:is_defined(verbose, ParsedArgs), - - Suites = proplists:get_all_values(suites, ParsedArgs), - case Suites of - [] -> ok; - _ -> io:format("Suites are not currently supported.") - end, - - CommandLineTests = parse_command_line_tests(ParsedArgs), - Tests = which_tests_to_run(Report, CommandLineTests), - - case Tests of - [] -> - lager:warning("No tests are scheduled to run"), - init:stop(1); - _ -> keep_on_keepin_on - end, - - io:format("Tests to run: ~p~n", [Tests]), - %% Two hard-coded deps... - add_deps(rt:get_deps()), - add_deps("deps"), - - [add_deps(Dep) || Dep <- rt:config(rt_deps, [])], - ENode = rt:config(rt_nodename, 'riak_test@127.0.0.1'), - Cookie = rt:config(rt_cookie, riak), - [] = os:cmd("epmd -daemon"), - net_kernel:start([ENode]), - erlang:set_cookie(node(), Cookie), - - TestResults = lists:filter(fun results_filter/1, [ run_test(Test, Outdir, TestMetaData, Report, HarnessArgs, length(Tests)) || {Test, TestMetaData} <- Tests]), - print_summary(TestResults, Verbose), - - Teardown = not proplists:get_value(keep, ParsedArgs, false), - maybe_teardown(Teardown, TestResults), - ok. - -maybe_teardown(false, _TestResults) -> - lager:info("Keeping cluster running as requested"); -maybe_teardown(true, TestResults) -> - case {length(TestResults), proplists:get_value(status, hd(TestResults))} of - {1, fail} -> - so_kill_riak_maybe(); - _ -> - lager:info("Multiple tests run or no failure"), - rt:teardown() - end, - ok. - -parse_command_line_tests(ParsedArgs) -> - Backends = case proplists:get_all_values(backend, ParsedArgs) of - [] -> [undefined]; - Other -> Other - end, - Upgrades = case proplists:get_all_values(upgrade_version, ParsedArgs) of - [] -> [undefined]; - UpgradeList -> UpgradeList - end, - %% Parse Command Line Tests - {CodePaths, SpecificTests} = - lists:foldl(fun extract_test_names/2, - {[], []}, - proplists:get_all_values(tests, ParsedArgs)), - [code:add_patha(CodePath) || CodePath <- CodePaths, - CodePath /= "."], - Dirs = proplists:get_all_values(dir, ParsedArgs), - DirTests = lists:append([load_tests_in_dir(Dir) || Dir <- Dirs]), - lists:foldl(fun(Test, Tests) -> - [{ - list_to_atom(Test), - [ - {id, -1}, - {platform, <<"local">>}, - {version, rt:get_version()}, - {project, list_to_binary(rt:config(rt_project, "undefined"))} - ] ++ - [ {backend, Backend} || Backend =/= undefined ] ++ - [ {upgrade_version, Upgrade} || Upgrade =/= undefined ]} - || Backend <- Backends, - Upgrade <- Upgrades ] ++ Tests - end, [], lists:usort(DirTests ++ SpecificTests)). - -extract_test_names(Test, {CodePaths, TestNames}) -> - {[filename:dirname(Test) | CodePaths], - [filename:rootname(filename:basename(Test)) | TestNames]}. - -which_tests_to_run(undefined, CommandLineTests) -> - {Tests, NonTests} = - lists:partition(fun is_runnable_test/1, CommandLineTests), - lager:info("These modules are not runnable tests: ~p", - [[NTMod || {NTMod, _} <- NonTests]]), - Tests; -which_tests_to_run(Platform, []) -> giddyup:get_suite(Platform); -which_tests_to_run(Platform, CommandLineTests) -> - Suite = filter_zip_suite(Platform, CommandLineTests), - {Tests, NonTests} = - lists:partition(fun is_runnable_test/1, - lists:foldr(fun filter_merge_tests/2, [], Suite)), - - lager:info("These modules are not runnable tests: ~p", - [[NTMod || {NTMod, _} <- NonTests]]), - Tests. - -filter_zip_suite(Platform, CommandLineTests) -> - [ {SModule, SMeta, CMeta} || {SModule, SMeta} <- giddyup:get_suite(Platform), - {CModule, CMeta} <- CommandLineTests, - SModule =:= CModule]. - -filter_merge_tests({Module, SMeta, CMeta}, Tests) -> - case filter_merge_meta(SMeta, CMeta, [backend, upgrade_version]) of - false -> - Tests; - Meta -> - [{Module, Meta}|Tests] - end. - -filter_merge_meta(SMeta, _CMeta, []) -> - SMeta; -filter_merge_meta(SMeta, CMeta, [Field|Rest]) -> - case {kvc:value(Field, SMeta, undefined), kvc:value(Field, CMeta, undefined)} of - {X, X} -> - filter_merge_meta(SMeta, CMeta, Rest); - {_, undefined} -> - filter_merge_meta(SMeta, CMeta, Rest); - {undefined, X} -> - filter_merge_meta(lists:keystore(Field, 1, SMeta, {Field, X}), CMeta, Rest); - _ -> - false - end. - -%% Check for api compatibility -is_runnable_test({TestModule, _}) -> - code:ensure_loaded(TestModule), - erlang:function_exported(TestModule, confirm, 0). - -run_test(Test, Outdir, TestMetaData, Report, _HarnessArgs, NumTests) -> - SingleTestResult = riak_test_runner:confirm(Test, Outdir, TestMetaData), - case NumTests of - 1 -> keep_them_up; - _ -> rt:teardown() - end, - case Report of - undefined -> ok; - _ -> - %% Old Code for concatinating log files for upload to giddyup - %% They're too big now, causing problems which will be solved by - %% GiddyUp's new Artifact feature, comming soon from a Cribbs near you. - - %% The point is, this is here in case we need to turn this back on - %% before artifacts are ready. And to remind jd that this is the place - %% to write the artifact client - - %% {log, TestLog} = lists:keyfind(log, 1, SingleTestResult), - %% NodeLogs = cat_node_logs(), - %% EncodedNodeLogs = unicode:characters_to_binary(iolist_to_binary(NodeLogs), - %% latin1, utf8), - %% NewLogs = iolist_to_binary([TestLog, EncodedNodeLogs]), - %% ResultWithNodeLogs = lists:keyreplace(log, 1, SingleTestResult, - %% {log, NewLogs}), - %% giddyup:post_result(ResultWithNodeLogs) - giddyup:post_result(SingleTestResult) - end, - SingleTestResult. - -print_summary(TestResults, Verbose) -> - io:format("~nTest Results:~n"), - - Results = [ - [ atom_to_list(proplists:get_value(test, SingleTestResult)) ++ "-" ++ - backend_list(proplists:get_value(backend, SingleTestResult)), - proplists:get_value(status, SingleTestResult), - proplists:get_value(reason, SingleTestResult)] - || SingleTestResult <- TestResults], - Width = test_name_width(Results), - - Print = fun(Test, Status, Reason) -> - case {Status, Verbose} of - {fail, true} -> io:format("~s: ~s ~p~n", [string:left(Test, Width), Status, Reason]); - _ -> io:format("~s: ~s~n", [string:left(Test, Width), Status]) - end - end, - [ Print(Test, Status, Reason) || [Test, Status, Reason] <- Results], - - PassCount = length(lists:filter(fun(X) -> proplists:get_value(status, X) =:= pass end, TestResults)), - FailCount = length(lists:filter(fun(X) -> proplists:get_value(status, X) =:= fail end, TestResults)), - io:format("---------------------------------------------~n"), - io:format("~w Tests Failed~n", [FailCount]), - io:format("~w Tests Passed~n", [PassCount]), - Percentage = case PassCount == 0 andalso FailCount == 0 of - true -> 0; - false -> (PassCount / (PassCount + FailCount)) * 100 - end, - io:format("That's ~w% for those keeping score~n", [Percentage]), - ok. - -test_name_width(Results) -> - lists:max([ length(X) || [X | _T] <- Results ]). - -backend_list(Backend) when is_atom(Backend) -> - atom_to_list(Backend); -backend_list(Backends) when is_list(Backends) -> - FoldFun = fun(X, []) -> - atom_to_list(X); - (X, Acc) -> - Acc ++ "," ++ atom_to_list(X) - end, - lists:foldl(FoldFun, [], Backends). - -results_filter(Result) -> - case proplists:get_value(status, Result) of - not_a_runnable_test -> - false; - _ -> - true - end. - -load_tests_in_dir(Dir) -> - case filelib:is_dir(Dir) of - true -> - code:add_path(Dir), - lists:sort([ string:substr(Filename, 1, length(Filename) - 5) || Filename <- filelib:wildcard("*.beam", Dir)]); - _ -> io:format("~s is not a dir!~n", [Dir]) - end. - -so_kill_riak_maybe() -> - io:format("~n~nSo, we find ourselves in a tricky situation here. ~n"), - io:format("You've run a single test, and it has failed.~n"), - io:format("Would you like to leave Riak running in order to debug?~n"), - Input = io:get_chars("[Y/n] ", 1), - case Input of - "n" -> rt:teardown(); - "N" -> rt:teardown(); - _ -> - io:format("Leaving Riak Up... "), - rt:whats_up() - end. - -%% cat_node_logs() -> -%% Files = rt:get_node_logs(), -%% Output = io_lib:format("================ Printing node logs and crash dumps ================~n~n", []), -%% cat_node_logs(Files, [Output]). -%% -%% cat_node_logs([], Output) -> Output; -%% cat_node_logs([{Filename, Content}|Rest], Output) -> -%% Log = io_lib:format("================ Log: ~s =====================~n~s~n~n", [Filename, Content]), -%% cat_node_logs(Rest, [Output, Log]). diff --git a/src/riak_test_escript.erl b/src/riak_test_escript.erl new file mode 100644 index 000000000..ad566838b --- /dev/null +++ b/src/riak_test_escript.erl @@ -0,0 +1,352 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2012 Basho Technologies, Inc. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @private +-module(riak_test_escript). +-export([main/1]). +-export([add_deps/1]). + +add_deps(Path) -> + {ok, Deps} = file:list_dir(Path), + [code:add_path(lists:append([Path, "/", Dep, "/ebin"])) || Dep <- Deps], + ok. + +cli_options() -> +%% Option Name, Short Code, Long Code, Argument Spec, Help Message +[ + {help, $h, "help", undefined, "Print this usage page"}, + {config, $c, "conf", string, "specifies the project configuration"}, + {tests, $t, "tests", string, "specifies which tests to run"}, + {suites, $s, "suites", string, "which suites to run"}, + {dir, $d, "dir", string, "run all tests in the specified directory"}, + {verbose, $v, "verbose", undefined, "verbose output"}, + {outdir, $o, "outdir", string, "output directory"}, + {backend, $b, "backend", atom, "backend to test [memory | bitcask | eleveldb]"}, + {upgrade_version, $u, "upgrade", atom, "which version to upgrade from [ previous | legacy ]"}, + {keep, undefined, "keep", boolean, "do not teardown cluster"}, + {report, $r, "report", string, "you're reporting an official test run, provide platform info (e.g. ubuntu-1204-64)\nUse 'config' if you want to pull from ~/.riak_test.config"} +]. + +print_help() -> + getopt:usage(cli_options(), + escript:script_name()), + halt(0). + +run_help([]) -> true; +run_help(ParsedArgs) -> + lists:member(help, ParsedArgs). + +main(Args) -> + register(riak_test, self()), + {ParsedArgs, HarnessArgs} = case getopt:parse(cli_options(), Args) of + {ok, {P, H}} -> {P, H}; + _ -> print_help() + end, + + case run_help(ParsedArgs) of + true -> print_help(); + _ -> ok + end, + + %% ibrowse + application:load(ibrowse), + application:start(ibrowse), + %% Start Lager + application:load(lager), + Config = proplists:get_value(config, ParsedArgs), + + %% Loads application defaults + application:load(riak_test), + + %% Loads from ~/.riak_test.config + rt:load_config(Config), + + %% Ensure existance of scratch_dir + case file:make_dir(rt:config(rt_scratch_dir)) of + ok -> great; + {eexist, _} -> great; + {ErrorType, ErrorReason} -> lager:error("Could not create scratch dir, {~p, ~p}", [ErrorType, ErrorReason]) + end, + + %% Fileoutput + Outdir = proplists:get_value(outdir, ParsedArgs), + ConsoleLagerLevel = case Outdir of + undefined -> rt:config(lager_level, info); + _ -> + filelib:ensure_dir(Outdir), + notice + end, + + application:set_env(lager, handlers, [{lager_console_backend, ConsoleLagerLevel}]), + lager:start(), + + %% Report + Report = case proplists:get_value(report, ParsedArgs, undefined) of + undefined -> undefined; + "config" -> rt:config(platform, undefined); + R -> R + end, + + Verbose = proplists:is_defined(verbose, ParsedArgs), + + Suites = proplists:get_all_values(suites, ParsedArgs), + case Suites of + [] -> ok; + _ -> io:format("Suites are not currently supported.") + end, + + CommandLineTests = parse_command_line_tests(ParsedArgs), + Tests = which_tests_to_run(Report, CommandLineTests), + + case Tests of + [] -> + lager:warning("No tests are scheduled to run"), + init:stop(1); + _ -> keep_on_keepin_on + end, + + io:format("Tests to run: ~p~n", [Tests]), + %% Two hard-coded deps... + add_deps(rt:get_deps()), + add_deps("deps"), + + [add_deps(Dep) || Dep <- rt:config(rt_deps, [])], + ENode = rt:config(rt_nodename, 'riak_test@127.0.0.1'), + Cookie = rt:config(rt_cookie, riak), + [] = os:cmd("epmd -daemon"), + net_kernel:start([ENode]), + erlang:set_cookie(node(), Cookie), + + TestResults = lists:filter(fun results_filter/1, [ run_test(Test, Outdir, TestMetaData, Report, HarnessArgs, length(Tests)) || {Test, TestMetaData} <- Tests]), + print_summary(TestResults, Verbose), + + Teardown = not proplists:get_value(keep, ParsedArgs, false), + maybe_teardown(Teardown, TestResults), + ok. + +maybe_teardown(false, _TestResults) -> + lager:info("Keeping cluster running as requested"); +maybe_teardown(true, TestResults) -> + case {length(TestResults), proplists:get_value(status, hd(TestResults))} of + {1, fail} -> + so_kill_riak_maybe(); + _ -> + lager:info("Multiple tests run or no failure"), + rt:teardown() + end, + ok. + +parse_command_line_tests(ParsedArgs) -> + Backends = case proplists:get_all_values(backend, ParsedArgs) of + [] -> [undefined]; + Other -> Other + end, + Upgrades = case proplists:get_all_values(upgrade_version, ParsedArgs) of + [] -> [undefined]; + UpgradeList -> UpgradeList + end, + %% Parse Command Line Tests + {CodePaths, SpecificTests} = + lists:foldl(fun extract_test_names/2, + {[], []}, + proplists:get_all_values(tests, ParsedArgs)), + [code:add_patha(CodePath) || CodePath <- CodePaths, + CodePath /= "."], + Dirs = proplists:get_all_values(dir, ParsedArgs), + DirTests = lists:append([load_tests_in_dir(Dir) || Dir <- Dirs]), + lists:foldl(fun(Test, Tests) -> + [{ + list_to_atom(Test), + [ + {id, -1}, + {platform, <<"local">>}, + {version, rt:get_version()}, + {project, list_to_binary(rt:config(rt_project, "undefined"))} + ] ++ + [ {backend, Backend} || Backend =/= undefined ] ++ + [ {upgrade_version, Upgrade} || Upgrade =/= undefined ]} + || Backend <- Backends, + Upgrade <- Upgrades ] ++ Tests + end, [], lists:usort(DirTests ++ SpecificTests)). + +extract_test_names(Test, {CodePaths, TestNames}) -> + {[filename:dirname(Test) | CodePaths], + [filename:rootname(filename:basename(Test)) | TestNames]}. + +which_tests_to_run(undefined, CommandLineTests) -> + {Tests, NonTests} = + lists:partition(fun is_runnable_test/1, CommandLineTests), + lager:info("These modules are not runnable tests: ~p", + [[NTMod || {NTMod, _} <- NonTests]]), + Tests; +which_tests_to_run(Platform, []) -> giddyup:get_suite(Platform); +which_tests_to_run(Platform, CommandLineTests) -> + Suite = filter_zip_suite(Platform, CommandLineTests), + {Tests, NonTests} = + lists:partition(fun is_runnable_test/1, + lists:foldr(fun filter_merge_tests/2, [], Suite)), + + lager:info("These modules are not runnable tests: ~p", + [[NTMod || {NTMod, _} <- NonTests]]), + Tests. + +filter_zip_suite(Platform, CommandLineTests) -> + [ {SModule, SMeta, CMeta} || {SModule, SMeta} <- giddyup:get_suite(Platform), + {CModule, CMeta} <- CommandLineTests, + SModule =:= CModule]. + +filter_merge_tests({Module, SMeta, CMeta}, Tests) -> + case filter_merge_meta(SMeta, CMeta, [backend, upgrade_version]) of + false -> + Tests; + Meta -> + [{Module, Meta}|Tests] + end. + +filter_merge_meta(SMeta, _CMeta, []) -> + SMeta; +filter_merge_meta(SMeta, CMeta, [Field|Rest]) -> + case {kvc:value(Field, SMeta, undefined), kvc:value(Field, CMeta, undefined)} of + {X, X} -> + filter_merge_meta(SMeta, CMeta, Rest); + {_, undefined} -> + filter_merge_meta(SMeta, CMeta, Rest); + {undefined, X} -> + filter_merge_meta(lists:keystore(Field, 1, SMeta, {Field, X}), CMeta, Rest); + _ -> + false + end. + +%% Check for api compatibility +is_runnable_test({TestModule, _}) -> + code:ensure_loaded(TestModule), + erlang:function_exported(TestModule, confirm, 0). + +run_test(Test, Outdir, TestMetaData, Report, _HarnessArgs, NumTests) -> + SingleTestResult = riak_test_runner:confirm(Test, Outdir, TestMetaData), + case NumTests of + 1 -> keep_them_up; + _ -> rt:teardown() + end, + case Report of + undefined -> ok; + _ -> + %% Old Code for concatinating log files for upload to giddyup + %% They're too big now, causing problems which will be solved by + %% GiddyUp's new Artifact feature, comming soon from a Cribbs near you. + + %% The point is, this is here in case we need to turn this back on + %% before artifacts are ready. And to remind jd that this is the place + %% to write the artifact client + + %% {log, TestLog} = lists:keyfind(log, 1, SingleTestResult), + %% NodeLogs = cat_node_logs(), + %% EncodedNodeLogs = unicode:characters_to_binary(iolist_to_binary(NodeLogs), + %% latin1, utf8), + %% NewLogs = iolist_to_binary([TestLog, EncodedNodeLogs]), + %% ResultWithNodeLogs = lists:keyreplace(log, 1, SingleTestResult, + %% {log, NewLogs}), + %% giddyup:post_result(ResultWithNodeLogs) + giddyup:post_result(SingleTestResult) + end, + SingleTestResult. + +print_summary(TestResults, Verbose) -> + io:format("~nTest Results:~n"), + + Results = [ + [ atom_to_list(proplists:get_value(test, SingleTestResult)) ++ "-" ++ + backend_list(proplists:get_value(backend, SingleTestResult)), + proplists:get_value(status, SingleTestResult), + proplists:get_value(reason, SingleTestResult)] + || SingleTestResult <- TestResults], + Width = test_name_width(Results), + + Print = fun(Test, Status, Reason) -> + case {Status, Verbose} of + {fail, true} -> io:format("~s: ~s ~p~n", [string:left(Test, Width), Status, Reason]); + _ -> io:format("~s: ~s~n", [string:left(Test, Width), Status]) + end + end, + [ Print(Test, Status, Reason) || [Test, Status, Reason] <- Results], + + PassCount = length(lists:filter(fun(X) -> proplists:get_value(status, X) =:= pass end, TestResults)), + FailCount = length(lists:filter(fun(X) -> proplists:get_value(status, X) =:= fail end, TestResults)), + io:format("---------------------------------------------~n"), + io:format("~w Tests Failed~n", [FailCount]), + io:format("~w Tests Passed~n", [PassCount]), + Percentage = case PassCount == 0 andalso FailCount == 0 of + true -> 0; + false -> (PassCount / (PassCount + FailCount)) * 100 + end, + io:format("That's ~w% for those keeping score~n", [Percentage]), + ok. + +test_name_width(Results) -> + lists:max([ length(X) || [X | _T] <- Results ]). + +backend_list(Backend) when is_atom(Backend) -> + atom_to_list(Backend); +backend_list(Backends) when is_list(Backends) -> + FoldFun = fun(X, []) -> + atom_to_list(X); + (X, Acc) -> + Acc ++ "," ++ atom_to_list(X) + end, + lists:foldl(FoldFun, [], Backends). + +results_filter(Result) -> + case proplists:get_value(status, Result) of + not_a_runnable_test -> + false; + _ -> + true + end. + +load_tests_in_dir(Dir) -> + case filelib:is_dir(Dir) of + true -> + code:add_path(Dir), + lists:sort([ string:substr(Filename, 1, length(Filename) - 5) || Filename <- filelib:wildcard("*.beam", Dir)]); + _ -> io:format("~s is not a dir!~n", [Dir]) + end. + +so_kill_riak_maybe() -> + io:format("~n~nSo, we find ourselves in a tricky situation here. ~n"), + io:format("You've run a single test, and it has failed.~n"), + io:format("Would you like to leave Riak running in order to debug?~n"), + Input = io:get_chars("[Y/n] ", 1), + case Input of + "n" -> rt:teardown(); + "N" -> rt:teardown(); + _ -> + io:format("Leaving Riak Up... "), + rt:whats_up() + end. + +%% cat_node_logs() -> +%% Files = rt:get_node_logs(), +%% Output = io_lib:format("================ Printing node logs and crash dumps ================~n~n", []), +%% cat_node_logs(Files, [Output]). +%% +%% cat_node_logs([], Output) -> Output; +%% cat_node_logs([{Filename, Content}|Rest], Output) -> +%% Log = io_lib:format("================ Log: ~s =====================~n~s~n~n", [Filename, Content]), +%% cat_node_logs(Rest, [Output, Log]). diff --git a/src/rtssh.erl b/src/rtssh.erl index 74ffbd8e1..82248fb73 100644 --- a/src/rtssh.erl +++ b/src/rtssh.erl @@ -20,7 +20,7 @@ setup_harness(_Test, _Args) -> ok; BenchPath -> code:add_path(BenchPath ++ "/ebin"), - riak_test:add_deps(BenchPath ++ "/deps") + riak_test_escript:add_deps(BenchPath ++ "/deps") end, sync_bench(Bench), From a93bf29d2d2cf5e1176ca06e26974cbcdf4926bb Mon Sep 17 00:00:00 2001 From: Joseph Blomstedt Date: Mon, 3 Feb 2014 23:42:11 -0800 Subject: [PATCH 079/139] Add missing config defaults --- src/rtssh.erl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/rtssh.erl b/src/rtssh.erl index 7e1e45ccd..e53b253c0 100644 --- a/src/rtssh.erl +++ b/src/rtssh.erl @@ -15,7 +15,7 @@ setup_harness(_Test, _Args) -> rt_config:set(rt_hostnames, Hosts), %% [io:format("R: ~p~n", [wildcard(Host, "/tmp/*")]) || Host <- Hosts], - case rt_config:get(rtssh_bench) of + case rt_config:get(rtssh_bench, undefined) of undefined -> ok; BenchPath -> @@ -421,7 +421,7 @@ stop_all(Host, DevPath) -> ok. sync_bench(Hosts) -> - case rt_config:get(rtssh_bench) of + case rt_config:get(rtssh_bench, undefined) of undefined -> ok; Path -> @@ -435,7 +435,7 @@ sync_bench(Hosts) -> end. sync_proxy(Hosts) -> - case rt_config:get(rtssh_proxy) of + case rt_config:get(rtssh_proxy, undefined) of undefined -> ok; Path -> @@ -449,7 +449,7 @@ sync_proxy(Hosts) -> end. stop_all_bench(Hosts) -> - case rt_config:get(rtssh_bench) of + case rt_config:get(rtssh_bench, undefined) of undefined -> ok; Path -> @@ -473,7 +473,7 @@ deploy_bench() -> deploy_bench(rt_config:get(rtssh_bench_hosts)). deploy_bench(Hosts) -> - case rt_config:get(rtssh_bench) of + case rt_config:get(rtssh_bench, undefined) of undefined -> ok; Path -> @@ -500,7 +500,7 @@ deploy_proxy(Seed) -> deploy_proxy(Seed, Hosts) -> SeedStr = atom_to_list(Seed), - case rt_config:get(rtssh_proxy) of + case rt_config:get(rtssh_proxy, undefined) of undefined -> ok; Path -> From 42e4c5fcd7674dbe945a398b1fc747972518120f Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Wed, 5 Feb 2014 18:13:06 -0500 Subject: [PATCH 080/139] Various improvements to the smoke tester * Fix the wait_for_workers in 'jobs' mode * Handle a dep dir being a symlink (leftover from an aborted previous run) * Add a new --tasks option so you can pick which tasks you want to run (eunit/dialyzer/xref) --- src/smoke_test_escript.erl | 82 +++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 32 deletions(-) diff --git a/src/smoke_test_escript.erl b/src/smoke_test_escript.erl index 76bff333f..57eb2abe0 100755 --- a/src/smoke_test_escript.erl +++ b/src/smoke_test_escript.erl @@ -1,7 +1,7 @@ -module(smoke_test_escript). -include_lib("kernel/include/file.hrl"). --export([main/1, get_version/0, worker/3]). +-export([main/1, get_version/0, worker/4]). get_version() -> list_to_binary(string:strip(os:cmd("git describe"), right, $\n)). @@ -9,10 +9,11 @@ get_version() -> cli_options() -> %% Option Name, Short Code, Long Code, Argument Spec, Help Message [ - {project, $p, "project", string, "specifices which project"}, - {debug, $v, "debug", undefined, "debug?"}, - {directory, $d, "directory", string, "source tree directory"}, - {jobs, $j, "jobs", integer, "jobs?"} + {project, $p, "project", string, "specifices which project"}, + {debug, $v, "debug", undefined, "debug?"}, + {directory, $d, "directory", string, "source tree directory"}, + {jobs, $j, "jobs", integer, "jobs?"}, + {tasks, $T, "tasks", string, "What task(s) to run (eunit|dialyzer|xref)"} ]. @@ -37,6 +38,13 @@ main(Args) -> lager:info("Changing working dir to ~s", [Dir]), ok = file:set_cwd(filename:absname(Dir)) end, + Tasks = case lists:keyfind(tasks, 1, Parsed) of + false -> + ["xref", "dialyzer", "eunit"]; + {tasks, List} -> + string:tokens(List, ",") + end, + case lists:member(debug, Parsed) of true -> lager:set_loglevel(lager_console_backend, debug); @@ -68,21 +76,21 @@ main(Args) -> {Counter + 1, dict:append(Counter rem Jobs, S, Dict)} end, {0, dict:new()}, Suites))), lager:debug("Split into ~p lists", [length(SplitSuites)]), - Workers = [spawn_monitor(?MODULE, worker, [Rebar, PWD, SS]) || {_, SS} <- SplitSuites], - wait_for_workers(Workers); + Workers = [spawn_monitor(?MODULE, worker, [Rebar, PWD, SS, Tasks]) || {_, SS} <- SplitSuites], + wait_for_workers([P || {P, _} <- Workers]); _ -> - worker(Rebar, PWD, Suites) + worker(Rebar, PWD, Suites, Tasks) end. -worker(Rebar, PWD, Suites) -> +worker(Rebar, PWD, Suites, Tasks) -> lists:foreach(fun({Suite, Config}) -> lager:info("Suite ~p config ~p", [Suite, Config]), [Dep, Task] = string:tokens(atom_to_list(Suite), ":"), FDep = filename:join([PWD, deps, Dep]), case filelib:is_dir(FDep) of true -> - case Task of - "eunit" -> + case {Task, lists:member(Task, Tasks)} of + {"eunit", true} -> %% set up a symlink so that each dep has deps P = erlang:open_port({spawn_executable, Rebar}, [{args, ["eunit", "skip_deps=true"]}, @@ -93,7 +101,7 @@ worker(Rebar, PWD, Suites) -> giddyup:post_result([{test, Suite}, {status, get_status(Res)}, {log, CleanedLog} | Config]), Res; - "dialyzer" -> + {"dialyzer", true} -> P = erlang:open_port({spawn_executable, "/usr/bin/make"}, [{args, ["dialyzer"]}, {cd, FDep}, exit_status, @@ -104,7 +112,7 @@ worker(Rebar, PWD, Suites) -> giddyup:post_result([{test, Suite}, {status, get_status(Res)}, {log, CleanedLog} | Config]), Res; - "xref" -> + {"xref", true} -> P = erlang:open_port({spawn_executable, Rebar}, [{args, ["xref", "skip_deps=true"]}, {cd, FDep}, exit_status, @@ -115,6 +123,7 @@ worker(Rebar, PWD, Suites) -> {log, CleanedLog} | Config]), Res; _ -> + lager:info("Skipping suite ~p", [Suite]), ok end; @@ -155,24 +164,32 @@ setup_deps(Rebar, PWD, [Dep|Deps]) -> setup_deps(Rebar, PWD, Deps). remove_deps_dir(Dep) -> - case filelib:is_dir(filename:join(Dep, "deps")) of + DepDir = filename:join(Dep, "deps"), + case filelib:is_dir(DepDir) of true -> - %% there should ONLY be a deps dir leftover from a previous run, - %% so it should be a directory filled with symlinks - {ok, Files} = file:list_dir(filename:join(Dep, "deps")), - lists:foreach(fun(F) -> - File = filename:join([Dep, "deps", F]), - {ok, FI} = file:read_link_info(File), - case FI#file_info.type of - symlink -> - ok = file:delete(File); - _ -> - ok - end - end, Files), - %% this will fail if the directory is not now empty - ok = file:del_dir(filename:join(Dep, "deps")), - ok; + {ok, DI} = file:read_link_info(DepDir), + case DI#file_info.type of + symlink -> + %% leftover symlink, probably from an aborted run + ok = file:delete(DepDir); + _ -> + %% there should ONLY be a deps dir leftover from a previous run, + %% so it should be a directory filled with symlinks + {ok, Files} = file:list_dir(DepDir), + lists:foreach(fun(F) -> + File = filename:join(DepDir, F), + {ok, FI} = file:read_link_info(File), + case FI#file_info.type of + symlink -> + ok = file:delete(File); + _ -> + ok + end + end, Files), + %% this will fail if the directory is not now empty + ok = file:del_dir(DepDir), + ok + end; false -> ok end. @@ -182,10 +199,11 @@ wait_for_workers([]) -> wait_for_workers(Workers) -> receive {'DOWN', _, _, Pid, normal} -> - lager:info("Worker exited normally"), + lager:info("Worker ~p exited normally, ~p left", [Pid, length(Workers)-1]), wait_for_workers(Workers -- [Pid]); {'DOWN', _, _, Pid, Reason} -> - lager:info("Worker exited abnormally: ~p", [Reason]), + lager:info("Worker ~p exited abnormally: ~p, ~p left", [Pid, Reason, + length(Workers)-1]), wait_for_workers(Workers -- [Pid]) end. From a5d6872670cf53ad5a10fae44f35aed9c3f038a2 Mon Sep 17 00:00:00 2001 From: Joseph Blomstedt Date: Thu, 6 Feb 2014 23:42:52 -0800 Subject: [PATCH 081/139] Numerous rtssh harness improvements -- Update to work in post-Cuttlefish world -- Support upgrade/downgrades -- Support cluster groups (as used by rtcloud) -- Create missing SNMP directories -- Add utils/rt-cluster escript for cluster provisioning --- src/rt.erl | 32 ++++++- src/rtssh.erl | 226 +++++++++++++++++++++++++++++++++++++++++------ utils/rt-cluster | 157 ++++++++++++++++++++++++++++++++ 3 files changed, 383 insertions(+), 32 deletions(-) create mode 100644 utils/rt-cluster diff --git a/src/rt.erl b/src/rt.erl index 2910670b4..56bd292de 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -37,6 +37,7 @@ build_cluster/1, build_cluster/2, build_cluster/3, + build_clusters/1, capability/2, capability/3, check_singleton_node/1, @@ -52,6 +53,7 @@ create_and_activate_bucket_type/3, deploy_nodes/1, deploy_nodes/2, + deploy_clusters/1, down/2, enable_search_hook/2, expect_in_log/2, @@ -290,6 +292,27 @@ deploy_nodes(Versions, Services) -> version_to_config({_, _}=Config) -> Config; version_to_config(Version) -> {Version, default}. +%%%%%%%%%%%%%%%%%%%% +deploy_clusters(Settings) -> + ClusterConfigs = [case Setting of + Configs when is_list(Configs) -> + Configs; + NumNodes when is_integer(NumNodes) -> + [{current, default} || _ <- lists:seq(1, NumNodes)]; + {NumNodes, InitialConfig} when is_integer(NumNodes) -> + [{current, InitialConfig} || _ <- lists:seq(1,NumNodes)] + end || Setting <- Settings], + ?HARNESS:deploy_clusters(ClusterConfigs). + +build_clusters(Settings) -> + Clusters = deploy_clusters(Settings), + [begin + join_cluster(Nodes), + lager:info("Cluster built: ~p", [Nodes]) + end || Nodes <- Clusters], + Clusters. +%%%%%%%%%%%%%%%%%%%% + %% @doc Start the specified Riak node start(Node) -> ?HARNESS:start(Node). @@ -834,6 +857,11 @@ build_cluster(NumNodes, Versions, InitialConfig) -> deploy_nodes(Versions) end, + join_cluster(Nodes), + lager:info("Cluster built: ~p", [Nodes]), + Nodes. + +join_cluster(Nodes) -> %% Ensure each node owns 100% of it's own ring [?assertEqual([Node], owners_according_to(Node)) || Node <- Nodes], @@ -856,9 +884,7 @@ build_cluster(NumNodes, Versions, InitialConfig) -> %% Ensure each node owns a portion of the ring wait_until_nodes_agree_about_ownership(Nodes), ?assertEqual(ok, wait_until_no_pending_changes(Nodes)), - - lager:info("Cluster built: ~p", [Nodes]), - Nodes. + ok. try_nodes_ready([Node1 | _Nodes], 0, _SleepMs) -> lager:info("Nodes not ready after initial plan/commit, retrying"), diff --git a/src/rtssh.erl b/src/rtssh.erl index e53b253c0..e17c7f8b2 100644 --- a/src/rtssh.erl +++ b/src/rtssh.erl @@ -6,7 +6,19 @@ get_version() -> unknown. get_deps() -> - "deps". + Path = relpath(current), + case filelib:is_dir(Path) of + true -> + lists:flatten(io_lib:format("~s/dev/dev1/lib", [Path])); + false -> + case rt_config:get(rt_deps, undefined) of + undefined -> + throw("Unable to determine Riak library path"); + _ -> + ok + end, + "" + end. setup_harness(_Test, _Args) -> Path = relpath(root), @@ -66,7 +78,7 @@ deploy_nodes(NodeConfig, Hosts) -> %% NumNodes = length(NodeConfig), %% NodesN = lists:seq(1, NumNodes), %% Nodes = [?DEV(N) || N <- NodesN], - Nodes = [list_to_atom("dev1@" ++ Host) || Host <- Hosts], + Nodes = [list_to_atom("riak@" ++ Host) || Host <- Hosts], HostMap = lists:zip(Nodes, Hosts), %% NodeMap = orddict:from_list(lists:zip(Nodes, NodesN)), @@ -81,6 +93,8 @@ deploy_nodes(NodeConfig, Hosts) -> rt:pmap(fun({_, default}) -> ok; + ({Node, {cuttlefish, Config}}) -> + set_conf(Node, Config); ({Node, Config}) -> update_app_config(Node, Config) end, @@ -90,28 +104,63 @@ deploy_nodes(NodeConfig, Hosts) -> rt:pmap(fun(Node) -> Host = get_host(Node), IP = get_ip(Host), - Config = [{riak_api, [{pb, fun([{_, Port}]) -> - [{IP, Port}] - end}, - {pb_ip, fun(_) -> - IP - end}]}, - {riak_core, [{http, fun([{_, Port}]) -> - [{IP, Port}] - end}]}], + Config = [{riak_api, [{pb, [{IP, 10017}]}, + {pb_ip, IP}, + {http,[{IP, 10018}]}]}, + {riak_core, [{http, [{IP, 10018}]}, + {cluster_mgr,{IP, 10016}}]}], + %% Config = [{riak_api, [{pb, fun([{_, Port}]) -> + %% [{IP, Port}] + %% end}, + %% {pb_ip, fun(_) -> + %% IP + %% end}]}, + %% {riak_core, [{http, fun([{_, Port}]) -> + %% [{IP, Port}] + %% end}]}], update_app_config(Node, Config) end, Nodes), timer:sleep(500), - rt:pmap(fun(Node) -> - update_vm_args(Node, [{"-name", Node}]) - end, Nodes), - timer:sleep(500), + %% rt:pmap(fun(Node) -> + %% update_vm_args(Node, [{"-name", Node}]) + %% end, Nodes), + %% timer:sleep(500), + + create_dirs(Nodes), rt:pmap(fun start/1, Nodes), Nodes. +deploy_clusters(ClusterConfigs) -> + Clusters = rt_config:get(rtssh_clusters, []), + NumConfig = length(ClusterConfigs), + case length(Clusters) < NumConfig of + true -> + erlang:error("Requested more clusters than available"); + false -> + Both = lists:zip(lists:sublist(Clusters, NumConfig), ClusterConfigs), + Deploy = + [begin + NumNodes = length(NodeConfig), + NumHosts = length(Hosts), + case NumNodes > NumHosts of + true -> + erlang:error("Not enough hosts available to deploy nodes", + [NumNodes, NumHosts]); + false -> + Hosts2 = lists:sublist(Hosts, NumNodes), + {Hosts2, NodeConfig} + end + end || {{_,Hosts}, NodeConfig} <- Both], + [deploy_nodes(NodeConfig, Hosts) || {Hosts, NodeConfig} <- Deploy] + end. + +create_dirs(Nodes) -> + [ssh_cmd(Node, "mkdir -p " ++ node_path(Node) ++ "/data/snmp/agent/db") + || Node <- Nodes]. + start(Node) -> run_riak(Node, "start"), ok. @@ -120,6 +169,36 @@ stop(Node) -> run_riak(Node, "stop"), ok. +upgrade(Node, NewVersion) -> + upgrade(Node, NewVersion, same). + +upgrade(Node, NewVersion, Config) -> + Version = node_version(Node), + lager:info("Upgrading ~p : ~p -> ~p", [Node, Version, NewVersion]), + stop(Node), + rt:wait_until_unpingable(Node), + OldPath = node_path(Node, Version), + NewPath = node_path(Node, NewVersion), + + Commands = [ + io_lib:format("cp -p -P -R \"~s/data\" \"~s\"", + [OldPath, NewPath]), + io_lib:format("rm -rf ~s/data/*", + [OldPath]), + io_lib:format("cp -p -P -R \"~s/etc\" \"~s\"", + [OldPath, NewPath]) + ], + [remote_cmd(Node, Cmd) || Cmd <- Commands], + VersionMap = orddict:store(Node, NewVersion, rt_config:get(rt_versions)), + rt_config:set(rt_versions, VersionMap), + case Config of + same -> ok; + _ -> update_app_config(Node, Config) + end, + start(Node), + rt:wait_until_pingable(Node), + ok. + run_riak(Node, Cmd) -> Exec = riakcmd(Node, Cmd), lager:info("Running: ~s :: ~s", [get_host(Node), Exec]), @@ -130,6 +209,11 @@ run_git(Host, Path, Cmd) -> lager:info("Running: ~s :: ~s", [Host, Exec]), ssh_cmd(Host, Exec). +remote_cmd(Node, Cmd) -> + lager:info("Running: ~s :: ~s", [get_host(Node), Cmd]), + {0, Result} = ssh_cmd(Node, Cmd), + {ok, Result}. + admin(Node, Args) -> Cmd = riak_admin_cmd(Node, Args), lager:info("Running: ~s :: ~s", [get_host(Node), Cmd]), @@ -176,6 +260,7 @@ load_bench() -> read_hosts_file(File) -> case file:consult(File) of {ok, Terms} -> + Terms2 = maybe_clusters(Terms), lists:mapfoldl(fun({Alias, Host}, Aliases) -> Aliases2 = orddict:store(Host, Host, Aliases), Aliases3 = orddict:store(Alias, Host, Aliases2), @@ -183,11 +268,26 @@ read_hosts_file(File) -> (Host, Aliases) -> Aliases2 = orddict:store(Host, Host, Aliases), {Host, Aliases2} - end, orddict:new(), Terms); + end, orddict:new(), Terms2); _ -> erlang:error({"Missing or invalid rtssh hosts file", file:get_cwd()}) end. +maybe_clusters(Terms=[L|_]) when is_list(L) -> + Labels = lists:seq(1, length(Terms)), + Hosts = [[case Host of + {H, _} -> + H; + H -> + H + end || Host <- Hosts] || Hosts <- Terms], + Clusters = lists:zip(Labels, Hosts), + rt_config:set(rtssh_clusters, Clusters), + io:format("Clusters: ~p", [Clusters]), + lists:append(Terms); +maybe_clusters(Terms) -> + Terms. + get_host(Node) -> orddict:fetch(Node, rt_config:get(rt_hosts)). @@ -248,6 +348,21 @@ format(Msg, Args) -> update_vm_args(_Node, []) -> ok; update_vm_args(Node, Props) -> + Etc = node_path(Node) ++ "/etc/", + Files = [filename:basename(File) || File <- wildcard(Node, Etc ++ "*")], + VMArgsExists = lists:member("vm.args", Files), + AdvExists = lists:member("advanced.config", Files), + if VMArgsExists -> + do_update_vm_args(Node, Props); + AdvExists -> + update_app_config_file(Node, Etc ++ "advanced.config", + [{vm_args, Props}], undefined); + true -> + update_app_config_file(Node, Etc ++ "advanced.config", + [{vm_args, Props}], []) + end. + +do_update_vm_args(Node, Props) -> %% TODO: Make non-matched options be appended to file VMArgs = node_path(Node) ++ "/etc/vm.args", Bin = remote_read_file(Node, VMArgs), @@ -264,21 +379,24 @@ update_vm_args(Node, Props) -> ok. update_app_config(Node, Config) -> - ConfigFile = node_path(Node) ++ "/etc/app.config", - update_app_config_file(Node, ConfigFile, Config). + Etc = node_path(Node) ++ "/etc/", + Files = [filename:basename(File) || File <- wildcard(Node, Etc ++ "*")], + AppExists = lists:member("app.config", Files), + AdvExists = lists:member("advanced.config", Files), + if AppExists -> + update_app_config_file(Node, Etc ++ "app.config", Config, undefined); + AdvExists -> + update_app_config_file(Node, Etc ++ "advanced.config", Config, undefined); + true -> + update_app_config_file(Node, Etc ++ "advanced.config", Config, []) + end. + %% ConfigFile = node_path(Node) ++ "/etc/app.config", + %% update_app_config_file(Node, ConfigFile, Config). -update_app_config_file(Node, ConfigFile, Config) -> +update_app_config_file(Node, ConfigFile, Config, Current) -> lager:info("rtssh:update_app_config_file(~p, ~s, ~p)", [Node, ConfigFile, Config]), - Bin = remote_read_file(Node, ConfigFile), - BaseConfig = - try - {ok, BC} = consult_string(Bin), - BC - catch - _:_ -> - erlang:error({"Failed to parse app.config for", Node, Bin}) - end, + BaseConfig = current_config(Node, ConfigFile, Current), %% io:format("BaseConfig: ~p~n", [BaseConfig]), MergeA = orddict:from_list(Config), MergeB = orddict:from_list(BaseConfig), @@ -299,12 +417,59 @@ update_app_config_file(Node, ConfigFile, Config) -> ?assertEqual(ok, remote_write_file(Node, ConfigFile, NewConfigOut)), ok. +current_config(Node, ConfigFile, undefined) -> + Bin = remote_read_file(Node, ConfigFile), + try + {ok, BC} = consult_string(Bin), + BC + catch + _:_ -> + erlang:error({"Failed to parse app.config for", Node, Bin}) + end; +current_config(_Node, _ConfigFile, Current) -> + Current. + consult_string(Bin) when is_binary(Bin) -> consult_string(binary_to_list(Bin)); consult_string(Str) -> {ok, Tokens, _} = erl_scan:string(Str), erl_parse:parse_term(Tokens). +-spec set_conf(atom(), [{string(), string()}]) -> ok. +set_conf(all, NameValuePairs) -> + lager:info("rtssh:set_conf(all, ~p)", [NameValuePairs]), + Hosts = rt_config:get(rtssh_hosts), + All = [{Host, DevPath} || Host <- Hosts, + DevPath <- devpaths()], + rt:pmap(fun({Host, DevPath}) -> + AllFiles = all_the_files(Host, DevPath, "etc/riak.conf"), + [append_to_conf_file(Host, File, NameValuePairs) || File <- AllFiles], + ok + end, All), + ok; +set_conf(Node, NameValuePairs) when is_atom(Node) -> + append_to_conf_file(Node, get_riak_conf(Node), NameValuePairs), + ok. + +get_riak_conf(Node) -> + node_path(Node) ++ "/etc/riak.conf". + +append_to_conf_file(Node, File, NameValuePairs) -> + Current = remote_read_file(Node, File), + Settings = [[$\n, to_list(Name), $=, to_list(Val), $\n] || {Name, Val} <- NameValuePairs], + Output = iolist_to_binary([Current, Settings]), + remote_write_file(Node, File, Output). + +all_the_files(Host, DevPath, File) -> + case wildcard(Host, DevPath ++ "/dev/dev*/" ++ File) of + error -> + lager:info("~s is not a directory.", [DevPath]), + []; + Files -> + io:format("~s :: files: ~p~n", [Host, Files]), + Files + end. + %%%=================================================================== %%% Riak devrel path utilities %%%=================================================================== @@ -334,8 +499,11 @@ relpath(_, _) -> throw("Version requested but only one path provided"). node_path(Node) -> + node_path(Node, node_version(Node)). + +node_path(Node, Version) -> N = node_id(Node), - Path = relpath(node_version(Node)), + Path = relpath(Version), lists:flatten(io_lib:format("~s/dev/dev~b", [Path, N])). node_id(_Node) -> diff --git a/utils/rt-cluster b/utils/rt-cluster new file mode 100644 index 000000000..488dc0de4 --- /dev/null +++ b/utils/rt-cluster @@ -0,0 +1,157 @@ +#!/usr/bin/env escript +%% -*- erlang -*- +%%! -smp enable +K true +P 10000 -env ERL_MAX_PORTS 10000 + +-export([main/1]). + +usage() -> + io:format("Usage: ~s []~n~n" + "Commands:~n" + " build Build Riak cluster(s)~n" + " teardown Teardown running clusters~n", + [escript:script_name()]). + +build_options() -> +%% Option Name, Short Code, Long Code, Argument Spec, Help Message +[ + {help, $h, "help", undefined, "Print this usage page"}, + {config, $c, "conf", string, "project configuration"}, + {version, $v, "version", atom, "Riak version (eg. current, previous legacy)"}, + {nodes, $n, "num", integer, "number of nodes/cluster (required)"}, + {clusters, undefined, "clusters", integer, "number of clusters"}, + {backend, $b, "backend", atom, "backend [memory | bitcask | eleveldb | yessir]"}, + {ring_size, $r, "ring-size", integer, "cluster ring size"}, + {file, $F, "file", string, "use the specified file instead of ~/.riak_test.config"} +]. + +build_defaults() -> + [{nodes, required}, + {version, current}, + {clusters, 1}, + {backend, undefined}, + {ring_size, undefined}]. + +print_help(Cmd, CmdOpts) -> + getopt:usage(CmdOpts, escript:script_name() ++ " " ++ Cmd), + halt(0). + +run_help(ParsedArgs) -> + lists:member(help, ParsedArgs). + +process_options(ParsedArgs, Defaults, Cmd, CmdOpts) -> + Opts = lists:ukeysort(1, ParsedArgs ++ Defaults), + case lists:keyfind(required, 2, Opts) of + false -> + Opts; + _ -> + io:format("Missing required option!~n"), + print_help(Cmd, CmdOpts) + end. + +parse_args(Args, Cmd, CmdOpts, Defaults) -> + {ParsedArgs, _} = case getopt:parse(CmdOpts, Args) of + {ok, {P, H}} -> {P, H}; + _ -> print_help(Cmd, CmdOpts) + end, + case run_help(ParsedArgs) of + true -> print_help(Cmd, CmdOpts); + _ -> ok + end, + Opts = process_options(ParsedArgs, Defaults, Cmd, CmdOpts), + Opts. + +setup() -> + try + true = filelib:is_dir("./ebin"), + true = filelib:is_dir("./deps/getopt/ebin"), + code:add_patha("./ebin"), + riak_test_escript:add_deps("./deps") + catch + _:_ -> + io:format("rt-cluster must be run from top-level of " + "compiled riak_test tree~n"), + halt(1) + end. + +main(Args) -> + setup(), + command(Args). + +command(["build"|Args]) -> + Opts = parse_args(Args, "build", build_options(), build_defaults()), + NumNodes = proplists:get_value(nodes, Opts), + KVConfig = [{storage_backend, get_backend(Opts)}], + CoreConfig = [{ring_creation_size, proplists:get_value(ring_size, Opts)}], + ConfigOpts = [maybe_config(riak_kv, KVConfig), + maybe_config(riak_core, CoreConfig)], + Config = lists:flatten(ConfigOpts), + setup2(Opts), + io:format("Config: ~p~n", [Config]), + Nodes = rt:build_cluster(NumNodes, Config), + [Node1|_] = Nodes, + rpc:call(Node1, riak_core_console, member_status, [[]]), + ok; +command(["teardown"|_Args]) -> + io:format("teardown~n"), + ok; +command(_) -> + usage(). + +get_backend(Opts) -> + case proplists:get_value(backend, Opts) of + bitcask -> + riak_kv_bitcask_backend; + leveldb -> + riak_kv_eleveldb_backend; + memory -> + riak_kv_memory_backend; + yessir -> + riak_kv_yessir_backend; + undefined -> + undefined + end. + +maybe_config(App, Config) -> + MaybeConfig = [Setting || Setting={_, Value} <- Config, + Value =/= undefined], + case MaybeConfig of + [] -> + []; + _ -> + [{App, MaybeConfig}] + end. + +setup2(Opts) -> + register(riak_test, self()), + + %% ibrowse + application:load(ibrowse), + application:start(ibrowse), + %% Start Lager + application:load(lager), + Config = proplists:get_value(config, Opts), + ConfigFile = proplists:get_value(file, Opts), + + %% Loads application defaults + application:load(riak_test), + + %% Loads from ~/.riak_test.config + rt_config:load(Config, ConfigFile), + + application:set_env(lager, handlers, [{lager_console_backend, + rt_config:get(lager_level, info)}]), + lager:start(), + + %% Two hard-coded deps... + riak_test_escript:add_deps(rt:get_deps()), + riak_test_escript:add_deps("deps"), + + [riak_test_escript:add_deps(Dep) || Dep <- rt_config:get(rt_deps, [])], + ENode = rt_config:get(rt_nodename, 'riak_test@127.0.0.1'), + Cookie = rt_config:get(rt_cookie, riak), + [] = os:cmd("epmd -daemon"), + net_kernel:start([ENode]), + erlang:set_cookie(node(), Cookie), + + rt:setup_harness(undefined, []), + ok. From 6f22e2f1f97fd6701b6af32463a113228352f19a Mon Sep 17 00:00:00 2001 From: Joseph Blomstedt Date: Fri, 7 Feb 2014 00:09:52 -0800 Subject: [PATCH 082/139] Add additional delay to remote_read_file to combat race --- src/rtssh.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/rtssh.erl b/src/rtssh.erl index e17c7f8b2..64de07461 100644 --- a/src/rtssh.erl +++ b/src/rtssh.erl @@ -321,6 +321,7 @@ ssh_cmd(Node, Cmd) -> wait_for_cmd(spawn_ssh_cmd(Node, Cmd)). remote_read_file(Node, File) -> + timer:sleep(500), case ssh_cmd(Node, "cat " ++ File) of {0, Text} -> %% io:format("~p/~p: read: ~p~n", [Node, File, Text]), From 89bf056aab486b87397c1194886aa3d9f1b0d7a8 Mon Sep 17 00:00:00 2001 From: Joseph Blomstedt Date: Fri, 7 Feb 2014 00:10:26 -0800 Subject: [PATCH 083/139] Remove all basho_bench/collector/proxy code (for now) --- src/rtssh.erl | 148 ++------------------------------------------------ 1 file changed, 4 insertions(+), 144 deletions(-) diff --git a/src/rtssh.erl b/src/rtssh.erl index 64de07461..76308bc83 100644 --- a/src/rtssh.erl +++ b/src/rtssh.erl @@ -23,24 +23,11 @@ get_deps() -> setup_harness(_Test, _Args) -> Path = relpath(root), Hosts = load_hosts(), - Bench = load_bench(), rt_config:set(rt_hostnames, Hosts), %% [io:format("R: ~p~n", [wildcard(Host, "/tmp/*")]) || Host <- Hosts], - case rt_config:get(rtssh_bench, undefined) of - undefined -> - ok; - BenchPath -> - code:add_path(BenchPath ++ "/ebin"), - riak_test_escript:add_deps(BenchPath ++ "/deps") - end, - - sync_bench(Bench), - sync_proxy(Bench), - %% Stop all discoverable nodes, not just nodes we'll be using for this test. stop_all(Hosts), - stop_all_bench(Bench), %% Reset nodes to base state lager:info("Resetting nodes to fresh state"), @@ -250,13 +237,6 @@ load_hosts() -> rt_config:set(rtssh_aliases, Aliases), Hosts. -load_bench() -> - {HostsIn, _Aliases} = read_hosts_file("bench"), - Hosts = lists:sort(HostsIn), - rt_config:set(rtssh_bench_hosts, Hosts), - io:format("Bench: ~p~n", [Hosts]), - Hosts. - read_hosts_file(File) -> case file:consult(File) of {ok, Terms} -> @@ -578,10 +558,10 @@ stop_all(Host, DevPath) -> [begin Cmd = D ++ "/bin/riak stop", {_, Result} = ssh_cmd(Host, Cmd), - [Output | _Tail] = string:tokens(Result, "\n"), - Status = case Output of - "ok" -> "ok"; - _ -> "wasn't running" + Status = case string:tokens(Result, "\n") of + ["ok"|_] -> "ok"; + [_|_] -> "wasn't running"; + [] -> "error" end, lager:info("Stopping Node... ~s :: ~s ~~ ~s.", [Host, Cmd, Status]) @@ -589,129 +569,9 @@ stop_all(Host, DevPath) -> end, ok. -sync_bench(Hosts) -> - case rt_config:get(rtssh_bench, undefined) of - undefined -> - ok; - Path -> - Paths = filename:split(Path), - Root = filename:join(lists:sublist(Paths, length(Paths)-1)), - rt:pmap(fun(Host) -> - Cmd = "rsync -tr " ++ Path ++ " " ++ Host ++ ":" ++ Root, - Result = cmd(Cmd), - lager:info("Syncing bench :: ~p :: ~p :: ~p~n", [Host, Cmd, Result]) - end, Hosts) - end. - -sync_proxy(Hosts) -> - case rt_config:get(rtssh_proxy, undefined) of - undefined -> - ok; - Path -> - Paths = filename:split(Path), - Root = filename:join(lists:sublist(Paths, length(Paths)-1)), - rt:pmap(fun(Host) -> - Cmd = "rsync -tr " ++ Path ++ " " ++ Host ++ ":" ++ Root, - Result = cmd(Cmd), - lager:info("Syncing proxy :: ~p :: ~p :: ~p~n", [Host, Cmd, Result]) - end, Hosts) - end. - -stop_all_bench(Hosts) -> - case rt_config:get(rtssh_bench, undefined) of - undefined -> - ok; - Path -> - rt:pmap(fun(Host) -> - Cmd = "cd " ++ Path ++ " && bash ./bb.sh stop", - %% Result = ssh_cmd(Host, Cmd), - %% lager:info("Stopping basho_bench... ~s :: ~s ~~ ~p.", - %% [Host, Cmd, Result]) - {_, Result} = ssh_cmd(Host, Cmd), - [Output | _Tail] = string:tokens(Result, "\n"), - Status = case Output of - "ok" -> "ok"; - _ -> "wasn't running" - end, - lager:info("Stopping basho_bench... ~s :: ~s ~~ ~s.", - [Host, Cmd, Status]) - end, Hosts) - end. - -deploy_bench() -> - deploy_bench(rt_config:get(rtssh_bench_hosts)). - -deploy_bench(Hosts) -> - case rt_config:get(rtssh_bench, undefined) of - undefined -> - ok; - Path -> - rt:pmap(fun(Host) -> - Cookie = "riak", - This = lists:flatten(io_lib:format("~s", [node()])), - Cmd = - "cd " ++ Path ++ " && bash ./bb.sh" - " -N bench@" ++ Host ++ - " -C " ++ Cookie ++ - " -J " ++ This ++ - " -D", - spawn_ssh_cmd(Host, Cmd), - lager:info("Starting basho_bench... ~s :: ~s", - [Host, Cmd]) - end, Hosts), - [rt:wait_until_pingable(list_to_atom("bench@" ++ Host)) || Host <- Hosts], - timer:sleep(1000), - ok - end. - -deploy_proxy(Seed) -> - deploy_proxy(Seed, rt_config:get(rtssh_bench_hosts)). - -deploy_proxy(Seed, Hosts) -> - SeedStr = atom_to_list(Seed), - case rt_config:get(rtssh_proxy, undefined) of - undefined -> - ok; - Path -> - rt:pmap(fun(Host) -> - Cmd = "cd " ++ Path ++ " && bash go.sh \"" ++ SeedStr ++ "\"", - spawn_ssh_cmd(Host, Cmd), - lager:info("Starting riak_proxycfg... ~s :: ~s", - [Host, Cmd]) - end, Hosts), - timer:sleep(2000), - ok - end. - teardown() -> stop_all(rt_config:get(rt_hostnames)). -%%%=================================================================== -%%% Collector stuff -%%%=================================================================== - -collector_group_start(Name) -> - collector_call({group_start, timestamp(), Name}). - -collector_group_end() -> - collector_call({group_end, timestamp()}). - -collector_bench_start(Name, Config, Desc) -> - collector_call({bench_start, timestamp(), Name, Config, Desc}). - -collector_bench_end() -> - collector_call({bench_end, timestamp()}). - -collector_call(Msg) -> - {Node, _, _} = rt_config:get(rtssh_collector), - gen_server:call({collector, Node}, Msg, 30000). - -timestamp() -> - timestamp(os:timestamp()). - -timestamp({Mega, Secs, Micro}) -> - Mega*1000*1000*1000 + Secs * 1000 + (Micro div 1000). - %%%=================================================================== %%% Utilities %%%=================================================================== From eef9ab38ffd3e1e7b9f897411b8af965ca7015b9 Mon Sep 17 00:00:00 2001 From: Joseph Blomstedt Date: Fri, 7 Feb 2014 01:24:55 -0800 Subject: [PATCH 084/139] Make rt-cluster support multiple clusters/repl --- src/rtssh.erl | 1 - utils/rt-cluster | 84 ++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 77 insertions(+), 8 deletions(-) diff --git a/src/rtssh.erl b/src/rtssh.erl index 76308bc83..5534929f7 100644 --- a/src/rtssh.erl +++ b/src/rtssh.erl @@ -263,7 +263,6 @@ maybe_clusters(Terms=[L|_]) when is_list(L) -> end || Host <- Hosts] || Hosts <- Terms], Clusters = lists:zip(Labels, Hosts), rt_config:set(rtssh_clusters, Clusters), - io:format("Clusters: ~p", [Clusters]), lists:append(Terms); maybe_clusters(Terms) -> Terms. diff --git a/utils/rt-cluster b/utils/rt-cluster index 488dc0de4..67146725a 100644 --- a/utils/rt-cluster +++ b/utils/rt-cluster @@ -21,6 +21,7 @@ build_options() -> {clusters, undefined, "clusters", integer, "number of clusters"}, {backend, $b, "backend", atom, "backend [memory | bitcask | eleveldb | yessir]"}, {ring_size, $r, "ring-size", integer, "cluster ring size"}, + {repl, undefined, "repl", atom, "connect clusters [primary | bidirect]"}, {file, $F, "file", string, "use the specified file instead of ~/.riak_test.config"} ]. @@ -31,6 +32,14 @@ build_defaults() -> {backend, undefined}, {ring_size, undefined}]. +teardown_options() -> +%% Option Name, Short Code, Long Code, Argument Spec, Help Message +[ + {help, $h, "help", undefined, "Print this usage page"}, + {config, $c, "conf", string, "project configuration"}, + {file, $F, "file", string, "use the specified file instead of ~/.riak_test.config"} +]. + print_help(Cmd, CmdOpts) -> getopt:usage(CmdOpts, escript:script_name() ++ " " ++ Cmd), halt(0). @@ -80,19 +89,30 @@ main(Args) -> command(["build"|Args]) -> Opts = parse_args(Args, "build", build_options(), build_defaults()), NumNodes = proplists:get_value(nodes, Opts), + NumClusters = proplists:get_value(clusters, Opts), KVConfig = [{storage_backend, get_backend(Opts)}], CoreConfig = [{ring_creation_size, proplists:get_value(ring_size, Opts)}], ConfigOpts = [maybe_config(riak_kv, KVConfig), maybe_config(riak_core, CoreConfig)], Config = lists:flatten(ConfigOpts), - setup2(Opts), + setup_rt(Opts), io:format("Config: ~p~n", [Config]), - Nodes = rt:build_cluster(NumNodes, Config), - [Node1|_] = Nodes, - rpc:call(Node1, riak_core_console, member_status, [[]]), + Settings = [{NumNodes, Config} || _ <- lists:seq(1, NumClusters)], + Clusters = rt:build_clusters(Settings), + lists:foldl(fun(Nodes, N) -> + io:format("---~nCluster ~b: ~p~n", [N, Nodes]), + rpc:call(hd(Nodes), riak_core_console, member_status, [[]]), + N+1 + end, 1, Clusters), + Repl = proplists:get_value(repl, Opts), + (length(Clusters) > 1) andalso maybe_connect_repl(Repl, Clusters), + info("Finished building clusters"), + info(""), ok; -command(["teardown"|_Args]) -> - io:format("teardown~n"), +command(["teardown"|Args]) -> + Opts = parse_args(Args, "teardown", teardown_options(), []), + setup_rt(Opts), + rt:teardown(), ok; command(_) -> usage(). @@ -121,7 +141,7 @@ maybe_config(App, Config) -> [{App, MaybeConfig}] end. -setup2(Opts) -> +setup_rt(Opts) -> register(riak_test, self()), %% ibrowse @@ -155,3 +175,53 @@ setup2(Opts) -> rt:setup_harness(undefined, []), ok. + +maybe_connect_repl(undefined, _) -> + ok; +maybe_connect_repl(primary, Clusters) -> + info("Connecting cluster1 (source) to other clusters (sink)"), + NamedClusters = name_clusters(Clusters), + [Primary|Others] = NamedClusters, + [connect_clusters(Primary, Other) || Other <- Others], + ok; +maybe_connect_repl(bidirect, Clusters) -> + info("Connecting all clusters bidirectionally"), + NamedClusters = name_clusters(Clusters), + [connect_clusters(A, B) || A <- NamedClusters, + B <- NamedClusters, + A =/= B], + ok; +maybe_connect_repl(Other, _) -> + info("Unknown --repl option: ~p~n", [Other]), + ok. + +connect_clusters({A, Source}, {B, Sink}) -> + NodeA = hd(Source), + NodeB = hd(Sink), + Leader = rpc:call(NodeA, riak_core_cluster_mgr, get_leader, []), + {ok, {IP, Port}} = rpc:call(NodeB, application, get_env, + [riak_core, cluster_mgr]), + info("connecting ~p to ~p at ~p:~p", [A, B, IP, Port]), + repl_util:connect_cluster(Leader, IP, Port), + ok = repl_util:wait_for_connection(Leader, B), + info("....connected"), + ok. + +name_clusters(Clusters) -> + info("Setting cluster names~n"), + {NamedClusters, _} = + lists:mapfoldl(fun(Nodes, N) -> + Name = "cluster" ++ integer_to_list(N), + repl_util:name_cluster(hd(Nodes), Name), + {{Name, Nodes}, N+1} + end, 1, Clusters), + [begin + rt:wait_until_ring_converged(Nodes), + ok = repl_util:wait_until_leader_converge(Nodes) + end || Nodes <- Clusters], + NamedClusters. + +info(Msg) -> + lager:log(info, self(), Msg). +info(Format, Args) -> + lager:log(info, self(), Format, Args). From 000b50fba0fcd96f3a73996dadbfee53c2a8a5c7 Mon Sep 17 00:00:00 2001 From: lordnull Date: Mon, 10 Feb 2014 10:36:08 -0600 Subject: [PATCH 085/139] Removed test for mutator capability. --- tests/rolling_capabilities.erl | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/rolling_capabilities.erl b/tests/rolling_capabilities.erl index d65553bbb..1ddb6cd30 100644 --- a/tests/rolling_capabilities.erl +++ b/tests/rolling_capabilities.erl @@ -34,8 +34,7 @@ confirm() -> {riak_kv, mapred_2i_pipe, true}, {riak_kv, mapred_system, pipe}, {riak_kv, vnode_vclocks, true}, - {riak_kv, anti_entropy, enabled_v1}, - {riak_kv, mutators, true}], + {riak_kv, anti_entropy, enabled_v1}], ExpectedOld = case OldVsn of legacy -> [{riak_core, vnode_routing, proxy}, @@ -44,16 +43,14 @@ confirm() -> {riak_kv, listkeys_backpressure, true}, {riak_kv, mapred_2i_pipe, true}, {riak_kv, mapred_system, pipe}, - {riak_kv, vnode_vclocks, true}, - {riak_kv, mutators, false}]; + {riak_kv, vnode_vclocks, true]; previous -> [{riak_core, vnode_routing, proxy}, {riak_core, staged_joins, true}, {riak_kv, legacy_keylisting, false}, {riak_kv, listkeys_backpressure, true}, {riak_kv, mapred_2i_pipe, true}, {riak_kv, mapred_system, pipe}, - {riak_kv, vnode_vclocks, true}, - {riak_kv, mutators, false}]; + {riak_kv, vnode_vclocks, true}]; _ -> [] end, From 4479abd542c26717c86e32b3f9b36f5f2e5d6364 Mon Sep 17 00:00:00 2001 From: Dave Parfitt Date: Mon, 10 Feb 2014 12:26:44 -0500 Subject: [PATCH 086/139] removed full_objects test, it's been removed from 2.0 --- intercepts/riak_repl_console_intercepts.erl | 8 -------- tests/replication2_console_tests.erl | 5 ----- 2 files changed, 13 deletions(-) diff --git a/intercepts/riak_repl_console_intercepts.erl b/intercepts/riak_repl_console_intercepts.erl index 25547114b..736d7efc4 100644 --- a/intercepts/riak_repl_console_intercepts.erl +++ b/intercepts/riak_repl_console_intercepts.erl @@ -108,14 +108,6 @@ verify_modes(Val) -> _ -> ?FAIL end. -verify_full_objects(Val) -> - case Val of - [] -> ?PASS; - ["always"] -> ?PASS; - ["99"] -> ?PASS; - _ -> ?FAIL - end. - verify_add_block_provider_redirect(Val) -> case Val of ["a","b"] -> ?PASS; diff --git a/tests/replication2_console_tests.erl b/tests/replication2_console_tests.erl index 3791e7719..868096c1b 100644 --- a/tests/replication2_console_tests.erl +++ b/tests/replication2_console_tests.erl @@ -66,7 +66,6 @@ confirm() -> {{del_nat_map,1}, verify_del_nat_map}, {{show_nat_map,1}, verify_show_nat_map}, {{realtime,1}, verify_realtime}, - {{full_objects,1}, verify_full_objects}, {{add_block_provider_redirect,1}, verify_add_block_provider_redirect}, {{show_block_provider_redirect,1}, verify_show_block_provider_redirect}, {{delete_block_provider_redirect,1}, verify_delete_block_provider_redirect}, @@ -110,10 +109,6 @@ confirm() -> check_cmd(Node, "nat-map add 1.2.3.4:4321 192.168.1.1"), check_cmd(Node, "nat-map del 1.2.3.4:4321 192.168.1.1"), - check_cmd(Node, "full_objects"), - check_cmd(Node, "full_objects always"), - check_cmd(Node, "full_objects 99"), - check_cmd(Node, "add-block-provider-redirect a b"), check_cmd(Node, "show-block-provider-redirect a"), check_cmd(Node, "delete-block-provider-redirect a"), From 246ed41a3ca4d9929049444cbfc4dc38a08adccc Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Mon, 10 Feb 2014 14:03:00 -0500 Subject: [PATCH 087/139] Fix typo --- tests/rolling_capabilities.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/rolling_capabilities.erl b/tests/rolling_capabilities.erl index 1ddb6cd30..5b05788b4 100644 --- a/tests/rolling_capabilities.erl +++ b/tests/rolling_capabilities.erl @@ -43,7 +43,7 @@ confirm() -> {riak_kv, listkeys_backpressure, true}, {riak_kv, mapred_2i_pipe, true}, {riak_kv, mapred_system, pipe}, - {riak_kv, vnode_vclocks, true]; + {riak_kv, vnode_vclocks, true}]; previous -> [{riak_core, vnode_routing, proxy}, {riak_core, staged_joins, true}, {riak_kv, legacy_keylisting, false}, From d0acc34568ca2eab62298d41e131a5e2a50e738c Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Mon, 10 Feb 2014 16:24:09 -0500 Subject: [PATCH 088/139] Don't use an elliptic curve cipher in the test, some OpenSSL flavors don't support it This should fix the test on CentOS 6 using openssl 1.0.1e-fips. --- tests/pb_cipher_suites.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/pb_cipher_suites.erl b/tests/pb_cipher_suites.erl index d62391488..fc08df4cd 100644 --- a/tests/pb_cipher_suites.erl +++ b/tests/pb_cipher_suites.erl @@ -62,12 +62,12 @@ confirm() -> ok = rpc:call(Node, riak_core_console, add_source, [["user", "127.0.0.1/32", "password"]]), - CipherList = "ECDHE-RSA-AES128-SHA256:RC4-SHA", + CipherList = "AES256-SHA256:RC4-SHA", %% set a simple default cipher list, one good one a and one shitty one rpc:call(Node, riak_core_security, set_ciphers, [CipherList]), - [ECDHE, RC4] = ParsedCiphers = [begin + [AES, RC4] = ParsedCiphers = [begin %% this includes the pseudo random function, which apparently %% we don't want {A, B, C, _D} = ssl_cipher:suite_definition(E), @@ -78,7 +78,7 @@ confirm() -> lager:info("Check that the server's preference for ECDHE-RSA-AES128-SHA256" "is honored"), - ?assertEqual({ok, {'tlsv1.2', ECDHE}}, + ?assertEqual({ok, {'tlsv1.2', AES}}, pb_connection_info(Port, [{credentials, "user", "password"}, {cacertfile, From 8dbaccf86de726cc3127f6c0080d68ccbb13242e Mon Sep 17 00:00:00 2001 From: Joseph Blomstedt Date: Mon, 10 Feb 2014 23:43:29 -0800 Subject: [PATCH 089/139] Better defend against plan/commit race --- src/rt.erl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/rt.erl b/src/rt.erl index 56bd292de..c24b7a3b3 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -382,6 +382,7 @@ plan_and_commit(Node) -> {error, ring_not_ready} -> lager:info("plan: ring not ready"), timer:sleep(100), + maybe_wait_for_changes(Node), plan_and_commit(Node); {ok, _, _} -> do_commit(Node) @@ -392,15 +393,32 @@ do_commit(Node) -> {error, plan_changed} -> lager:info("commit: plan changed"), timer:sleep(100), + maybe_wait_for_changes(Node), plan_and_commit(Node); {error, ring_not_ready} -> lager:info("commit: ring not ready"), timer:sleep(100), + maybe_wait_for_changes(Node), do_commit(Node); + {error,nothing_planned} -> + %% Assume plan actually committed somehow + ok; ok -> ok end. +maybe_wait_for_changes(Node) -> + Ring = get_ring(Node), + Changes = riak_core_ring:pending_changes(Ring), + Joining = riak_core_ring:members(Ring, [joining]), + if Changes =:= [] -> + ok; + Joining =/= [] -> + ok; + true -> + ok = wait_until_no_pending_changes([Node]) + end. + %% @doc Have the `Node' leave the cluster leave(Node) -> R = rpc:call(Node, riak_core, leave, []), From 37232a3a61aa63bade3e56864705b1e5763f95aa Mon Sep 17 00:00:00 2001 From: Joseph Blomstedt Date: Tue, 11 Feb 2014 00:08:16 -0800 Subject: [PATCH 090/139] Fix eleveldb typo in rt-cluster --- utils/rt-cluster | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 utils/rt-cluster diff --git a/utils/rt-cluster b/utils/rt-cluster old mode 100644 new mode 100755 index 67146725a..d0b26588c --- a/utils/rt-cluster +++ b/utils/rt-cluster @@ -19,7 +19,7 @@ build_options() -> {version, $v, "version", atom, "Riak version (eg. current, previous legacy)"}, {nodes, $n, "num", integer, "number of nodes/cluster (required)"}, {clusters, undefined, "clusters", integer, "number of clusters"}, - {backend, $b, "backend", atom, "backend [memory | bitcask | eleveldb | yessir]"}, + {backend, $b, "backend", atom, "backend [memory | bitcask | leveldb | yessir]"}, {ring_size, $r, "ring-size", integer, "cluster ring size"}, {repl, undefined, "repl", atom, "connect clusters [primary | bidirect]"}, {file, $F, "file", string, "use the specified file instead of ~/.riak_test.config"} From a1062111fcbcc4de9374b2d6bb34c830b606900a Mon Sep 17 00:00:00 2001 From: "John R. Daily" Date: Wed, 12 Feb 2014 19:19:08 -0500 Subject: [PATCH 091/139] Bring pb_security test up to date with riak-core security changes --- tests/pb_security.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/pb_security.erl b/tests/pb_security.erl index a8b83f9c8..3a4878220 100644 --- a/tests/pb_security.erl +++ b/tests/pb_security.erl @@ -704,11 +704,11 @@ group_test(Node, Port, CertDir) -> lager:info("Creating a new group"), %% create a new group - ok = rpc:call(Node, riak_core_console, add_user, [["group"]]), + ok = rpc:call(Node, riak_core_console, add_group, [["group"]]), lager:info("Creating a user in the group"), %% create a new user in that group - ok = rpc:call(Node, riak_core_console, add_user, [["myuser", "roles=group"]]), + ok = rpc:call(Node, riak_core_console, add_user, [["myuser", "groups=group"]]), lager:info("Granting get/put/delete on a bucket type to the group, checking those requests work"), From ee27329edb94a42b0271d6f381a6f7a8b8efa97a Mon Sep 17 00:00:00 2001 From: Jared Morrow Date: Thu, 13 Feb 2014 11:16:27 -0700 Subject: [PATCH 092/139] Bump lager dep to 2.0.3 --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index e324a8b75..d8b64739b 100644 --- a/rebar.config +++ b/rebar.config @@ -8,7 +8,7 @@ warnings_as_errors, {parse_transform, lager_transform}]}. {deps, [ - {lager, ".*", {git, "git://github.com/basho/lager", {tag, "2.0.0"}}}, + {lager, ".*", {git, "git://github.com/basho/lager", {tag, "2.0.3"}}}, {getopt, ".*", {git, "git://github.com/jcomellas/getopt", {tag, "v0.4"}}}, {meck, ".*", {git, "git://github.com/eproxus/meck"}}, {mapred_verify, ".*", {git, "git://github.com/basho/mapred_verify", {branch, "master"}}}, From 8b54af245ea47aa605b4c5a4cd11c3b6bc74177c Mon Sep 17 00:00:00 2001 From: Kelly McLaughlin Date: Tue, 28 Jan 2014 09:18:06 -0700 Subject: [PATCH 093/139] Refactor repl_bucket_types test to also test fullsync --- tests/repl_bucket_types.erl | 187 +++++++++++++++++++++++++++--------- 1 file changed, 142 insertions(+), 45 deletions(-) diff --git a/tests/repl_bucket_types.erl b/tests/repl_bucket_types.erl index 7e6e1fdaa..73f44fda2 100644 --- a/tests/repl_bucket_types.erl +++ b/tests/repl_bucket_types.erl @@ -19,26 +19,35 @@ confirm() -> rt:set_conf(all, [{"buckets.default.allow_mult", "false"}]), %% Start up two >1.3.2 clusters and connect them, - {LeaderA, LeaderB, ANodes, BNodes} = make_clusters(), - - rpc:multicall([LeaderA, LeaderB], app_helper, set_env, [riak_repl, true]), + {LeaderA, LeaderB, ANodes, BNodes} = ClusterNodes = make_clusters(), PBA = get_pb_pid(LeaderA), PBB = get_pb_pid(LeaderB), - DefinedType = <<"working_type">>, - rt:create_and_activate_bucket_type(LeaderA, DefinedType, [{n_val, 3}]), + {DefinedType, UndefType} = Types = {<<"working_type">>, <<"undefined_type">>}, + + rt:create_and_activate_bucket_type(LeaderA, DefinedType, [{n_val, 3}, {allow_mult, false}]), rt:wait_until_bucket_type_status(DefinedType, active, ANodes), - rt:create_and_activate_bucket_type(LeaderB, DefinedType, [{n_val, 3}]), + rt:create_and_activate_bucket_type(LeaderB, DefinedType, [{n_val, 3}, {allow_mult, false}]), rt:wait_until_bucket_type_status(DefinedType, active, BNodes), - UndefType = <<"undefined_type">>, - rt:create_and_activate_bucket_type(LeaderA, UndefType, [{n_val, 3}]), + rt:create_and_activate_bucket_type(LeaderA, UndefType, [{n_val, 3}, {allow_mult, false}]), rt:wait_until_bucket_type_status(UndefType, active, ANodes), connect_clusters(LeaderA, LeaderB), + realtime_test(ClusterNodes, Types, PBA, PBB), + fullsync_test(ClusterNodes, Types, PBA, PBB), + + riakc_pb_socket:stop(PBA), + riakc_pb_socket:stop(PBB), + pass. + +realtime_test(ClusterNodes, BucketTypes, PBA, PBB) -> + {LeaderA, LeaderB, ANodes, BNodes} = ClusterNodes, + {DefinedType, UndefType} = BucketTypes, + %% Enable RT replication from cluster "A" to cluster "B" lager:info("Enabling realtime between ~p and ~p", [LeaderA, LeaderB]), enable_rt(LeaderA, ANodes), @@ -49,7 +58,7 @@ confirm() -> DefaultObj = riakc_obj:new(Bucket, Key, Bin), lager:info("doing untyped put on A, bucket:~p", [Bucket]), riakc_pb_socket:put(PBA, DefaultObj, [{w,3}]), - + UntypedWait = make_pbget_fun(PBB, Bucket, Key, Bin), ?assertEqual(ok, rt:wait_until(UntypedWait)), @@ -72,31 +81,106 @@ confirm() -> lager:info("waiting for undefined type pb get on B, should get error <<\"no_type\">>"), - case riakc_pb_socket:get(PBB, UndefBucketTyped, UndefKeyTyped) of - {error, E} -> - lager:info("Got error:~p from get on cluster B", [E]), - ?assertEqual(<<"no_type">>, E), - false; - {ok, Res} -> - lager:info("Got result from get on B"), - ?assertEqual(<<"data data data">>, riakc_obj:get_value(Res)), - false - end, + ErrorResult = riakc_pb_socket:get(PBB, UndefBucketTyped, UndefKeyTyped), + ?assertEqual({error, <<"no_type">>}, ErrorResult), DefaultProps = get_current_bucket_props(BNodes, DefinedType), - update_props(DefinedType, [{n_val, 1}], DefaultProps, LeaderB, BNodes), + ?assertEqual({n_val, 3}, lists:keyfind(n_val, 1, DefaultProps)), + + UpdatedProps = update_props(DefinedType, + [{n_val, 1}], + LeaderB, + BNodes), + ?assertEqual({n_val, 1}, lists:keyfind(n_val, 1, UpdatedProps)), UnequalObjBin = <<"unequal props val">>, UnequalPropsObj = riakc_obj:new(BucketTyped, KeyTyped, UnequalObjBin), - lager:info("doing put of typed bucket on A where bucket properties (n_val 3 versus n_val 1) are not equal on B"), + lager:info("doing put of typed bucket on A where bucket properties " + "(n_val 3 versus n_val 1) are not equal on B"), riakc_pb_socket:put(PBA, UnequalPropsObj, [{w,3}]), lager:info("checking to ensure the bucket contents were not updated."), ensure_bucket_not_updated(PBB, BucketTyped, KeyTyped, Bin), + disable_rt(LeaderA, ANodes), - riakc_pb_socket:stop(PBA), - riakc_pb_socket:stop(PBB), - pass. + UpdatedProps2 = update_props(DefinedType, + [{n_val, 3}], + LeaderB, + BNodes), + ?assertEqual({n_val, 3}, lists:keyfind(n_val, 1, UpdatedProps2)). + +fullsync_test(ClusterNodes, BucketTypes, PBA, PBB) -> + {LeaderA, LeaderB, ANodes, BNodes} = ClusterNodes, + {DefinedType, UndefType} = BucketTypes, + + %% Enable RT replication from cluster "A" to cluster "B" + lager:info("Enabling fullsync between ~p and ~p", [LeaderA, LeaderB]), + enable_fullsync(LeaderA, ANodes), + + Bin = <<"data data data">>, + Key = <<"key">>, + Bucket = <<"fullsync-kicked">>, + DefaultObj = riakc_obj:new(Bucket, Key, Bin), + lager:info("doing untyped put on A, bucket:~p", [Bucket]), + riakc_pb_socket:put(PBA, DefaultObj, [{w,3}]), + + BucketTyped = {DefinedType, <<"fullsync-typekicked">>}, + KeyTyped = <<"keytyped">>, + ObjTyped = riakc_obj:new(BucketTyped, KeyTyped, Bin), + + lager:info("doing typed put on A, bucket:~p", [BucketTyped]), + riakc_pb_socket:put(PBA, ObjTyped, [{w,3}]), + + UndefBucketTyped = {UndefType, <<"fullsync-badtype">>}, + UndefKeyTyped = <<"badkeytyped">>, + UndefObjTyped = riakc_obj:new(UndefBucketTyped, UndefKeyTyped, Bin), + + lager:info("doing typed put on A where type is not " + "defined on B, bucket:~p", + [UndefBucketTyped]), + + riakc_pb_socket:put(PBA, UndefObjTyped, [{w,3}]), + + {SyncTime1, _} = timer:tc(repl_util, + start_and_wait_until_fullsync_complete, + [LeaderA]), + + lager:info("Fullsync completed in ~p seconds", [SyncTime1/1000/1000]), + + ReadResult1 = riakc_pb_socket:get(PBB, Bucket, Key), + ReadResult2 = riakc_pb_socket:get(PBB, BucketTyped, KeyTyped), + ReadResult3 = riakc_pb_socket:get(PBB, UndefBucketTyped, UndefKeyTyped), + + ?assertMatch({ok, _}, ReadResult1), + ?assertMatch({ok, _}, ReadResult2), + ?assertMatch({error, _}, ReadResult3), + + {ok, ReadObj1} = ReadResult1, + {ok, ReadObj2} = ReadResult2, + + ?assertEqual(Bin, riakc_obj:get_value(ReadObj1)), + ?assertEqual(Bin, riakc_obj:get_value(ReadObj2)), + ?assertEqual({error, <<"no_type">>}, ReadResult3), + + DefaultProps = get_current_bucket_props(BNodes, DefinedType), + ?assertEqual({n_val, 3}, lists:keyfind(n_val, 1, DefaultProps)), + + UpdatedProps = update_props(DefinedType, [{n_val, 1}], LeaderB, BNodes), + ?assertEqual({n_val, 1}, lists:keyfind(n_val, 1, UpdatedProps)), + + UnequalObjBin = <<"unequal props val">>, + UnequalPropsObj = riakc_obj:new(BucketTyped, KeyTyped, UnequalObjBin), + lager:info("doing put of typed bucket on A where bucket properties (n_val 3 versus n_val 1) are not equal on B"), + riakc_pb_socket:put(PBA, UnequalPropsObj, [{w,3}]), + + {SyncTime2, _} = timer:tc(repl_util, + start_and_wait_until_fullsync_complete, + [LeaderA]), + + lager:info("Fullsync completed in ~p seconds", [SyncTime2/1000/1000]), + + lager:info("checking to ensure the bucket contents were not updated."), + ensure_bucket_not_updated(PBB, BucketTyped, KeyTyped, Bin). %% @doc Turn on Realtime replication on the cluster lead by LeaderA. %% The clusters must already have been named and connected. @@ -107,6 +191,20 @@ enable_rt(LeaderA, ANodes) -> repl_util:start_realtime(LeaderA, "B"), rt:wait_until_ring_converged(ANodes). +%% @doc Turn off Realtime replication on the cluster lead by LeaderA. +disable_rt(LeaderA, ANodes) -> + repl_util:disable_realtime(LeaderA, "B"), + rt:wait_until_ring_converged(ANodes), + + repl_util:stop_realtime(LeaderA, "B"), + rt:wait_until_ring_converged(ANodes). + +%% @doc Turn on fullsync replication on the cluster lead by LeaderA. +%% The clusters must already have been named and connected. +enable_fullsync(LeaderA, ANodes) -> + repl_util:enable_fullsync(LeaderA, "B"), + rt:wait_until_ring_converged(ANodes). + %% @doc Connect two clusters for replication using their respective leader nodes. connect_clusters(LeaderA, LeaderB) -> {ok, {_IP, Port}} = rpc:call(LeaderB, application, get_env, @@ -128,9 +226,12 @@ make_clusters() -> %% turn off fullsync {fullsync_on_connect, false}, {fullsync_interval, disabled}, + {max_fssource_cluster, 20}, + {max_fssource_node, 20}, + {max_fssink_node, 20}, {rtq_max_bytes, 1048576} ]} - ], + ], Nodes = rt:deploy_nodes(NumNodes, Conf), {ANodes, BNodes} = lists:split(ClusterASize, Nodes), @@ -160,21 +261,18 @@ make_clusters() -> repl_util:name_cluster(BFirst, "B"), rt:wait_until_ring_converged(BNodes), - %% Connect for replication - %% connect_clusters(AFirst, BFirst), - {AFirst, BFirst, ANodes, BNodes}. make_pbget_fun(Pid, Bucket, Key, Bin) -> fun() -> - case riakc_pb_socket:get(Pid, Bucket, Key) of - {ok, O6} -> - ?assertEqual(Bin, riakc_obj:get_value(O6)), - true; - _ -> - false - end - end. + case riakc_pb_socket:get(Pid, Bucket, Key) of + {ok, O6} -> + ?assertEqual(Bin, riakc_obj:get_value(O6)), + true; + _ -> + false + end + end. ensure_bucket_not_updated(Pid, Bucket, Key, Bin) -> Results = [ value_unchanged(Pid, Bucket, Key, Bin) || _I <- lists:seq(1, ?ENSURE_READ_ITERATIONS)], @@ -196,22 +294,21 @@ get_pb_pid(Leader) -> {ok, Pid} = riakc_pb_socket:start_link(IP, PortA, []), Pid. -update_props(Type, Updates, DefaultProps, Node, Nodes) -> - lager:info("Setting bucket properties ~p for bucket type ~p on node ~p", +update_props(Type, Updates, Node, Nodes) -> + lager:info("Setting bucket properties ~p for bucket type ~p on node ~p", [Updates, Type, Node]), - rpc:call(Node, riak_core_bucket_type, update, [Type, Updates]), + rpc:call(Node, riak_core_bucket_type, update, [Type, Updates]), rt:wait_until_ring_converged(Nodes), - UpdatedProps = get_current_bucket_props(Nodes, Type), - ?assertNotEqual(DefaultProps, UpdatedProps). - -%% fetch bucket properties via rpc + get_current_bucket_props(Nodes, Type). + +%% fetch bucket properties via rpc %% from a node or a list of nodes (one node is chosen at random) -get_current_bucket_props(Nodes, Type) when is_list(Nodes) -> +get_current_bucket_props(Nodes, Type) when is_list(Nodes) -> Node = lists:nth(length(Nodes), Nodes), get_current_bucket_props(Node, Type); get_current_bucket_props(Node, Type) when is_atom(Node) -> - rpc:call(Node, + rpc:call(Node, riak_core_bucket_type, get, [Type]). From ad9af013f4e63eadd31370c9f262198ec3cf1156 Mon Sep 17 00:00:00 2001 From: Jon Anderson Date: Tue, 4 Feb 2014 10:42:10 -0500 Subject: [PATCH 094/139] added backward compatbility test for fullsync --- tests/repl_bucket_types.erl | 156 +++++++++++++++++++++++++++++++++++- tests/repl_util.erl | 10 +++ 2 files changed, 165 insertions(+), 1 deletion(-) diff --git a/tests/repl_bucket_types.erl b/tests/repl_bucket_types.erl index 73f44fda2..328daaf14 100644 --- a/tests/repl_bucket_types.erl +++ b/tests/repl_bucket_types.erl @@ -42,6 +42,24 @@ confirm() -> riakc_pb_socket:stop(PBA), riakc_pb_socket:stop(PBB), + + rt:clean_cluster(ANodes ++ BNodes), + + {MixedLeaderA, MixedLeaderB, MixedANodes, _MixedBNodes} = MixedClusterNodes = make_mixed_clusters(), + + rt:create_and_activate_bucket_type(MixedLeaderA, DefinedType, [{n_val, 3}, {allow_mult, false}]), + rt:wait_until_bucket_type_status(DefinedType, active, MixedANodes), + + DPBA = get_pb_pid(MixedLeaderA), + DPBB = get_pb_pid(MixedLeaderB), + + connect_clusters(LeaderA, LeaderB), + + realtime_mixed_version_test(MixedClusterNodes, Types, DPBA, DPBB), + fullsync_mixed_version_test(MixedClusterNodes, Types, DPBA, DPBB), + + riakc_pb_socket:stop(DPBA), + riakc_pb_socket:stop(DPBB), pass. realtime_test(ClusterNodes, BucketTypes, PBA, PBB) -> @@ -107,7 +125,38 @@ realtime_test(ClusterNodes, BucketTypes, PBA, PBB) -> [{n_val, 3}], LeaderB, BNodes), - ?assertEqual({n_val, 3}, lists:keyfind(n_val, 1, UpdatedProps2)). + ?assertEqual({n_val, 3}, lists:keyfind(n_val, 1, UpdatedProps2)), + ?assertEqual({n_val, 3}, lists:keyfind(n_val, 1, UpdatedProps2)), + disable_rt(LeaderA, ANodes). + +realtime_mixed_version_test(ClusterNodes, BucketTypes, PBA, PBB) -> + {LeaderA, LeaderB, ANodes, _BNodes} = ClusterNodes, + {DefinedType, _UndefType} = BucketTypes, + + %% Enable RT replication from cluster "A" to cluster "B" + lager:info("Enabling realtime between ~p and ~p", [LeaderA, LeaderB]), + enable_rt(LeaderA, ANodes), + + Bin = <<"data data data">>, + Key = <<"key">>, + Bucket = <<"kicked">>, + DefaultObj = riakc_obj:new(Bucket, Key, Bin), + lager:info("doing untyped put on A, bucket:~p", [Bucket]), + riakc_pb_socket:put(PBA, DefaultObj, [{w,3}]), + + %% make sure we rt replicate a "default" type bucket + UntypedWait = make_pbget_fun(PBB, Bucket, Key, Bin), + ?assertEqual(ok, rt:wait_until(UntypedWait)), + + DowngradedBucketTyped = {DefinedType, <<"typekicked">>}, + KeyTyped = <<"keytyped">>, + ObjTyped = riakc_obj:new(DowngradedBucketTyped, KeyTyped, Bin), + + lager:info("doing typed put on A with downgraded B, bucket:~p", [DowngradedBucketTyped]), + riakc_pb_socket:put(PBA, ObjTyped, [{w,3}]), + + lager:info("checking to ensure the bucket contents were not sent to previous version B."), + ensure_bucket_not_sent(PBB, DowngradedBucketTyped, KeyTyped). fullsync_test(ClusterNodes, BucketTypes, PBA, PBB) -> {LeaderA, LeaderB, ANodes, BNodes} = ClusterNodes, @@ -182,6 +231,40 @@ fullsync_test(ClusterNodes, BucketTypes, PBA, PBB) -> lager:info("checking to ensure the bucket contents were not updated."), ensure_bucket_not_updated(PBB, BucketTyped, KeyTyped, Bin). +fullsync_mixed_version_test(ClusterNodes, BucketTypes, PBA, PBB) -> + {LeaderA, LeaderB, ANodes, _BNodes} = ClusterNodes, + {DefinedType, _UndefType} = BucketTypes, + + %% Enable RT replication from cluster "A" to cluster "B" + lager:info("Enabling fullsync between ~p and ~p", [LeaderA, LeaderB]), + enable_fullsync(LeaderA, ANodes), + + Bin = <<"data data data">>, + Key = <<"key">>, + Bucket = <<"fullsync-kicked">>, + DefaultObj = riakc_obj:new(Bucket, Key, Bin), + lager:info("doing untyped put on A, bucket:~p", [Bucket]), + riakc_pb_socket:put(PBA, DefaultObj, [{w,3}]), + + BucketTyped = {DefinedType, <<"fullsync-typekicked">>}, + KeyTyped = <<"keytyped">>, + ObjTyped = riakc_obj:new(BucketTyped, KeyTyped, Bin), + + lager:info("doing typed put on A, bucket:~p", [BucketTyped]), + riakc_pb_socket:put(PBA, ObjTyped, [{w,3}]), + + {SyncTime1, _} = timer:tc(repl_util, + start_and_wait_until_fullsync_complete, + [LeaderA]), + + lager:info("Fullsync completed in ~p seconds", [SyncTime1/1000/1000]), + + ReadResult1 = riakc_pb_socket:get(PBB, Bucket, Key), + ReadResult2 = riakc_pb_socket:get(PBB, BucketTyped, KeyTyped), + + ?assertMatch({ok, _}, ReadResult1), + ?assertMatch({error, _}, ReadResult2). + %% @doc Turn on Realtime replication on the cluster lead by LeaderA. %% The clusters must already have been named and connected. enable_rt(LeaderA, ANodes) -> @@ -205,6 +288,13 @@ enable_fullsync(LeaderA, ANodes) -> repl_util:enable_fullsync(LeaderA, "B"), rt:wait_until_ring_converged(ANodes). + +%% @doc Turn ff fullsync replication on the cluster lead by LeaderA. +%% The clusters must already have been named and connected. +%disable_fullsync(LeaderA, ANodes) -> +% repl_util:disable_fullsync(LeaderA, "B"), +% rt:wait_until_ring_converged(ANodes). + %% @doc Connect two clusters for replication using their respective leader nodes. connect_clusters(LeaderA, LeaderB) -> {ok, {_IP, Port}} = rpc:call(LeaderB, application, get_env, @@ -263,6 +353,56 @@ make_clusters() -> {AFirst, BFirst, ANodes, BNodes}. +%% @doc Create two clusters of 1 node each and connect them for replication: +%% Cluster "A" -> cluster "B" +make_mixed_clusters() -> + NumNodes = rt_config:get(num_nodes, 2), + ClusterASize = rt_config:get(cluster_a_size, 1), + + lager:info("Deploy ~p mixed version nodes", [NumNodes]), + Conf = [ + {riak_repl, + [ + %% turn off fullsync + {fullsync_on_connect, false}, + {fullsync_interval, disabled}, + {max_fssource_cluster, 20}, + {max_fssource_node, 20}, + {max_fssink_node, 20}, + {rtq_max_bytes, 1048576} + ]} + ], + MixedConf = [{current, Conf}, {previous, Conf}], + Nodes = rt:deploy_nodes(MixedConf), + {ANodes, BNodes} = lists:split(ClusterASize, Nodes), + lager:info("ANodes: ~p", [ANodes]), + lager:info("BNodes: ~p", [BNodes]), + + lager:info("Build cluster A"), + repl_util:make_cluster(ANodes), + + lager:info("Build cluster B"), + repl_util:make_cluster(BNodes), + + %% get the leader for the first cluster + lager:info("waiting for leader to converge on cluster A"), + ?assertEqual(ok, repl_util:wait_until_leader_converge(ANodes)), + AFirst = hd(ANodes), + + %% get the leader for the second cluster + lager:info("waiting for leader to converge on cluster B"), + ?assertEqual(ok, repl_util:wait_until_leader_converge(BNodes)), + BFirst = hd(BNodes), + + %% Name the clusters + repl_util:name_cluster(AFirst, "A"), + rt:wait_until_ring_converged(ANodes), + + repl_util:name_cluster(BFirst, "B"), + rt:wait_until_ring_converged(BNodes), + + {AFirst, BFirst, ANodes, BNodes}. + make_pbget_fun(Pid, Bucket, Key, Bin) -> fun() -> case riakc_pb_socket:get(Pid, Bucket, Key) of @@ -274,6 +414,10 @@ make_pbget_fun(Pid, Bucket, Key, Bin) -> end end. +ensure_bucket_not_sent(Pid, Bucket, Key) -> + Results = [ assert_bucket_not_found(Pid, Bucket, Key) || _I <- lists:seq(1, ?ENSURE_READ_ITERATIONS)], + ?assertEqual(false, lists:member(false, Results)). + ensure_bucket_not_updated(Pid, Bucket, Key, Bin) -> Results = [ value_unchanged(Pid, Bucket, Key, Bin) || _I <- lists:seq(1, ?ENSURE_READ_ITERATIONS)], ?assertEqual(false, lists:member(false, Results)). @@ -289,6 +433,16 @@ value_unchanged(Pid, Bucket, Key, Bin) -> end, timer:sleep(?ENSURE_READ_INTERVAL). + +assert_bucket_not_found(Pid, Bucket, Key) -> + case riakc_pb_socket:get(Pid, Bucket, Key) of + {error, notfound} -> + true; + {ok, Res} -> + lager:error("Found bucket:~p and key:~p on sink when we should not have", [Res, Key]), + false + end. + get_pb_pid(Leader) -> {ok, [{IP, PortA}] } = rpc:call(Leader, application, get_env, [riak_api, pb]), {ok, Pid} = riakc_pb_socket:start_link(IP, PortA, []), diff --git a/tests/repl_util.erl b/tests/repl_util.erl index 9c0f4f16b..e06e21827 100644 --- a/tests/repl_util.erl +++ b/tests/repl_util.erl @@ -24,6 +24,8 @@ enable_fullsync/2, start_realtime/2, stop_realtime/2, + stop_fullsync/2, + disable_fullsync/2, do_write/5, get_fs_coord_status_item/3, num_partitions/1, @@ -249,6 +251,14 @@ enable_fullsync(Node, Cluster) -> Res = rpc:call(Node, riak_repl_console, fullsync, [["enable", Cluster]]), ?assertEqual(ok, Res). +disable_fullsync(Node, Cluster) -> + Res = rpc:call(Node, riak_repl_console, fullsync, [["disable", Cluster]]), + ?assertEqual(ok, Res). + +stop_fullsync(Node, Cluster) -> + Res = rpc:call(Node, riak_repl_console, fullsync, [["stop", Cluster]]), + ?assertEqual(ok, Res). + start_realtime(Node, Cluster) -> Res = rpc:call(Node, riak_repl_console, realtime, [["start", Cluster]]), ?assertEqual(ok, Res). From 024d725d3c884422b0691c402717cb844be449d1 Mon Sep 17 00:00:00 2001 From: Jon Anderson Date: Tue, 11 Feb 2014 08:45:18 -0500 Subject: [PATCH 095/139] removed commented function enable_fullsync, and comments --- tests/repl_bucket_types.erl | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/repl_bucket_types.erl b/tests/repl_bucket_types.erl index 328daaf14..55c0d55fc 100644 --- a/tests/repl_bucket_types.erl +++ b/tests/repl_bucket_types.erl @@ -288,13 +288,6 @@ enable_fullsync(LeaderA, ANodes) -> repl_util:enable_fullsync(LeaderA, "B"), rt:wait_until_ring_converged(ANodes). - -%% @doc Turn ff fullsync replication on the cluster lead by LeaderA. -%% The clusters must already have been named and connected. -%disable_fullsync(LeaderA, ANodes) -> -% repl_util:disable_fullsync(LeaderA, "B"), -% rt:wait_until_ring_converged(ANodes). - %% @doc Connect two clusters for replication using their respective leader nodes. connect_clusters(LeaderA, LeaderB) -> {ok, {_IP, Port}} = rpc:call(LeaderB, application, get_env, From 27605419673ce1558f9463bd42cc423c1d06c25d Mon Sep 17 00:00:00 2001 From: Jon Anderson Date: Wed, 12 Feb 2014 07:33:52 -0500 Subject: [PATCH 096/139] various fixes; changed mixed-version fullsync check --- tests/repl_bucket_types.erl | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tests/repl_bucket_types.erl b/tests/repl_bucket_types.erl index 55c0d55fc..37afb20de 100644 --- a/tests/repl_bucket_types.erl +++ b/tests/repl_bucket_types.erl @@ -6,6 +6,7 @@ -module(repl_bucket_types). -behaviour(riak_test). -export([confirm/0]). +-compile(export_all). -include_lib("eunit/include/eunit.hrl"). -define(ENSURE_READ_ITERATIONS, 5). @@ -18,13 +19,13 @@ confirm() -> rt:set_conf(all, [{"buckets.default.allow_mult", "false"}]), - %% Start up two >1.3.2 clusters and connect them, + {LeaderA, LeaderB, ANodes, BNodes} = ClusterNodes = make_clusters(), PBA = get_pb_pid(LeaderA), PBB = get_pb_pid(LeaderB), - {DefinedType, UndefType} = Types = {<<"working_type">>, <<"undefined_type">>}, + {DefinedType, UndefType} = Types = {<<"working_type">>, <<"undefined_type">>}, rt:create_and_activate_bucket_type(LeaderA, DefinedType, [{n_val, 3}, {allow_mult, false}]), rt:wait_until_bucket_type_status(DefinedType, active, ANodes), @@ -53,7 +54,7 @@ confirm() -> DPBA = get_pb_pid(MixedLeaderA), DPBB = get_pb_pid(MixedLeaderB), - connect_clusters(LeaderA, LeaderB), + connect_clusters(MixedLeaderA, MixedLeaderB), realtime_mixed_version_test(MixedClusterNodes, Types, DPBA, DPBB), fullsync_mixed_version_test(MixedClusterNodes, Types, DPBA, DPBB), @@ -239,16 +240,17 @@ fullsync_mixed_version_test(ClusterNodes, BucketTypes, PBA, PBB) -> lager:info("Enabling fullsync between ~p and ~p", [LeaderA, LeaderB]), enable_fullsync(LeaderA, ANodes), - Bin = <<"data data data">>, + Bin = <<"good data">>, Key = <<"key">>, Bucket = <<"fullsync-kicked">>, DefaultObj = riakc_obj:new(Bucket, Key, Bin), lager:info("doing untyped put on A, bucket:~p", [Bucket]), riakc_pb_socket:put(PBA, DefaultObj, [{w,3}]), - BucketTyped = {DefinedType, <<"fullsync-typekicked">>}, + BucketTyped = {DefinedType, Bucket}, KeyTyped = <<"keytyped">>, - ObjTyped = riakc_obj:new(BucketTyped, KeyTyped, Bin), + BadBin = <<"overwritten">>, + ObjTyped = riakc_obj:new(BucketTyped, KeyTyped, BadBin), lager:info("doing typed put on A, bucket:~p", [BucketTyped]), riakc_pb_socket:put(PBA, ObjTyped, [{w,3}]), @@ -260,10 +262,15 @@ fullsync_mixed_version_test(ClusterNodes, BucketTypes, PBA, PBB) -> lager:info("Fullsync completed in ~p seconds", [SyncTime1/1000/1000]), ReadResult1 = riakc_pb_socket:get(PBB, Bucket, Key), - ReadResult2 = riakc_pb_socket:get(PBB, BucketTyped, KeyTyped), - ?assertMatch({ok, _}, ReadResult1), - ?assertMatch({error, _}, ReadResult2). + + %% The following check appears to be the best we can do. If a 2.x source + %% sends a typed bucket to the 1.x sink, the put will occur. + %% The bucket is undefined to the interfaces, but some parts of it + %% appear to be written to the sink node. Since we cannot check using pb, + %% here we at least make sure we haven't written over an existing default + %% bucket with data from a typed bucket of the same name. + ensure_bucket_not_updated(PBB, Bucket, Key, Bin). %% @doc Turn on Realtime replication on the cluster lead by LeaderA. %% The clusters must already have been named and connected. From bfe561eb00c957396acfaea104641954cb73da6f Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 13 Feb 2014 15:51:49 -0500 Subject: [PATCH 097/139] use pb client for list_keys to ensure interface success --- tests/overload.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/overload.erl b/tests/overload.erl index 4bb2e9044..de990bf90 100644 --- a/tests/overload.erl +++ b/tests/overload.erl @@ -149,7 +149,7 @@ test_cover_queries_overload(Nodes) -> lager:info("Checking Coverage queries for overload"), Res = list_keys(Node1), - ?assertEqual({error, mailbox_overload}, Res), + ?assertEqual({error, <<"mailbox_overload">>}, Res), lager:info("list_keys correctly handled overload"), Res2 = list_buckets(Node1), @@ -163,8 +163,8 @@ test_cover_queries_overload(Nodes) -> wait_for_all_vnode_queues_empty(Node2). list_keys(Node) -> - {ok, C} = riak:client_connect(Node), - riak_client:list_keys(?BUCKET, 30000, C). + Pid = rt:pbc(Node), + riakc_pb_socket:list_keys(Pid, ?BUCKET, 30000). list_buckets(Node) -> {ok, C} = riak:client_connect(Node), @@ -195,7 +195,7 @@ read_until_success(Node) -> read_until_success(C, Count) -> case C:get(?BUCKET, ?KEY) of - {error, overload} -> + {error, mailbox_overload} -> read_until_success(C, Count+1); _ -> Count From cf9888ffa31cb2b3783be5fcdf8a1683bc21f330 Mon Sep 17 00:00:00 2001 From: "Engel A. Sanchez" Date: Mon, 10 Feb 2014 16:57:39 -0500 Subject: [PATCH 098/139] Add AAE test This test verifies that AAE repairs replicas of values without passive read repairs. This includes missing replicas and replicas with divergent values. It will also repair entire KV partitions lost, and if configured for trees to rebuild, it will recover from AAE data loss and corruption. This version differs from the original 1.4 test only in the handling of siblings. It does get before put for modifications and merges values by choosing the longest one, as modifications in this test append bits. --- src/rt.erl | 24 ++++ tests/verify_aae.erl | 289 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 313 insertions(+) create mode 100644 tests/verify_aae.erl diff --git a/src/rt.erl b/src/rt.erl index 2910670b4..47f465667 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -57,6 +57,7 @@ expect_in_log/2, get_deps/0, get_node_logs/0, + get_replica/5, get_ring/1, get_version/0, heal/1, @@ -965,6 +966,29 @@ systest_read(Node, Start, End, Bucket, R, CommonValBin) end, lists:foldl(F, [], lists:seq(Start, End)). +% @doc Reads a single replica of a value. This issues a get command directly +% to the vnode handling the Nth primary partition of the object's preflist. +get_replica(Node, Bucket, Key, I, N) -> + BKey = {Bucket, Key}, + Chash = rpc:call(Node, riak_core_util, chash_key, [BKey]), + Pl = rpc:call(Node, riak_core_apl, get_primary_apl, [Chash, N, riak_kv]), + {{Partition, PNode}, primary} = lists:nth(I, Pl), + Ref = Reqid = make_ref(), + Sender = {raw, Ref, self()}, + rpc:call(PNode, riak_kv_vnode, get, + [{Partition, PNode}, BKey, Ref, Sender]), + receive + {Ref, {r, Result, _, Reqid}} -> + Result; + {Ref, Reply} -> + Reply + after + 60000 -> + lager:error("Replica ~p get for ~p/~p timed out", + [I, Bucket, Key]), + ?assert(false) + end. + %%%=================================================================== %%% PBC & HTTPC Functions %%%=================================================================== diff --git a/tests/verify_aae.erl b/tests/verify_aae.erl new file mode 100644 index 000000000..1a08a4f4f --- /dev/null +++ b/tests/verify_aae.erl @@ -0,0 +1,289 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2013 Basho Technologies, Inc. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% @doc Verification of Active Anti Entropy. +%% The basic guarantee of AAE is this: Even without the read repairs that will +%% happen when data is accessed, inconsistencies between the replicas of a +%% KV object will be repaired eventually. The test tries hard not to +%% explicitly check for when the AAE trees are built or when exchanges are run +%% in an effort to remain decoupled from the implementation. Instead, it +%% simply configures AAE to build/rebuild and run exchanges between the data +%% partitions. It then performs direct vnode reads on all replicas and verify +%% they eventually match. +%% +%% Data recovery after the following scenarios is tested: +%% +%% - Data for a partition completely disappears. +%% - Less than N replicas are written +%% - Less than N replicas are updated +%% +%% Also, a sanity check is done to make sure AAE repairs go away eventually +%% if there is no activity. That was an actual early AAE bug. + +-module(verify_aae). +-export([confirm/0, verify_aae/1, test_single_partition_loss/3]). +-include_lib("eunit/include/eunit.hrl"). + +% I would hope this would come from the testing framework some day +% to use the test in small and large scenarios. +-define(DEFAULT_RING_SIZE, 8). +-define(CFG, + [{riak_kv, + [ + % Speedy AAE configuration + {anti_entropy, {on, []}}, + {anti_entropy_build_limit, {100, 1000}}, + {anti_entropy_concurrency, 100}, + {anti_entropy_expire, 24 * 60 * 60 * 1000}, % Not for now! + {anti_entropy_tick, 500} + ]}, + {riak_core, + [ + {ring_creation_size, ?DEFAULT_RING_SIZE} + ]}] + ). +-define(NUM_NODES, 1). +-define(NUM_KEYS, 1000). +-define(BUCKET, <<"test_bucket">>). +-define(N_VAL, 3). + +confirm() -> + Nodes = rt:build_cluster(?NUM_NODES, ?CFG), + verify_aae(Nodes), + pass. + +verify_aae(Nodes) -> + Node1 = hd(Nodes), + % First, recovery without tree rebuilds + + % Test recovery from to few replicas written + KV1 = test_data(1, 1000), + test_less_than_n_writes(Node1, KV1), + + % Test recovery when replicas are different + KV2 = [{K, <>} || {K, V} <- KV1], + test_less_than_n_mods(Node1, KV2), + + lager:info("Run similar tests now with tree rebuilds enabled"), + start_tree_rebuilds(Nodes), + + % Test recovery from to few replicas written + KV3 = test_data(1001, 2000), + test_less_than_n_writes(Node1, KV3), + + % Test recovery when replicas are different + KV4 = [{K, <>} || {K, V} <- KV3], + test_less_than_n_mods(Node1, KV4), + + lager:info("Writing 1000 objects"), + KV5 = test_data(2001, 3000), + write_data(Node1, KV5), + + % Test recovery from single partition loss. + {PNuke, NNuke} = choose_partition_to_nuke(Node1, ?BUCKET, KV5), + test_single_partition_loss(NNuke, PNuke, KV5), + + % Test recovery from losing AAE data + test_aae_partition_loss(NNuke, PNuke, KV5), + + % Test recovery from losing both AAE and KV data + test_total_partition_loss(NNuke, PNuke, KV5), + + % Make sure AAE repairs die down. + wait_until_no_aae_repairs(Nodes), + + lager:info("Finished verifying AAE magic"), + ok. + +start_tree_rebuilds(Nodes) -> + rpc:multicall(Nodes, application, set_env, [riak_kv, anti_entropy_expire, + 15 * 1000]). + +acc_preflists(Pl, PlCounts) -> + lists:foldl(fun(Idx, D) -> + dict:update(Idx, fun(V) -> V+1 end, 0, D) + end, PlCounts, Pl). + +choose_partition_to_nuke(Node, Bucket, KVs) -> + Preflists = [get_preflist(Node, Bucket, K) || {K, _} <- KVs], + PCounts = lists:foldl(fun acc_preflists/2, dict:new(), Preflists), + CPs = [{C, P} || {P, C} <- dict:to_list(PCounts)], + {_, MaxP} = lists:max(CPs), + MaxP. + +get_preflist(Node, B, K) -> + DocIdx = rpc:call(Node, riak_core_util, chash_key, [{B, K}]), + PlTagged = rpc:call(Node, riak_core_apl, get_primary_apl, [DocIdx, ?N_VAL, riak_kv]), + Pl = [E || {E, primary} <- PlTagged], + Pl. + +to_key(N) -> + list_to_binary(io_lib:format("K~4..0B", [N])). + +test_data(Start, End) -> + Keys = [to_key(N) || N <- lists:seq(Start, End)], + [{K, K} || K <- Keys]. + +write_data(Node, KVs) -> + write_data(Node, KVs, []). + +write_data(Node, KVs, Opts) -> + PB = rt:pbc(Node), + [begin + O = riakc_obj:new(?BUCKET, K, V), + riakc_pb_socket:put(PB, O, Opts) + end || {K, V} <- KVs], + riakc_pb_socket:stop(PB), + ok. + +% @doc Verifies that the data is eventually restored to the expected set. +verify_data(Node, KeyValues) -> + lager:info("Verify all replicas are eventually correct"), + PB = rt:pbc(Node), + CheckFun = + fun() -> + Matches = [verify_replicas(Node, ?BUCKET, K, V, ?N_VAL) + || {K, V} <- KeyValues], + CountTrues = fun(true, G) -> G+1; (false, G) -> G end, + NumGood = lists:foldl(CountTrues, 0, Matches), + Num = length(KeyValues), + case Num == NumGood of + true -> true; + false -> + lager:info("Data not yet correct: ~p mismatches", + [Num-NumGood]), + false + end + end, + MaxTime = rt_config:get(rt_max_wait_time), + Delay = 2000, % every two seconds until max time. + Retry = MaxTime div Delay, + case rt:wait_until(CheckFun, Retry, Delay) of + ok -> + lager:info("Data is now correct. Yay!"); + fail -> + lager:error("AAE failed to fix data"), + ?assertEqual(aae_fixed_data, aae_failed_to_fix_data) + end, + riakc_pb_socket:stop(PB), + ok. + +verify_replicas(Node, B, K, V, N) -> + Replies = [rt:get_replica(Node, B, K, I, N) + || I <- lists:seq(1,N)], + Vals = [riak_object:get_value(O) || {ok, O} <- Replies], + Expected = [V || _ <- lists:seq(1, N)], + Vals == Expected. + +test_single_partition_loss(Node, Partition, KeyValues) + when is_atom(Node), is_integer(Partition) -> + lager:info("Verify recovery from the loss of partition ~p", [Partition]), + wipe_out_partition(Node, Partition), + restart_vnode(Node, riak_kv, Partition), + verify_data(Node, KeyValues). + +test_aae_partition_loss(Node, Partition, KeyValues) + when is_atom(Node), is_integer(Partition) -> + lager:info("Verify recovery from the loss of AAE data for partition ~p", [Partition]), + wipe_out_aae_data(Node, Partition), + restart_vnode(Node, riak_kv, Partition), + verify_data(Node, KeyValues). + +test_total_partition_loss(Node, Partition, KeyValues) + when is_atom(Node), is_integer(Partition) -> + lager:info("Verify recovery from the loss of AAE and KV data for partition ~p", [Partition]), + wipe_out_partition(Node, Partition), + wipe_out_aae_data(Node, Partition), + restart_vnode(Node, riak_kv, Partition), + verify_data(Node, KeyValues). + +test_less_than_n_writes(Node, KeyValues) -> + lager:info("Writing ~p objects with N=1, AAE should ensure they end up" + " with ~p replicas", [length(KeyValues), ?N_VAL]), + write_data(Node, KeyValues, [{n_val, 1}]), + verify_data(Node, KeyValues). + +test_less_than_n_mods(Node, KeyValues) -> + lager:info("Modifying only one replica for ~p objects. AAE should ensure" + " all replicas end up modified", [length(KeyValues)]), + write_data(Node, KeyValues, [{n_val, 1}]), + verify_data(Node, KeyValues). + +wipe_out_partition(Node, Partition) -> + lager:info("Wiping out partition ~p in node ~p", [Partition, Node]), + rt:clean_data_dir(Node, dir_for_partition(Partition)), + ok. + +wipe_out_aae_data(Node, Partition) -> + lager:info("Wiping out AAE data for partition ~p in node ~p", [Partition, Node]), + rt:clean_data_dir(Node, "anti_entropy/"++integer_to_list(Partition)), + ok. + +base_dir_for_backend(undefined) -> + base_dir_for_backend(bitcask); +base_dir_for_backend(bitcask) -> + "bitcask"; +base_dir_for_backend(eleveldb) -> + "leveldb". + +restart_vnode(Node, Service, Partition) -> + VNodeName = list_to_atom(atom_to_list(Service) ++ "_vnode"), + {ok, Pid} = rpc:call(Node, riak_core_vnode_manager, get_vnode_pid, + [Partition, VNodeName]), + ?assert(rpc:call(Node, erlang, exit, [Pid, kill_for_test])), + Mon = monitor(process, Pid), + receive + {'DOWN', Mon, _, _, _} -> + ok + after + rt_config:get(rt_max_wait_time) -> + lager:error("VNode for partition ~p did not die, the bastard", + [Partition]), + ?assertEqual(vnode_killed, {failed_to_kill_vnode, Partition}) + end, + {ok, NewPid} = rpc:call(Node, riak_core_vnode_manager, get_vnode_pid, + [Partition, VNodeName]), + lager:info("Vnode for partition ~p restarted as ~p", + [Partition, NewPid]). + +dir_for_partition(Partition) -> + TestMetaData = riak_test_runner:metadata(), + KVBackend = proplists:get_value(backend, TestMetaData), + BaseDir = base_dir_for_backend(KVBackend), + filename:join([BaseDir, integer_to_list(Partition)]). + +% @doc True if the AAE stats report zero data repairs for last exchange +% across the board. +wait_until_no_aae_repairs(Nodes) -> + lager:info("Verifying AAE repairs go away without activity"), + rt:wait_until(fun() -> no_aae_repairs(Nodes) end). + +no_aae_repairs(Nodes) when is_list(Nodes) -> + MaxCount = max_aae_repairs(Nodes), + lager:info("Max AAE repair count across the board is ~p", [MaxCount]), + MaxCount == 0. + +max_aae_repairs(Nodes) when is_list(Nodes) -> + MaxCount = lists:max([max_aae_repairs(Node) || Node <- Nodes]), + MaxCount; +max_aae_repairs(Node) when is_atom(Node) -> + Info = rpc:call(Node, riak_kv_entropy_info, compute_exchange_info, []), + LastCounts = [Last || {_, _, _, {Last, _, _, _}} <- Info], + MaxCount = lists:max(LastCounts), + MaxCount. From 3330116b61b3ed4a8b9d0a7ce81a7a7270ca7f81 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Thu, 13 Feb 2014 18:08:04 -0500 Subject: [PATCH 099/139] Ensure nodes are started and pingable. --- src/rtssh.erl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/rtssh.erl b/src/rtssh.erl index 5534929f7..2fa39c2e8 100644 --- a/src/rtssh.erl +++ b/src/rtssh.erl @@ -118,6 +118,19 @@ deploy_nodes(NodeConfig, Hosts) -> rt:pmap(fun start/1, Nodes), + %% Ensure nodes started + [ok = rt:wait_until_pingable(N) || N <- Nodes], + + %% %% Enable debug logging + %% [rpc:call(N, lager, set_loglevel, [lager_console_backend, debug]) || N <- Nodes], + + %% We have to make sure that riak_core_ring_manager is running before we can go on. + [ok = rt:wait_until_registered(N, riak_core_ring_manager) || N <- Nodes], + + %% Ensure nodes are singleton clusters + [ok = rt:check_singleton_node(N) || {N, Version} <- VersionMap, + Version /= "0.14.2"], + Nodes. deploy_clusters(ClusterConfigs) -> From 66dc9908f67600f4c969b27cc269d5509f31bb88 Mon Sep 17 00:00:00 2001 From: "Engel A. Sanchez" Date: Fri, 14 Feb 2014 17:30:11 -0500 Subject: [PATCH 100/139] Missed the get b4 put and siblings merge intended --- tests/verify_aae.erl | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tests/verify_aae.erl b/tests/verify_aae.erl index 1a08a4f4f..1d5fa2dda 100644 --- a/tests/verify_aae.erl +++ b/tests/verify_aae.erl @@ -146,8 +146,14 @@ write_data(Node, KVs) -> write_data(Node, KVs, Opts) -> PB = rt:pbc(Node), [begin - O = riakc_obj:new(?BUCKET, K, V), - riakc_pb_socket:put(PB, O, Opts) + O = + case riakc_pb_socket:get(PB, ?BUCKET, K) of + {ok, Prev} -> + riakc_obj:update_value(Prev, V); + _ -> + riakc_obj:new(?BUCKET, K, V) + end, + ?assertMatch(ok, riakc_pb_socket:put(PB, O, Opts)) end || {K, V} <- KVs], riakc_pb_socket:stop(PB), ok. @@ -184,10 +190,19 @@ verify_data(Node, KeyValues) -> riakc_pb_socket:stop(PB), ok. +merge_values(O) -> + Vals = riak_object:get_values(O), + lists:foldl(fun(NV, V) -> + case size(NV) > size(V) of + true -> NV; + _ -> V + end + end, <<>>, Vals). + verify_replicas(Node, B, K, V, N) -> Replies = [rt:get_replica(Node, B, K, I, N) || I <- lists:seq(1,N)], - Vals = [riak_object:get_value(O) || {ok, O} <- Replies], + Vals = [merge_values(O) || {ok, O} <- Replies], Expected = [V || _ <- lists:seq(1, N)], Vals == Expected. From e4b9cb7634d37e78d7ef3fe38dd6cf00088303bc Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 19 Feb 2014 14:15:51 -0500 Subject: [PATCH 101/139] Use a real message to overload vnodes during coverage tests in overload.erl --- tests/overload.erl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/overload.erl b/tests/overload.erl index de990bf90..7dcd5ef1d 100644 --- a/tests/overload.erl +++ b/tests/overload.erl @@ -236,7 +236,9 @@ remote_suspend_and_overload() -> end). overload(Pid) -> - [Pid ! hola || _ <- lists:seq(1, ?NUM_REQUESTS)]. + %% The actual message doesn't matter. This one just has the least side + % effects. + [Pid ! {set_concurrency_limit, some_lock, 1} || _ <- lists:seq(1, ?NUM_REQUESTS)]. suspend_vnode(Node, Idx) -> Pid = rpc:call(Node, ?MODULE, remote_suspend_vnode, [Idx], infinity), From e009ad9141ef7842a840ea35a3356217231e10a7 Mon Sep 17 00:00:00 2001 From: Kelly McLaughlin Date: Wed, 19 Feb 2014 14:09:36 -0700 Subject: [PATCH 102/139] Refactor repl_bucket_types test Refactor the repl_bucket_types test to elminate a race condition in cluster setup and reduce code duplication. --- tests/repl_bucket_types.erl | 206 ++++++++++++++++-------------------- 1 file changed, 93 insertions(+), 113 deletions(-) diff --git a/tests/repl_bucket_types.erl b/tests/repl_bucket_types.erl index 37afb20de..42d6d22ca 100644 --- a/tests/repl_bucket_types.erl +++ b/tests/repl_bucket_types.erl @@ -15,55 +15,67 @@ %% Replication Bucket Types test %% -%% @doc riak_test entry point -confirm() -> - +setup(Type) -> rt:set_conf(all, [{"buckets.default.allow_mult", "false"}]), - {LeaderA, LeaderB, ANodes, BNodes} = ClusterNodes = make_clusters(), + {LeaderA, LeaderB, ANodes, BNodes} = ClusterNodes = make_clusters(Type), - PBA = get_pb_pid(LeaderA), - PBB = get_pb_pid(LeaderB), + PBA = rt:pbc(LeaderA), + PBB = rt:pbc(LeaderB), {DefinedType, UndefType} = Types = {<<"working_type">>, <<"undefined_type">>}, - rt:create_and_activate_bucket_type(LeaderA, DefinedType, [{n_val, 3}, {allow_mult, false}]), + rt:create_and_activate_bucket_type(LeaderA, + DefinedType, + [{n_val, 3}, {allow_mult, false}]), rt:wait_until_bucket_type_status(DefinedType, active, ANodes), - rt:create_and_activate_bucket_type(LeaderB, DefinedType, [{n_val, 3}, {allow_mult, false}]), - rt:wait_until_bucket_type_status(DefinedType, active, BNodes), + case Type of + current -> + rt:create_and_activate_bucket_type(LeaderB, + DefinedType, + [{n_val, 3}, {allow_mult, false}]), + rt:wait_until_bucket_type_status(DefinedType, active, BNodes); + mixed -> + ok + end, - rt:create_and_activate_bucket_type(LeaderA, UndefType, [{n_val, 3}, {allow_mult, false}]), + rt:create_and_activate_bucket_type(LeaderA, + UndefType, + [{n_val, 3}, {allow_mult, false}]), rt:wait_until_bucket_type_status(UndefType, active, ANodes), connect_clusters(LeaderA, LeaderB), + {ClusterNodes, Types, PBA, PBB}. - realtime_test(ClusterNodes, Types, PBA, PBB), - fullsync_test(ClusterNodes, Types, PBA, PBB), - +cleanup({ClusterNodes, _Types, PBA, PBB}, CleanCluster) -> riakc_pb_socket:stop(PBA), riakc_pb_socket:stop(PBB), + {_, _, ANodes, BNodes} = ClusterNodes, + case CleanCluster of + true -> + rt:clean_cluster(ANodes ++ BNodes); + false -> + ok + end. - rt:clean_cluster(ANodes ++ BNodes), - - {MixedLeaderA, MixedLeaderB, MixedANodes, _MixedBNodes} = MixedClusterNodes = make_mixed_clusters(), - - rt:create_and_activate_bucket_type(MixedLeaderA, DefinedType, [{n_val, 3}, {allow_mult, false}]), - rt:wait_until_bucket_type_status(DefinedType, active, MixedANodes), - - DPBA = get_pb_pid(MixedLeaderA), - DPBB = get_pb_pid(MixedLeaderB), - - connect_clusters(MixedLeaderA, MixedLeaderB), - - realtime_mixed_version_test(MixedClusterNodes, Types, DPBA, DPBB), - fullsync_mixed_version_test(MixedClusterNodes, Types, DPBA, DPBB), - - riakc_pb_socket:stop(DPBA), - riakc_pb_socket:stop(DPBB), +%% @doc riak_test entry point +confirm() -> + %% Test two clusters of the current version + SetupData = setup(current), + realtime_test(SetupData), + fullsync_test(SetupData), + cleanup(SetupData, true), + + %% Test a cluster of the current version replicating to a cluster + %% of the previous version + MixedSetupData = setup(mixed), + realtime_mixed_version_test(MixedSetupData), + fullsync_mixed_version_test(MixedSetupData), + cleanup(MixedSetupData, false), pass. -realtime_test(ClusterNodes, BucketTypes, PBA, PBB) -> +realtime_test({ClusterNodes, BucketTypes, PBA, PBB}) -> {LeaderA, LeaderB, ANodes, BNodes} = ClusterNodes, {DefinedType, UndefType} = BucketTypes, @@ -130,7 +142,7 @@ realtime_test(ClusterNodes, BucketTypes, PBA, PBB) -> ?assertEqual({n_val, 3}, lists:keyfind(n_val, 1, UpdatedProps2)), disable_rt(LeaderA, ANodes). -realtime_mixed_version_test(ClusterNodes, BucketTypes, PBA, PBB) -> +realtime_mixed_version_test({ClusterNodes, BucketTypes, PBA, PBB}) -> {LeaderA, LeaderB, ANodes, _BNodes} = ClusterNodes, {DefinedType, _UndefType} = BucketTypes, @@ -159,7 +171,7 @@ realtime_mixed_version_test(ClusterNodes, BucketTypes, PBA, PBB) -> lager:info("checking to ensure the bucket contents were not sent to previous version B."), ensure_bucket_not_sent(PBB, DowngradedBucketTyped, KeyTyped). -fullsync_test(ClusterNodes, BucketTypes, PBA, PBB) -> +fullsync_test({ClusterNodes, BucketTypes, PBA, PBB}) -> {LeaderA, LeaderB, ANodes, BNodes} = ClusterNodes, {DefinedType, UndefType} = BucketTypes, @@ -232,7 +244,7 @@ fullsync_test(ClusterNodes, BucketTypes, PBA, PBB) -> lager:info("checking to ensure the bucket contents were not updated."), ensure_bucket_not_updated(PBB, BucketTyped, KeyTyped, Bin). -fullsync_mixed_version_test(ClusterNodes, BucketTypes, PBA, PBB) -> +fullsync_mixed_version_test({ClusterNodes, BucketTypes, PBA, PBB}) -> {LeaderA, LeaderB, ANodes, _BNodes} = ClusterNodes, {DefinedType, _UndefType} = BucketTypes, @@ -263,15 +275,15 @@ fullsync_mixed_version_test(ClusterNodes, BucketTypes, PBA, PBB) -> ReadResult1 = riakc_pb_socket:get(PBB, Bucket, Key), ?assertMatch({ok, _}, ReadResult1), - + %% The following check appears to be the best we can do. If a 2.x source %% sends a typed bucket to the 1.x sink, the put will occur. - %% The bucket is undefined to the interfaces, but some parts of it + %% The bucket is undefined to the interfaces, but some parts of it %% appear to be written to the sink node. Since we cannot check using pb, - %% here we at least make sure we haven't written over an existing default + %% here we at least make sure we haven't written over an existing default %% bucket with data from a typed bucket of the same name. ensure_bucket_not_updated(PBB, Bucket, Key, Bin). - + %% @doc Turn on Realtime replication on the cluster lead by LeaderA. %% The clusters must already have been named and connected. enable_rt(LeaderA, ANodes) -> @@ -295,35 +307,48 @@ enable_fullsync(LeaderA, ANodes) -> repl_util:enable_fullsync(LeaderA, "B"), rt:wait_until_ring_converged(ANodes). +%% @doc Connect two clusters using a given name. +connect_cluster(Source, Port, Name) -> + lager:info("Connecting ~p to ~p for cluster ~p.", + [Source, Port, Name]), + repl_util:connect_cluster(Source, "127.0.0.1", Port), + ?assertEqual(ok, repl_util:wait_for_connection(Source, Name)). + %% @doc Connect two clusters for replication using their respective leader nodes. connect_clusters(LeaderA, LeaderB) -> - {ok, {_IP, Port}} = rpc:call(LeaderB, application, get_env, - [riak_core, cluster_mgr]), + Port = repl_util:get_port(LeaderB), lager:info("connect cluster A:~p to B on port ~p", [LeaderA, Port]), repl_util:connect_cluster(LeaderA, "127.0.0.1", Port), ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")). +cluster_conf() -> + [ + {riak_repl, + [ + %% turn off fullsync + {fullsync_on_connect, false}, + {fullsync_interval, disabled}, + {max_fssource_cluster, 20}, + {max_fssource_node, 20}, + {max_fssink_node, 20}, + {rtq_max_bytes, 1048576} + ]} + ]. + +deploy_nodes(NumNodes, current) -> + rt:deploy_nodes(NumNodes, cluster_conf()); +deploy_nodes(_, mixed) -> + Conf = cluster_conf(), + rt:deploy_nodes([{current, Conf}, {previous, Conf}]). + %% @doc Create two clusters of 1 node each and connect them for replication: %% Cluster "A" -> cluster "B" -make_clusters() -> +make_clusters(Type) -> NumNodes = rt_config:get(num_nodes, 2), ClusterASize = rt_config:get(cluster_a_size, 1), lager:info("Deploy ~p nodes", [NumNodes]), - Conf = [ - {riak_repl, - [ - %% turn off fullsync - {fullsync_on_connect, false}, - {fullsync_interval, disabled}, - {max_fssource_cluster, 20}, - {max_fssource_node, 20}, - {max_fssink_node, 20}, - {rtq_max_bytes, 1048576} - ]} - ], - - Nodes = rt:deploy_nodes(NumNodes, Conf), + Nodes = deploy_nodes(NumNodes, Type), {ANodes, BNodes} = lists:split(ClusterASize, Nodes), lager:info("ANodes: ~p", [ANodes]), lager:info("BNodes: ~p", [BNodes]), @@ -334,74 +359,34 @@ make_clusters() -> lager:info("Build cluster B"), repl_util:make_cluster(BNodes), - %% get the leader for the first cluster - lager:info("waiting for leader to converge on cluster A"), - ?assertEqual(ok, repl_util:wait_until_leader_converge(ANodes)), AFirst = hd(ANodes), - - %% get the leader for the second cluster - lager:info("waiting for leader to converge on cluster B"), - ?assertEqual(ok, repl_util:wait_until_leader_converge(BNodes)), BFirst = hd(BNodes), %% Name the clusters repl_util:name_cluster(AFirst, "A"), - rt:wait_until_ring_converged(ANodes), - repl_util:name_cluster(BFirst, "B"), - rt:wait_until_ring_converged(BNodes), - - {AFirst, BFirst, ANodes, BNodes}. - -%% @doc Create two clusters of 1 node each and connect them for replication: -%% Cluster "A" -> cluster "B" -make_mixed_clusters() -> - NumNodes = rt_config:get(num_nodes, 2), - ClusterASize = rt_config:get(cluster_a_size, 1), - lager:info("Deploy ~p mixed version nodes", [NumNodes]), - Conf = [ - {riak_repl, - [ - %% turn off fullsync - {fullsync_on_connect, false}, - {fullsync_interval, disabled}, - {max_fssource_cluster, 20}, - {max_fssource_node, 20}, - {max_fssink_node, 20}, - {rtq_max_bytes, 1048576} - ]} - ], - MixedConf = [{current, Conf}, {previous, Conf}], - Nodes = rt:deploy_nodes(MixedConf), - {ANodes, BNodes} = lists:split(ClusterASize, Nodes), - lager:info("ANodes: ~p", [ANodes]), - lager:info("BNodes: ~p", [BNodes]), - - lager:info("Build cluster A"), - repl_util:make_cluster(ANodes), + lager:info("Waiting for convergence."), + rt:wait_until_ring_converged(ANodes), + rt:wait_until_ring_converged(BNodes), - lager:info("Build cluster B"), - repl_util:make_cluster(BNodes), + lager:info("Waiting for transfers to complete."), + rt:wait_until_transfers_complete(ANodes), + rt:wait_until_transfers_complete(BNodes), %% get the leader for the first cluster lager:info("waiting for leader to converge on cluster A"), ?assertEqual(ok, repl_util:wait_until_leader_converge(ANodes)), - AFirst = hd(ANodes), %% get the leader for the second cluster lager:info("waiting for leader to converge on cluster B"), ?assertEqual(ok, repl_util:wait_until_leader_converge(BNodes)), - BFirst = hd(BNodes), - %% Name the clusters - repl_util:name_cluster(AFirst, "A"), - rt:wait_until_ring_converged(ANodes), - - repl_util:name_cluster(BFirst, "B"), - rt:wait_until_ring_converged(BNodes), + ALeader = repl_util:get_leader(hd(ANodes)), + BLeader = repl_util:get_leader(hd(BNodes)), - {AFirst, BFirst, ANodes, BNodes}. + lager:info("ALeader: ~p BLeader: ~p", [ALeader, BLeader]), + {ALeader, BLeader, ANodes, BNodes}. make_pbget_fun(Pid, Bucket, Key, Bin) -> fun() -> @@ -437,17 +422,12 @@ value_unchanged(Pid, Bucket, Key, Bin) -> assert_bucket_not_found(Pid, Bucket, Key) -> case riakc_pb_socket:get(Pid, Bucket, Key) of {error, notfound} -> - true; - {ok, Res} -> + true; + {ok, Res} -> lager:error("Found bucket:~p and key:~p on sink when we should not have", [Res, Key]), - false + false end. -get_pb_pid(Leader) -> - {ok, [{IP, PortA}] } = rpc:call(Leader, application, get_env, [riak_api, pb]), - {ok, Pid} = riakc_pb_socket:start_link(IP, PortA, []), - Pid. - update_props(Type, Updates, Node, Nodes) -> lager:info("Setting bucket properties ~p for bucket type ~p on node ~p", [Updates, Type, Node]), From ecc5dfb25c035d05d1df90a2583001254e5b3a37 Mon Sep 17 00:00:00 2001 From: Kelly McLaughlin Date: Wed, 19 Feb 2014 14:10:32 -0700 Subject: [PATCH 103/139] Fix problem with repl_util:wait_until_leader_converge function The wait_until_leader_converge function could incorrectly return success if all of the results from the get_leader rpc calls were either undefined or all returned a badrpc tuple. In either case the particular result ends up as the sole unique value in a list and the success condition is verifying that the list is of length 1 regardless of the value of the member of the list. Change the function to filter the list of results for values that indicate failure prior to the success condition checking. --- tests/repl_util.erl | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/tests/repl_util.erl b/tests/repl_util.erl index e06e21827..7b37bfce3 100644 --- a/tests/repl_util.erl +++ b/tests/repl_util.erl @@ -106,18 +106,27 @@ wait_until_new_leader(Node, OldLeader) -> wait_until_leader_converge([Node|_] = Nodes) -> rt:wait_until(Node, fun(_) -> - length(lists:usort([begin - case rpc:call(N, riak_core_cluster_mgr, get_leader, []) of - undefined -> - false; - L -> - %lager:info("Leader for ~p is ~p", - %[N,L]), - L - end - end || N <- Nodes])) == 1 + LeaderResults = + [rpc:call(N, riak_core_cluster_mgr, get_leader, []) || + N <- Nodes], + UniqueLeaders = lists:usort( + lists:filter(leader_result_filter_fun(), + LeaderResults)), + length(UniqueLeaders) == 1 end). +leader_result_filter_fun() -> + fun(L) -> + case L of + undefined -> + false; + {badrpc, _} -> + false; + _ -> + true + end + end. + wait_until_connection(Node) -> rt:wait_until(Node, fun(_) -> From 8e6b043d8425a4b8fcd1b36a7c13d02cb0a19f5e Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Wed, 19 Feb 2014 21:47:44 +0000 Subject: [PATCH 104/139] Add functions to directly manipulate advanced.config. Even if most of the system is running under cuttlefish, replication may not be, and may require configuration placed directly in the advanced.config file. Add a method to the rtssh harness to provide direct manipulation of that file. --- src/rt.erl | 10 ++++++++++ src/rtssh.erl | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/src/rt.erl b/src/rt.erl index c24b7a3b3..9f4879d8a 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -94,6 +94,7 @@ set_backend/1, set_backend/2, set_conf/2, + set_advanced_conf/2, setup_harness/2, setup_log_capture/1, slow_upgrade/3, @@ -193,6 +194,15 @@ set_conf(Node, NameValuePairs) -> ?HARNESS:set_conf(Node, NameValuePairs), start(Node). +-spec set_advanced_conf(atom(), [{string(), string()}]) -> ok. +set_advanced_conf(all, NameValuePairs) -> + ?HARNESS:set_advanced_conf(all, NameValuePairs); +set_advanced_conf(Node, NameValuePairs) -> + stop(Node), + ?assertEqual(ok, rt:wait_until_unpingable(Node)), + ?HARNESS:set_advanced_conf(Node, NameValuePairs), + start(Node). + %% @doc Rewrite the given node's app.config file, overriding the varialbes %% in the existing app.config with those in `Config'. update_app_config(all, Config) -> diff --git a/src/rtssh.erl b/src/rtssh.erl index 2fa39c2e8..fe8f5a7e1 100644 --- a/src/rtssh.erl +++ b/src/rtssh.erl @@ -444,9 +444,27 @@ set_conf(Node, NameValuePairs) when is_atom(Node) -> append_to_conf_file(Node, get_riak_conf(Node), NameValuePairs), ok. +set_advanced_conf(all, NameValuePairs) -> + lager:debug("rtssh:set_advanced_conf(all, ~p)", [NameValuePairs]), + Hosts = rt_config:get(rtssh_hosts), + All = [{Host, DevPath} || Host <- Hosts, + DevPath <- devpaths()], + rt:pmap(fun({Host, DevPath}) -> + AllFiles = all_the_files(Host, DevPath, "etc/advanced.config"), + [update_app_config_file(Host, File, NameValuePairs, undefined) || File <- AllFiles], + ok + end, All), + ok; +set_advanced_conf(Node, NameValuePairs) when is_atom(Node) -> + append_to_conf_file(Node, get_advanced_riak_conf(Node), NameValuePairs), + ok. + get_riak_conf(Node) -> node_path(Node) ++ "/etc/riak.conf". +get_advanced_riak_conf(Node) -> + node_path(Node) ++ "/etc/advanced.config". + append_to_conf_file(Node, File, NameValuePairs) -> Current = remote_read_file(Node, File), Settings = [[$\n, to_list(Name), $=, to_list(Val), $\n] || {Name, Val} <- NameValuePairs], From 1347ac91c16492b2985428698fc1fd658a1cc565 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Wed, 19 Feb 2014 22:10:32 +0000 Subject: [PATCH 105/139] Remove harness specific code. Remove some harness specific code which isn't even really that valuable at this point. --- tests/repl_util.erl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/repl_util.erl b/tests/repl_util.erl index 9c0f4f16b..8dac2bfc5 100644 --- a/tests/repl_util.erl +++ b/tests/repl_util.erl @@ -174,10 +174,9 @@ start_and_wait_until_fullsync_complete(Node, Cluster) -> _ -> Count0 + 1 end, - lager:info("waiting for fullsync count to be ~p", [Count]), + lager:info("Waiting for fullsync count to be ~p", [Count]), - lager:info("Starting fullsync on ~p (~p)", [Node, - rtdev:node_version(rtdev:node_id(Node))]), + lager:info("Starting fullsync on: ~p", [Node]), Args = case Cluster of undefined -> ["start"]; From 44d4424e41948f4d6823875cd9f14a5d19939fe6 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Wed, 19 Feb 2014 22:11:02 +0000 Subject: [PATCH 106/139] WIP: Add first pass replication test for EC2. --- tests/repl_bench.erl | 83 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 tests/repl_bench.erl diff --git a/tests/repl_bench.erl b/tests/repl_bench.erl new file mode 100644 index 000000000..874ec7926 --- /dev/null +++ b/tests/repl_bench.erl @@ -0,0 +1,83 @@ +-module(repl_bench). +-export([confirm/0]). +-include_lib("eunit/include/eunit.hrl"). + +-define(TEST_BUCKET, <<"repl_bench">>). +-define(CONF, [ + {riak_core, + [ + {ring_creation_size, 8}, + {default_bucket_props, [{n_val, 1}]} + ] + }, + {riak_kv, + [ + {anti_entropy, {on, []}}, + {anti_entropy_build_limit, {100, 1000}}, + {anti_entropy_concurrency, 100} + ] + }, + {riak_repl, + [ + {fullsync_strategy, aae}, + {fullsync_on_connect, false}, + {fullsync_interval, disabled}, + {max_fssource_retries, infinity} + ]} + ]). + +confirm() -> + rt:set_advanced_conf(all, ?CONF), + [ANodes, BNodes] = rt:build_clusters([3, 3]), + + AFirst = hd(ANodes), + BFirst = hd(BNodes), + + repl_util:name_cluster(AFirst, "A"), + repl_util:name_cluster(BFirst, "B"), + + rt:wait_until_ring_converged(ANodes), + rt:wait_until_ring_converged(BNodes), + + lager:info("waiting for leader to converge on cluster A"), + ?assertEqual(ok, repl_util:wait_until_leader_converge(ANodes)), + + lager:info("waiting for leader to converge on cluster B"), + ?assertEqual(ok, repl_util:wait_until_leader_converge(BNodes)), + + LeaderA = rpc:call(AFirst, riak_core_cluster_mgr, get_leader, []), + + {ok, {IP, Port}} = rpc:call(BFirst, application, get_env, + [riak_core, cluster_mgr]), + + lager:info("connect cluster A:~p to B on port ~p", [LeaderA, Port]), + repl_util:connect_cluster(LeaderA, IP, Port), + ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")), + + repl_util:enable_fullsync(LeaderA, "B"), + rt:wait_until_ring_converged(ANodes), + + lager:info("Wait for cluster connection A:~p -> B:~p:~p", [LeaderA, BFirst, Port]), + ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")), + + %% Perform fullsync of an empty cluster. + {EmptyTime, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, [LeaderA]), + + %% Write 10000 keys and perform fullsync. + write_to_cluster(AFirst, 0, 10000), + {AllTime, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, [LeaderA]), + + %% Rewrite first 1000 keys and perform fullsync. + write_to_cluster(AFirst, 0, 1000), + {DiffTime, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, [LeaderA]), + + lager:info("***********************************************************************"), + lager:info("Empty fullsync completed in: ~p", [EmptyTime]), + lager:info("All fullsync completed in: ~p", [AllTime]), + lager:info("Diff fullsync completed in: ~p", [DiffTime]), + + pass. + +write_to_cluster(Node, Start, End) -> + lager:info("Writing ~p keys to node ~p.", [End - Start, Node]), + ?assertEqual([], repl_util:do_write(Node, Start, End, ?TEST_BUCKET, 1)). From aca2606fbd8cf4318d3ce9f4fe09efec624e2c4f Mon Sep 17 00:00:00 2001 From: Kelly McLaughlin Date: Wed, 19 Feb 2014 16:34:47 -0700 Subject: [PATCH 107/139] Change heartbeat timeout to seconds in repl_rt_heartbeat test The heartbeat timeout enforcement was recently updated to be specified in seconds to match the documentation for that option. The repl_rt_heartbeat test has since been failing since it still specified the timeout in milliseconds. This change makes the test use seconds for the heartbeat timeout gets the test passing again. --- tests/repl_rt_heartbeat.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/repl_rt_heartbeat.erl b/tests/repl_rt_heartbeat.erl index 7d6edd39f..94bb203c6 100644 --- a/tests/repl_rt_heartbeat.erl +++ b/tests/repl_rt_heartbeat.erl @@ -9,8 +9,8 @@ -include_lib("eunit/include/eunit.hrl"). -define(RPC_TIMEOUT, 5000). --define(HB_TIMEOUT, 2000). --define(HB_INTERVAL, 1000). +-define(HB_TIMEOUT, 2). +-define(HB_INTERVAL, 1). %% Replication Realtime Heartbeat test %% Valid for EE version 1.3.2 and up @@ -66,7 +66,7 @@ confirm() -> %% sleep longer than the HB timeout interval to force re-connection; %% and give it time to restart the RT connection. Wait an extra 2 seconds. - timer:sleep(?HB_TIMEOUT + 2000), + timer:sleep(timer:seconds(?HB_TIMEOUT) + 2000), %% Verify that RT connection has restarted by noting that it's Pid has changed RTConnPid2 = get_rt_conn_pid(LeaderA), @@ -80,7 +80,7 @@ confirm() -> %% Wait one second longer than the timeout rt:log_to_nodes([LeaderA], "Resuming HB"), resume_heartbeat_messages(LeaderA), - timer:sleep(?HB_TIMEOUT + 1000), + timer:sleep(timer:seconds(?HB_TIMEOUT) + 1000), %% Verify that heartbeats are being acknowledged by the sink (B) back to source (A) rt:log_to_nodes([LeaderA], "Verify resumed HB"), From b5da7e429180a8cddd74fc3031fa48d17a982b70 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Thu, 20 Feb 2014 03:54:38 +0000 Subject: [PATCH 108/139] Further the fullsync test. --- tests/{repl_bench.erl => repl_fs_bench.erl} | 43 +++++++++++++-------- 1 file changed, 27 insertions(+), 16 deletions(-) rename tests/{repl_bench.erl => repl_fs_bench.erl} (66%) diff --git a/tests/repl_bench.erl b/tests/repl_fs_bench.erl similarity index 66% rename from tests/repl_bench.erl rename to tests/repl_fs_bench.erl index 874ec7926..87dcfc454 100644 --- a/tests/repl_bench.erl +++ b/tests/repl_fs_bench.erl @@ -1,9 +1,12 @@ --module(repl_bench). +-module(repl_fs_bench). -export([confirm/0]). -include_lib("eunit/include/eunit.hrl"). +-define(DIFF_NUM_KEYS, 1000). +-define(FULL_NUM_KEYS, 10000). -define(TEST_BUCKET, <<"repl_bench">>). --define(CONF, [ + +-define(CONF(Strategy), [ {riak_core, [ {ring_creation_size, 8}, @@ -19,7 +22,7 @@ }, {riak_repl, [ - {fullsync_strategy, aae}, + {fullsync_strategy, Strategy}, {fullsync_on_connect, false}, {fullsync_interval, disabled}, {max_fssource_retries, infinity} @@ -27,7 +30,17 @@ ]). confirm() -> - rt:set_advanced_conf(all, ?CONF), + {None, Full, Diff} = fullsync_test(aae), + + lager:info("Results:"), + lager:info("Empty fullsync completed in: ~pms", [None / 1000]), + lager:info("All fullsync completed in: ~pms", [Full / 1000]), + lager:info("Diff fullsync completed in: ~pms", [Diff / 1000]), + + pass. + +fullsync_test(Strategy) -> + rt:set_advanced_conf(all, ?CONF(Strategy)), [ANodes, BNodes] = rt:build_clusters([3, 3]), AFirst = hd(ANodes), @@ -61,22 +74,20 @@ confirm() -> ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")), %% Perform fullsync of an empty cluster. - {EmptyTime, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, [LeaderA]), + repl_util:wait_until_aae_trees_built(ANodes ++ BNodes), + {NoneTime, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, [LeaderA]), - %% Write 10000 keys and perform fullsync. - write_to_cluster(AFirst, 0, 10000), - {AllTime, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, [LeaderA]), + %% Write keys and perform fullsync. + write_to_cluster(AFirst, 0, ?FULL_NUM_KEYS), + repl_util:wait_until_aae_trees_built(ANodes ++ BNodes), + {FullTime, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, [LeaderA]), - %% Rewrite first 1000 keys and perform fullsync. - write_to_cluster(AFirst, 0, 1000), + %% Rewrite first 10% keys and perform fullsync. + write_to_cluster(AFirst, 0, ?DIFF_NUM_KEYS), + repl_util:wait_until_aae_trees_built(ANodes ++ BNodes), {DiffTime, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, [LeaderA]), - lager:info("***********************************************************************"), - lager:info("Empty fullsync completed in: ~p", [EmptyTime]), - lager:info("All fullsync completed in: ~p", [AllTime]), - lager:info("Diff fullsync completed in: ~p", [DiffTime]), - - pass. + {NoneTime, FullTime, DiffTime}. write_to_cluster(Node, Start, End) -> lager:info("Writing ~p keys to node ~p.", [End - Start, Node]), From 92880a35cabefc3028382d7927cb6ff088161d11 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Thu, 20 Feb 2014 14:58:03 -0500 Subject: [PATCH 109/139] Limit concurrency; disable allow_mult. Limit the concurrency to one partition at a time, and ensure we do not generate siblings when rewriting the smaller portion of the keyspace. --- tests/repl_fs_bench.erl | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/tests/repl_fs_bench.erl b/tests/repl_fs_bench.erl index 87dcfc454..40627937d 100644 --- a/tests/repl_fs_bench.erl +++ b/tests/repl_fs_bench.erl @@ -10,7 +10,7 @@ {riak_core, [ {ring_creation_size, 8}, - {default_bucket_props, [{n_val, 1}]} + {default_bucket_props, [{n_val, 1}, {allow_mult, false}]} ] }, {riak_kv, @@ -25,17 +25,21 @@ {fullsync_strategy, Strategy}, {fullsync_on_connect, false}, {fullsync_interval, disabled}, - {max_fssource_retries, infinity} + {max_fssource_retries, infinity}, + {max_fssource_cluster, 1}, + {max_fssource_node, 1}, + {max_fssink_node, 1} ]} ]). confirm() -> - {None, Full, Diff} = fullsync_test(aae), + {Empty, Full, Diff, None} = fullsync_test(aae), lager:info("Results:"), - lager:info("Empty fullsync completed in: ~pms", [None / 1000]), + lager:info("Empty fullsync completed in: ~pms", [Empty / 1000]), lager:info("All fullsync completed in: ~pms", [Full / 1000]), lager:info("Diff fullsync completed in: ~pms", [Diff / 1000]), + lager:info("None fullsync completed in: ~pms", [None / 1000]), pass. @@ -63,32 +67,47 @@ fullsync_test(Strategy) -> {ok, {IP, Port}} = rpc:call(BFirst, application, get_env, [riak_core, cluster_mgr]), - lager:info("connect cluster A:~p to B on port ~p", [LeaderA, Port]), + lager:info("connect cluster A:~p to B on port ~p", + [LeaderA, Port]), repl_util:connect_cluster(LeaderA, IP, Port), ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")), repl_util:enable_fullsync(LeaderA, "B"), rt:wait_until_ring_converged(ANodes), - lager:info("Wait for cluster connection A:~p -> B:~p:~p", [LeaderA, BFirst, Port]), + lager:info("Wait for cluster connection A:~p -> B:~p:~p", + [LeaderA, BFirst, Port]), ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")), %% Perform fullsync of an empty cluster. repl_util:wait_until_aae_trees_built(ANodes ++ BNodes), - {NoneTime, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, [LeaderA]), + {EmptyTime, _} = timer:tc(repl_util, + start_and_wait_until_fullsync_complete, + [LeaderA]), %% Write keys and perform fullsync. write_to_cluster(AFirst, 0, ?FULL_NUM_KEYS), repl_util:wait_until_aae_trees_built(ANodes ++ BNodes), - {FullTime, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, [LeaderA]), + {FullTime, _} = timer:tc(repl_util, + start_and_wait_until_fullsync_complete, + [LeaderA]), %% Rewrite first 10% keys and perform fullsync. write_to_cluster(AFirst, 0, ?DIFF_NUM_KEYS), repl_util:wait_until_aae_trees_built(ANodes ++ BNodes), - {DiffTime, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, [LeaderA]), + {DiffTime, _} = timer:tc(repl_util, + start_and_wait_until_fullsync_complete, + [LeaderA]), - {NoneTime, FullTime, DiffTime}. + %% Write no keys, and perform the fullsync. + repl_util:wait_until_aae_trees_built(ANodes ++ BNodes), + {NoneTime, _} = timer:tc(repl_util, + start_and_wait_until_fullsync_complete, + [LeaderA]), + + {EmptyTime, FullTime, DiffTime, NoneTime}. write_to_cluster(Node, Start, End) -> lager:info("Writing ~p keys to node ~p.", [End - Start, Node]), - ?assertEqual([], repl_util:do_write(Node, Start, End, ?TEST_BUCKET, 1)). + ?assertEqual([], + repl_util:do_write(Node, Start, End, ?TEST_BUCKET, 1)). From 377fe57ae986bde38586e89c2190776fffca36ba Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Thu, 20 Feb 2014 14:59:07 -0500 Subject: [PATCH 110/139] Add functions for advanced.config manipulation. Add functions to assist in manipulation of the advanced.config file directly, which is required for repl which can not be configured with Cuttlefish, when enabled. --- src/rtdev.erl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/rtdev.erl b/src/rtdev.erl index f5bb157b7..b8fe18c86 100644 --- a/src/rtdev.erl +++ b/src/rtdev.erl @@ -167,11 +167,27 @@ set_conf(DevPath, NameValuePairs) -> [append_to_conf_file(RiakConf, NameValuePairs) || RiakConf <- all_the_files(DevPath, "etc/riak.conf")], ok. +set_advanced_conf(all, NameValuePairs) -> + lager:info("rtdev:set_advanced_conf(all, ~p)", [NameValuePairs]), + [ set_advanced_conf(DevPath, NameValuePairs) || DevPath <- devpaths()], + ok; +set_advanced_conf(Node, NameValuePairs) when is_atom(Node) -> + append_to_conf_file(get_advanced_riak_conf(Node), NameValuePairs), + ok; +set_advanced_conf(DevPath, NameValuePairs) -> + [update_app_config_file(RiakConf, NameValuePairs) || RiakConf <- all_the_files(DevPath, "etc/advanced.config")], + ok. + get_riak_conf(Node) -> N = node_id(Node), Path = relpath(node_version(N)), io_lib:format("~s/dev/dev~b/etc/riak.conf", [Path, N]). +get_advanced_riak_conf(Node) -> + N = node_id(Node), + Path = relpath(node_version(N)), + io_lib:format("~s/dev/dev~b/etc/advanced.config", [Path, N]). + append_to_conf_file(File, NameValuePairs) -> Settings = lists:flatten( [io_lib:format("~n~s = ~s~n", [Name, Value]) || {Name, Value} <- NameValuePairs]), From 106401bad572c4a29fbb04c4bed4a2c093930739 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Thu, 20 Feb 2014 15:21:13 -0500 Subject: [PATCH 111/139] Run both keylist and aae. Run, test, and time both the aae and keylist strategy of fullsync replication. --- tests/repl_fs_bench.erl | 43 ++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/tests/repl_fs_bench.erl b/tests/repl_fs_bench.erl index 40627937d..7fff371ff 100644 --- a/tests/repl_fs_bench.erl +++ b/tests/repl_fs_bench.erl @@ -6,6 +6,8 @@ -define(FULL_NUM_KEYS, 10000). -define(TEST_BUCKET, <<"repl_bench">>). +-define(HARNESS, (rt_config:get(rt_harness))). + -define(CONF(Strategy), [ {riak_core, [ @@ -32,20 +34,44 @@ ]} ]). +-import(rt, [deploy_nodes/2]). + confirm() -> - {Empty, Full, Diff, None} = fullsync_test(aae), + {AAEEmpty, AAEFull, AAEDiff, AAENone} = fullsync_test(aae), + + {KeylistEmpty, KeylistFull, KeylistDiff, KeylistNone} = fullsync_test(keylist), + + lager:info("Results for aae:"), + lager:info("Empty fullsync completed in: ~pms", [AAEEmpty / 1000]), + lager:info("All fullsync completed in: ~pms", [AAEFull / 1000]), + lager:info("Diff fullsync completed in: ~pms", [AAEDiff / 1000]), + lager:info("None fullsync completed in: ~pms", [AAENone / 1000]), - lager:info("Results:"), - lager:info("Empty fullsync completed in: ~pms", [Empty / 1000]), - lager:info("All fullsync completed in: ~pms", [Full / 1000]), - lager:info("Diff fullsync completed in: ~pms", [Diff / 1000]), - lager:info("None fullsync completed in: ~pms", [None / 1000]), + lager:info("Results for keylist:"), + lager:info("Empty fullsync completed in: ~pms", [KeylistEmpty / 1000]), + lager:info("All fullsync completed in: ~pms", [KeylistFull / 1000]), + lager:info("Diff fullsync completed in: ~pms", [KeylistDiff / 1000]), + lager:info("None fullsync completed in: ~pms", [KeylistNone / 1000]), pass. fullsync_test(Strategy) -> rt:set_advanced_conf(all, ?CONF(Strategy)), - [ANodes, BNodes] = rt:build_clusters([3, 3]), + + {ANodes, BNodes} = case ?HARNESS of + rtssh -> + [A, B] = rt:build_clusters([3, 3]), + {A, B}; + rtdev -> + Nodes = deploy_nodes(6, ?CONF(Strategy)), + + %% Break up the 6 nodes into three clustes. + {A, B} = lists:split(3, Nodes), + + lager:info("Building two clusters."), + [repl_util:make_cluster(N) || N <- [A, B]], + {A, B} + end, AFirst = hd(ANodes), BFirst = hd(BNodes), @@ -105,6 +131,9 @@ fullsync_test(Strategy) -> start_and_wait_until_fullsync_complete, [LeaderA]), + rt:clean_cluster(ANodes), + rt:clean_cluster(BNodes), + {EmptyTime, FullTime, DiffTime, NoneTime}. write_to_cluster(Node, Start, End) -> From af1691530e2a3732daecc246184a164add51d513 Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Thu, 20 Feb 2014 15:24:14 -0500 Subject: [PATCH 112/139] Enable siblings for pb_security test --- tests/pb_security.erl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/pb_security.erl b/tests/pb_security.erl index 3a4878220..dd743bde7 100644 --- a/tests/pb_security.erl +++ b/tests/pb_security.erl @@ -49,6 +49,9 @@ confirm() -> lager:info("Deploy some nodes"), PrivDir = rt:priv_dir(), Conf = [ + {riak_core, [ + {default_bucket_props, [{allow_mult, true}]} + ]}, {riak_api, [ {certfile, filename:join([CertDir,"site3.basho.com/cert.pem"])}, {keyfile, filename:join([CertDir, "site3.basho.com/key.pem"])}, From a6c983229e0804af8366d3a018c6c6e8bfe17ac0 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Mon, 24 Feb 2014 10:26:55 -0500 Subject: [PATCH 113/139] Ignore tags file. --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 79d1ae60c..7a4d69e65 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,4 @@ doc/ !doc/overview.edoc *.jar coverage - +tags From 1a3367bcb5cdabdf7ad1a53ff97874e573b3f5f4 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Mon, 24 Feb 2014 10:27:09 -0500 Subject: [PATCH 114/139] Assert AAE fullsync replication is faster. --- tests/repl_fs_bench.erl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/repl_fs_bench.erl b/tests/repl_fs_bench.erl index 7fff371ff..d33573b05 100644 --- a/tests/repl_fs_bench.erl +++ b/tests/repl_fs_bench.erl @@ -53,6 +53,11 @@ confirm() -> lager:info("Diff fullsync completed in: ~pms", [KeylistDiff / 1000]), lager:info("None fullsync completed in: ~pms", [KeylistNone / 1000]), + ?assert(AAEEmpty < KeylistEmpty), + ?assert(AAEFull < KeylistFull), + ?assert(AAEDiff < KeylistDiff), + ?assert(AAENone < KeylistNone), + pass. fullsync_test(Strategy) -> From 4757c349d59d430a867ced4cd05523d9e96686a9 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Mon, 24 Feb 2014 11:56:32 -0500 Subject: [PATCH 115/139] Add compatible API with rtssh for deploying clusters. --- src/rtdev.erl | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/rtdev.erl b/src/rtdev.erl index b8fe18c86..4e7ed59a0 100644 --- a/src/rtdev.erl +++ b/src/rtdev.erl @@ -346,6 +346,23 @@ add_default_node_config(Nodes) -> throw({invalid_config, {rt_default_config, BadValue}}) end. +deploy_clusters(ClusterConfigs) -> + NumNodes = rt_config:get(num_nodes, 6), + RequestedNodes = lists:flatten(ClusterConfigs), + + case length(RequestedNodes) > NumNodes of + true -> + erlang:error("Requested more nodes than available"); + false -> + Nodes = deploy_nodes(RequestedNodes), + {DeployedClusters, _} = lists:foldl( + fun(Cluster, {Clusters, RemNodes}) -> + {A, B} = lists:split(length(Cluster), RemNodes), + {Clusters ++ [A], B} + end, {[], Nodes}, ClusterConfigs), + DeployedClusters + end. + deploy_nodes(NodeConfig) -> Path = relpath(root), lager:info("Riak path: ~p", [Path]), From c08ce430f31150157808f492cb3bdd8fb147195d Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Mon, 24 Feb 2014 11:56:54 -0500 Subject: [PATCH 116/139] Use normalized API. --- tests/repl_fs_bench.erl | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/tests/repl_fs_bench.erl b/tests/repl_fs_bench.erl index d33573b05..7dbc8d4ab 100644 --- a/tests/repl_fs_bench.erl +++ b/tests/repl_fs_bench.erl @@ -63,20 +63,7 @@ confirm() -> fullsync_test(Strategy) -> rt:set_advanced_conf(all, ?CONF(Strategy)), - {ANodes, BNodes} = case ?HARNESS of - rtssh -> - [A, B] = rt:build_clusters([3, 3]), - {A, B}; - rtdev -> - Nodes = deploy_nodes(6, ?CONF(Strategy)), - - %% Break up the 6 nodes into three clustes. - {A, B} = lists:split(3, Nodes), - - lager:info("Building two clusters."), - [repl_util:make_cluster(N) || N <- [A, B]], - {A, B} - end, + [ANodes, BNodes] = rt:build_clusters([3, 3]), AFirst = hd(ANodes), BFirst = hd(BNodes), From e2b412f4551f1ff8afd49745653ece97c49cfce8 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Mon, 24 Feb 2014 18:46:23 +0000 Subject: [PATCH 117/139] Ensure we update version and node maps. In the event we deploy more than one cluster, make sure we merge the values in, rather than replace. --- src/rtssh.erl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/rtssh.erl b/src/rtssh.erl index fe8f5a7e1..09380101c 100644 --- a/src/rtssh.erl +++ b/src/rtssh.erl @@ -73,8 +73,12 @@ deploy_nodes(NodeConfig, Hosts) -> {Versions, Configs} = lists:unzip(NodeConfig), VersionMap = lists:zip(Nodes, Versions), - rt_config:set(rt_hosts, HostMap), - rt_config:set(rt_versions, VersionMap), + rt_config:set(rt_hosts, + orddict:from_list( + orddict:to_list(rt_config:get(rt_hosts, orddict:new())) ++ HostMap)), + rt_config:set(rt_versions, + orddict:from_list( + orddict:to_list(rt_config:get(rt_versions, orddict:new())) ++ VersionMap)), %% io:format("~p~n", [Nodes]), From 00a67b022ec4ff105b5b635ee580e463e4245adf Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Mon, 24 Feb 2014 19:13:00 +0000 Subject: [PATCH 118/139] Add missing clean_data_dir function --- src/rtssh.erl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/rtssh.erl b/src/rtssh.erl index 09380101c..0b43e805d 100644 --- a/src/rtssh.erl +++ b/src/rtssh.erl @@ -165,6 +165,10 @@ create_dirs(Nodes) -> [ssh_cmd(Node, "mkdir -p " ++ node_path(Node) ++ "/data/snmp/agent/db") || Node <- Nodes]. +clean_data_dir(Nodes, SubDir) when is_list(Nodes) -> + [ssh_cmd(Node, "rm -rf " ++ node_path(Node) ++ "/data/" ++ SubDir) + || Node <- Nodes]. + start(Node) -> run_riak(Node, "start"), ok. @@ -314,6 +318,7 @@ spawn_ssh_cmd(Host, Cmd, Opts) -> spawn_cmd(SSHCmd, Opts). ssh_cmd(Node, Cmd) -> + lager:info("Running: ~s :: ~s", [Node, Cmd]), wait_for_cmd(spawn_ssh_cmd(Node, Cmd)). remote_read_file(Node, File) -> From ccc31dd9f78932a4934317e75b754b9155d1f946 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Tue, 25 Feb 2014 10:41:24 -0500 Subject: [PATCH 119/139] Remove observer. --- src/observer.erl | 655 ----------------------------------------------- 1 file changed, 655 deletions(-) delete mode 100644 src/observer.erl diff --git a/src/observer.erl b/src/observer.erl deleted file mode 100644 index 7b10e8cb3..000000000 --- a/src/observer.erl +++ /dev/null @@ -1,655 +0,0 @@ --module(observer). --compile(export_all). - --record(history, {network, - disk, - rate, - nodes, - lvlref, - collector_sock, - collector_host, - collector_port}). - --record(watcher, {nodes, - collector, - probes}). - -%% See: https://www.kernel.org/doc/Documentation/iostats.txt --record(disk, {read, - read_merged, - read_sectors, - read_wait_ms, - write, - write_merged, - write_sectors, - write_wait_ms, - io_pending, - io_wait_ms, - io_wait_weighted_ms}). - -watch(Nodes, Collector) -> - %% io:format("Loading on ~p~n", [Nodes]), - %% load_modules_on_nodes([?MODULE], Nodes), - %% R = rpc:multicall(Nodes, ?MODULE, start, [self(), 1000, Collector, Nodes, collect]), - %% io:format("RPC: ~p~n", [R]), - spawn(?MODULE, watcher, [self(), Nodes, Collector]), - start(self(), 1000, Collector, Nodes, ping), - ok. - -watcher(Master, Nodes, Collector) -> - monitor(process, Master), - Probes = [{Node, undefined} || Node <- Nodes], - W = #watcher{nodes=Nodes, - collector=Collector, - probes=Probes}, - watcher_loop(W). - -watcher_loop(W=#watcher{probes=Probes}) -> - Missing = [Node || {Node, undefined} <- Probes], - %% io:format("Missing: ~p~n", [Missing]), - W2 = install_probes(Missing, W), - Probes2 = W2#watcher.probes, - receive - {'DOWN', MRef, process, _, _} -> - case lists:keyfind(MRef, 2, Probes2) of - false -> - %% master died, exit - io:format("watcher exiting~n"), - ok; - {Node, MRef} -> - io:format("Probe exit: ~p/~p~n", [Node, MRef]), - Probes3 = lists:keyreplace(Node, 1, Probes2, {Node, undefined}), - W3 = W2#watcher{probes=Probes3}, - ?MODULE:watcher_loop(W3) - end - after 1000 -> - ?MODULE:watcher_loop(W2) - end. - -install_probes(Nodes, W=#watcher{collector=Collector, nodes=AllNodes, probes=Probes}) -> - %% io:format("Loading on ~p~n", [Nodes]), - load_modules_on_nodes([?MODULE], Nodes), - R = rpc:multicall(Nodes, ?MODULE, start, [self(), 1000, Collector, AllNodes, collect]), - %% io:format("R: ~p~n", [R]), - {Pids, Down} = R, - %% io:format("I: ~p/~p~n", [Pids, Down]), - Probes2 = lists:foldl(fun({Node, Pid}, Acc) -> - if is_pid(Pid) -> - lists:keystore(Node, 1, Acc, {Node, monitor(process, Pid)}); - true -> - Acc - end - end, Probes, Pids), - Probes3 = lists:foldl(fun(Node, Acc) -> - lists:keystore(Node, 1, Acc, {Node, undefined}) - end, Probes2, Down), - %% io:format("P3: ~p~n", [Probes3]), - W#watcher{probes=Probes3}. - -start(Master, Rate, Collector, Nodes, Fun) -> - io:format("In start: ~p~n", [node()]), - Pid = spawn(?MODULE, init, [Master, Rate, Collector, Nodes, Fun]), - {node(), Pid}. - -init(Master, Rate, {Host, Port}, Nodes, Fun) -> - io:format("In init: ~p~n", [node()]), - {ok, Sock} = gen_udp:open(Port), - case application:get_env(riak_kv, storage_backend) of - {ok, riak_kv_eleveldb_backend} -> - LRef = get_leveldb_ref(); - _ -> - LRef = undefined - end, - H = #history{network=undefined, - %% disk=undefined, - disk=[], - rate=Rate div 1000, - lvlref=LRef, - nodes=Nodes, - collector_sock=Sock, - collector_host=Host, - collector_port=Port}, - %% case Fun of - %% collect -> - %% vmstat(Master, H); - %% _ -> - %% ok - %% end, - monitor(process, Master), - loop(Fun, Rate, H). - -loop(Fun, Rate, H) -> - %% io:format("loop: ~p~n", [node()]), - NewH = ?MODULE:Fun(H), - receive - {'DOWN', _, process, _, _} -> - io:format("shutting: ~p~n", [node()]), - ok - after Rate -> - ?MODULE:loop(Fun, Rate, NewH) - end. - -ping(H=#history{nodes=Nodes}) -> - TS = timestamp(), - XNodes = lists:zip(lists:seq(1, length(Nodes)), Nodes), - pmap(fun({X,Node}) -> - case net_adm:ping(Node) of - pang -> - notify_down(TS, X, Node, H), - ok; - pong -> - case rpc:call(Node, riak_core_node_watcher, services, [Node]) of - L when is_list(L) -> - case lists:member(riak_kv, L) of - true -> - ok; - false -> - notify_down(TS, X, Node, H) - end; - _ -> - notify_down(TS, X, Node, H) - end; - _ -> - ok - end - end, XNodes), - H. - -notify_down(TS, X, Node, H) -> - %% emit_stat(Stat, TS, Value, H) -> - NodeBin = atom_to_binary(Node, utf8), - Metric = <<"offline_nodes/", NodeBin/binary>>, - emit_stat2(Metric, TS, X, H). - -collect(H0) -> - H = try report_leveldb(H0) catch _:_ -> H0 end, - catch report_queues(H), - catch report_processes(H), - H2 = try report_network(H) catch _:_ -> H end, - %% H3 = report_disk2(H2), - %% H3 = report_disk2([{<<"dm-0">>, "dm-0"}, - %% {<<"dm-1">>, "dm-1"}], H2), - H3 = report_disk2([{<<"xvdb">>, "xvdb"}, - {<<"xvdc">>, "xvdc"}, - {<<"raid0">>, "md127"}], H2), - report_vmstat(H2), - report_memory(H2), - %% H3 = try report_disk2(H2) catch _:_ -> H2 end, - catch report_stats(riak_core_stat, [dropped_vnode_requests_total], H3), - catch report_stats(riak_kv_stat, - [node_gets, - node_puts, - - vnode_gets, - vnode_puts, - - node_get_fsm_active, - node_get_fsm_rejected, - node_get_fsm_in_rate, - node_get_fsm_out_rate, - - node_put_fsm_active, - node_put_fsm_rejected, - node_put_fsm_in_rate, - node_put_fsm_out_rate - ], H3), - - catch report_stats(riak_kv_stat, - [riak_kv_stat, - node_get_fsm_time_median, - node_get_fsm_time_95, - node_get_fsm_time_100, - - node_put_fsm_time_median, - node_put_fsm_time_95, - node_put_fsm_time_100 - ], H3, 1000), - - %% catch print_down(Nodes), - H3. - -report_queues(H) -> - Max = lists:max([Len || Pid <- processes(), - {message_queue_len, Len} <- [process_info(Pid, message_queue_len)]]), - TS = timestamp(), - emit_stat(<<"message_queue_max">>, TS, Max, H), - ok. - -%% report_queues(Threshold) -> -%% VNodes = riak_core_vnode_manager:all_vnodes(), -%% VNodes2 = [{Pid, {Mod,Idx}} || {Mod,Idx,Pid} <- VNodes], -%% VNodeMap = dict:from_list(VNodes2), -%% Queues = message_queues(processes(), Threshold, VNodeMap, []), -%% Queues2 = lists:keysort(1, filter(Queues, 2, [])), -%% ok. - -report_processes(H) -> - Procs = erlang:system_info(process_count), - %% Limit = erlang:system_info(process_limit), - %% Ratio = Procs * 100 div Limit, - TS = timestamp(), - emit_stat(<<"erlang_processes">>, TS, Procs, H), - ok. - -%% report_processes(Threshold) -> -%% Procs = erlang:system_info(process_count), -%% Limit = erlang:system_info(process_limit), -%% Ratio = Procs * 100 div Limit, -%% case Ratio > Threshold of -%% true -> -%% {Procs, Ratio}; -%% false -> -%% none -%% end. - -report_network(H=#history{network=LastStats, rate=Rate}) -> - {RX, TX} = get_network(), - case LastStats of - undefined -> - ok; - {LastRX, LastTX} -> - RXRate = net_rate(LastRX, RX) div Rate, - TXRate = net_rate(LastTX, TX) div Rate, - TS = timestamp(), - emit_stat(<<"net_rx">>, TS, RXRate, H), - emit_stat(<<"net_tx">>, TS, TXRate, H) - end, - H#history{network={RX, TX}}. - -report_disk2(Disks, H=#history{disk=DiskStats}) -> - NewStats = - lists:foldl(fun({Name, Dev}, Acc) -> - LastStats = case orddict:find(Dev, DiskStats) of - error -> - undefined; - {ok, LS} -> - LS - end, - Stats = report_disk2(Name, Dev, LastStats, H), - orddict:store(Dev, Stats, Acc) - end, DiskStats, Disks), - H#history{disk=NewStats}. - -report_disk2(Name, Dev, LastStats, H=#history{rate=Rate}) -> - Stats = get_disk2(Dev), - case LastStats of - undefined -> - ok; - _ -> - ReadRate = disk_rate(#disk.read_sectors, LastStats, Stats) div Rate, - WriteRate = disk_rate(#disk.write_sectors, LastStats, Stats) div Rate, - {AwaitR, AwaitW} = disk_await(LastStats, Stats), - Svctime = disk_svctime(LastStats, Stats), - QueueLen = disk_qlength(LastStats, Stats), - Util = disk_util(LastStats, Stats), - TS = timestamp(), - emit_stat(<<"disk_read (", Name/binary, ")">>, TS, ReadRate, H), - emit_stat(<<"disk_write (", Name/binary, ")">>, TS, WriteRate, H), - emit_stat(<<"disk_await_r (", Name/binary, ")">>, TS, AwaitR, H), - emit_stat(<<"disk_await_w (", Name/binary, ")">>, TS, AwaitW, H), - emit_stat(<<"disk_svctime (", Name/binary, ")">>, TS, Svctime, H), - emit_stat(<<"disk_queue_size (", Name/binary, ")">>, TS, QueueLen, H), - emit_stat(<<"disk_utilization (", Name/binary, ")">>, TS, Util, H) - end, - Stats. - -report_disk2(H=#history{disk=LastStats, rate=Rate}) -> - Stats = get_disk2(), - case LastStats of - undefined -> - ok; - _ -> - ReadRate = disk_rate(#disk.read_sectors, LastStats, Stats) div Rate, - WriteRate = disk_rate(#disk.write_sectors, LastStats, Stats) div Rate, - {AwaitR, AwaitW} = disk_await(LastStats, Stats), - Svctime = disk_svctime(LastStats, Stats), - QueueLen = disk_qlength(LastStats, Stats), - Util = disk_util(LastStats, Stats), - TS = timestamp(), - emit_stat(<<"disk_read">>, TS, ReadRate, H), - emit_stat(<<"disk_write">>, TS, WriteRate, H), - emit_stat(<<"disk_await_r">>, TS, AwaitR, H), - emit_stat(<<"disk_await_w">>, TS, AwaitW, H), - emit_stat(<<"disk_svctime">>, TS, Svctime, H), - emit_stat(<<"disk_queue_size">>, TS, QueueLen, H), - emit_stat(<<"disk_utilization">>, TS, Util, H) - end, - H#history{disk=Stats}. - -%% report_disk(H=#history{disk=LastStats, rate=Rate}) -> -%% {Read, Write} = get_disk(), -%% case LastStats of -%% undefined -> -%% ok; -%% {LastRead, LastWrite} -> -%% ReadRate = disk_rate(LastRead, Read) div Rate, -%% WriteRate = disk_rate(LastWrite, Write) div Rate, -%% TS = timestamp(), -%% emit_stat(<<"disk_read">>, TS, ReadRate, H), -%% emit_stat(<<"disk_write">>, TS, WriteRate, H) -%% end, -%% H#history{disk={Read, Write}}. - -report_memory(H) -> - Stats = get_memory(), - Util = memory_util(Stats), - Dirty = memory_dirty(Stats), - Writeback = memory_writeback(Stats), - TS = timestamp(), - emit_stat(<<"memory_utilization">>, TS, Util, H), - emit_stat(<<"memory_page_dirty">>, TS, Dirty, H), - emit_stat(<<"memory_page_writeback">>, TS, Writeback, H), - ok. - -report_leveldb(H=#history{lvlref=undefined}) -> - H; -report_leveldb(H=#history{lvlref=LRef}) -> - try case eleveldb:status(LRef, <<"leveldb.ThrottleGauge">>) of - {ok, Result} -> - Value = list_to_integer(binary_to_list(Result)), - TS = timestamp(), - emit_stat(<<"leveldb_write_throttle">>, TS, Value, H), - H; - _ -> - H - end - catch - _:_ -> - LRef2 = get_leveldb_ref(), - H#history{lvlref=LRef2} - end. - -%% print_down(Nodes) -> -%% Down = [Node || Node <- Nodes, -%% net_adm:ping(Node) =:= pang], -%% case Down of -%% [] -> -%% ok; -%% _ -> -%% io:format("Offline nodes:~n ~p~n", [Down]) -%% end. - -net_rate(Bytes1, Bytes2) -> - (Bytes2 - Bytes1) div 1024. - -disk_rate(I, Stats1, Stats2) -> - disk_rate(element(I, Stats1), element(I, Stats2)). - -disk_rate(Sectors1, Sectors2) -> - %% 512-byte sectors - (Sectors2 - Sectors1) div 2. - -disk_await(S1, S2) -> - NumR = erlang:max(S2#disk.read - S1#disk.read, 1), - NumW = erlang:max(S2#disk.write - S1#disk.write, 1), - AwaitR = (S2#disk.read_wait_ms - S1#disk.read_wait_ms) div NumR, - AwaitW = (S2#disk.write_wait_ms - S1#disk.write_wait_ms) div NumW, - {AwaitR, AwaitW}. - -disk_svctime(S1, S2) -> - NumR = S2#disk.read - S1#disk.read, - NumW = S2#disk.write - S1#disk.write, - NumIO = erlang:max(NumR + NumW, 1), - Wait = S2#disk.io_wait_ms - S1#disk.io_wait_ms, - Wait div NumIO. - -disk_util(S1, S2) -> - Wait = S2#disk.io_wait_ms - S1#disk.io_wait_ms, - Wait * 100 div 1000. %% Really should be div Rate - -disk_qlength(S1, S2) -> - (S2#disk.io_wait_weighted_ms - S1#disk.io_wait_weighted_ms) div 1000. - -filter(L, Pos, Val) -> - [T || T <- L, - element(Pos, T) /= Val]. - -message_queues([], _Threshold, _VNodeMap, Queues) -> - lists:reverse(lists:keysort(1, Queues)); -message_queues([Pid|Pids], Threshold, VNodeMap, Queues) -> - case process_info(Pid, [message_queue_len, registered_name]) of - [{message_queue_len, Len}, - {registered_name, RegName}] when Len > Threshold -> - Entry = {Len, pid_name(Pid, RegName, VNodeMap)}, - message_queues(Pids, Threshold, VNodeMap, [Entry|Queues]); - _ -> - message_queues(Pids, Threshold, VNodeMap, Queues) - end. - -get_network() -> - %% {ok, RX} = file:read_file("/sys/class/net/eth0/statistics/rx_bytes"), - %% {ok, TX} = file:read_file("/sys/class/net/eth0/statistics/tx_bytes"), - {ok, RX} = file:read_file("/sys/class/net/eth1/statistics/rx_bytes"), - {ok, TX} = file:read_file("/sys/class/net/eth1/statistics/tx_bytes"), - {to_integer(RX), to_integer(TX)}. - -get_disk2() -> - {ok, Bin} = file:read_file("/sys/block/md127/stat"), - %% {ok, Bin} = file:read_file("/sys/block/dm-0/stat"), - Stats = parse_disk_stats(Bin), - Stats. - -get_disk2(Dev) -> - {ok, Bin} = file:read_file("/sys/block/" ++ Dev ++ "/stat"), - Stats = parse_disk_stats(Bin), - Stats. - -%% get_disk() -> -%% {ok, Bin} = file:read_file("/sys/block/md127/stat"), -%% Stats = parse_disk_stats(Bin), -%% {Stats#disk.read_sectors, Stats#disk.write_sectors}. - -memory_util(Mem) -> - Stat = fun(Key) -> - list_to_integer(element(2, lists:keyfind(Key, 1, Mem))) - end, - Total = Stat("MemTotal:"), - Free = Stat("MemFree:"), - Buffers = Stat("Buffers:"), - Cached = Stat("Cached:"), - (Total - Free - Buffers - Cached) * 100 div Total. - -memory_dirty(Mem) -> - {_, Dirty} = lists:keyfind("Dirty:", 1, Mem), - list_to_integer(Dirty). - -memory_writeback(Mem) -> - {_, Writeback} = lists:keyfind("Writeback:", 1, Mem), - list_to_integer(Writeback). - -get_memory() -> - S = os:cmd("cat /proc/meminfo"), - [case string:tokens(L," ") of - [Key, Value, _] -> - {Key, Value}; - [Key, Value] -> - {Key, Value}; - _ -> - ignore - end || L <- string:tokens(S, "\n")]. - -parse_disk_stats(Bin) -> - [Line|_] = binary:split(Bin, <<"\n">>), - Fields = string:tokens(binary_to_list(Line), " "), - Fields2 = [list_to_integer(Field) || Field <- Fields], - list_to_tuple([disk|Fields2]). - -to_integer(Bin) -> - [Line|_] = binary:split(Bin, <<"\n">>), - list_to_integer(binary_to_list(Line)). - -pid_name(Pid, [], VNodeMap) -> - case dict:find(Pid, VNodeMap) of - {ok, VNode} -> - VNode; - _ -> - Pid - end; -pid_name(_Pid, RegName, _VNodeMap) -> - RegName. - -report_stats(Mod, Keys, H) -> - report_stats(Mod, Keys, H, 1). - -report_stats(Mod, Keys, H, Scale) -> - Stats = Mod:get_stats(), - TS = timestamp(), - [case lists:keyfind(Key, 1, Stats) of - false -> - ok; - {_, Value} -> - emit_stat(atom_to_binary(Key, utf8), TS, Value / Scale, H) - end || Key <- Keys], - ok. - -%%%=================================================================== -%%% Utility functions -%%%=================================================================== -pmap(F, L) -> - Parent = self(), - lists:mapfoldl( - fun(X, N) -> - Pid = spawn(fun() -> - Parent ! {pmap, N, F(X)} - end), - {Pid, N+1} - end, 0, L), - L2 = [receive {pmap, N, R} -> {N,R} end || _ <- L], - [R || {_, R} <- lists:keysort(1, L2)]. - -load_modules_on_nodes(Modules, Nodes) -> - [case code:get_object_code(Module) of - {Module, Bin, File} -> - %% rpc:multicall(Nodes, code, purge, [Module]), - rpc:multicall(Nodes, code, load_binary, [Module, File, Bin]); - error -> - error({no_object_code, Module}) - end || Module <- Modules]. - -get_leveldb_ref() -> - VNodes = riak_core_vnode_manager:all_vnodes(riak_kv_vnode), - {_, _, Pid} = hd(VNodes), - State = get_state(Pid), - ModState = element(4, State), - case element(3,ModState) of - riak_kv_eleveldb_backend -> - LvlState = element(4, ModState), - element(2, LvlState); - _ -> - undefined - end. - -get_state(Pid) -> - {status, Pid, _Mod, Status} = sys:get_status(Pid), - Status2 = lists:flatten(Status), - Status3 = [L || {data, L} <- Status2], - Status4 = lists:flatten(Status3), - State = proplists:get_value("StateData", Status4), - State. - -%% print_queues(Nodes) -> -%% pmap(remote(get_queues), Nodes). - -%% remote(F) -> -%% fun(Node) -> -%% rpc:call(Node, ?MODULE, F, []) -%% end. - -timestamp() -> - timestamp(os:timestamp()). - -timestamp({Mega, Secs, Micro}) -> - Mega*1000*1000*1000 + Secs * 1000 + (Micro div 1000). - -emit_stat(Stat, TS, Value, H) -> - NodeBin = atom_to_binary(node(), utf8), - %% Metric = <>, - Metric = <>, - emit_stat2(Metric, TS, Value, H). - -emit_stat2(Metric, TS, Value, #history{collector_sock=Sock, - collector_host=Host, - collector_port=Port}) -> - if is_integer(Value) -> - Packet = <<"=", TS:64/integer, Value:64/integer, Metric/binary>>, - %% io:format("Sending: ~p~n", [{TS, Value, Metric}]), - gen_udp:send(Sock, Host, Port, Packet); - is_float(Value) -> - %% IValue = erlang:trunc(Value), - %% Packet = <<"=", TS:64/integer, IValue:64/integer, Metric/binary>>, - %% %% io:format("Sending: ~p~n", [{TS, Value, Metric}]), - Packet = <<"#", (term_to_binary({Value, Metric, TS}))/binary>>, - gen_udp:send(Sock, Host, Port, Packet); - true -> - io:format("NT: ~p~n", [Value]) - end, - ok. - - --record(vmstat, {procs_r, - procs_b, - mem_swpd, - mem_free, - mem_buff, - mem_cache, - swap_si, - swap_so, - io_bi, - io_bo, - system_in, - system_cs, - cpu_us, - cpu_sy, - cpu_id, - cpu_wa}). - -report_vmstat(H) -> - Result = os:cmd("vmstat 1 2"), - Lines = string:tokens(Result, "\n"), - Last = hd(lists:reverse(Lines)), - case parse_vmstat(Last) of - undefined -> - ok; - VM = #vmstat{} -> - TS = timestamp(), - emit_stat(<<"cpu_utilization">>, TS, 100 - VM#vmstat.cpu_id, H), - emit_stat(<<"cpu_iowait">>, TS, VM#vmstat.cpu_wa, H), - emit_stat(<<"memory_swap_in">>, TS, VM#vmstat.swap_si, H), - emit_stat(<<"memory_swap_out">>, TS, VM#vmstat.swap_so, H) - end, - ok. - -vmstat(Master, H) -> - spawn(fun() -> - monitor(process, Master), - Port = open_port({spawn, "vmstat 1"}, [{line,4096}, out]), - vmstat_loop(Port, H) - end). - -parse_vmstat(Line) -> - Values = string:tokens(Line, " "), - try - Fields = [list_to_integer(Field) || Field <- Values], - list_to_tuple([vmstat|Fields]) - catch - _:_ -> - undefined - end. - -vmstat_loop(Port, H) -> - receive - {'DOWN', _, process, _, _} -> - ok; - {Port, {data, Line}} -> - case parse_vmstat(Line) of - undefined -> - ok; - VM = #vmstat{} -> - TS = timestamp(), - emit_stat(<<"cpu_utilization">>, TS, 100 - VM#vmstat.cpu_id, H), - emit_stat(<<"cpu_iowait">>, TS, VM#vmstat.cpu_wa, H), - emit_stat(<<"memory_swap_in">>, TS, VM#vmstat.swap_si, H), - emit_stat(<<"memory_swap_out">>, TS, VM#vmstat.swap_so, H) - end, - vmstat_loop(Port, H) - end. From 89d5ad9e35c18c965bb104405edacd2e76ddfb41 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Wed, 26 Feb 2014 13:43:46 -0500 Subject: [PATCH 120/139] Ignore riak installations. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 7a4d69e65..77d316592 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ doc/ *.jar coverage tags +riak-* From b7f303bf5bba4cdd1453b7877919c6161f049528 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Wed, 26 Feb 2014 13:43:54 -0500 Subject: [PATCH 121/139] Remove assertions based on performance. --- tests/repl_fs_bench.erl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/repl_fs_bench.erl b/tests/repl_fs_bench.erl index 7dbc8d4ab..0523a9ba0 100644 --- a/tests/repl_fs_bench.erl +++ b/tests/repl_fs_bench.erl @@ -53,11 +53,6 @@ confirm() -> lager:info("Diff fullsync completed in: ~pms", [KeylistDiff / 1000]), lager:info("None fullsync completed in: ~pms", [KeylistNone / 1000]), - ?assert(AAEEmpty < KeylistEmpty), - ?assert(AAEFull < KeylistFull), - ?assert(AAEDiff < KeylistDiff), - ?assert(AAENone < KeylistNone), - pass. fullsync_test(Strategy) -> From 711d687c916a6cfd3a8782362e2292aee7c05057 Mon Sep 17 00:00:00 2001 From: Kelly McLaughlin Date: Tue, 25 Feb 2014 17:17:29 -0700 Subject: [PATCH 122/139] Add test for realtime queue data buildup on sink clusters --- tests/rt_cascading_rtq.erl | 185 +++++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 tests/rt_cascading_rtq.erl diff --git a/tests/rt_cascading_rtq.erl b/tests/rt_cascading_rtq.erl new file mode 100644 index 000000000..a407bbadb --- /dev/null +++ b/tests/rt_cascading_rtq.erl @@ -0,0 +1,185 @@ +-module(rt_cascading_rtq). +-compile(export_all). + +-include_lib("eunit/include/eunit.hrl"). + +-define(TEST_BUCKET, <<"rt-cascading-rtq-systest-a">>). + +setup() -> + rt:set_conf(all, [{"buckets.default.allow_mult", "false"}]), + + {SourceLeader, SinkLeaderA, SinkLeaderB, _, _, _} = ClusterNodes = make_clusters(), + + connect_clusters(SourceLeader, SinkLeaderA, "SinkA"), + connect_clusters(SourceLeader, SinkLeaderB, "SinkB"), + ClusterNodes. + +confirm() -> + SetupData = setup(), + rtq_data_buildup_test(SetupData), + pass. + +%% This test case is designed to ensure that there is no realtime +%% queue buildup on sink nodes that do not serve as source nodes for +%% any other clusters. It constructs a simple toplogy with a single +%% source cluster replicating to two sinks. The toplogy for this test +%% is as follows: +%% +--------+ +%% | Source | +%% +--------+ +%% ^ ^ +%% / \ +%% V V +%% +-------+ +-------+ +%% | SinkA | | SinkB | +%% +-------+ +-------+ +rtq_data_buildup_test(ClusterNodes) -> + {SourceLeader, SinkLeaderA, SinkLeaderB, SourceNodes, _SinkANodes, _SinkBNodes} = ClusterNodes, + + %% Enable RT replication from source cluster "SinkA" + lager:info("Enabling realtime between ~p and ~p", [SourceLeader, SinkLeaderB]), + enable_rt(SourceLeader, SourceNodes, "SinkA"), + %% Enable RT replication from source cluster "SinkB" + lager:info("Enabling realtime between ~p and ~p", [SourceLeader, SinkLeaderA]), + enable_rt(SourceLeader, SourceNodes, "SinkB"), + + %% Get the baseline byte count for the rtq for each sink cluster + SinkAInitialQueueSize = rtq_bytes(SinkLeaderA), + SinkBInitialQueueSize = rtq_bytes(SinkLeaderB), + + %% Write keys to source cluster A + KeyCount = 1001, + write_to_cluster(SourceLeader, 1, KeyCount), + read_from_cluster(SinkLeaderA, 1, KeyCount, 0), + read_from_cluster(SinkLeaderB, 1, KeyCount, 0), + + %% Verify the rt queue is still at the initial size for both sink clusters + ?assertEqual(SinkAInitialQueueSize, rtq_bytes(SinkLeaderA)), + ?assertEqual(SinkBInitialQueueSize, rtq_bytes(SinkLeaderB)). + +rtq_bytes(Node) -> + RtqStatus = rpc:call(Node, riak_repl2_rtq, status, []), + proplists:get_value(bytes, RtqStatus). + +make_clusters() -> + NodeCount = rt_config:get(num_nodes, 6), + lager:info("Deploy ~p nodes", [NodeCount]), + Nodes = deploy_nodes(NodeCount, true), + + {SourceNodes, SinkNodes} = lists:split(2, Nodes), + {SinkANodes, SinkBNodes} = lists:split(2, SinkNodes), + lager:info("SinkANodes: ~p", [SinkANodes]), + lager:info("SinkBNodes: ~p", [SinkBNodes]), + + lager:info("Build source cluster"), + repl_util:make_cluster(SourceNodes), + + lager:info("Build sink cluster A"), + repl_util:make_cluster(SinkANodes), + + lager:info("Build sink cluster B"), + repl_util:make_cluster(SinkBNodes), + + SourceFirst = hd(SourceNodes), + AFirst = hd(SinkANodes), + BFirst = hd(SinkBNodes), + + %% Name the clusters + repl_util:name_cluster(SourceFirst, "Source"), + repl_util:name_cluster(AFirst, "SinkA"), + repl_util:name_cluster(BFirst, "SinkB"), + + lager:info("Waiting for convergence."), + rt:wait_until_ring_converged(SourceNodes), + rt:wait_until_ring_converged(SinkANodes), + rt:wait_until_ring_converged(SinkBNodes), + + lager:info("Waiting for transfers to complete."), + rt:wait_until_transfers_complete(SourceNodes), + rt:wait_until_transfers_complete(SinkANodes), + rt:wait_until_transfers_complete(SinkBNodes), + + %% get the leader for the source cluster + lager:info("waiting for leader to converge on the source cluster"), + ?assertEqual(ok, repl_util:wait_until_leader_converge(SourceNodes)), + + %% get the leader for the first sink cluster + lager:info("waiting for leader to converge on sink cluster A"), + ?assertEqual(ok, repl_util:wait_until_leader_converge(SinkANodes)), + + %% get the leader for the second cluster + lager:info("waiting for leader to converge on cluster B"), + ?assertEqual(ok, repl_util:wait_until_leader_converge(SinkBNodes)), + + SourceLeader = repl_util:get_leader(SourceFirst), + ALeader = repl_util:get_leader(AFirst), + BLeader = repl_util:get_leader(BFirst), + + %% Uncomment the following 2 lines to verify that pre-2.0 versions + %% of Riak behave as expected if cascading writes are disabled for + %% the sink clusters. + %% disable_cascading(ALeader, SinkANodes), + %% disable_cascading(BLeader, SinkBNodes), + + lager:info("Source Leader: ~p SinkALeader: ~p SinkBLeader: ~p", [SourceLeader, ALeader, BLeader]), + {SourceLeader, ALeader, BLeader, SourceNodes, SinkANodes, SinkBNodes}. + +%% @doc Connect two clusters using a given name. +connect_cluster(Source, Port, Name) -> + lager:info("Connecting ~p to ~p for cluster ~p.", + [Source, Port, Name]), + repl_util:connect_cluster(Source, "127.0.0.1", Port), + ?assertEqual(ok, repl_util:wait_for_connection(Source, Name)). + +%% @doc Connect two clusters for replication using their respective leader nodes. +connect_clusters(SourceLeader, SinkLeader, SinkName) -> + SinkPort = repl_util:get_port(SinkLeader), + lager:info("connect source cluster to ~p on port ~p", [SinkName, SinkPort]), + repl_util:connect_cluster(SourceLeader, "127.0.0.1", SinkPort), + ?assertEqual(ok, repl_util:wait_for_connection(SourceLeader, SinkName)). + +cluster_conf(_CascadingWrites) -> + [ + {riak_repl, + [ + %% turn off fullsync + {fullsync_on_connect, false}, + {fullsync_interval, disabled}, + {max_fssource_cluster, 20}, + {max_fssource_node, 20}, + {max_fssink_node, 20}, + {rtq_max_bytes, 1048576} + ]} + ]. + +deploy_nodes(NumNodes, true) -> + rt:deploy_nodes(NumNodes, cluster_conf(always)); +deploy_nodes(NumNodes, false) -> + rt:deploy_nodes(NumNodes, cluster_conf(never)). + +%% @doc Turn on Realtime replication on the cluster lead by LeaderA. +%% The clusters must already have been named and connected. +enable_rt(SourceLeader, SourceNodes, SinkName) -> + repl_util:enable_realtime(SourceLeader, SinkName), + rt:wait_until_ring_converged(SourceNodes), + + repl_util:start_realtime(SourceLeader, SinkName), + rt:wait_until_ring_converged(SourceNodes). + +%% @doc Turn off Realtime replication on the cluster lead by LeaderA. +disable_cascading(Leader, Nodes) -> + rpc:call(Leader, riak_repl_console, realtime_cascades, [["never"]]), + rt:wait_until_ring_converged(Nodes). + +%% @doc Write a series of keys and ensure they are all written. +write_to_cluster(Node, Start, End) -> + lager:info("Writing ~p keys to node ~p.", [End - Start, Node]), + ?assertEqual([], + repl_util:do_write(Node, Start, End, ?TEST_BUCKET, 1)). + +%% @doc Read from cluster a series of keys, asserting a certain number +%% of errors. +read_from_cluster(Node, Start, End, Errors) -> + lager:info("Reading ~p keys from node ~p.", [End - Start, Node]), + Res2 = rt:systest_read(Node, Start, End, ?TEST_BUCKET, 1), + ?assertEqual(Errors, length(Res2)). From bfb35d59cd5951f1bb129efdb08f0d799d6904a2 Mon Sep 17 00:00:00 2001 From: Kelly McLaughlin Date: Wed, 26 Feb 2014 12:16:28 -0700 Subject: [PATCH 123/139] Rename rt_cascading_rtq -> repl_rt_cascading_rtq --- tests/{rt_cascading_rtq.erl => repl_rt_cascading_rtq.erl} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename tests/{rt_cascading_rtq.erl => repl_rt_cascading_rtq.erl} (99%) diff --git a/tests/rt_cascading_rtq.erl b/tests/repl_rt_cascading_rtq.erl similarity index 99% rename from tests/rt_cascading_rtq.erl rename to tests/repl_rt_cascading_rtq.erl index a407bbadb..f8edad8d6 100644 --- a/tests/rt_cascading_rtq.erl +++ b/tests/repl_rt_cascading_rtq.erl @@ -1,4 +1,4 @@ --module(rt_cascading_rtq). +-module(repl_rt_cascading_rtq). -compile(export_all). -include_lib("eunit/include/eunit.hrl"). From e0951a960988bc3fe042fd157f31c834d04aa203 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Fri, 28 Feb 2014 13:04:21 -0800 Subject: [PATCH 124/139] Add benchmarking with latency support. --- intercepts/riak_repl2_fssource_intercepts.erl | 17 ++++ .../riak_repl_aae_source_intercepts.erl | 21 +++++ tests/repl_fs_bench.erl | 90 ++++++++++++------- 3 files changed, 94 insertions(+), 34 deletions(-) create mode 100644 intercepts/riak_repl2_fssource_intercepts.erl create mode 100644 intercepts/riak_repl_aae_source_intercepts.erl diff --git a/intercepts/riak_repl2_fssource_intercepts.erl b/intercepts/riak_repl2_fssource_intercepts.erl new file mode 100644 index 000000000..cf37ff1d9 --- /dev/null +++ b/intercepts/riak_repl2_fssource_intercepts.erl @@ -0,0 +1,17 @@ +-module(riak_repl2_fssource_intercepts). +-compile(export_all). +-include("intercept.hrl"). + +-define(M, riak_repl2_fssource_orig). + +slow_handle_info(Msg, State) -> + io:format("slow_handle_info~n"), + ?I_INFO("slow_handle_info~n"), + timer:sleep(10), + ?M:handle_info_orig(Msg, State). + +really_slow_handle_info(Msg, State) -> + io:format("really_slow_handle_info~n"), + ?I_INFO("really_slow_handle_info~n"), + timer:sleep(100), + ?M:handle_info_orig(Msg, State). diff --git a/intercepts/riak_repl_aae_source_intercepts.erl b/intercepts/riak_repl_aae_source_intercepts.erl new file mode 100644 index 000000000..e12203ee3 --- /dev/null +++ b/intercepts/riak_repl_aae_source_intercepts.erl @@ -0,0 +1,21 @@ +-module(riak_repl_aae_source_intercepts). +-compile(export_all). +-include("intercept.hrl"). + +-define(M, riak_repl_aae_source_orig). + +%% @doc Introduce 10ms of latency in receiving message off of the +%% socket. +delayed_get_reply(State) -> + io:format("delayed~n"), + ?I_INFO("delayed~n"), + timer:sleep(10), + ?M:get_reply_orig(State). + +%% @doc Introduce 100ms of latency in receiving message off of the +%% socket. +really_delayed_get_reply(State) -> + io:format("really delayed~n"), + ?I_INFO("really delayed~n"), + timer:sleep(100), + ?M:get_reply_orig(State). diff --git a/tests/repl_fs_bench.erl b/tests/repl_fs_bench.erl index 0523a9ba0..1d7a4169d 100644 --- a/tests/repl_fs_bench.erl +++ b/tests/repl_fs_bench.erl @@ -1,9 +1,11 @@ -module(repl_fs_bench). + -export([confirm/0]). + -include_lib("eunit/include/eunit.hrl"). --define(DIFF_NUM_KEYS, 1000). --define(FULL_NUM_KEYS, 10000). +-define(DIFF_NUM_KEYS, 10). +-define(FULL_NUM_KEYS, 100). -define(TEST_BUCKET, <<"repl_bench">>). -define(HARNESS, (rt_config:get(rt_harness))). @@ -34,28 +36,30 @@ ]} ]). --import(rt, [deploy_nodes/2]). - confirm() -> - {AAEEmpty, AAEFull, AAEDiff, AAENone} = fullsync_test(aae), + {E1, F1, D1, N1} = fullsync_test(keylist, 0), + {E2, F2, D2, N2} = fullsync_test(keylist, 10), + {E3, F3, D3, N3} = fullsync_test(keylist, 100), - {KeylistEmpty, KeylistFull, KeylistDiff, KeylistNone} = fullsync_test(keylist), + {E4, F4, D4, N4} = fullsync_test(aae, 0), + {E5, F5, D5, N5} = fullsync_test(aae, 10), + {E6, F6, D6, N6} = fullsync_test(aae, 100), - lager:info("Results for aae:"), - lager:info("Empty fullsync completed in: ~pms", [AAEEmpty / 1000]), - lager:info("All fullsync completed in: ~pms", [AAEFull / 1000]), - lager:info("Diff fullsync completed in: ~pms", [AAEDiff / 1000]), - lager:info("None fullsync completed in: ~pms", [AAENone / 1000]), + lager:info("Keylist Empty: ~pms ~pms ~pms", [E1 / 1000, E2 / 1000, E3 / 1000]), + lager:info("Keylist Full: ~pms ~pms ~pms", [F1 / 1000, F2 / 1000, F3 / 1000]), + lager:info("Keylist Diff: ~pms ~pms ~pms", [D1 / 1000, D2 / 1000, D3 / 1000]), + lager:info("Keylist None: ~pms ~pms ~pms", [N1 / 1000, N2 / 1000, N3 / 1000]), - lager:info("Results for keylist:"), - lager:info("Empty fullsync completed in: ~pms", [KeylistEmpty / 1000]), - lager:info("All fullsync completed in: ~pms", [KeylistFull / 1000]), - lager:info("Diff fullsync completed in: ~pms", [KeylistDiff / 1000]), - lager:info("None fullsync completed in: ~pms", [KeylistNone / 1000]), + lager:info("AAE Empty: ~pms ~pms ~pms", [E4 / 1000, E5 / 1000, E6 / 1000]), + lager:info("AAE Full: ~pms ~pms ~pms", [F4 / 1000, F5 / 1000, F6 / 1000]), + lager:info("AAE Diff: ~pms ~pms ~pms", [D4 / 1000, D5 / 1000, D6 / 1000]), + lager:info("AAE None: ~pms ~pms ~pms", [N4 / 1000, N5 / 1000, N6 / 1000]), pass. -fullsync_test(Strategy) -> +%% @doc Perform a fullsync, with given latency injected via intercept +%% and return times for each fullsync time. +fullsync_test(Strategy, Latency) -> rt:set_advanced_conf(all, ?CONF(Strategy)), [ANodes, BNodes] = rt:build_clusters([3, 3]), @@ -63,33 +67,56 @@ fullsync_test(Strategy) -> AFirst = hd(ANodes), BFirst = hd(BNodes), + case {Strategy, Latency} of + {aae, 10} -> + [rt_intercept:add(Node, + {riak_repl_aae_source, + [{{get_reply, 1}, delayed_get_reply}]}) + || Node <- ANodes], + ok; + {keylist, 10} -> + [rt_intercept:add(Node, + {riak_repl2_fssource, + [{{handle_info, 2}, slow_handle_info}]}) + || Node <- ANodes], + ok; + {aae, 100} -> + [rt_intercept:add(Node, + {riak_repl_aae_source, + [{{get_reply, 1}, really_delayed_get_reply}]}) + || Node <- ANodes], + ok; + {keylist, 100} -> + [rt_intercept:add(Node, + {riak_repl2_fssource, + [{{handle_info, 2}, really_slow_handle_info}]}) + || Node <- ANodes], + ok; + _ -> + ok + end, + repl_util:name_cluster(AFirst, "A"), repl_util:name_cluster(BFirst, "B"), rt:wait_until_ring_converged(ANodes), rt:wait_until_ring_converged(BNodes), - lager:info("waiting for leader to converge on cluster A"), ?assertEqual(ok, repl_util:wait_until_leader_converge(ANodes)), - - lager:info("waiting for leader to converge on cluster B"), ?assertEqual(ok, repl_util:wait_until_leader_converge(BNodes)), - LeaderA = rpc:call(AFirst, riak_core_cluster_mgr, get_leader, []), + LeaderA = rpc:call(AFirst, + riak_core_cluster_mgr, get_leader, []), - {ok, {IP, Port}} = rpc:call(BFirst, application, get_env, - [riak_core, cluster_mgr]), + {ok, {IP, Port}} = rpc:call(BFirst, + application, get_env, [riak_core, cluster_mgr]), - lager:info("connect cluster A:~p to B on port ~p", - [LeaderA, Port]), repl_util:connect_cluster(LeaderA, IP, Port), ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")), repl_util:enable_fullsync(LeaderA, "B"), rt:wait_until_ring_converged(ANodes), - lager:info("Wait for cluster connection A:~p -> B:~p:~p", - [LeaderA, BFirst, Port]), ?assertEqual(ok, repl_util:wait_for_connection(LeaderA, "B")), %% Perform fullsync of an empty cluster. @@ -99,14 +126,14 @@ fullsync_test(Strategy) -> [LeaderA]), %% Write keys and perform fullsync. - write_to_cluster(AFirst, 0, ?FULL_NUM_KEYS), + repl_util:write_to_cluster(AFirst, 0, ?FULL_NUM_KEYS, ?TEST_BUCKET), repl_util:wait_until_aae_trees_built(ANodes ++ BNodes), {FullTime, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, [LeaderA]), %% Rewrite first 10% keys and perform fullsync. - write_to_cluster(AFirst, 0, ?DIFF_NUM_KEYS), + repl_util:write_to_cluster(AFirst, 0, ?FULL_NUM_KEYS, ?TEST_BUCKET), repl_util:wait_until_aae_trees_built(ANodes ++ BNodes), {DiffTime, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, @@ -122,8 +149,3 @@ fullsync_test(Strategy) -> rt:clean_cluster(BNodes), {EmptyTime, FullTime, DiffTime, NoneTime}. - -write_to_cluster(Node, Start, End) -> - lager:info("Writing ~p keys to node ~p.", [End - Start, Node]), - ?assertEqual([], - repl_util:do_write(Node, Start, End, ?TEST_BUCKET, 1)). From e57df6270d06de711a6b4f48017e7954c9f80242 Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Tue, 4 Mar 2014 13:21:22 -0800 Subject: [PATCH 125/139] Ensure we load the intercept module remotely. --- tests/repl_fs_bench.erl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/repl_fs_bench.erl b/tests/repl_fs_bench.erl index 1d7a4169d..fa4004174 100644 --- a/tests/repl_fs_bench.erl +++ b/tests/repl_fs_bench.erl @@ -67,6 +67,8 @@ fullsync_test(Strategy, Latency) -> AFirst = hd(ANodes), BFirst = hd(BNodes), + [rt_intercept:load_code(Node) || Node <- ANodes], + case {Strategy, Latency} of {aae, 10} -> [rt_intercept:add(Node, From bf9525fbf64c0b305c404d3786c5d2170bfe6cda Mon Sep 17 00:00:00 2001 From: lordnull Date: Tue, 4 Mar 2014 15:28:34 -0600 Subject: [PATCH 126/139] Added test to ensure fssource's don't block repl status. --- tests/repl_fs_stat_caching.erl | 110 +++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 tests/repl_fs_stat_caching.erl diff --git a/tests/repl_fs_stat_caching.erl b/tests/repl_fs_stat_caching.erl new file mode 100644 index 000000000..3dc174a1b --- /dev/null +++ b/tests/repl_fs_stat_caching.erl @@ -0,0 +1,110 @@ +%% @doc Tests to ensure a stalling or blocking fssource process does not +%% cause status call to timeout. Useful for only 2.0 and up (and up is +%% a regression test). +-module(repl_fs_stat_caching). +-behavior(riak_test). + +-include_lib("eunit/include/eunit.hrl"). +-define(TEST_BUCKET, <<"repl_fs_stat_caching">>). + +-export([confirm/0]). + +confirm() -> + {{SrcLead, SrcCluster}, {SinkLead, _SinkCluster}} = setup(), + SinkPort = repl_util:get_cluster_mgr_port(SinkLead), + repl_util:connect_cluster(SrcLead, "127.0.0.1", SinkPort), + + lager:info("Loading source cluster"), + [] = repl_util:do_write(SrcLead, 1, 1000, ?TEST_BUCKET, 1), + + repl_util:enable_fullsync(SrcLead, "sink"), + rpc:call(SrcLead, riak_repl_console, fullsync, [["start", "sink"]]), + + % and now, the actual test. + % find a random fssource, suspend it, and then ensure we can get a + % status. + {ok, Suspended} = suspend_an_fs_source(SrcCluster), + lager:info("Suspended: ~p", [Suspended]), + {ok, Status} = rt:riak_repl(SrcLead, "status"), + FailLine = "RPC to '" ++ atom_to_list(SrcLead) ++ "' failed: timeout\n", + ?assertNotEqual(FailLine, Status), + + true = rpc:block_call(node(Suspended), erlang, resume_process, [Suspended]), + + ?assert(true). + +setup() -> + rt:set_conf(all, [{"buckets.default.allow_mult", "false"}]), + NodeCount = rt_config:get(num_nodes, 6), + + lager:info("Deploy ~p nodes", [NodeCount]), + Nodes = rt:deploy_nodes(NodeCount, cluster_conf()), + SplitSize = NodeCount div 2, + {SourceNodes, SinkNodes} = lists:split(SplitSize, Nodes), + + lager:info("making cluster Source from ~p", [SourceNodes]), + repl_util:make_cluster(SourceNodes), + + lager:info("making cluster Sink from ~p", [SinkNodes]), + repl_util:make_cluster(SinkNodes), + + SrcHead = hd(SourceNodes), + SinkHead = hd(SinkNodes), + repl_util:name_cluster(SrcHead, "source"), + repl_util:name_cluster(SinkHead, "sink"), + + rt:wait_until_ring_converged(SourceNodes), + rt:wait_until_ring_converged(SinkNodes), + + rt:wait_until_transfers_complete(SourceNodes), + rt:wait_until_transfers_complete(SinkNodes), + + ok = repl_util:wait_until_leader_converge(SourceNodes), + ok = repl_util:wait_until_leader_converge(SinkNodes), + + SourceLead = repl_util:get_leader(SrcHead), + SinkLead = repl_util:get_leader(SinkHead), + + {{SourceLead, SourceNodes}, {SinkLead, SinkNodes}}. + +cluster_conf() -> + [ + {riak_repl, [ + {fullsync_on_connect, false}, + {fullsync_interval, disabled}, + {max_fssource_cluster, 3}, + {max_fssource_node, 1}, + {max_fssink_node, 20}, + {rtq_max_bytes, 1048576} + ]} + ]. + +suspend_an_fs_source([]) -> + {error, no_nodes}; + +suspend_an_fs_source(Nodes) -> + suspend_an_fs_source(Nodes, 10000). + +suspend_an_fs_source([_Node | _Tail], 0) -> + {error, tries_ran_out}; + +suspend_an_fs_source([Node | Tail], TriesLeft) -> + Pids = rpc:call(Node, riak_repl2_fssource_sup, enabled, []), + case maybe_suspend_an_fs_source(Node, Pids) of + false -> + suspend_an_fs_source(Tail ++ [Node], TriesLeft - 1); + Pid -> + {ok, Pid} + end. + +maybe_suspend_an_fs_source(_Node, []) -> + false; + +maybe_suspend_an_fs_source(Node, [{_Remote, Pid} | Tail]) -> + case rpc:block_call(Node, erlang, suspend_process, [Pid]) of + false -> + maybe_suspend_an_fs_source(Node, Tail); + true -> + Pid + end. + From 804e28fbe230c89058ec0c36877db09d7bb9bfd2 Mon Sep 17 00:00:00 2001 From: "John R. Daily" Date: Thu, 6 Mar 2014 19:10:27 -0500 Subject: [PATCH 127/139] Update tests to reflect new commands, plus the fact that riak_core_console is being called twice from one riak-admin command in some cases --- intercepts/riak_core_console_intercepts.erl | 37 +++++++++++++++++++++ tests/riak_admin_console_tests.erl | 34 +++++++++++++++---- 2 files changed, 64 insertions(+), 7 deletions(-) diff --git a/intercepts/riak_core_console_intercepts.erl b/intercepts/riak_core_console_intercepts.erl index bbffc35d2..cd3ed3bec 100644 --- a/intercepts/riak_core_console_intercepts.erl +++ b/intercepts/riak_core_console_intercepts.erl @@ -99,6 +99,25 @@ verify_console_del_user(Val) -> _ -> ?FAIL end. +verify_console_add_group(Val) -> + case Val of + ["group"] -> ?PASS; + ["group", "x1=y1", "x2=y2"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_alter_group(Val) -> + case Val of + ["group", "x1=y1", "x2=y2"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_del_group(Val) -> + case Val of + ["group"] -> ?PASS; + _ -> ?FAIL + end. + verify_console_add_source(Val) -> case Val of ["all","192.168.100.0/22","x","x1=y1"] -> ?PASS; @@ -149,6 +168,24 @@ verify_console_print_users(Val) -> _ -> ?FAIL end. +verify_console_print_group(Val) -> + case Val of + ["group"] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_print_groups(Val) -> + case Val of + [] -> ?PASS; + _ -> ?FAIL + end. + +verify_console_print_grants(Val) -> + case Val of + ["foo"] -> ?PASS; + _ -> ?FAIL + end. + verify_console_print_sources(Val) -> case Val of [] -> ?PASS; diff --git a/tests/riak_admin_console_tests.erl b/tests/riak_admin_console_tests.erl index 61d733924..8084b8424 100644 --- a/tests/riak_admin_console_tests.erl +++ b/tests/riak_admin_console_tests.erl @@ -31,7 +31,7 @@ %% will be run on giddyup and hence many platforms, we should be able %% to catch these types of bugs earlier. %% See also: replication2_console_tests.erl for a more detailed -%% description. +%% description. %% UNTESTED, as they don't use rpc, or have a non-trivial impl %% test @@ -66,11 +66,15 @@ bucket_tests(Node) -> %% riak-admin security security_tests(Node) -> - check_admin_cmd(Node, "security add-user foo"), - check_admin_cmd(Node, "security add-user foo x1=y1 x2=y2"), - check_admin_cmd(Node, "security alter-user foo x1=y1"), - check_admin_cmd(Node, "security alter-user foo x1=y1 x2=y2"), + check_admin_cmd_2x(Node, "security add-user foo"), + check_admin_cmd_2x(Node, "security add-user foo x1=y1 x2=y2"), + check_admin_cmd_2x(Node, "security add-group group"), + check_admin_cmd_2x(Node, "security add-group group x1=y1 x2=y2"), + check_admin_cmd_2x(Node, "security alter-user foo x1=y1"), + check_admin_cmd_2x(Node, "security alter-user foo x1=y1 x2=y2"), + check_admin_cmd_2x(Node, "security alter-group group x1=y1 x2=y2"), check_admin_cmd(Node, "security del-user foo"), + check_admin_cmd(Node, "security del-group group"), check_admin_cmd(Node, "security add-source all 192.168.100.0/22 y"), check_admin_cmd(Node, "security add-source all 192.168.100.0/22 x x1=y1"), check_admin_cmd(Node, "security add-source foo,bar 192.168.100.0/22 x x1=y1"), @@ -90,10 +94,12 @@ security_tests(Node) -> check_admin_cmd(Node, "security revoke foo,bar,baz on foo my_bucket from y"), check_admin_cmd(Node, "security print-users"), check_admin_cmd(Node, "security print-sources"), - check_admin_cmd(Node, "security enable"), - check_admin_cmd(Node, "security disable"), + check_admin_cmd_2x(Node, "security enable"), + check_admin_cmd_2x(Node, "security disable"), check_admin_cmd(Node, "security status"), check_admin_cmd(Node, "security print-user foo"), + check_admin_cmd(Node, "security print-group group"), + check_admin_cmd(Node, "security print-grants foo"), check_admin_cmd(Node, "security ciphers foo"). %% "top level" riak-admin COMMANDS @@ -168,12 +174,18 @@ confirm() -> {{add_user, 1}, verify_console_add_user}, {{alter_user, 1}, verify_console_alter_user}, {{del_user, 1}, verify_console_del_user}, + {{add_group, 1}, verify_console_add_group}, + {{alter_group, 1}, verify_console_alter_group}, + {{del_group, 1}, verify_console_del_group}, {{add_source, 1}, verify_console_add_source}, {{del_source, 1}, verify_console_del_source}, {{grant, 1}, verify_console_grant}, {{revoke, 1}, verify_console_revoke}, {{print_user,1}, verify_console_print_user}, {{print_users,1}, verify_console_print_users}, + {{print_group,1}, verify_console_print_group}, + {{print_groups,1}, verify_console_print_groups}, + {{print_grants,1}, verify_console_print_grants}, {{print_sources, 1}, verify_console_print_sources}, {{security_enable,1}, verify_console_security_enable}, {{security_disable,1}, verify_console_security_disable}, @@ -224,3 +236,11 @@ check_admin_cmd(Node, Cmd) -> {ok, Out} = rt:admin(Node, S), ?assertEqual("pass", Out). +%% Recently we've started calling riak_core_console twice from the +%% same riak-admin invocation; this will result in "passpass" as a +%% return instead of a simple "pass" +check_admin_cmd_2x(Node, Cmd) -> + S = string:tokens(Cmd, " "), + lager:info("Testing riak-admin ~s on ~s", [Cmd, Node]), + {ok, Out} = rt:admin(Node, S), + ?assertEqual("passpass", Out). From 5de495debd4aa322dab99123becd65d4a0a55bce Mon Sep 17 00:00:00 2001 From: Christopher Meiklejohn Date: Sat, 8 Mar 2014 17:46:55 -0800 Subject: [PATCH 128/139] Fix differential typo. --- tests/repl_fs_bench.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/repl_fs_bench.erl b/tests/repl_fs_bench.erl index fa4004174..505ed2a1a 100644 --- a/tests/repl_fs_bench.erl +++ b/tests/repl_fs_bench.erl @@ -135,7 +135,7 @@ fullsync_test(Strategy, Latency) -> [LeaderA]), %% Rewrite first 10% keys and perform fullsync. - repl_util:write_to_cluster(AFirst, 0, ?FULL_NUM_KEYS, ?TEST_BUCKET), + repl_util:write_to_cluster(AFirst, 0, ?DIFF_NUM_KEYS, ?TEST_BUCKET), repl_util:wait_until_aae_trees_built(ANodes ++ BNodes), {DiffTime, _} = timer:tc(repl_util, start_and_wait_until_fullsync_complete, From 2ec86af3216adbb9451b1e244499c340cd72da6b Mon Sep 17 00:00:00 2001 From: Russell Brown Date: Fri, 14 Mar 2014 18:05:49 +0000 Subject: [PATCH 129/139] Changes to fix a broken CRDT map (see riak_dt#82) break this test Well, that's not true. They break riak_kv's context operations on Maps. This change works around that breakage by turning the context off for the operations in this test. It is a temporary thing, when the context fix work has been done, we'll be changing back. --- tests/verify_dt_converge.erl | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/verify_dt_converge.erl b/tests/verify_dt_converge.erl index 570353c27..5ac297c75 100644 --- a/tests/verify_dt_converge.erl +++ b/tests/verify_dt_converge.erl @@ -40,6 +40,8 @@ %% Type, Bucket, Client, Mod +-define(MODIFY_OPTS, [create, {include_context, false}]). + confirm() -> Config = [ {riak_kv, [{handoff_concurrency, 100}]}, {riak_core, [ {ring_creation_size, 16}, @@ -162,14 +164,14 @@ update_1({BType, counter}, Bucket, Client, CMod) -> fun(C) -> riakc_counter:increment(5, C) end, - {BType, Bucket}, ?KEY, [create]); + {BType, Bucket}, ?KEY, ?MODIFY_OPTS); update_1({BType, set}, Bucket, Client, CMod) -> lager:info("update_1: Updating set"), CMod:modify_type(Client, fun(S) -> riakc_set:add_element(<<"Riak">>, S) end, - {BType, Bucket}, ?KEY, [create]); + {BType, Bucket}, ?KEY, ?MODIFY_OPTS); update_1({BType, map}, Bucket, Client, CMod) -> lager:info("update_1: Updating map"), CMod:modify_type(Client, @@ -186,7 +188,7 @@ update_1({BType, map}, Bucket, Client, CMod) -> riakc_counter:increment(10, C) end, M1) end, - {BType, Bucket}, ?KEY, [create]). + {BType, Bucket}, ?KEY, ?MODIFY_OPTS). check_1({BType, counter}, Bucket, Client, CMod) -> lager:info("check_1: Checking counter value is correct"), @@ -205,7 +207,7 @@ update_2a({BType, counter}, Bucket, Client, CMod) -> fun(C) -> riakc_counter:decrement(10, C) end, - {BType, Bucket}, ?KEY, [create]); + {BType, Bucket}, ?KEY, ?MODIFY_OPTS); update_2a({BType, set}, Bucket, Client, CMod) -> CMod:modify_type(Client, fun(S) -> @@ -213,7 +215,7 @@ update_2a({BType, set}, Bucket, Client, CMod) -> <<"Voldemort">>, riakc_set:add_element(<<"Cassandra">>, S)) end, - {BType, Bucket}, ?KEY, [create]); + {BType, Bucket}, ?KEY, ?MODIFY_OPTS); update_2a({BType, map}, Bucket, Client, CMod) -> CMod:modify_type(Client, fun(M) -> @@ -224,7 +226,7 @@ update_2a({BType, map}, Bucket, Client, CMod) -> end, M), riakc_map:add({<<"verified">>, flag}, M1) end, - {BType, Bucket}, ?KEY, [create]). + {BType, Bucket}, ?KEY, ?MODIFY_OPTS). check_2b({BType, counter}, Bucket, Client, CMod) -> lager:info("check_2b: Checking counter value is unchanged"), @@ -243,13 +245,13 @@ update_3b({BType, counter}, Bucket, Client, CMod) -> fun(C) -> riakc_counter:increment(2, C) end, - {BType, Bucket}, ?KEY, [create]); + {BType, Bucket}, ?KEY, ?MODIFY_OPTS); update_3b({BType, set}, Bucket, Client, CMod) -> CMod:modify_type(Client, fun(S) -> riakc_set:add_element(<<"Couchbase">>, S) end, - {BType, Bucket}, ?KEY, [create]); + {BType, Bucket}, ?KEY, ?MODIFY_OPTS); update_3b({BType, map},Bucket,Client,CMod) -> CMod:modify_type(Client, fun(M) -> @@ -266,7 +268,7 @@ update_3b({BType, map},Bucket,Client,CMod) -> end, M1) end, - {BType, Bucket}, ?KEY, [create]). + {BType, Bucket}, ?KEY, ?MODIFY_OPTS). check_3a({BType, counter}, Bucket, Client, CMod) -> lager:info("check_3a: Checking counter value is unchanged"), From 77d20d7cd08044988116eae7893bc99a8b9eee74 Mon Sep 17 00:00:00 2001 From: Russell Brown Date: Wed, 19 Mar 2014 11:30:38 +0000 Subject: [PATCH 130/139] Revert "no context" work around for broken riak_dt --- tests/verify_dt_converge.erl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/verify_dt_converge.erl b/tests/verify_dt_converge.erl index 5ac297c75..553213d2b 100644 --- a/tests/verify_dt_converge.erl +++ b/tests/verify_dt_converge.erl @@ -24,7 +24,9 @@ -module(verify_dt_converge). -behavior(riak_test). +-compile([export_all]). -export([confirm/0]). + -include_lib("eunit/include/eunit.hrl"). -define(CTYPE, <<"counters">>). @@ -40,7 +42,7 @@ %% Type, Bucket, Client, Mod --define(MODIFY_OPTS, [create, {include_context, false}]). +-define(MODIFY_OPTS, [create]). confirm() -> Config = [ {riak_kv, [{handoff_concurrency, 100}]}, From 8c5ca5a3a49a0b57fdaf779c949c1e4e9e032878 Mon Sep 17 00:00:00 2001 From: Russell Brown Date: Wed, 19 Mar 2014 15:57:20 +0000 Subject: [PATCH 131/139] Add test for context operations on riak datatypes --- tests/verify_dt_context.erl | 225 +++++++++++++++++++++++++++++++++++ tests/verify_dt_converge.erl | 1 + 2 files changed, 226 insertions(+) create mode 100644 tests/verify_dt_context.erl diff --git a/tests/verify_dt_context.erl b/tests/verify_dt_context.erl new file mode 100644 index 000000000..f0a6fb75b --- /dev/null +++ b/tests/verify_dt_context.erl @@ -0,0 +1,225 @@ +%% ------------------------------------------------------------------- +%% +%% Copyright (c) 2013 Basho Technologies, Inc. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%%% @copyright (C) 2013, Basho Technologies +%%% @doc +%%% riak_test for riak_dt CRDT context operations +%%% @end + +-module(verify_dt_context). +-behavior(riak_test). +-compile([export_all]). +-export([confirm/0]). + +-include_lib("eunit/include/eunit.hrl"). + +-define(STYPE, <<"sets">>). +-define(MTYPE, <<"maps">>). +-define(TYPES, [{?STYPE, set}, + {?MTYPE, map}]). + +-define(BUCKET, <<"pbtest">>). +-define(KEY, <<"ctx">>). + +-define(MODIFY_OPTS, [create]). + +confirm() -> + Config = [ {riak_kv, [{handoff_concurrency, 100}]}, + {riak_core, [ {ring_creation_size, 16}, + {vnode_management_timer, 1000} ]}], + + [N1, N2]=Nodes = rt:build_cluster(2, Config), + + create_bucket_types(Nodes, ?TYPES), + + [P1, P2] = PBClients = create_pb_clients(Nodes), + + S = make_set([a, b]), + + ok = store_set(P1, S), + + S2 = make_set([x, y, z]), + + M = make_map([{<<"set1">>, S}, {<<"set2">>, S2}]), + + ok = store_map(P2, M), + + + verify_dt_converge:check_value(P1, riakc_pb_socket, + {?STYPE, ?BUCKET}, ?KEY, riakc_set, + [<<"a">>, <<"b">>]), + + verify_dt_converge:check_value(P2, riakc_pb_socket, + {?MTYPE, ?BUCKET}, ?KEY, riakc_map, + [{{<<"set1">>, set}, [<<"a">>, <<"b">>]}, + {{<<"set2">>, set}, [ <<"x">>, <<"y">>, <<"z">>]}]), + + lager:info("Partition cluster in two."), + + PartInfo = rt:partition([N1], [N2]), + + lager:info("Modify data on side 1"), + %% Modify one side + S1_1 = make_set([c, d, e]), + ok= store_set(P1, S1_1), + + S3 = make_set([r, s]), + + M_1 = make_map([{<<"set1">>, S1_1}, {<<"set3">>, S3}]), + ok = store_map(P1, M_1), + + verify_dt_converge:check_value(P1, riakc_pb_socket, + {?STYPE, ?BUCKET}, ?KEY, riakc_set, + [<<"a">>, <<"b">>, <<"c">>, <<"d">>, <<"e">>]), + + verify_dt_converge:check_value(P1, riakc_pb_socket, + {?MTYPE, ?BUCKET}, ?KEY, riakc_map, + [{{<<"set1">>, set}, [<<"a">>, <<"b">>, <<"c">>, <<"d">>, <<"e">>]}, + {{<<"set2">>, set}, [ <<"x">>, <<"y">>, <<"z">>]}, + {{<<"set3">>, set}, [<<"r">>, <<"s">>]}]), + + verify_dt_converge:check_value(P2, riakc_pb_socket, + {?STYPE, ?BUCKET}, ?KEY, riakc_set, + [<<"a">>, <<"b">>]), + + verify_dt_converge:check_value(P2, riakc_pb_socket, + {?MTYPE, ?BUCKET}, ?KEY, riakc_map, + [{{<<"set1">>, set}, [<<"a">>, <<"b">>]}, + {{<<"set2">>, set}, [ <<"x">>, <<"y">>, <<"z">>]}]), + + %% get the modified sides values + + S1_2 = fetch(P1, ?STYPE), + M_2 = fetch(P1, ?MTYPE), + + %% operate on them and send to the partitioned side + S1_3 = riakc_set:del_element(<<"d">>, S1_2), + M_3 = riakc_map:update({<<"set1">>, set}, fun(Set1) -> + riakc_set:del_element(<<"e">>, Set1) end, + riakc_map:erase({<<"set3">>, set}, M_2)), + + %% we've removed elements that aren't to be found on P2, and a + %% field that's never been seen on P2 + + %% update the unmodified side + ok = store_map(P2, M_3), + ok = store_set(P2, S1_3), + + %% the value should not have changed, as these removes should be deferred + + verify_dt_converge:check_value(P2, riakc_pb_socket, + {?STYPE, ?BUCKET}, ?KEY, riakc_set, + [<<"a">>, <<"b">>]), + + verify_dt_converge:check_value(P2, riakc_pb_socket, + {?MTYPE, ?BUCKET}, ?KEY, riakc_map, + [{{<<"set1">>, set}, [<<"a">>, <<"b">>]}, + {{<<"set2">>, set}, [ <<"x">>, <<"y">>, <<"z">>]}]), + + %% Check both sides + %% heal + lager:info("Heal and check merged values"), + ok = rt:heal(PartInfo), + ok = rt:wait_for_cluster_service(Nodes, riak_kv), + + %% verify all nodes agree + + verify_dt_converge:check_value(P1, riakc_pb_socket, + {?STYPE, ?BUCKET}, ?KEY, riakc_set, + [<<"a">>, <<"b">>, <<"c">>, <<"e">>]), + + verify_dt_converge:check_value(P1, riakc_pb_socket, + {?MTYPE, ?BUCKET}, ?KEY, riakc_map, + [{{<<"set1">>, set}, [<<"a">>, <<"b">>, <<"c">>, <<"d">>]}, + {{<<"set2">>, set}, [ <<"x">>, <<"y">>, <<"z">>]}]), + + verify_dt_converge:check_value(P2, riakc_pb_socket, + {?STYPE, ?BUCKET}, ?KEY, riakc_set, + [<<"a">>, <<"b">>, <<"c">>, <<"e">>]), + + verify_dt_converge:check_value(P2, riakc_pb_socket, + {?MTYPE, ?BUCKET}, ?KEY, riakc_map, + [{{<<"set1">>, set}, [<<"a">>, <<"b">>, <<"c">>, <<"d">>]}, + {{<<"set2">>, set}, [ <<"x">>, <<"y">>, <<"z">>]}]), + + + [riakc_pb_socket:stop(C) || C <- PBClients], + + pass. + +fetch(Client, BType) -> + {ok, DT} = riakc_pb_socket:fetch_type(Client, {BType, ?BUCKET}, ?KEY), + DT. + + +make_set(Elems) -> + lists:foldl(fun(E, Set) -> + riakc_set:add_element(atom_to_binary(E, latin1), Set) + end, + riakc_set:new(), + Elems). + +make_map(Fields) -> + lists:foldl(fun({F, V}, Map) -> + riakc_map:update({F, set}, fun(_) -> + V end, + Map) + end, + riakc_map:new(), + Fields). + +store_set(Client, Set) -> + riakc_pb_socket:update_type(Client, {?STYPE, ?BUCKET}, ?KEY, riakc_set:to_op(Set)). + +store_map(Client, Map) -> + riakc_pb_socket:update_type(Client, {?MTYPE, ?BUCKET}, ?KEY, riakc_map:to_op(Map)). + +create_pb_clients(Nodes) -> + [begin + C = rt:pbc(N), + riakc_pb_socket:set_options(C, [queue_if_disconnected]), + C + end || N <- Nodes]. + +create_bucket_types([N1|_]=Nodes, Types) -> + lager:info("Creating bucket types with datatypes: ~p", [Types]), + [ rpc:call(N1, riak_core_bucket_type, create, + [Name, [{datatype, Type}, {allow_mult, true}]]) || + {Name, Type} <- Types ], + [rt:wait_until(N1, bucket_type_ready_fun(Name)) || {Name, _Type} <- Types], + [ rt:wait_until(N, bucket_type_matches_fun(Types)) || N <- Nodes]. + +bucket_type_ready_fun(Name) -> + fun(Node) -> + Res = rpc:call(Node, riak_core_bucket_type, activate, [Name]), + lager:info("is ~p ready ~p?", [Name, Res]), + Res == ok + end. + +bucket_type_matches_fun(Types) -> + fun(Node) -> + lists:all(fun({Name, Type}) -> + Props = rpc:call(Node, riak_core_bucket_type, get, + [Name]), + Props /= undefined andalso + proplists:get_value(allow_mult, Props, false) + andalso + proplists:get_value(datatype, Props) == Type + end, Types) + end. diff --git a/tests/verify_dt_converge.erl b/tests/verify_dt_converge.erl index 553213d2b..d4829d52c 100644 --- a/tests/verify_dt_converge.erl +++ b/tests/verify_dt_converge.erl @@ -319,6 +319,7 @@ check_value(Client, CMod, Bucket, Key, DTMod, Expected, Options) -> try Result = CMod:fetch_type(Client, Bucket, Key, Options), + lager:info("Expected ~p~n got ~p~n", [Expected, Result]), ?assertMatch({ok, _}, Result), {ok, C} = Result, ?assertEqual(true, DTMod:is_type(C)), From ae475d39116a09ebae77ea1a379f85230fb6726e Mon Sep 17 00:00:00 2001 From: rzezeski Date: Wed, 19 Mar 2014 19:50:31 +0000 Subject: [PATCH 132/139] Check git exit code and fix pipe cleanup Recently Scott was running into an issue running `verify_handoff` where his old data was not being properly reset when running `setup_harness`. I noticed we were using `os:cmd` which doesn't check the exit code of the command. I modified `run_git` to use `cmd` as well as verify the exit code is 0. This allowed Scott to catch the real issue which turned out to be a bad path in his config. While making this modification I noticed a bug in the pipe cleanup code. The `file:del_dir` call is actually returning `{error, eexist}` because there is a `bin` directory under each pipe dir which had not yet been deleted. Rather than spend time writing a recursive delete in Erlang I changed the code to use `cmd` and to confirm an exit of 0. I modified `stop_all`, which is used by `setup_harness`, to also use `cmd` and check exit codes. Finally I make sure that `spawn_cmd` flattens the list before passing it along as `open_port` wants a string not an iolist. --- src/rtdev.erl | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/rtdev.erl b/src/rtdev.erl index f5bb157b7..49d4f8d09 100644 --- a/src/rtdev.erl +++ b/src/rtdev.erl @@ -52,7 +52,8 @@ riak_admin_cmd(Path, N, Args) -> run_git(Path, Cmd) -> lager:info("Running: ~s", [gitcmd(Path, Cmd)]), - os:cmd(gitcmd(Path, Cmd)). + {0, Out} = cmd(gitcmd(Path, Cmd)), + Out. run_riak(N, Path, Cmd) -> lager:info("Running: ~s", [riakcmd(Path, N, Cmd)]), @@ -89,8 +90,8 @@ setup_harness(_Test, _Args) -> %% Reset nodes to base state lager:info("Resetting nodes to fresh state"), - run_git(Path, "reset HEAD --hard"), - run_git(Path, "clean -fd"), + _ = run_git(Path, "reset HEAD --hard"), + _ = run_git(Path, "clean -fd"), lager:info("Cleaning up lingering pipe directories"), rt:pmap(fun(Dir) -> @@ -100,11 +101,7 @@ setup_harness(_Test, _Args) -> %% the extra slashes will be pruned by filename:join, but this %% ensures that there will be at least one between "/tmp" and Dir PipeDir = filename:join(["/tmp//" ++ Dir, "dev"]), - %% when using filelib:wildcard/2, there must be a wildchar char - %% before the first '/'. - Files = filelib:wildcard("dev?/*.{r,w}", PipeDir), - [ file:delete(filename:join(PipeDir, File)) || File <- Files], - file:del_dir(PipeDir) + {0, _} = cmd("rm -rf " ++ PipeDir) end, devpaths()), ok. @@ -392,12 +389,14 @@ stop_all(DevPath) -> "\n"), try _ = list_to_integer(MaybePid), - os:cmd("kill -9 "++MaybePid) + {0, Out} = cmd("kill -9 "++MaybePid), + Out catch _:_ -> ok end, Cmd = C ++ "/bin/riak stop", - [Output | _Tail] = string:tokens(os:cmd(Cmd), "\n"), + {_, StopOut} = cmd(Cmd), + [Output | _Tail] = string:tokens(StopOut, "\n"), Status = case Output of "ok" -> "ok"; _ -> "wasn't running" @@ -544,7 +543,7 @@ node_version(N) -> spawn_cmd(Cmd) -> spawn_cmd(Cmd, []). spawn_cmd(Cmd, Opts) -> - Port = open_port({spawn, Cmd}, [stream, in, exit_status] ++ Opts), + Port = open_port({spawn, lists:flatten(Cmd)}, [stream, in, exit_status] ++ Opts), Port. wait_for_cmd(Port) -> From dfac1cfd5ee8936fb537539594bd819fa8d3090b Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Thu, 20 Mar 2014 15:45:58 -0400 Subject: [PATCH 133/139] Collect per-test coverage for both riak_test and eunit tests for giddyup To enable us to be able to see the *combined* coverage of our unit and integration tests, modify riak_test and the smoke_test runner to capture coverage data per-test and post it as a giddyup artifact. To maintain the current riak_test behaviour where the *combined* coverage is reported on at the end of a run, each test writes its own .coverdata file, cover is reset and then once all tests are run, the coverdata files are all loaded and the total coverage is reported. --- src/riak_test_escript.erl | 8 +++++++- src/rt_cover.erl | 25 +++++++++++++++++++++++++ src/smoke_test_escript.erl | 13 +++++++++++-- 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/src/riak_test_escript.erl b/src/riak_test_escript.erl index 574a74965..9dbd87daa 100644 --- a/src/riak_test_escript.erl +++ b/src/riak_test_escript.erl @@ -172,6 +172,7 @@ main(Args) -> rt_cover:maybe_start(), TestResults = lists:filter(fun results_filter/1, [ run_test(Test, Outdir, TestMetaData, Report, HarnessArgs, length(Tests)) || {Test, TestMetaData} <- Tests]), + [rt_cover:maybe_import_coverage(proplists:get_value(coverdata, R)) || R <- TestResults], Coverage = rt_cover:maybe_write_coverage(all, CoverDir), case {length(TestResults), proplists:get_value(status, hd(TestResults))} of @@ -274,11 +275,14 @@ is_runnable_test({TestModule, _}) -> erlang:function_exported(Mod, Fun, 0). run_test(Test, Outdir, TestMetaData, Report, _HarnessArgs, NumTests) -> + rt_cover:maybe_reset(), SingleTestResult = riak_test_runner:confirm(Test, Outdir, TestMetaData), + CoverDir = rt_config:get(cover_output, "coverage"), case NumTests of 1 -> keep_them_up; _ -> rt:teardown() end, + CoverageFile = rt_cover:maybe_export_coverage(Test, CoverDir), case Report of undefined -> ok; _ -> @@ -289,11 +293,13 @@ run_test(Test, Outdir, TestMetaData, Report, _HarnessArgs, NumTests) -> %% Now push up the artifacts, starting with the test log giddyup:post_artifact(Base, {"riak_test.log", L}), [ giddyup:post_artifact(Base, File) || File <- rt:get_node_logs() ], + [giddyup:post_artifact(Base, {filename:basename(CoverageFile) ++ ".gz", + zlib:gzip(element(2,file:read_file(CoverageFile)))}) || CoverageFile /= cover_disabled ], ResultPlusGiddyUp = TestResult ++ [{giddyup_url, list_to_binary(Base)}], [ rt:post_result(ResultPlusGiddyUp, WebHook) || WebHook <- get_webhooks() ] end end, - SingleTestResult. + [{coverdata, CoverageFile} | SingleTestResult]. get_webhooks() -> Hooks = lists:foldl(fun(E, Acc) -> [parse_webhook(E) | Acc] end, diff --git a/src/rt_cover.erl b/src/rt_cover.erl index ed9b268a3..8d636ddfb 100644 --- a/src/rt_cover.erl +++ b/src/rt_cover.erl @@ -30,7 +30,11 @@ start/0, maybe_start_on_node/2, maybe_write_coverage/2, + maybe_export_coverage/2, + maybe_import_coverage/1, + maybe_stop/0, stop/0, + maybe_reset/0, maybe_stop_on_node/1, maybe_stop_on_nodes/0, stop_on_nodes/0, @@ -186,6 +190,21 @@ stop_on_nodes(Nodes) -> maybe_write_coverage(CoverMods, Dir) -> if_coverage(fun() -> write_coverage(CoverMods, Dir) end). +maybe_export_coverage(TestModule, Dir) -> + if_coverage(fun() -> + prepare_output_dir(Dir), + Filename = filename:join(Dir, + atom_to_list(TestModule) + ++ ".coverdata"), + ok = cover:export(Filename), + Filename + end). + +maybe_import_coverage(cover_disabled) -> + ok; +maybe_import_coverage(File) -> + if_coverage(fun() -> cover:import(File) end). + prepare_output_dir(Dir) -> %% NOTE: This is not a recursive make dir, only top level will be created. case file:make_dir(Dir) of @@ -363,6 +382,12 @@ write_module_coverage(CoverMod, CoverDir) -> end end. +maybe_stop() -> + if_coverage(fun maybe_stop/0). + stop() -> lager:info("Stopping cover"), cover:stop(). + +maybe_reset() -> + if_coverage(fun() -> cover:reset() end). diff --git a/src/smoke_test_escript.erl b/src/smoke_test_escript.erl index 57eb2abe0..9b3fd9adb 100755 --- a/src/smoke_test_escript.erl +++ b/src/smoke_test_escript.erl @@ -91,6 +91,9 @@ worker(Rebar, PWD, Suites, Tasks) -> true -> case {Task, lists:member(Task, Tasks)} of {"eunit", true} -> + %% make rebar spit out the coverdata + file:write_file(filename:join(FDep, "rebar.config"), + "\n{cover_export_enabled, true}.", [append]), %% set up a symlink so that each dep has deps P = erlang:open_port({spawn_executable, Rebar}, [{args, ["eunit", "skip_deps=true"]}, @@ -98,8 +101,14 @@ worker(Rebar, PWD, Suites, Tasks) -> {line, 1024}, stderr_to_stdout, binary]), {Res, Log} = accumulate(P, []), CleanedLog = cleanup_logs(Log), - giddyup:post_result([{test, Suite}, {status, get_status(Res)}, - {log, CleanedLog} | Config]), + {ok, Base} = giddyup:post_result([{test, Suite}, {status, get_status(Res)}, + {log, CleanedLog} | Config]), + CoverFile = filename:join(FDep, ".eunit/eunit.coverdata"), + case filelib:is_regular(CoverFile) of + true -> + giddyup:post_artifact(Base, {"eunit.coverdata.gz", zlib:gzip(element(2, file:read_file(CoverFile)))}); + _ -> ok + end, Res; {"dialyzer", true} -> P = erlang:open_port({spawn_executable, "/usr/bin/make"}, From 44e0a6f7fff4d5bf956e174f2d6d6f91ad6b7f9c Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Fri, 21 Mar 2014 13:11:06 -0400 Subject: [PATCH 134/139] Address review concerns * Change group leader for cover_server while generating reports, so the 'includes data from imported files' message can be suppressed. * Append a phash of the test metadata to the coverdata filename to keep them unique. * Removed unused maybe_stop function. --- src/riak_test_escript.erl | 2 +- src/rt_cover.erl | 35 ++++++++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/riak_test_escript.erl b/src/riak_test_escript.erl index 9dbd87daa..0727ef123 100644 --- a/src/riak_test_escript.erl +++ b/src/riak_test_escript.erl @@ -282,7 +282,7 @@ run_test(Test, Outdir, TestMetaData, Report, _HarnessArgs, NumTests) -> 1 -> keep_them_up; _ -> rt:teardown() end, - CoverageFile = rt_cover:maybe_export_coverage(Test, CoverDir), + CoverageFile = rt_cover:maybe_export_coverage(Test, CoverDir, erlang:phash2(TestMetaData)), case Report of undefined -> ok; _ -> diff --git a/src/rt_cover.erl b/src/rt_cover.erl index 8d636ddfb..2ffeb3a6a 100644 --- a/src/rt_cover.erl +++ b/src/rt_cover.erl @@ -30,9 +30,8 @@ start/0, maybe_start_on_node/2, maybe_write_coverage/2, - maybe_export_coverage/2, + maybe_export_coverage/3, maybe_import_coverage/1, - maybe_stop/0, stop/0, maybe_reset/0, maybe_stop_on_node/1, @@ -190,11 +189,12 @@ stop_on_nodes(Nodes) -> maybe_write_coverage(CoverMods, Dir) -> if_coverage(fun() -> write_coverage(CoverMods, Dir) end). -maybe_export_coverage(TestModule, Dir) -> +maybe_export_coverage(TestModule, Dir, Phash) -> if_coverage(fun() -> prepare_output_dir(Dir), Filename = filename:join(Dir, atom_to_list(TestModule) + ++ "-" ++ integer_to_list(Phash) ++ ".coverdata"), ok = cover:export(Filename), Filename @@ -293,6 +293,28 @@ process_module(Mod, OutDir) -> write_coverage(all, Dir) -> write_coverage(rt_config:get(cover_modules, []), Dir); write_coverage(CoverModules, CoverDir) -> + % temporarily reassign the group leader, to suppress annoying io:format output + {group_leader, GL} = erlang:process_info(whereis(cover_server), group_leader), + %% tiny recursive fun that pretends to be a group leader$ + F = fun() -> + YComb = fun(Fun) -> + receive + {io_request, From, ReplyAs, {put_chars, _Enc, _Msg}} -> + From ! {io_reply, ReplyAs, ok}, + Fun(Fun); + {io_request, From, ReplyAs, {put_chars, _Enc, _Mod, _Func, _Args}} -> + From ! {io_reply, ReplyAs, ok}, + Fun(Fun); + _Other -> + io:format(user, "Other Msg ~p", [_Other]), + Fun(Fun) + end + end, + YComb(YComb) + end, + Pid = spawn(F), + lager:info("changing cover group leader to ~p", [Pid]), + erlang:group_leader(Pid, whereis(cover_server)), % First write a file per module prepare_output_dir(CoverDir), ModCovList0 = rt:pmap(fun(Mod) -> process_module(Mod, CoverDir) end, @@ -307,7 +329,9 @@ write_coverage(CoverModules, CoverDir) -> % Now write main file with links to module files. IdxFile = filename:join([CoverDir, "index.html"]), write_index_file(TotalCov, IdxFile), - + erlang:group_leader(GL, whereis(cover_server)), + lager:info("changing cover group leader back to ~p", [GL]), + exit(Pid, kill), TotalCov. write_index_file({TotalPerc, AppCovList}, File) -> @@ -382,9 +406,6 @@ write_module_coverage(CoverMod, CoverDir) -> end end. -maybe_stop() -> - if_coverage(fun maybe_stop/0). - stop() -> lager:info("Stopping cover"), cover:stop(). From 54ca285306d724114f0a08adea96fda679dc88d1 Mon Sep 17 00:00:00 2001 From: Andrew Thompson Date: Fri, 21 Mar 2014 17:34:53 -0400 Subject: [PATCH 135/139] Silence is golden --- src/rt_cover.erl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/rt_cover.erl b/src/rt_cover.erl index 2ffeb3a6a..0add36a58 100644 --- a/src/rt_cover.erl +++ b/src/rt_cover.erl @@ -313,7 +313,6 @@ write_coverage(CoverModules, CoverDir) -> YComb(YComb) end, Pid = spawn(F), - lager:info("changing cover group leader to ~p", [Pid]), erlang:group_leader(Pid, whereis(cover_server)), % First write a file per module prepare_output_dir(CoverDir), @@ -330,7 +329,6 @@ write_coverage(CoverModules, CoverDir) -> IdxFile = filename:join([CoverDir, "index.html"]), write_index_file(TotalCov, IdxFile), erlang:group_leader(GL, whereis(cover_server)), - lager:info("changing cover group leader back to ~p", [GL]), exit(Pid, kill), TotalCov. From 6fe9940c0281d765d1a12cf54b4c15becf6faf6a Mon Sep 17 00:00:00 2001 From: Russell Brown Date: Mon, 24 Mar 2014 14:10:37 +0000 Subject: [PATCH 136/139] Use built in rt code for bucket type creation --- tests/verify_dt_context.erl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/verify_dt_context.erl b/tests/verify_dt_context.erl index f0a6fb75b..acdd74e27 100644 --- a/tests/verify_dt_context.erl +++ b/tests/verify_dt_context.erl @@ -103,7 +103,7 @@ confirm() -> [{{<<"set1">>, set}, [<<"a">>, <<"b">>]}, {{<<"set2">>, set}, [ <<"x">>, <<"y">>, <<"z">>]}]), - %% get the modified sides values + %% get the modified side's values S1_2 = fetch(P1, ?STYPE), M_2 = fetch(P1, ?MTYPE), @@ -197,13 +197,10 @@ create_pb_clients(Nodes) -> C end || N <- Nodes]. -create_bucket_types([N1|_]=Nodes, Types) -> +create_bucket_types([N1|_], Types) -> lager:info("Creating bucket types with datatypes: ~p", [Types]), - [ rpc:call(N1, riak_core_bucket_type, create, - [Name, [{datatype, Type}, {allow_mult, true}]]) || - {Name, Type} <- Types ], - [rt:wait_until(N1, bucket_type_ready_fun(Name)) || {Name, _Type} <- Types], - [ rt:wait_until(N, bucket_type_matches_fun(Types)) || N <- Nodes]. + [rt:create_and_activate_bucket_type(N1, Name, [{datatype, Type}, {allow_mult, true}]) + || {Name, Type} <- Types ]. bucket_type_ready_fun(Name) -> fun(Node) -> From 6d4b2ace5f9681ef916435c3bf1d738954bbd7cf Mon Sep 17 00:00:00 2001 From: Russell Brown Date: Wed, 26 Mar 2014 17:26:20 +0000 Subject: [PATCH 137/139] Set allow_mult to default to true for clusters in test --- tests/verify_counter_repl.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/verify_counter_repl.erl b/tests/verify_counter_repl.erl index dcf735e15..5b6a1db49 100644 --- a/tests/verify_counter_repl.erl +++ b/tests/verify_counter_repl.erl @@ -61,7 +61,8 @@ confirm() -> make_clusters() -> Conf = [{riak_repl, [{fullsync_on_connect, false}, - {fullsync_interval, disabled}]}], + {fullsync_interval, disabled}]}, + {riak_core, [{default_bucket_props, [{allow_mult, true}]}]}], Nodes = rt:deploy_nodes(6, Conf), {ClusterA, ClusterB} = lists:split(3, Nodes), A = make_cluster(ClusterA, "A"), @@ -70,7 +71,6 @@ make_clusters() -> make_cluster(Nodes, Name) -> repl_util:make_cluster(Nodes), - verify_counter_converge:set_allow_mult_true(Nodes), repl_util:name_cluster(hd(Nodes), Name), repl_util:wait_until_leader_converge(Nodes), Clients = [ rt:httpc(Node) || Node <- Nodes ], From b106abb87f2597011cd093d0bd8d4c6d70197588 Mon Sep 17 00:00:00 2001 From: Kelly McLaughlin Date: Thu, 27 Mar 2014 14:07:05 -0600 Subject: [PATCH 138/139] Address some replication test failures due to cluster race conditions Change some of the helper functions in the repl_util module to handle errors more sensibly so that cluster setup race conditions do not cause unnecessary test failures. --- tests/repl_util.erl | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/tests/repl_util.erl b/tests/repl_util.erl index 7b37bfce3..c0fd6a59e 100644 --- a/tests/repl_util.erl +++ b/tests/repl_util.erl @@ -109,10 +109,10 @@ wait_until_leader_converge([Node|_] = Nodes) -> LeaderResults = [rpc:call(N, riak_core_cluster_mgr, get_leader, []) || N <- Nodes], - UniqueLeaders = lists:usort( - lists:filter(leader_result_filter_fun(), - LeaderResults)), - length(UniqueLeaders) == 1 + {Leaders, Errors} = + lists:partition(leader_result_filter_fun(), LeaderResults), + UniqueLeaders = lists:usort(Leaders), + Errors == [] andalso length(UniqueLeaders) == 1 end). leader_result_filter_fun() -> @@ -131,15 +131,20 @@ wait_until_connection(Node) -> rt:wait_until(Node, fun(_) -> Status = rpc:call(Node, riak_repl_console, status, [quiet]), - case proplists:get_value(fullsync_coordinator, Status) of - [] -> + case Status of + {badrpc, _} -> false; - [_C] -> - true; - Conns -> - lager:warning("multiple connections detected: ~p", - [Conns]), - true + _ -> + case proplists:get_value(fullsync_coordinator, Status) of + [] -> + false; + [_C] -> + true; + Conns -> + lager:warning("multiple connections detected: ~p", + [Conns]), + true + end end end). %% 40 seconds is enough for repl @@ -147,11 +152,16 @@ wait_until_no_connection(Node) -> rt:wait_until(Node, fun(_) -> Status = rpc:call(Node, riak_repl_console, status, [quiet]), - case proplists:get_value(connected_clusters, Status) of - [] -> - true; + case Status of + {badrpc, _} -> + false; _ -> - false + case proplists:get_value(connected_clusters, Status) of + [] -> + true; + _ -> + false + end end end). %% 40 seconds is enough for repl From 5087b8ef2bfd50141a863d6461ef726a8487f213 Mon Sep 17 00:00:00 2001 From: Joseph Blomstedt Date: Mon, 31 Mar 2014 14:43:10 -0700 Subject: [PATCH 139/139] Delete two spurious comment lines from rt.erl --- src/rt.erl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/rt.erl b/src/rt.erl index 9f4879d8a..345d53f4d 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -302,7 +302,6 @@ deploy_nodes(Versions, Services) -> version_to_config({_, _}=Config) -> Config; version_to_config(Version) -> {Version, default}. -%%%%%%%%%%%%%%%%%%%% deploy_clusters(Settings) -> ClusterConfigs = [case Setting of Configs when is_list(Configs) -> @@ -321,7 +320,6 @@ build_clusters(Settings) -> lager:info("Cluster built: ~p", [Nodes]) end || Nodes <- Clusters], Clusters. -%%%%%%%%%%%%%%%%%%%% %% @doc Start the specified Riak node start(Node) ->