From eb8a6bfe81bc481bea1d36611f69c940ec223e17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Muska=C5=82a?= Date: Fri, 26 Jul 2024 12:31:45 -0700 Subject: [PATCH] Fix dbg for remote nodes dbg uses the new trace sessions and stores them in the main dbg server on the node that's doing the tracing. However, trace sessions are reference-counted NIF resources - this means that when the last reference is GC'ed on the node that owns the resource, the resouce is destroyed. In case of dbg, this meant that as soon as the first GC happened on the relay process, the session would be destroyed and tracing would silently stop. Furthermore, any subsequent requests to amend trace patterns, etc would fail for the remote nodes with badarg in trace module's functions. This fixes the issue by making the relay process hold onto the session reference for as long as the process is active. This solves the issue. The amended test fails before applying this change and succeeds after. --- lib/runtime_tools/src/dbg.erl | 37 +++++++++++++++------------- lib/runtime_tools/test/dbg_SUITE.erl | 9 ++++--- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/lib/runtime_tools/src/dbg.erl b/lib/runtime_tools/src/dbg.erl index 2d9735ec5ecd..7b671494be7f 100644 --- a/lib/runtime_tools/src/dbg.erl +++ b/lib/runtime_tools/src/dbg.erl @@ -2346,30 +2346,33 @@ relay(SessionName, Node,To) when Node /= node() -> -doc false. do_relay(SessionName, Parent,RelP) -> process_flag(trap_exit, true), - case RelP of - {Type,Data} -> - {ok,Tracer} = remote_tracer(Type,Data), - Parent ! {started, Tracer, trace:session_create(SessionName, Tracer, [])}, - ok; - Pid when is_pid(Pid) -> - Parent ! {started, self(), trace:session_create(SessionName, self(), [])}, - ok - end, - do_relay_1(RelP). - -do_relay_1(RelP) -> - %% In the case of a port tracer, this process exists only so that - %% dbg know that the node is alive... should maybe use monitor instead? + Tracer = + case RelP of + {Type, Data} -> + {ok, Started} = remote_tracer(Type,Data), + Started; + Pid when is_pid(Pid) -> + self() + end, + Session = trace:session_create(SessionName, Tracer, []), + Parent ! {started, Tracer, Session}, + do_relay_1(RelP, Session). + +do_relay_1(RelP, Session) -> + %% This process exists to relay messages and keep trace session + %% alive - session is ref-counted locally, meaning just the main + %% node keeping a reference to the session won't prevent the session + %% from being garbage collected. receive {'EXIT', _P, _} -> exit(normal); TraceInfo when is_pid(RelP) -> % Here is the normal case for trace i/o - RelP ! TraceInfo, - do_relay_1(RelP); + RelP ! TraceInfo, + do_relay_1(RelP, Session); Other -> Modifier = modifier(user), io:format(user,"** relay got garbage: ~"++Modifier++"p~n", [Other]), - do_relay_1(RelP) + do_relay_1(RelP, Session) end. -doc false. diff --git a/lib/runtime_tools/test/dbg_SUITE.erl b/lib/runtime_tools/test/dbg_SUITE.erl index 2994329f694b..b4f526b5dc1c 100644 --- a/lib/runtime_tools/test/dbg_SUITE.erl +++ b/lib/runtime_tools/test/dbg_SUITE.erl @@ -409,17 +409,18 @@ distributed(Config) when is_list(Config) -> {value, {matched, Node, 1}} = lists:keysearch(Node, 2, Z), dbg:cn(Node), dbg:tp(dbg,ln,[]), - ok = rpc:block_call(Node, dbg, ltp, []), - ok = rpc:block_call(Node, dbg, ln, []), ok = dbg:ln(), + [ok = rpc:block_call(Node, dbg, ltp, []) || _ <- lists:seq(1,100)], + ok = rpc:block_call(Node, dbg, ln, []), S = self(), {TraceSend, TraceCall} = lists:partition(fun ({trace,RP,send,_,_}) when RP =:= RexPid -> true; (_) -> false end, flush()), [_|_] = TraceSend, - [{trace,Pid,call,{dbg,ltp,[]}}, - {trace,S,call,{dbg,ln,[]}}] = TraceCall, + [{trace,S,call,{dbg,ln,[]}} | RemoteCall] = TraceCall, + 100 = length(RemoteCall), + [{trace,Pid,call,{dbg,ltp,[]}}] = lists:uniq(RemoteCall), Node = node(Pid), %% stop()