From b905135b48b64e034c1ea83cc9e759654b3aa2b3 Mon Sep 17 00:00:00 2001 From: Frantisek Tobias Date: Mon, 19 Aug 2024 17:16:38 +0200 Subject: [PATCH] modules/stats: add answer.stale --- NEWS | 1 + daemon/lua/kres-gen-33.lua | 1 + lib/resolve.h | 1 + manager/knot_resolver_manager/statistics.py | 6 ++++++ modules/serve_stale/serve_stale.lua | 7 +++++-- modules/stats/README.rst | 2 ++ modules/stats/stats.c | 8 +++++++- 7 files changed, 23 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index a95403263..11c4bc453 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,7 @@ Improvements - answer NOTIMPL for meta-types and non-IN RR classes (!1589) - views: improve interaction with old-style policies (!1576) +- stats: add stale answer counter 'answer.stale' (!1591) Bugfixes -------- diff --git a/daemon/lua/kres-gen-33.lua b/daemon/lua/kres-gen-33.lua index 6be16bc4a..40a03453b 100644 --- a/daemon/lua/kres-gen-33.lua +++ b/daemon/lua/kres-gen-33.lua @@ -248,6 +248,7 @@ struct kr_request { ranked_rr_array_t add_selected; _Bool answ_validated; _Bool auth_validated; + _Bool stale_accounted; uint8_t rank; struct kr_rplan rplan; trace_log_f trace_log; diff --git a/lib/resolve.h b/lib/resolve.h index 443fef29c..cbc20877e 100644 --- a/lib/resolve.h +++ b/lib/resolve.h @@ -260,6 +260,7 @@ struct kr_request { ranked_rr_array_t add_selected; bool answ_validated; /**< internal to validator; beware of caching, etc. */ bool auth_validated; /**< see answ_validated ^^ ; TODO */ + bool stale_accounted; /** Overall rank for the request. * diff --git a/manager/knot_resolver_manager/statistics.py b/manager/knot_resolver_manager/statistics.py index 4a0eb783a..ae9d98119 100644 --- a/manager/knot_resolver_manager/statistics.py +++ b/manager/knot_resolver_manager/statistics.py @@ -119,6 +119,12 @@ def _parse_resolver_metrics(instance_id: "KresID", metrics: Any) -> Generator[Me label=("instance_id", sid), value=metrics["answer"]["cached"], ) + yield _counter( + "resolver_answer_stale", + "number of queries that utilized stale data", + label=("instance_id", sid), + value=metrics["answer"]["stale"], + ) yield _counter( "resolver_answer_rcode_noerror", "number of NOERROR answers", diff --git a/modules/serve_stale/serve_stale.lua b/modules/serve_stale/serve_stale.lua index faf07fbe0..c1528e804 100644 --- a/modules/serve_stale/serve_stale.lua +++ b/modules/serve_stale/serve_stale.lua @@ -8,9 +8,10 @@ local ffi = require('ffi') M.timeout = 3*sec M.callback = ffi.cast("kr_stale_cb", - function (ttl) --, name, type, qry) + function (ttl, _, _, qry) --log_debug(ffi.C.SRVSTALE, ' => called back with TTL: ' .. tostring(ttl)) if ttl + 3600 * 24 > 0 then -- at most one day stale + qry.request.stale_accounted = true return 1 else return -1 @@ -27,7 +28,9 @@ M.layer = { local now = ffi.C.kr_now() local deadline = qry.creation_time_mono + M.timeout if now > deadline or qry.flags.NO_NS_FOUND then - log_debug(ffi.C.LOG_GRP_SRVSTALE, ' => no reachable NS, using stale data') + log_qry(qry, ffi.C.LOG_GRP_SRVSTALE, + ' => no reachable NS, using stale data "%s"', + kres.dname2str(qry:name())) qry.stale_cb = M.callback -- TODO: probably start the same request that doesn't stale-serve, -- but first we need some detection of non-interactive / internal requests. diff --git a/modules/stats/README.rst b/modules/stats/README.rst index 1def925c4..e9258274f 100644 --- a/modules/stats/README.rst +++ b/modules/stats/README.rst @@ -55,6 +55,8 @@ Built-in counters keep track of number of queries and answers matching specific +-----------------+----------------------------------+ | answer.cached | queries answered from cache | +-----------------+----------------------------------+ +| answer.stale | queries that utilized stale data | ++-----------------+----------------------------------+ +-----------------+----------------------------------+ | **Answers categorized by RCODE** | diff --git a/modules/stats/stats.c b/modules/stats/stats.c index deed9c949..596847d7c 100644 --- a/modules/stats/stats.c +++ b/modules/stats/stats.c @@ -37,12 +37,17 @@ #define UPSTREAMS_COUNT 512 /* Size of recent upstreams */ #endif -/** @cond internal Fixed-size map of predefined metrics. */ +/** @cond internal Fixed-size map of predefined metrics. + * + * When changing the list, don't forget _parse_resolver_metrics() + * in ../../manager/knot_resolver_manager/statistics.py + */ #define CONST_METRICS(X) \ X(answer,total) X(answer,noerror) X(answer,nodata) X(answer,nxdomain) X(answer,servfail) \ X(answer,cached) X(answer,1ms) X(answer,10ms) X(answer,50ms) X(answer,100ms) \ X(answer,250ms) X(answer,500ms) X(answer,1000ms) X(answer,1500ms) X(answer,slow) \ X(answer,sum_ms) \ + X(answer,stale) \ X(answer,aa) X(answer,tc) X(answer,rd) X(answer,ra) X(answer, ad) X(answer,cd) \ X(answer,edns0) X(answer,do) \ X(query,edns) X(query,dnssec) \ @@ -303,6 +308,7 @@ static int collect(kr_layer_t *ctx) DEPRECATED use new names metric_answer_edns0 and metric_answer_do */ + stat_const_add(data, metric_answer_stale, param->stale_accounted); stat_const_add(data, metric_query_edns, knot_pkt_has_edns(param->answer)); stat_const_add(data, metric_query_dnssec, knot_pkt_has_dnssec(param->answer));