Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(prometheus) add wasmx metrics #13681

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .requirements
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ ATC_ROUTER=ffd11db657115769bf94f0c4f915f98300bc26b6 # 1.6.2
SNAPPY=23b3286820105438c5dbb9bc22f1bb85c5812c8a # 1.2.0

KONG_MANAGER=nightly
NGX_WASM_MODULE=96b4e27e10c63b07ed40ea88a91c22f23981db35
NGX_WASM_MODULE=9b1b2c760f73827fc08ade3a936a89fa5473f8fa
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just an FYI I juuuust merged #13765, so you can drop the deps changes and rebase to pick them up from master.

WASMER=3.1.1
WASMTIME=23.0.2
WASMTIME=25.0.1
V8=12.0.267.17

NGX_BROTLI=a71f9312c2deb28875acc7bacfdd5695a111aa53 # master branch of Oct 9, 2023
Expand Down
8 changes: 4 additions & 4 deletions build/openresty/wasmx/wasmx_repositories.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@ wasm_runtimes = {
},
"wasmtime": {
"linux": {
"x86_64": "c2fe82f4d707711523e57c2fc8f67d8fc0311fd3cf15050f811f88b30c254980",
"aarch64": "4593a131018a99df3aa16b41b1c63838cbbba9a36771c444a39761b25be73469",
"x86_64": "5c4c490bbc8ddee6311653dd5c361933202b69e12eaddfe6b3aed371c97b6b4a",
"aarch64": "a189e01ef73a5c3c0bfcbc1a26dcc31f5b1904fcbdf344f761cfb19e8ecfd501",
},
"macos": {
"x86_64": "2939cdf4eca5ce79c7e179c338c46700deb88bc7906da206a272143c3da0ca5b",
"aarch64": "cafff668144d15fdee57645918d06330aa05126b6a28b92b836eb69987842cd9",
"x86_64": "6d81ab0775ec900285ee1140555ba09a953669324d9317a8bb1fe0572684dbfb",
"aarch64": "61b15351c136aad75735eadf42f6101acb42480d6419efef4dbdd81ddb4dd180",
},
},
}
Expand Down
4 changes: 4 additions & 0 deletions changelog/unreleased/kong/prometheus-wasmx-metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
message: |
**Prometheus**: Added support for Proxy-Wasm metrics.
type: feature
scope: Plugin
1 change: 1 addition & 0 deletions kong-3.9.0-0.rockspec
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,7 @@ build = {
["kong.plugins.prometheus.prometheus"] = "kong/plugins/prometheus/prometheus.lua",
["kong.plugins.prometheus.serve"] = "kong/plugins/prometheus/serve.lua",
["kong.plugins.prometheus.schema"] = "kong/plugins/prometheus/schema.lua",
["kong.plugins.prometheus.wasmx"] = "kong/plugins/prometheus/wasmx.lua",

["kong.plugins.session.handler"] = "kong/plugins/session/handler.lua",
["kong.plugins.session.schema"] = "kong/plugins/session/schema.lua",
Expand Down
8 changes: 6 additions & 2 deletions kong/plugins/prometheus/exporter.lua
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
local balancer = require "kong.runloop.balancer"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: we should probably try to be consistent with the require syntax, in this file it seems we are using round brackets around quotes all the time i.e. here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I noticed the inconsistencies in this module but I opted for not addressing them to keep the PR simple. I think it might be a good idea to address those inconsistencies and style issues collectively in a separate PR -- which would be easier to review.

local yield = require("kong.tools.yield").yield
local wasm = require "kong.plugins.prometheus.wasmx"


local kong = kong
local ngx = ngx
local get_phase = ngx.get_phase
local lower = string.lower
local ngx_timer_pending_count = ngx.timer.pending_count
local ngx_timer_running_count = ngx.timer.running_count
local balancer = require("kong.runloop.balancer")
local yield = require("kong.tools.yield").yield
local get_all_upstreams = balancer.get_all_upstreams
if not balancer.get_all_upstreams then -- API changed since after Kong 2.5
get_all_upstreams = require("kong.runloop.balancer.upstreams").get_all_upstreams
Expand Down Expand Up @@ -517,6 +520,7 @@ local function metric_data(write_fn)
-- notify the function if prometheus plugin is enabled,
-- so that it can avoid exporting unnecessary metrics if not
prometheus:metric_data(write_fn, not IS_PROMETHEUS_ENABLED)
wasm.metric_data()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be conditional to something, i.e. kong.configuration.wasm being enabled? Or, alternatively, should the kong.plugins.prometheus.wasmx module's functions be made noop when wasm is disabled?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function metric_data() in kong.plugins.prometheus.wasmx returns early if support for wasm isn't enabled in the gateway.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, I left the check for wasm support within metric_data() to keep the changes in existing code to a minimum.

end

local function collect()
Expand Down
206 changes: 206 additions & 0 deletions kong/plugins/prometheus/wasmx.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
local buffer = require "string.buffer"
local wasm = require "kong.runloop.wasm"
local wasmx_shm


local fmt = string.format
local str_find = string.find
local str_match = string.match
local str_sub = string.sub
local table_insert = table.insert
local table_sort = table.sort
local buf_new = buffer.new
local ngx_say = ngx.say


local _M = {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The first impression I had from seeing this file was that this looks pretty tightly coupled to the implementation details of ngx_wasm_module's metrics storage (well, and also to Prometheus, I suppose). Is it not feasible to put this code into resty.wasmx.* with a more simplified API that Kong gateway can use to register/inspect/collect metric [meta]data?



local FLUSH_EVERY = 100
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be configurable or is there a reason for always using 100 in particular?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

100 was chosen rather arbitrarily. I haven't tested different values and picked the optimum one.

The reasoning was rather simplistic. By default ngx_wasm_module dedicates 5m of shared memory to metrics storage; which is enough to store ~ 30k counters/gauges.

Flushing every 100 metrics implicates that we write to the HTTP response 3k times -- which sounded like a good balance between IO serialization and memory consumption.

I think exposing FLUSH_EVERY as configurable parameter without giving users context and numbers to guide their decision would be more burdensome than helpful.



local function sorted_iter(ctx)
local v = ctx.t[ctx.sorted_keys[ctx.i]]
ctx.i = ctx.i + 1

return v
end


local function sorted_pairs(t)
local sorted_keys = {}

for k, _ in pairs(t) do
table_insert(sorted_keys, k)
end

table_sort(sorted_keys)

return sorted_iter, { t = t, sorted_keys = sorted_keys, i = 1 }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the standard signature of an iterator function should take the index as the second parameter, after the context. Is there a reason for having i inside the context table here? (It works regardless, just wondering if it's just a style choice).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point.

It wasn't a deliberate choice, no. I think I misinterpreted that section of the book and oversaw the part describing the index of the iteration as a control variable and not part of the invariant state of the loop -- which is logical.

Thanks for mentioning it. I'll change the code accordingly.

end


local function parse_pw_key(key)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm struggling a bit with this function, seems like key is a string that starts with pw. with sections that are separated by dots, would it be possible to add an example of what is expected here as a code comment?

At first glance it feels like it could be simplified a bit either using regex or splitting by dots - maybe, but I have the feeling this was purposely written to maximise performance over readability?

local name = key
local labels = {}
local header_size = 3 -- pw.
local first_label = #key

local second_dot_pos, _ = str_find(key, "%.", header_size + 1)
local filter_name = str_sub(key, header_size + 1, second_dot_pos - 1)

local filter_config = wasm.filters_by_name[filter_name].config or {}
local patterns = filter_config.pw_metrics
and filter_config.pw_metrics.label_patterns or {}
Comment on lines +52 to +54
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel iffy about intermingling the filter's (http) configuration with the filter's metric information.

Would it be possible to use filter metadata (the ${filter}.meta.json file) for this? For instance, currently we're using filter metadata to allow developers to provide JSON schema for filters with JSON configurations. Metric data seems to me like a comparable use case:

{
  "config_schema": {
    "type": "object",
    "properties": { ... }
  },
  "metrics": {
    "label_patterns": [
      { "label": "service", "pattern": "(_s_id=([0-9a-z%-]+))" },
      { "label": "route",   "pattern": "(_r_id=([0-9a-z%-]+))" }
    ]
  }
}

I still need to spend some more time grokking the new metrics feature, but lemme know what you think in the meantime.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it is possible. I totally oversaw the metadata option. Thanks for bringing that up.
In fact, the filter metadata is a more appropriate place for storing something that will be common to all filter instances.


for _, pair in ipairs(patterns) do
local label_kv, label_v = str_match(key, pair.pattern)
if label_kv then
local label_k = str_sub(label_kv, 0, str_find(label_kv, "="))
local label_k_start, _ = str_find(key, label_k)

first_label = (label_k_start < first_label) and label_k_start or first_label
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this more of a first_label_start_index than a first_label?


table_insert(labels, { pair.label, label_v })
end
end

if first_label ~= #key then
name = str_sub(key, 0, first_label - 1)
end

return name, labels
end


local function parse_key(key)
-- TODO: parse wa. (WasmX metrics) and lua. (metrics defined in Lua land)
local header = { pw = "pw." }

local name = key
local labels = {}

local is_pw = #key > #header.pw and key:sub(0, #header.pw) == header.pw

if is_pw then
name, labels = parse_pw_key(key)
end

name = name:gsub("%.", "_")

return name, labels
end


local function serialize_labels(labels)
local buf = buf_new()

for _, pair in ipairs(labels) do
buf:put(fmt(',%s="%s"', pair[1], pair[2]))
end

buf:get(1) -- discard trailing comma

return "{" .. buf:get() .. "}"
end


local function serialize_metric(m, buf)
buf:put(fmt("# HELP %s\n# TYPE %s %s", m.name, m.name, m.type))

if m.type == "histogram" then
local sum_lines_buf = buf_new()
local count_lines_buf = buf_new()

for _, pair in ipairs(m.labels) do
local count, sum = 0, 0
local labels, labeled_m = pair[1], pair[2]
local slabels = (#labels > 0) and serialize_labels(labels) or ""

local blabels = (#labels > 0) and (slabels:sub(1, #slabels - 1) .. ",") or "{"

for _, bin in ipairs(labeled_m.value) do
local ub = (bin.ub ~= 4294967295) and bin.ub or "+Inf"
local ubl = fmt('le="%s"', ub)

count = count + bin.count

buf:put(fmt("\n%s%s %s", m.name, blabels .. ubl .. "}", count))
end

sum = sum + labeled_m.sum

sum_lines_buf:put(fmt("\n%s_sum%s %s", m.name, slabels, sum))
count_lines_buf:put(fmt("\n%s_count%s %s", m.name, slabels, count))
end

buf:put(sum_lines_buf:get())
buf:put(count_lines_buf:get())

else
for _, pair in ipairs(m.labels) do
local labels, labeled_m = pair[1], pair[2]
local slabels = (#labels > 0) and serialize_labels(labels) or ""

buf:put(fmt("\n%s%s %s", m.name, slabels, labeled_m.value))
end
end

buf:put("\n")
end


_M.metric_data = function()
local i = 0
local metrics = {}
local parsed = {}
local buf = buf_new()

-- delayed require of the WasmX module, to ensure it is loaded
-- after ngx_wasm_module.so is loaded.
if not wasmx_shm then
local ok, _wasmx_shm = pcall(require, "resty.wasmx.shm")
if ok then
wasmx_shm = _wasmx_shm
end
end

if not wasmx_shm then
return
end

wasmx_shm.metrics:lock()

for key in wasmx_shm.metrics:iterate_keys() do
table_insert(metrics, { key, wasmx_shm.metrics:get_by_name(key, { prefix = false })})
end

wasmx_shm.metrics:unlock()

-- in WasmX the different labels of a metric are stored as separate metrics;
thibaultcha marked this conversation as resolved.
Show resolved Hide resolved
-- aggregate those separate metrics into a single one.
for _, pair in ipairs(metrics) do
local key = pair[1]
local m = pair[2]
local name, labels = parse_key(key)

parsed[name] = parsed[name] or { name = name, type = m.type, labels = {} }

table_insert(parsed[name].labels, { labels, m })
end

for metric_by_label in sorted_pairs(parsed) do
buf:put(serialize_metric(metric_by_label, buf))

i = i + 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: looks like i is only used here and declared outside, perhaps we could have sorted_iter return index and value and use the index here instead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point!


if i % FLUSH_EVERY == 0 then
ngx_say(buf:get())
end
end

ngx_say(buf:get())
end


return _M
16 changes: 16 additions & 0 deletions kong/runloop/wasm.lua
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,8 @@ local function rebuild_state(db, version, old_state)

for _, filter in ipairs(chain.filters) do
if filter.enabled then
_M.filters_by_name[filter.name].config = cjson_decode(filter.config) or filter.config
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There may exist more than one filter by the same name (and likely with differing configuration).


Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Proxy-wasm spec for metrics doesn't provide a way for filter developers to define a metric associated with a set of labels.

The function proxy_define_metric expects only a metric type and a string representing the metric name.

As a result, filter developers include whatever label associated with a metric as part of its name, e.g. a_metric_label1="label_value"_label2="label_value".

There isn't however a consensus over how to represent labels in metric names.

The functionality of parsing labels proposed and implemented in the function parse_pw_key below, reflects the one available in Envoy and used by Coraza WAF Proxy-wasm filter.

It expects filter developers to provide a set of regex patterns matching labels in a metric's name, as part of the filter configuration:

    label_patterns = {
      { label = "service", pattern = "(_s_id=([0-9a-z%-]+))" },
      { label = "route", pattern = "(_r_id=([0-9a-z%-]+))" },
    }

Then, during serialization, those patterns are match with metric names to extract potential labels.

As such, we need to have access to the filter configuration when serializing Proxy-Wasm metrics.

-- Serialize all JSON configurations up front
--
-- NOTE: there is a subtle difference between a raw, non-JSON filter
Expand Down Expand Up @@ -778,6 +780,13 @@ local function register_property_handlers()
return ok, value, const
end)

properties.add_getter("kong.route_name", function(_, _, ctx)
local value = ctx.route and ctx.route.name
local ok = value ~= nil
local const = ok
return ok, value, const
end)

properties.add_getter("kong.service.response.status", function(kong)
return true, kong.service.response.get_status(), false
end)
Expand All @@ -789,6 +798,13 @@ local function register_property_handlers()
return ok, value, const
end)

properties.add_getter("kong.service_name", function(_, _, ctx)
local value = ctx.service and ctx.service.name
local ok = value ~= nil
local const = ok
return ok, value, const
end)

properties.add_getter("kong.version", function(kong)
return true, kong.version, true
end)
Expand Down
43 changes: 43 additions & 0 deletions spec/02-integration/20-wasm/04-proxy-wasm_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ describe("proxy-wasm filters (#wasm) (#" .. strategy .. ")", function()
mock_service = assert(bp.services:insert {
host = helpers.mock_upstream_host,
port = helpers.mock_upstream_port,
name = "mock_service",
})

local mock_upstream = assert(bp.upstreams:insert {
Expand All @@ -50,12 +51,14 @@ describe("proxy-wasm filters (#wasm) (#" .. strategy .. ")", function()
})

r_single = assert(bp.routes:insert {
name = "r_single",
paths = { "/single" },
strip_path = true,
service = mock_service,
})

local r_double = assert(bp.routes:insert {
name = "r_double",
paths = { "/double" },
strip_path = true,
service = mock_service,
Expand Down Expand Up @@ -687,6 +690,26 @@ describe("proxy-wasm filters (#wasm) (#" .. strategy .. ")", function()
assert.logfile().has.no.line("[crit]", true, 0)
end)

it("read kong.route_name", function()
local client = helpers.proxy_client()
finally(function() client:close() end)

local res = assert(client:send {
method = "GET",
path = "/single/status/201",
headers = {
[HEADER_NAME_TEST] = "get_kong_property",
[HEADER_NAME_INPUT] = "route_name",
[HEADER_NAME_DISPATCH_ECHO] = "on",
}
})

local body = assert.res_status(200, res)
assert.equal(r_single.name, body)
assert.logfile().has.no.line("[error]", true, 0)
assert.logfile().has.no.line("[crit]", true, 0)
end)

it("read kong.service_id", function()
local client = helpers.proxy_client()
finally(function() client:close() end)
Expand All @@ -707,6 +730,26 @@ describe("proxy-wasm filters (#wasm) (#" .. strategy .. ")", function()
assert.logfile().has.no.line("[crit]", true, 0)
end)

it("read kong.service_name", function()
local client = helpers.proxy_client()
finally(function() client:close() end)

local res = assert(client:send {
method = "GET",
path = "/single/status/201",
headers = {
[HEADER_NAME_TEST] = "get_kong_property",
[HEADER_NAME_INPUT] = "service_name",
[HEADER_NAME_DISPATCH_ECHO] = "on",
}
})

local body = assert.res_status(200, res)
assert.equal(mock_service.name, body)
assert.logfile().has.no.line("[error]", true, 0)
assert.logfile().has.no.line("[crit]", true, 0)
end)

it("read kong.ctx.shared[<attr>]", function()
local client = helpers.proxy_client()
finally(function() client:close() end)
Expand Down
Loading
Loading