From 2168dc22eb9fb36fd36dd8383a3c8158598d51ed Mon Sep 17 00:00:00 2001 From: Martin Chodur Date: Sun, 1 Dec 2024 23:39:26 +0100 Subject: [PATCH] fix: expressionUsesOnlyAllowedLabelsForMetricRegexp logic (#98) Signed-off-by: Martin Chodur --- CHANGELOG.md | 3 + docs/validations.md | 8 +- pkg/validator/config.go | 2 +- pkg/validator/promql_expression.go | 2 +- pkg/validator/promql_expression_helpers.go | 87 +++++++++++++++---- .../promql_expression_helpers_test.go | 29 +++++-- pkg/validator/validator_test.go | 6 +- 7 files changed, 101 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36f0a07..ab5075f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [3.6.1] - 2024-12-01 +- Fixed: FIxed the `expressionUsesOnlyAllowedLabelsForMetricRegexp` validator that might return false positives when used on more complex expressions with vector matching and functions using labels. + ## [3.6.0] - 2024-11-29 - Added: Configuration file can now be in Jsonnet format if the config file name ends with `.jsonnet` and it will get automatically evaluated. diff --git a/docs/validations.md b/docs/validations.md index 1f44395..f3908a4 100644 --- a/docs/validations.md +++ b/docs/validations.md @@ -268,11 +268,9 @@ params: #### `expressionUsesOnlyAllowedLabelsForMetricRegexp` -Fails if the rule uses any labels beside those listed in `allowedLabels`, in combination with given metric regexp in its `expr` label matchers, aggregations or joins. -Different metric name matchers are handled as follows: -* `{__name__="foo",..}, foo{...}` - `foo` is matched literally against given `metricNameRegexp`, if matches, expr is validated against `allowedLabels` -* `{__name__=~"foo",..}` - skipped -* `{__name__!="foo",}, {__name__!~"foo"}` - skipped +Fails if the rule uses any labels beside those listed in `allowedLabels`, in combination with given metric regexp in its `expr` label matchers, aggregations or joins. If the metric name is omitted in the query, or matched using regexp or any negative matcher on the `__name__` label, the rule will be skipped. + +The check rather ignores validation of labels, where it cannot be sure if they are targeting only the metric in question, like aggregations by labels on top of vector matching expression where the labels might come from the other part of the expr. > If using kube-state-metrics for exposing labels information about K8S objects (kube_*_labels) only those labels whitelisted by kube-state-metrics admin will be available. > Might be useful to check that users does not use any other in their expressions. diff --git a/pkg/validator/config.go b/pkg/validator/config.go index 76b472f..3875533 100644 --- a/pkg/validator/config.go +++ b/pkg/validator/config.go @@ -25,7 +25,7 @@ var registeredUniversalRuleValidators = map[string]validatorCreator{ "validFunctionsOnCounters": newValidFunctionsOnCounters, "rateBeforeAggregation": newRateBeforeAggregation, "expressionDoesNotUseLabels": newExpressionDoesNotUseLabels, - "expressionUsesOnlyAllowedLabelsForMetricRegexp": newExpressionUseOnlyWhitelistedLabelsForMetric, + "expressionUsesOnlyAllowedLabelsForMetricRegexp": newExpressionUsesOnlyAllowedLabelsForMetricRegexp, "expressionDoesNotUseOlderDataThan": newExpressionDoesNotUseOlderDataThan, "expressionDoesNotUseRangeShorterThan": newExpressionDoesNotUseRangeShorterThan, "expressionDoesNotUseMetrics": newExpressionDoesNotUseMetrics, diff --git a/pkg/validator/promql_expression.go b/pkg/validator/promql_expression.go index 929d2d6..93175f5 100644 --- a/pkg/validator/promql_expression.go +++ b/pkg/validator/promql_expression.go @@ -127,7 +127,7 @@ type expressionUsesOnlyAllowedLabelsForMetricRegexp struct { metricNameRegexp *regexp.Regexp } -func newExpressionUseOnlyWhitelistedLabelsForMetric(paramsConfig yaml.Node) (Validator, error) { +func newExpressionUsesOnlyAllowedLabelsForMetricRegexp(paramsConfig yaml.Node) (Validator, error) { params := struct { AllowedLabels []string `yaml:"allowedLabels"` MetricNameRegexp string `yaml:"metricNameRegexp"` diff --git a/pkg/validator/promql_expression_helpers.go b/pkg/validator/promql_expression_helpers.go index ced96ac..be786cb 100644 --- a/pkg/validator/promql_expression_helpers.go +++ b/pkg/validator/promql_expression_helpers.go @@ -2,11 +2,12 @@ package validator import ( "fmt" + "maps" "regexp" + "slices" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql/parser" - "golang.org/x/exp/slices" ) func init() { @@ -37,9 +38,18 @@ func labelsUsedInSelectorForMetric(selector *parser.VectorSelector, metricRegexp return usedLabels, metricUsed } +func parserCallStringArgValue(e parser.Expr) string { + val, ok := e.(*parser.StringLiteral) + if !ok { + return "" // Ignore the error, this shouldn't happen anyway, parser should already catch this. + } + return val.Val +} + // Returns a list of labels which are used in given expr in relation to given metric. -// Beside labels within vector selector itself, it adds labels used in Aggregate expressions and labels used in Binary expression. -// For Binary expressions it may report false positives as the current implementation does not consider on which side of group_left/group_right is the given metric. +// It traverses the whole expression tree top to bottom and collects all labels used in selectors, operators, functions etc. +// In case of vector matching, it also collects labels used in vector matching only relevant to the part of the expression where the metric is used. +// If the vector matching uses grouping, any labels used on top of the expression are not validated, since they might come from the other side of the expression. func getExpressionUsedLabelsForMetric(expr string, metricRegexp *regexp.Regexp) ([]string, error) { promQl, err := parser.ParseExpr(expr) if err != nil { @@ -49,29 +59,72 @@ func getExpressionUsedLabelsForMetric(expr string, metricRegexp *regexp.Regexp) var usedLabels []string labelsUpInExpr := func(path []parser.Node) []string { - usedLabels := []string{} - for _, n := range path { + usedLabels := map[string]struct{}{} + for i, n := range path { switch v := n.(type) { case *parser.AggregateExpr: - usedLabels = append(usedLabels, v.Grouping...) + for _, l := range v.Grouping { + usedLabels[l] = struct{}{} + } case *parser.BinaryExpr: - if v.VectorMatching != nil { - usedLabels = append(usedLabels, v.VectorMatching.Include...) - usedLabels = append(usedLabels, v.VectorMatching.MatchingLabels...) + if v.VectorMatching == nil { + continue + } + // If any group_left/group_right is used, we need to reset the used labels, since any labels used on top of this expression might come from the other side of the expression. + if v.VectorMatching.Include != nil { + usedLabels = map[string]struct{}{} + } + // Validate only the on(...) labels. The ignoring(...) might target the other side of the binary expression. + if v.VectorMatching.On { + for _, l := range v.VectorMatching.MatchingLabels { + usedLabels[l] = struct{}{} + } + } + // We want to validate the group_left/group_right labels only if the validated metric is on the "one" of the many-one/one-to.many side. + nextExpr := path[i+1].String() + if (v.VectorMatching.Card == parser.CardManyToOne && v.RHS.String() == nextExpr) || (v.VectorMatching.Card == parser.CardOneToMany && v.LHS.String() == nextExpr) { + for _, l := range v.VectorMatching.Include { + usedLabels[l] = struct{}{} + } + } + case *parser.Call: + switch v.Func.Name { + case "label_replace": + // Any PromQL "above" this label_replace can use the destination synthetic label, so drop it from the list of already used labels. + delete(usedLabels, parserCallStringArgValue(v.Args[1])) + usedLabels[parserCallStringArgValue(v.Args[3])] = struct{}{} // The source_label is interesting for us + case "label_join": + delete(usedLabels, parserCallStringArgValue(v.Args[1])) + // label_join is variadic, so we need to iterate over all labels that are used in the expression + for _, l := range v.Args[3:] { + usedLabels[parserCallStringArgValue(l)] = struct{}{} + } + case "sort_by_label": + for _, l := range v.Args[1:] { + usedLabels[parserCallStringArgValue(l)] = struct{}{} + } + case "sort_by_label_desc": + for _, l := range v.Args[1:] { + usedLabels[parserCallStringArgValue(l)] = struct{}{} + } } } } - return usedLabels + delete(usedLabels, "") // Used in case of errors so just drop it + return slices.Collect(maps.Keys(usedLabels)) } parser.Inspect(promQl, func(n parser.Node, path []parser.Node) error { - if v, isVectorSelector := n.(*parser.VectorSelector); isVectorSelector { - selectorUsedLabels, ok := labelsUsedInSelectorForMetric(v, metricRegexp) - if ok { - metricInExpr = true - usedLabels = append(usedLabels, selectorUsedLabels...) - usedLabels = append(usedLabels, labelsUpInExpr(path)...) - } + v, isVectorSelector := n.(*parser.VectorSelector) + if !isVectorSelector { + return nil + } + selectorUsedLabels, ok := labelsUsedInSelectorForMetric(v, metricRegexp) + if ok { + metricInExpr = true + usedLabels = append(usedLabels, selectorUsedLabels...) + // The path does not contain the current node, so we need to append it since some cases need also the last node. + usedLabels = append(usedLabels, labelsUpInExpr(append(path, n))...) } return nil }) diff --git a/pkg/validator/promql_expression_helpers_test.go b/pkg/validator/promql_expression_helpers_test.go index b079543..f15f9b1 100644 --- a/pkg/validator/promql_expression_helpers_test.go +++ b/pkg/validator/promql_expression_helpers_test.go @@ -2,6 +2,7 @@ package validator import ( "errors" + "fmt" "reflect" "regexp" "testing" @@ -48,16 +49,28 @@ func TestGetExpressionUsedLabelsForMetric(t *testing.T) { {expr: "kube_pod_labels{label_app!='foo'}", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app"}}, {expr: "kube_pod_labels{label_app='foo'} * on(pod) kube_pod_info{}", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "pod"}}, {expr: "kube_pod_info{} * on(pod) group_left(label_workload) kube_pod_labels{label_app='foo'}", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "label_workload", "pod"}}, - {expr: "kube_pod_info{} * on(pod) group_right(pod_ip) kube_pod_labels{label_app='foo'}", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "pod", "pod_ip"}}, - {expr: "kube_pod_info{} * on(pod) group_right(pod_ip) kube_pod_labels{label_app='foo'} offset 1h", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "pod", "pod_ip"}}, + {expr: "kube_pod_info{} * on(pod) group_right(pod_ip) kube_pod_labels{label_app='foo'}", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "pod"}}, + {expr: "kube_pod_info{} * on(pod) group_right(pod_ip) kube_pod_labels{label_app='foo'} offset 1h", metric: "kube_pod_labels", expected: []string{metricNameLabel, "label_app", "pod"}}, + {expr: "sum(kube_pod_info * kube_pod_labels) by (foo)", metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo"}}, + {expr: "label_replace(kube_pod_labels, 'bar', '$1', 'foo', '.*')", metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo"}}, + {expr: `sum(label_join(kube_pod_labels, "foo", ",", "l1", "l2", "l3")) by (foo)`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "l1", "l2", "l3"}}, + {expr: `sum(label_join(kube_pod_labels, "foo", ",", "l1", "l2", "l3")) by (bar)`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "l1", "l2", "l3", "bar"}}, + {expr: `sum(kube_pod_labels * on (foo, bar) group_right(baz) kube_pod_info) by (to_be_dropped)`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo", "bar", "baz"}}, + {expr: `sum(kube_pod_labels * on (foo, bar) group_left(baz) kube_pod_info) by (to_be_dropped)`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo", "bar"}}, + {expr: `sum(kube_pod_labels * ignoring (foo, bar) group_left() kube_pod_info) by (to_be_dropped)`, metric: "kube_pod_labels", expected: []string{metricNameLabel}}, + {expr: `sum(kube_pod_labels * on (foo, bar) kube_pod_info) by (baz)`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo", "bar", "baz"}}, + {expr: `sort_by_label(kube_pod_labels, "foo", "bar", "baz")`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo", "bar", "baz"}}, + {expr: `sort_by_label_desc(kube_pod_labels, "foo", "bar", "baz")`, metric: "kube_pod_labels", expected: []string{metricNameLabel, "foo", "bar", "baz"}}, } - for _, test := range tests { - labels, err := getExpressionUsedLabelsForMetric(test.expr, regexp.MustCompile(test.metric)) - assert.ElementsMatch(t, labels, test.expected, "Expected labels %v, but got %v", test.expected, labels) - if !errors.Is(err, test.expectedErr) { - t.Errorf("Expected error %v, but got %v", test.expectedErr, err) - } + for i, test := range tests { + t.Run(fmt.Sprintf("test_case_%d", i), func(t *testing.T) { + labels, err := getExpressionUsedLabelsForMetric(test.expr, regexp.MustCompile(test.metric)) + assert.ElementsMatch(t, labels, test.expected, "Expected labels %v, but got %v", test.expected, labels) + if !errors.Is(err, test.expectedErr) { + t.Errorf("Expected error %v, but got %v", test.expectedErr, err) + } + }) } } diff --git a/pkg/validator/validator_test.go b/pkg/validator/validator_test.go index 2b40d1d..ffcf3ff 100644 --- a/pkg/validator/validator_test.go +++ b/pkg/validator/validator_test.go @@ -105,10 +105,8 @@ var testCases = []struct { {name: "ruleExprDoesUseForbiddenLabelInBy", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "sum(kube_pod_labels) by (app)"}, expectedErrors: 1}, {name: "ruleExprDoesUseForbiddenLabelInOn", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) up"}, expectedErrors: 1}, {name: "ruleExprDoesUseForbiddenLabelInGroup", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "group(kube_pod_labels) by (label_app)"}, expectedErrors: 1}, - {name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_left(foo) up"}, expectedErrors: 2}, - {name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_right(foo) up"}, expectedErrors: 2}, - {name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{"app"}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_left(foo) up"}, expectedErrors: 1}, - {name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{"app"}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_right(foo) up"}, expectedErrors: 1}, + {name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer1", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_left(foo) up"}, expectedErrors: 1}, + {name: "ruleExprDoesUseForbiddenLabelInBinaryExprWithLabelTransfer2", validator: expressionUsesOnlyAllowedLabelsForMetricRegexp{allowedLabels: allowedLabelsMap([]string{}), metricNameRegexp: mustCompileAnchoredRegexp("kube_pod_labels")}, rule: rulefmt.Rule{Expr: "kube_pod_labels * on(app) group_right(foo) up"}, expectedErrors: 2}, // expressionDoesNotUseOlderDataThan {name: "ruleExprDoesNotUseOlderData", validator: expressionDoesNotUseOlderDataThan{limit: time.Hour}, rule: rulefmt.Rule{Expr: "up{xxx='yyy'}"}, expectedErrors: 0},