feat: query split (#301)

go-graphite · Feb 11, 2025 · 19f8ccb · 19f8ccb
1 parent 7327752
commit 19f8ccb
Show file tree

Hide file tree

Showing 8 changed files with 764 additions and 46 deletions.
diff --git a/config/config.go b/config/config.go
@@ -221,9 +221,11 @@ type ClickHouse struct {
 	TagsAdaptiveQueries   int                   `toml:"tags-adaptive-queries" json:"tags-adaptive-queries" comment:"Tags adaptive queries (based on load average) for increase/decrease concurrent queries"`
 	TagsLimiter           limiter.ServerLimiter `toml:"-"                        json:"-"`
 
-	WildcardMinDistance   int `toml:"wildcard-min-distance" json:"wildcard-min-distance" comment:"If a wildcard appears both at the start and the end of a plain query at a distance (in terms of nodes) less than wildcard-min-distance, then it will be discarded. This parameter can be used to discard expensive queries."`
-	TagsMinInQuery        int `toml:"tags-min-in-query" json:"tags-min-in-query" comment:"Minimum tags in seriesByTag query"`
-	TagsMinInAutocomplete int `toml:"tags-min-in-autocomplete" json:"tags-min-in-autocomplete" comment:"Minimum tags in autocomplete query"`
+	WildcardMinDistance   int  `toml:"wildcard-min-distance" json:"wildcard-min-distance" comment:"If a wildcard appears both at the start and the end of a plain query at a distance (in terms of nodes) less than wildcard-min-distance, then it will be discarded. This parameter can be used to discard expensive queries."`
+	TrySplitQuery         bool `toml:"try-split-query" json:"try-split-query" comment:"Plain queries like '{first,second}.custom.metric.*' are also a subject to wildcard-min-distance restriction. But can be split into 2 queries: 'first.custom.metric.*', 'second.custom.metric.*'. Note that: only one list will be split; if there are wildcard in query before (after) list then reverse (direct) notation will be preferred; if there are wildcards before and after list, then query will not be split"`
+	MaxNodeToSplitIndex   int  `toml:"max-node-to-split-index" json:"max-node-to-split-index" comment:"Used only if try-split-query is true. Query that contains list will be split if its (list) node index is less or equal to max-node-to-split-index. By default is 0. It is recommended to have this value set to 2 or 3 and increase it very carefully, because 3 or 4 plain nodes without wildcards have good selectivity"`
+	TagsMinInQuery        int  `toml:"tags-min-in-query" json:"tags-min-in-query" comment:"Minimum tags in seriesByTag query"`
+	TagsMinInAutocomplete int  `toml:"tags-min-in-autocomplete" json:"tags-min-in-autocomplete" comment:"Minimum tags in autocomplete query"`
 
 	UserLimits           map[string]UserLimits `toml:"user-limits"              json:"user-limits"              comment:"customized query limiter for some users"                                                                                        commented:"true"`
 	DateFormat           string                `toml:"date-format"              json:"date-format"              comment:"Date format (default, utc, both)"`

diff --git a/doc/config.md b/doc/config.md
@@ -313,6 +313,10 @@ Only one tag used as filter for index field Tag1, see graphite_tagged table [str
  tags-adaptive-queries = 0
  # If a wildcard appears both at the start and the end of a plain query at a distance (in terms of nodes) less than wildcard-min-distance, then it will be discarded. This parameter can be used to discard expensive queries.
  wildcard-min-distance = 0
+ # Plain queries like '{first,second}.custom.metric.*' are also a subject to wildcard-min-distance restriction. But can be split into 2 queries: 'first.custom.metric.*', 'second.custom.metric.*'. Note that: only one list will be split; if there are wildcard in query before (after) list then reverse (direct) notation will be preferred; if there are wildcards before and after list, then query will not be split
+ try-split-query = false
+ # Used only if try-split-query is true. Query that contains list will be split if its (list) node index is less or equal to max-node-to-split-index. By default is 0. It is recommended to have this value set to 2 or 3 and increase it very carefully, because 3 or 4 plain nodes without wildcards have good selectivity
+ max-node-to-split-index = 0
  # Minimum tags in seriesByTag query
  tags-min-in-query = 0
  # Minimum tags in autocomplete query

diff --git a/finder/finder.go b/finder/finder.go
@@ -66,6 +66,20 @@ func newPlainFinder(ctx context.Context, config *config.Config, query string, fr
 			opts,
 			useCache,
 		)
+
+		if config.ClickHouse.TrySplitQuery {
+			f = WrapSplitIndex(
+				f,
+				config.ClickHouse.WildcardMinDistance,
+				config.ClickHouse.URL,
+				config.ClickHouse.IndexTable,
+				config.ClickHouse.IndexUseDaily,
+				config.ClickHouse.IndexReverse,
+				config.ClickHouse.IndexReverses,
+				opts,
+				useCache,
+			)
+		}
 	} else {
 		if from > 0 && until > 0 && config.ClickHouse.DateTreeTable != "" {
 			f = NewDateFinder(config.ClickHouse.URL, config.ClickHouse.DateTreeTable, config.ClickHouse.DateTreeTableVersion, opts)

diff --git a/finder/index.go b/finder/index.go
@@ -119,20 +119,13 @@ func (idx *IndexFinder) useReverse(query string) bool {
 	return idx.useReverse(query)
 }
 
-func (idx *IndexFinder) whereFilter(query string, from int64, until int64) *where.Where {
-	reverse := idx.useReverse(query)
-	if reverse {
-		query = ReverseString(query)
-	}
-
-	if idx.dailyEnabled && from > 0 && until > 0 {
-		idx.useDaily = true
-	} else {
-		idx.useDaily = false
-	}
+func useDaily(dailyEnabled bool, from, until int64) bool {
+	return dailyEnabled && from > 0 && until > 0
+}
 
+func calculateIndexLevelOffset(useDaily, reverse bool) int {
 	var levelOffset int
-	if idx.useDaily {
+	if useDaily {
 		if reverse {
 			levelOffset = ReverseLevelOffset
 		}
@@ -142,8 +135,11 @@ func (idx *IndexFinder) whereFilter(query string, from int64, until int64) *wher
 		levelOffset = TreeLevelOffset
 	}
 
-	w := idx.where(query, levelOffset)
-	if idx.useDaily {
+	return levelOffset
+}
+
+func addDatesToWhere(w *where.Where, useDaily bool, from, until int64) {
+	if useDaily {
 		w.Andf(
 			"Date >='%s' AND Date <= '%s'",
 			date.FromTimestampToDaysFormat(from),
@@ -152,10 +148,24 @@ func (idx *IndexFinder) whereFilter(query string, from int64, until int64) *wher
 	} else {
 		w.And(where.Eq("Date", DefaultTreeDate))
 	}
+}
+
+func (idx *IndexFinder) whereFilter(query string, from int64, until int64) *where.Where {
+	reverse := idx.useReverse(query)
+	if reverse {
+		query = ReverseString(query)
+	}
+
+	idx.useDaily = useDaily(idx.dailyEnabled, from, until)
+
+	levelOffset := calculateIndexLevelOffset(idx.useDaily, reverse)
+
+	w := idx.where(query, levelOffset)
+	addDatesToWhere(w, idx.useDaily, from, until)
 	return w
 }
 
-func (idx *IndexFinder) validatePlainQuery(query string, wildcardMinDistance int) error {
+func validatePlainQuery(query string, wildcardMinDistance int) error {
 	if where.HasUnmatchedBrackets(query) {
 		return errs.NewErrorWithCode("query has unmatched brackets", http.StatusBadRequest)
 	}
@@ -175,7 +185,7 @@ func (idx *IndexFinder) validatePlainQuery(query string, wildcardMinDistance int
 }
 
 func (idx *IndexFinder) Execute(ctx context.Context, config *config.Config, query string, from int64, until int64, stat *FinderStat) (err error) {
-	err = idx.validatePlainQuery(query, config.ClickHouse.WildcardMinDistance)
+	err = validatePlainQuery(query, config.ClickHouse.WildcardMinDistance)
 	if err != nil {
 		return err
 	}
@@ -202,45 +212,61 @@ func (idx *IndexFinder) Abs(v []byte) []byte {
 	return v
 }
 
-func (idx *IndexFinder) bodySplit() {
-	if len(idx.body) == 0 {
-		return
+func splitIndexBody(body []byte, useReverse, useCache bool) ([]byte, [][]byte, bool) {
+	if len(body) == 0 {
+		return body, [][]byte{}, false
 	}
 
-	idx.rows = bytes.Split(bytes.TrimSuffix(idx.body, []byte{'\n'}), []byte{'\n'})
+	rows := bytes.Split(bytes.TrimSuffix(body, []byte{'\n'}), []byte{'\n'})
+	setDirect := false
 
-	if idx.useReverse("") {
-		// rotate names for reduce
+	if useReverse {
 		var buf bytes.Buffer
-		if idx.useCache {
-			buf.Grow(len(idx.body))
+		if useCache {
+			buf.Grow(len(body))
 		}
-		for i := 0; i < len(idx.rows); i++ {
-			idx.rows[i] = ReverseBytes(idx.rows[i])
-			if idx.useCache {
-				buf.Write(idx.rows[i])
+
+		for i := range rows {
+			rows[i] = ReverseBytes(rows[i])
+			if useCache {
+				buf.Write(rows[i])
 				buf.WriteByte('\n')
 			}
 		}
-		if idx.useCache {
-			idx.body = buf.Bytes()
-			idx.reverse = queryDirect
+
+		if useCache {
+			body = buf.Bytes()
+			setDirect = true
 		}
 	}
+
+	return body, rows, setDirect
+}
+
+func (idx *IndexFinder) bodySplit() {
+	setDirect := false
+	idx.body, idx.rows, setDirect = splitIndexBody(idx.body, idx.useReverse(""), idx.useCache)
+	if setDirect {
+		idx.reverse = queryDirect
+	}
 }
 
-func (idx *IndexFinder) makeList(onlySeries bool) [][]byte {
-	if len(idx.rows) == 0 {
+func makeList(rows [][]byte, onlySeries bool) [][]byte {
+	if len(rows) == 0 {
 		return [][]byte{}
 	}
 
-	rows := make([][]byte, len(idx.rows))
+	resRows := make([][]byte, len(rows))
 
-	for i := 0; i < len(idx.rows); i++ {
-		rows[i] = idx.rows[i]
+	for i := 0; i < len(rows); i++ {
+		resRows[i] = rows[i]
 	}
 
-	return rows
+	return resRows
+}
+
+func (idx *IndexFinder) makeList(onlySeries bool) [][]byte {
+	return makeList(idx.rows, onlySeries)
 }
 
 func (idx *IndexFinder) List() [][]byte {