Skip to content

Commit

Permalink
feat: query split (#301)
Browse files Browse the repository at this point in the history
  • Loading branch information
AleksandrMatsko authored Feb 11, 2025
1 parent 7327752 commit 19f8ccb
Show file tree
Hide file tree
Showing 8 changed files with 764 additions and 46 deletions.
8 changes: 5 additions & 3 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,11 @@ type ClickHouse struct {
TagsAdaptiveQueries int `toml:"tags-adaptive-queries" json:"tags-adaptive-queries" comment:"Tags adaptive queries (based on load average) for increase/decrease concurrent queries"`
TagsLimiter limiter.ServerLimiter `toml:"-" json:"-"`

WildcardMinDistance int `toml:"wildcard-min-distance" json:"wildcard-min-distance" comment:"If a wildcard appears both at the start and the end of a plain query at a distance (in terms of nodes) less than wildcard-min-distance, then it will be discarded. This parameter can be used to discard expensive queries."`
TagsMinInQuery int `toml:"tags-min-in-query" json:"tags-min-in-query" comment:"Minimum tags in seriesByTag query"`
TagsMinInAutocomplete int `toml:"tags-min-in-autocomplete" json:"tags-min-in-autocomplete" comment:"Minimum tags in autocomplete query"`
WildcardMinDistance int `toml:"wildcard-min-distance" json:"wildcard-min-distance" comment:"If a wildcard appears both at the start and the end of a plain query at a distance (in terms of nodes) less than wildcard-min-distance, then it will be discarded. This parameter can be used to discard expensive queries."`
TrySplitQuery bool `toml:"try-split-query" json:"try-split-query" comment:"Plain queries like '{first,second}.custom.metric.*' are also a subject to wildcard-min-distance restriction. But can be split into 2 queries: 'first.custom.metric.*', 'second.custom.metric.*'. Note that: only one list will be split; if there are wildcard in query before (after) list then reverse (direct) notation will be preferred; if there are wildcards before and after list, then query will not be split"`
MaxNodeToSplitIndex int `toml:"max-node-to-split-index" json:"max-node-to-split-index" comment:"Used only if try-split-query is true. Query that contains list will be split if its (list) node index is less or equal to max-node-to-split-index. By default is 0. It is recommended to have this value set to 2 or 3 and increase it very carefully, because 3 or 4 plain nodes without wildcards have good selectivity"`
TagsMinInQuery int `toml:"tags-min-in-query" json:"tags-min-in-query" comment:"Minimum tags in seriesByTag query"`
TagsMinInAutocomplete int `toml:"tags-min-in-autocomplete" json:"tags-min-in-autocomplete" comment:"Minimum tags in autocomplete query"`

UserLimits map[string]UserLimits `toml:"user-limits" json:"user-limits" comment:"customized query limiter for some users" commented:"true"`
DateFormat string `toml:"date-format" json:"date-format" comment:"Date format (default, utc, both)"`
Expand Down
4 changes: 4 additions & 0 deletions doc/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,10 @@ Only one tag used as filter for index field Tag1, see graphite_tagged table [str
tags-adaptive-queries = 0
# If a wildcard appears both at the start and the end of a plain query at a distance (in terms of nodes) less than wildcard-min-distance, then it will be discarded. This parameter can be used to discard expensive queries.
wildcard-min-distance = 0
# Plain queries like '{first,second}.custom.metric.*' are also a subject to wildcard-min-distance restriction. But can be split into 2 queries: 'first.custom.metric.*', 'second.custom.metric.*'. Note that: only one list will be split; if there are wildcard in query before (after) list then reverse (direct) notation will be preferred; if there are wildcards before and after list, then query will not be split
try-split-query = false
# Used only if try-split-query is true. Query that contains list will be split if its (list) node index is less or equal to max-node-to-split-index. By default is 0. It is recommended to have this value set to 2 or 3 and increase it very carefully, because 3 or 4 plain nodes without wildcards have good selectivity
max-node-to-split-index = 0
# Minimum tags in seriesByTag query
tags-min-in-query = 0
# Minimum tags in autocomplete query
Expand Down
14 changes: 14 additions & 0 deletions finder/finder.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,20 @@ func newPlainFinder(ctx context.Context, config *config.Config, query string, fr
opts,
useCache,
)

if config.ClickHouse.TrySplitQuery {
f = WrapSplitIndex(
f,
config.ClickHouse.WildcardMinDistance,
config.ClickHouse.URL,
config.ClickHouse.IndexTable,
config.ClickHouse.IndexUseDaily,
config.ClickHouse.IndexReverse,
config.ClickHouse.IndexReverses,
opts,
useCache,
)
}
} else {
if from > 0 && until > 0 && config.ClickHouse.DateTreeTable != "" {
f = NewDateFinder(config.ClickHouse.URL, config.ClickHouse.DateTreeTable, config.ClickHouse.DateTreeTableVersion, opts)
Expand Down
100 changes: 63 additions & 37 deletions finder/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,20 +119,13 @@ func (idx *IndexFinder) useReverse(query string) bool {
return idx.useReverse(query)
}

func (idx *IndexFinder) whereFilter(query string, from int64, until int64) *where.Where {
reverse := idx.useReverse(query)
if reverse {
query = ReverseString(query)
}

if idx.dailyEnabled && from > 0 && until > 0 {
idx.useDaily = true
} else {
idx.useDaily = false
}
func useDaily(dailyEnabled bool, from, until int64) bool {
return dailyEnabled && from > 0 && until > 0
}

func calculateIndexLevelOffset(useDaily, reverse bool) int {
var levelOffset int
if idx.useDaily {
if useDaily {
if reverse {
levelOffset = ReverseLevelOffset
}
Expand All @@ -142,8 +135,11 @@ func (idx *IndexFinder) whereFilter(query string, from int64, until int64) *wher
levelOffset = TreeLevelOffset
}

w := idx.where(query, levelOffset)
if idx.useDaily {
return levelOffset
}

func addDatesToWhere(w *where.Where, useDaily bool, from, until int64) {
if useDaily {
w.Andf(
"Date >='%s' AND Date <= '%s'",
date.FromTimestampToDaysFormat(from),
Expand All @@ -152,10 +148,24 @@ func (idx *IndexFinder) whereFilter(query string, from int64, until int64) *wher
} else {
w.And(where.Eq("Date", DefaultTreeDate))
}
}

func (idx *IndexFinder) whereFilter(query string, from int64, until int64) *where.Where {
reverse := idx.useReverse(query)
if reverse {
query = ReverseString(query)
}

idx.useDaily = useDaily(idx.dailyEnabled, from, until)

levelOffset := calculateIndexLevelOffset(idx.useDaily, reverse)

w := idx.where(query, levelOffset)
addDatesToWhere(w, idx.useDaily, from, until)
return w
}

func (idx *IndexFinder) validatePlainQuery(query string, wildcardMinDistance int) error {
func validatePlainQuery(query string, wildcardMinDistance int) error {
if where.HasUnmatchedBrackets(query) {
return errs.NewErrorWithCode("query has unmatched brackets", http.StatusBadRequest)
}
Expand All @@ -175,7 +185,7 @@ func (idx *IndexFinder) validatePlainQuery(query string, wildcardMinDistance int
}

func (idx *IndexFinder) Execute(ctx context.Context, config *config.Config, query string, from int64, until int64, stat *FinderStat) (err error) {
err = idx.validatePlainQuery(query, config.ClickHouse.WildcardMinDistance)
err = validatePlainQuery(query, config.ClickHouse.WildcardMinDistance)
if err != nil {
return err
}
Expand All @@ -202,45 +212,61 @@ func (idx *IndexFinder) Abs(v []byte) []byte {
return v
}

func (idx *IndexFinder) bodySplit() {
if len(idx.body) == 0 {
return
func splitIndexBody(body []byte, useReverse, useCache bool) ([]byte, [][]byte, bool) {
if len(body) == 0 {
return body, [][]byte{}, false
}

idx.rows = bytes.Split(bytes.TrimSuffix(idx.body, []byte{'\n'}), []byte{'\n'})
rows := bytes.Split(bytes.TrimSuffix(body, []byte{'\n'}), []byte{'\n'})
setDirect := false

if idx.useReverse("") {
// rotate names for reduce
if useReverse {
var buf bytes.Buffer
if idx.useCache {
buf.Grow(len(idx.body))
if useCache {
buf.Grow(len(body))
}
for i := 0; i < len(idx.rows); i++ {
idx.rows[i] = ReverseBytes(idx.rows[i])
if idx.useCache {
buf.Write(idx.rows[i])

for i := range rows {
rows[i] = ReverseBytes(rows[i])
if useCache {
buf.Write(rows[i])
buf.WriteByte('\n')
}
}
if idx.useCache {
idx.body = buf.Bytes()
idx.reverse = queryDirect

if useCache {
body = buf.Bytes()
setDirect = true
}
}

return body, rows, setDirect
}

func (idx *IndexFinder) bodySplit() {
setDirect := false
idx.body, idx.rows, setDirect = splitIndexBody(idx.body, idx.useReverse(""), idx.useCache)
if setDirect {
idx.reverse = queryDirect
}
}

func (idx *IndexFinder) makeList(onlySeries bool) [][]byte {
if len(idx.rows) == 0 {
func makeList(rows [][]byte, onlySeries bool) [][]byte {
if len(rows) == 0 {
return [][]byte{}
}

rows := make([][]byte, len(idx.rows))
resRows := make([][]byte, len(rows))

for i := 0; i < len(idx.rows); i++ {
rows[i] = idx.rows[i]
for i := 0; i < len(rows); i++ {
resRows[i] = rows[i]
}

return rows
return resRows
}

func (idx *IndexFinder) makeList(onlySeries bool) [][]byte {
return makeList(idx.rows, onlySeries)
}

func (idx *IndexFinder) List() [][]byte {
Expand Down
Loading

0 comments on commit 19f8ccb

Please sign in to comment.