Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve query complexity calculations #586

Merged
merged 6 commits into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 36 additions & 16 deletions cmd/api/src/queries/graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ func (s *GraphQuery) PrepareCypherQuery(rawCypher string) (PreparedQuery, error)
// log query details if it is rejected due to high complexity
highComplexityLog := log.WithLevel(log.LevelError)
highComplexityLog.Str("query", strippedQueryBuffer.String())
highComplexityLog.Msg(fmt.Sprintf("Query rejected. Query weight: %.2f. Maximum allowed weight: %d", complexityMeasure.Weight, MaxQueryComplexityWeightAllowed))
highComplexityLog.Msg(fmt.Sprintf("Query rejected. Query weight: %d. Maximum allowed weight: %d", complexityMeasure.Weight, MaxQueryComplexityWeightAllowed))

return graphQuery, newQueryError(ErrCypherQueryTooComplex)
}
Expand Down Expand Up @@ -470,35 +470,32 @@ func (s *GraphQuery) RawCypherSearch(ctx context.Context, pQuery PreparedQuery,
)

if bhCtxInst.Timeout > maxTimeout {
log.Debugf("Custom timeout is too large, using the maximum allowable timeout of %.2f seconds instead", maxTimeout.Seconds())
log.Debugf("Custom timeout is too large, using the maximum allowable timeout of %d minutes instead", maxTimeout.Minutes())
bhCtxInst.Timeout = maxTimeout
}

availableRuntime := bhCtxInst.Timeout
if availableRuntime > 0 {
log.Debugf("Available timeout for query is set to: %.2f seconds", availableRuntime.Seconds())
log.Debugf("Available timeout for query is set to: %d seconds", availableRuntime.Seconds())
} else {
availableRuntime = defaultTimeout

if !s.DisableCypherComplexityLimit {
// The weight of the query is divided by 5 to get a runtime reduction factor. This means that query weights
// of 5 or less will get the full runtime duration.
if reductionFactor := time.Duration(pQuery.complexity.Weight) / 5; reductionFactor > 0 {
availableRuntime /= reductionFactor

log.Infof("Cypher query cost is: %.2f. Reduction factor for query is: %d. Available timeout for query is now set to: %.2f minutes", pQuery.complexity.Weight, reductionFactor, availableRuntime.Minutes())
}
var reductionFactor int64
availableRuntime, reductionFactor = applyTimeoutReduction(pQuery.complexity.Weight, availableRuntime)

logEvent := log.WithLevel(log.LevelInfo)
logEvent.Str("query", pQuery.strippedQuery)
logEvent.Str("query cost", fmt.Sprintf("%d", pQuery.complexity.Weight))
logEvent.Str("reduction factor", strconv.FormatInt(reductionFactor, 10))
logEvent.Msg(fmt.Sprintf("Available timeout for query is set to: %.2f seconds", availableRuntime.Seconds()))
}
}

// Set the timeout for this DB interaction
config.Timeout = availableRuntime
}

logEvent := log.WithLevel(log.LevelInfo)
logEvent.Str("query", pQuery.strippedQuery)
logEvent.Str("query cost", fmt.Sprintf("%.2f", pQuery.complexity.Weight))
logEvent.Msg("Executing user cypher query")
start := time.Now()

// TODO: verify write vs read tx need differentiation after PG migration
if pQuery.HasMutation {
Expand All @@ -507,12 +504,19 @@ func (s *GraphQuery) RawCypherSearch(ctx context.Context, pQuery PreparedQuery,
err = s.Graph.ReadTransaction(ctx, txDelegate, txOptions)
}

runtime := time.Since(start)

logEvent := log.WithLevel(log.LevelInfo)
logEvent.Str("query", pQuery.strippedQuery)
logEvent.Str("query cost", fmt.Sprintf("%d", pQuery.complexity.Weight))
logEvent.Msg(fmt.Sprintf("Executed user cypher query with cost %d in %.2f seconds", pQuery.complexity.Weight, runtime.Seconds()))

if err != nil {
// Log query details if neo4j times out
if util.IsNeoTimeoutError(err) {
timeoutLog := log.WithLevel(log.LevelError)
timeoutLog.Str("query", pQuery.strippedQuery)
timeoutLog.Str("query cost", fmt.Sprintf("%.2f", pQuery.complexity.Weight))
timeoutLog.Str("query cost", fmt.Sprintf("%d", pQuery.complexity.Weight))
timeoutLog.Msg("Neo4j timed out while executing cypher query")
} else {
log.Errorf("RawCypherSearch failed: %v", err)
Expand All @@ -523,6 +527,22 @@ func (s *GraphQuery) RawCypherSearch(ctx context.Context, pQuery PreparedQuery,
return graphResponse, nil
}

func applyTimeoutReduction(queryWeight int64, availableRuntime time.Duration) (time.Duration, int64) {
// The weight of the query is divided by 5 to get a runtime reduction factor, in a way that:
// weights of 4 or less get the full runtime duration
// weights of 5-9 will get 1/2 the runtime duration
// weights of 10-15 will get 1/3 the runtime duration
// and so on until the max weight of 50 gets 1/11 the runtime duration
reductionFactor := 1 + (queryWeight / 5)

availableRuntimeInt := int64(availableRuntime.Seconds())
// reductionFactor will be the math.Floor() of the result of the division below
availableRuntimeInt /= reductionFactor
availableRuntime = time.Duration(availableRuntimeInt) * time.Second

return availableRuntime, reductionFactor
}

func nodeToSearchResult(node *graph.Node) model.SearchResult {
var (
name, _ = node.Properties.GetOrDefault(common.Name.String(), "NO NAME").String()
Expand Down
35 changes: 35 additions & 0 deletions cmd/api/src/queries/graph_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,41 @@ import (
"go.uber.org/mock/gomock"
)

func Test_ApplyTimeoutReduction(t *testing.T) {
// Query Weight Reduction Factor Runtime
// 0-4 1 x
// 5-9 2 x/2
// 10-14 3 x/3
// 15-19 4 x/4
// 20-24 5 x/5
// 25-29 6 x/6
// 30-34 7 x/7
// 35-39 8 x/8
// 40-44 9 x/9
// 45-49 10 x/10
// 50 11 x/11
// >50 Too complex

var (
inputRuntime = 15 * time.Minute
expectedReduction int64
)

// Start with weight of 2, increase by 5 in each iteration until reduction factor = 11
// This will run the function and assess the results for each range of permissible query
// weights, against their respective expected reduction factor and runtime.
weight := int64(2)
for expectedReduction = 1; expectedReduction < 12; expectedReduction++ {
expectedRuntime := int64(inputRuntime.Seconds()) / expectedReduction
reducedRuntime, reduction := applyTimeoutReduction(weight, inputRuntime)

require.Equal(t, expectedReduction, reduction)
require.Equal(t, expectedRuntime, int64(reducedRuntime.Seconds()))

weight += 5
}
}

const cacheKey = "ad-entity-query_queryName_objectID_1"

func Test_runMaybeCachedEntityQuery(t *testing.T) {
Expand Down
35 changes: 26 additions & 9 deletions cmd/api/src/queries/graph_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ func TestGraphQuery_RawCypherSearch(t *testing.T) {

t.Run("RawCypherSearch query complexity controls", func(t *testing.T) {
// Validate that query complexity controls are working
// Scenario 1:
mockGraphDB.EXPECT().ReadTransaction(gomock.Any(), gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, txDelegate graph.TransactionDelegate, options ...graph.TransactionOption) error {
// Validate that the options are being set correctly
if len(options) != 1 {
Expand All @@ -141,28 +142,44 @@ func TestGraphQuery_RawCypherSearch(t *testing.T) {
txConfig := &graph.TransactionConfig{}
options[0](txConfig)

require.Equal(t, time.Minute*5, txConfig.Timeout)
require.Equal(t, time.Second*225, txConfig.Timeout)

return nil
}).Times(2)
}).Times(1)

// Unset the user-set timeout in the BH context to validate QC runtime reduction of a complex query
// This will be set to a default of 15 min, with a reduction factor of 3, so we should have a 5 min config timeout
// Scenario 2:
mockGraphDB.EXPECT().ReadTransaction(gomock.Any(), gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, txDelegate graph.TransactionDelegate, options ...graph.TransactionOption) error {
// Validate that the options are being set correctly
if len(options) != 1 {
t.Fatalf("Expected only one transaction option for RawCypherSearch but saw: %d", len(options))
}

// availableRuntime = 15min (default), query cost = 15
// Then reductionFactor = 15/5 = 3
// Therefore actual timeout = availableRuntime/reductionFactor : 15/3 = 5min
// Create a new transaction config to capture the query timeout logic
txConfig := &graph.TransactionConfig{}
options[0](txConfig)

outerBHCtxInst.Timeout = 0
require.Equal(t, time.Second*5, txConfig.Timeout)

return nil
}).Times(1)

// Scenario 1:
// Unset the user-set timeout in the BH context to validate QC runtime reduction of a complex query
// This will be set to a default of 15 min or 900 sec
// availableRuntime = 900 sec, query cost = 15
// reductionFactor = 1 + (15/5) = 4
// Therefore actual timeout = availableRuntime/reductionFactor : 900/4 = 225sec

outerBHCtxInst.Timeout = 0
preparedQuery, err := gq.PrepareCypherQuery("match ()-[:HasSession*..]->()-[:MemberOf*..]->() return n;")
require.Nil(t, err)
_, err = gq.RawCypherSearch(outerBHCtxInst.ConstructGoContext(), preparedQuery, false)
require.Nil(t, err)

// Scenario 2:
// Prove that overriding QC with a user-preference works
// This will be directly used as the config timeout, without any reduction factor
outerBHCtxInst.Timeout = time.Minute * 5
outerBHCtxInst.Timeout = time.Second * 5

preparedQuery, err = gq.PrepareCypherQuery("match ()-[:HasSession*..]->()-[:MemberOf*..]->() return n;")
require.Nil(t, err)
Expand Down
8 changes: 4 additions & 4 deletions packages/go/cypher/analyzer/analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,16 @@ func WithVisitor[T model.Expression](analyzer *Analyzer, visitorFunc TypedVisito
// Weight constants aren't well named for right now. These are just dumb values to assign heuristic weight to certain
// query elements
const (
Weight1 float64 = iota + 1
Weight1 int64 = iota + 1
Weight2
Weight3
)

type ComplexityMeasure struct {
Weight float64
Weight int64

numPatterns float64
numProjections float64
numPatterns int64
numProjections int64
nodeLookupKinds map[string]graph.Kinds
}

Expand Down
Loading
Loading