diff --git a/go.mod b/go.mod index d579ca06..4a828163 100644 --- a/go.mod +++ b/go.mod @@ -3,9 +3,9 @@ module github.com/ardanlabs/gotour go 1.22 require ( - github.com/blevesearch/bleve/v2 v2.4.0 - golang.org/x/net v0.26.0 - golang.org/x/sys v0.21.0 + github.com/blevesearch/bleve/v2 v2.4.1 + golang.org/x/net v0.27.0 + golang.org/x/sys v0.22.0 golang.org/x/tools v0.22.0 ) @@ -38,6 +38,6 @@ require ( github.com/mschoch/smat v0.2.0 // indirect github.com/yuin/goldmark v1.7.4 // indirect go.etcd.io/bbolt v1.3.10 // indirect - golang.org/x/mod v0.18.0 // indirect + golang.org/x/mod v0.19.0 // indirect google.golang.org/protobuf v1.34.2 // indirect ) diff --git a/go.sum b/go.sum index 4df51694..b80e5cdb 100644 --- a/go.sum +++ b/go.sum @@ -3,8 +3,8 @@ github.com/RoaringBitmap/roaring v1.9.4/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhve github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= github.com/bits-and-blooms/bitset v1.13.0 h1:bAQ9OPNFYbGHV6Nez0tmNI0RiEu7/hxlYJRUA0wFAVE= github.com/bits-and-blooms/bitset v1.13.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= -github.com/blevesearch/bleve/v2 v2.4.0 h1:2xyg+Wv60CFHYccXc+moGxbL+8QKT/dZK09AewHgKsg= -github.com/blevesearch/bleve/v2 v2.4.0/go.mod h1:IhQHoFAbHgWKYavb9rQgQEJJVMuY99cKdQ0wPpst2aY= +github.com/blevesearch/bleve/v2 v2.4.1 h1:8QWqsifq693mN3h6cSigKqkKUsUfv5hu0FDgz/4bFuA= +github.com/blevesearch/bleve/v2 v2.4.1/go.mod h1:Ezmvsouspi+uVwnDzjIsCeUIT0WuBKlicP5JZnExWzo= github.com/blevesearch/bleve_index_api v1.1.9 h1:Cpq0Lp3As0Gfk3+PmcoNDRKeI50C5yuFNpj0YlN/bOE= github.com/blevesearch/bleve_index_api v1.1.9/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8= github.com/blevesearch/geo v0.1.20 h1:paaSpu2Ewh/tn5DKn/FB5SzvH0EWupxHEIwbCk/QPqM= @@ -71,15 +71,15 @@ github.com/yuin/goldmark v1.7.4 h1:BDXOHExt+A7gwPCJgPIIq7ENvceR7we7rOS9TNoLZeg= github.com/yuin/goldmark v1.7.4/go.mod h1:uzxRWxtg69N339t3louHJ7+O03ezfj6PlliRlaOzY1E= go.etcd.io/bbolt v1.3.10 h1:+BqfJTcCzTItrop8mq/lbzL8wSGtj94UO/3U31shqG0= go.etcd.io/bbolt v1.3.10/go.mod h1:bK3UQLPJZly7IlNmV7uVHJDxfe5aK9Ll93e/74Y9oEQ= -golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0= -golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= -golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/mod v0.19.0 h1:fEdghXQSo20giMthA7cd28ZC+jts4amQ3YMXiP5oMQ8= +golang.org/x/mod v0.19.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys= +golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE= golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= -golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= +golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA= golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c= google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= diff --git a/vendor/github.com/blevesearch/bleve/v2/README.md b/vendor/github.com/blevesearch/bleve/v2/README.md index dbe5b789..e113536f 100644 --- a/vendor/github.com/blevesearch/bleve/v2/README.md +++ b/vendor/github.com/blevesearch/bleve/v2/README.md @@ -16,10 +16,10 @@ A modern text indexing library in go * Index any go data structure (including JSON) * Intelligent defaults backed up by powerful configuration * Supported field types: - * Text, Numeric, Datetime, Boolean + * `text`, `number`, `datetime`, `boolean`, `geopoint`, `geoshape`, `IP`, `vector` * Supported query types: * Term, Phrase, Match, Match Phrase, Prefix, Fuzzy - * Conjunction, Disjunction, Boolean (must/should/must_not) + * Conjunction, Disjunction, Boolean (`must`/`should`/`must_not`) * Term Range, Numeric Range, Date Range * [Geo Spatial](https://github.com/blevesearch/bleve/blob/master/geo/README.md) * Simple [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/) diff --git a/vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds/microseconds.go b/vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds/microseconds.go new file mode 100644 index 00000000..a0e2c949 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds/microseconds.go @@ -0,0 +1,52 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package microseconds + +import ( + "math" + "strconv" + "time" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" +) + +const Name = "unix_micro" + +type DateTimeParser struct { +} + +var minBound int64 = math.MinInt64 / 1000 +var maxBound int64 = math.MaxInt64 / 1000 + +func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) { + // unix timestamp is milliseconds since UNIX epoch + timestamp, err := strconv.ParseInt(input, 10, 64) + if err != nil { + return time.Time{}, "", analysis.ErrInvalidTimestampString + } + if timestamp < minBound || timestamp > maxBound { + return time.Time{}, "", analysis.ErrInvalidTimestampRange + } + return time.UnixMicro(timestamp), Name, nil +} + +func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { + return &DateTimeParser{}, nil +} + +func init() { + registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) +} diff --git a/vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds/milliseconds.go b/vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds/milliseconds.go new file mode 100644 index 00000000..63826b45 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds/milliseconds.go @@ -0,0 +1,52 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package milliseconds + +import ( + "math" + "strconv" + "time" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" +) + +const Name = "unix_milli" + +type DateTimeParser struct { +} + +var minBound int64 = math.MinInt64 / 1000000 +var maxBound int64 = math.MaxInt64 / 1000000 + +func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) { + // unix timestamp is milliseconds since UNIX epoch + timestamp, err := strconv.ParseInt(input, 10, 64) + if err != nil { + return time.Time{}, "", analysis.ErrInvalidTimestampString + } + if timestamp < minBound || timestamp > maxBound { + return time.Time{}, "", analysis.ErrInvalidTimestampRange + } + return time.UnixMilli(timestamp), Name, nil +} + +func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { + return &DateTimeParser{}, nil +} + +func init() { + registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) +} diff --git a/vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds/nanoseconds.go b/vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds/nanoseconds.go new file mode 100644 index 00000000..8bb1ab1b --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds/nanoseconds.go @@ -0,0 +1,52 @@ +// Copyright (c) 2023 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package nanoseconds + +import ( + "math" + "strconv" + "time" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" +) + +const Name = "unix_nano" + +type DateTimeParser struct { +} + +var minBound int64 = math.MinInt64 +var maxBound int64 = math.MaxInt64 + +func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) { + // unix timestamp is milliseconds since UNIX epoch + timestamp, err := strconv.ParseInt(input, 10, 64) + if err != nil { + return time.Time{}, "", analysis.ErrInvalidTimestampString + } + if timestamp < minBound || timestamp > maxBound { + return time.Time{}, "", analysis.ErrInvalidTimestampRange + } + return time.Unix(0, timestamp), Name, nil +} + +func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { + return &DateTimeParser{}, nil +} + +func init() { + registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) +} diff --git a/vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds/seconds.go b/vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds/seconds.go new file mode 100644 index 00000000..58e947c8 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds/seconds.go @@ -0,0 +1,52 @@ +// Copyright (c) 2014 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package seconds + +import ( + "math" + "strconv" + "time" + + "github.com/blevesearch/bleve/v2/analysis" + "github.com/blevesearch/bleve/v2/registry" +) + +const Name = "unix_sec" + +type DateTimeParser struct { +} + +var minBound int64 = math.MinInt64 / 1000000000 +var maxBound int64 = math.MaxInt64 / 1000000000 + +func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) { + // unix timestamp is seconds since UNIX epoch + timestamp, err := strconv.ParseInt(input, 10, 64) + if err != nil { + return time.Time{}, "", analysis.ErrInvalidTimestampString + } + if timestamp < minBound || timestamp > maxBound { + return time.Time{}, "", analysis.ErrInvalidTimestampRange + } + return time.Unix(timestamp, 0), Name, nil +} + +func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { + return &DateTimeParser{}, nil +} + +func init() { + registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) +} diff --git a/vendor/github.com/blevesearch/bleve/v2/document/field_vector_base64.go b/vendor/github.com/blevesearch/bleve/v2/document/field_vector_base64.go new file mode 100644 index 00000000..67825960 --- /dev/null +++ b/vendor/github.com/blevesearch/bleve/v2/document/field_vector_base64.go @@ -0,0 +1,149 @@ +// Copyright (c) 2024 Couchbase, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build vectors +// +build vectors + +package document + +import ( + "encoding/base64" + "encoding/binary" + "fmt" + "math" + + "github.com/blevesearch/bleve/v2/size" + "github.com/blevesearch/bleve/v2/util" + index "github.com/blevesearch/bleve_index_api" +) + +type VectorBase64Field struct { + vectorField *VectorField + base64Encoding string +} + +func (n *VectorBase64Field) Size() int { + return n.vectorField.Size() +} + +func (n *VectorBase64Field) Name() string { + return n.vectorField.Name() +} + +func (n *VectorBase64Field) ArrayPositions() []uint64 { + return n.vectorField.ArrayPositions() +} + +func (n *VectorBase64Field) Options() index.FieldIndexingOptions { + return n.vectorField.Options() +} + +func (n *VectorBase64Field) NumPlainTextBytes() uint64 { + return n.vectorField.NumPlainTextBytes() +} + +func (n *VectorBase64Field) AnalyzedLength() int { + return n.vectorField.AnalyzedLength() +} + +func (n *VectorBase64Field) EncodedFieldType() byte { + return 'e' +} + +func (n *VectorBase64Field) AnalyzedTokenFrequencies() index.TokenFrequencies { + return n.vectorField.AnalyzedTokenFrequencies() +} + +func (n *VectorBase64Field) Analyze() { +} + +func (n *VectorBase64Field) Value() []byte { + return n.vectorField.Value() +} + +func (n *VectorBase64Field) GoString() string { + return fmt.Sprintf("&document.vectorFieldBase64Field{Name:%s, Options: %s, "+ + "Value: %+v}", n.vectorField.Name(), n.vectorField.Options(), n.vectorField.Value()) +} + +// For the sake of not polluting the API, we are keeping arrayPositions as a +// parameter, but it is not used. +func NewVectorBase64Field(name string, arrayPositions []uint64, vectorBase64 string, + dims int, similarity, vectorIndexOptimizedFor string) (*VectorBase64Field, error) { + + decodedVector, err := DecodeVector(vectorBase64) + if err != nil { + return nil, err + } + + return &VectorBase64Field{ + vectorField: NewVectorFieldWithIndexingOptions(name, arrayPositions, + decodedVector, dims, similarity, + vectorIndexOptimizedFor, DefaultVectorIndexingOptions), + + base64Encoding: vectorBase64, + }, nil +} + +// This function takes a base64 encoded string and decodes it into +// a vector. +func DecodeVector(encodedValue string) ([]float32, error) { + // We first decode the encoded string into a byte array. + decodedString, err := base64.StdEncoding.DecodeString(encodedValue) + if err != nil { + return nil, err + } + + // The array is expected to be divisible by 4 because each float32 + // should occupy 4 bytes + if len(decodedString)%size.SizeOfFloat32 != 0 { + return nil, fmt.Errorf("decoded byte array not divisible by %d", size.SizeOfFloat32) + } + dims := int(len(decodedString) / size.SizeOfFloat32) + + if dims <= 0 { + return nil, fmt.Errorf("unable to decode encoded vector") + } + + decodedVector := make([]float32, dims) + + // We iterate through the array 4 bytes at a time and convert each of + // them to a float32 value by reading them in a little endian notation + for i := 0; i < dims; i++ { + bytes := decodedString[i*size.SizeOfFloat32 : (i+1)*size.SizeOfFloat32] + entry := math.Float32frombits(binary.LittleEndian.Uint32(bytes)) + if !util.IsValidFloat32(float64(entry)) { + return nil, fmt.Errorf("invalid float32 value: %f", entry) + } + decodedVector[i] = entry + } + + return decodedVector, nil +} + +func (n *VectorBase64Field) Vector() []float32 { + return n.vectorField.Vector() +} + +func (n *VectorBase64Field) Dims() int { + return n.vectorField.Dims() +} + +func (n *VectorBase64Field) Similarity() string { + return n.vectorField.Similarity() +} + +func (n *VectorBase64Field) IndexOptimizedFor() string { + return n.vectorField.IndexOptimizedFor() +} diff --git a/vendor/github.com/blevesearch/bleve/v2/index/scorch/README.md b/vendor/github.com/blevesearch/bleve/v2/index/scorch/README.md index 9794aed7..fe2abde5 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/scorch/README.md +++ b/vendor/github.com/blevesearch/bleve/v2/index/scorch/README.md @@ -218,7 +218,7 @@ A term search for term T in field F will look something like this: } ``` -The searchResultPostings will be a new implementation of the TermFieldReader inteface. +The searchResultPostings will be a new implementation of the TermFieldReader interface. As a reminder this interface is: diff --git a/vendor/github.com/blevesearch/bleve/v2/index/scorch/event.go b/vendor/github.com/blevesearch/bleve/v2/index/scorch/event.go index 31c9e80c..0f653ccf 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/scorch/event.go +++ b/vendor/github.com/blevesearch/bleve/v2/index/scorch/event.go @@ -22,7 +22,8 @@ var RegistryAsyncErrorCallbacks = map[string]func(error, string){} // RegistryEventCallbacks should be treated as read-only after // process init()'ialization. -var RegistryEventCallbacks = map[string]func(Event){} +// In the event of not having a callback, these return true. +var RegistryEventCallbacks = map[string]func(Event) bool{} // Event represents the information provided in an OnEvent() callback. type Event struct { @@ -62,3 +63,7 @@ var EventKindMergeTaskIntroductionStart = EventKind(7) // EventKindMergeTaskIntroduction is fired when the merger has completed // the introduction of merged segment from a single merge task. var EventKindMergeTaskIntroduction = EventKind(8) + +// EventKindPreMergeCheck is fired before the merge begins to check if +// the caller should proceed with the merge. +var EventKindPreMergeCheck = EventKind(9) diff --git a/vendor/github.com/blevesearch/bleve/v2/index/scorch/merge.go b/vendor/github.com/blevesearch/bleve/v2/index/scorch/merge.go index 339ec596..b74504ca 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/scorch/merge.go +++ b/vendor/github.com/blevesearch/bleve/v2/index/scorch/merge.go @@ -72,6 +72,17 @@ OUTER: ctrlMsg = ctrlMsgDflt } if ctrlMsg != nil { + continueMerge := s.fireEvent(EventKindPreMergeCheck, 0) + // The default, if there's no handler, is to continue the merge. + if !continueMerge { + // If it's decided that this merge can't take place now, + // begin the merge process all over again. + // Retry instead of blocking/waiting here since a long wait + // can result in more segments introduced i.e. s.root will + // be updated. + continue OUTER + } + startTime := time.Now() // lets get started diff --git a/vendor/github.com/blevesearch/bleve/v2/index/scorch/mergeplan/merge_plan.go b/vendor/github.com/blevesearch/bleve/v2/index/scorch/mergeplan/merge_plan.go index 75235066..b4e7a372 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/scorch/mergeplan/merge_plan.go +++ b/vendor/github.com/blevesearch/bleve/v2/index/scorch/mergeplan/merge_plan.go @@ -37,6 +37,11 @@ type Segment interface { // Size of the live data of the segment; i.e., FullSize() minus // any logical deletions. LiveSize() int64 + + HasVector() bool + + // Size of the persisted segment file. + FileSize() int64 } // Plan() will functionally compute a merge plan. A segment will be @@ -76,6 +81,11 @@ type MergePlanOptions struct { // planner’s predicted sizes. MaxSegmentSize int64 + // Max size (in bytes) of the persisted segment file that contains the + // vectors. This is used to prevent merging of segments that + // contain vectors that are too large. + MaxSegmentFileSize int64 + // The growth factor for each tier in a staircase of idealized // segments computed by CalcBudget(). TierGrowth float64 @@ -128,6 +138,7 @@ var ErrMaxSegmentSizeTooLarge = errors.New("MaxSegmentSize exceeds the size limi var DefaultMergePlanOptions = MergePlanOptions{ MaxSegmentsPerTier: 10, MaxSegmentSize: 5000000, + MaxSegmentFileSize: 4000000000, // 4GB TierGrowth: 10.0, SegmentsPerMergeTask: 10, FloorSegmentSize: 2000, @@ -170,8 +181,17 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) { minLiveSize = segment.LiveSize() } + isEligible := segment.LiveSize() < o.MaxSegmentSize/2 + // An eligible segment (based on #documents) may be too large + // and thus need a stricter check based on the file size. + // This is particularly important for segments that contain + // vectors. + if isEligible && segment.HasVector() && o.MaxSegmentFileSize > 0 { + isEligible = segment.FileSize() < o.MaxSegmentFileSize/2 + } + // Only small-enough segments are eligible. - if segment.LiveSize() < o.MaxSegmentSize/2 { + if isEligible { eligibles = append(eligibles, segment) eligiblesLiveSize += segment.LiveSize() } @@ -215,14 +235,25 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) { for startIdx := 0; startIdx < len(eligibles); startIdx++ { var roster []Segment var rosterLiveSize int64 + var rosterFileSize int64 // useful for segments with vectors for idx := startIdx; idx < len(eligibles) && len(roster) < o.SegmentsPerMergeTask; idx++ { eligible := eligibles[idx] - if rosterLiveSize+eligible.LiveSize() < o.MaxSegmentSize { - roster = append(roster, eligible) - rosterLiveSize += eligible.LiveSize() + if rosterLiveSize+eligible.LiveSize() >= o.MaxSegmentSize { + continue } + + if eligible.HasVector() { + efs := eligible.FileSize() + if rosterFileSize+efs >= o.MaxSegmentFileSize { + continue + } + rosterFileSize += efs + } + + roster = append(roster, eligible) + rosterLiveSize += eligible.LiveSize() } if len(roster) > 0 { diff --git a/vendor/github.com/blevesearch/bleve/v2/index/scorch/optimize_knn.go b/vendor/github.com/blevesearch/bleve/v2/index/scorch/optimize_knn.go index 330e214f..b93be465 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/scorch/optimize_knn.go +++ b/vendor/github.com/blevesearch/bleve/v2/index/scorch/optimize_knn.go @@ -77,7 +77,7 @@ func (o *OptimizeVR) Finish() error { wg.Done() }() for field, vrs := range o.vrs { - vecIndex, err := segment.InterpretVectorIndex(field) + vecIndex, err := segment.InterpretVectorIndex(field, origSeg.deleted) if err != nil { errorsM.Lock() errors = append(errors, err) @@ -91,7 +91,7 @@ func (o *OptimizeVR) Finish() error { for _, vr := range vrs { // for each VR, populate postings list and iterators // by passing the obtained vector index and getting similar vectors. - pl, err := vecIndex.Search(vr.vector, vr.k, origSeg.deleted) + pl, err := vecIndex.Search(vr.vector, vr.k) if err != nil { errorsM.Lock() errors = append(errors, err) diff --git a/vendor/github.com/blevesearch/bleve/v2/index/scorch/persister.go b/vendor/github.com/blevesearch/bleve/v2/index/scorch/persister.go index afd518dd..ed0fc40e 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/scorch/persister.go +++ b/vendor/github.com/blevesearch/bleve/v2/index/scorch/persister.go @@ -549,11 +549,14 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string, val := make([]byte, 8) bytesWritten := atomic.LoadUint64(&snapshot.parent.stats.TotBytesWrittenAtIndexTime) binary.LittleEndian.PutUint64(val, bytesWritten) - internalBucket.Put(TotBytesWrittenKey, val) + err = internalBucket.Put(TotBytesWrittenKey, val) + if err != nil { + return nil, nil, err + } } - var filenames []string - newSegmentPaths := make(map[uint64]string) + filenames := make([]string, 0, len(snapshot.segment)) + newSegmentPaths := make(map[uint64]string, len(snapshot.segment)) // first ensure that each segment in this snapshot has been persisted for _, segmentSnapshot := range snapshot.segment { @@ -982,7 +985,7 @@ func getTimeSeriesSnapshots(maxDataPoints int, interval time.Duration, return ptr, rv } -// getProtectedEpochs aims to fetch the epochs keep based on a timestamp basis. +// getProtectedSnapshots aims to fetch the epochs keep based on a timestamp basis. // It tries to get NumSnapshotsToKeep snapshots, each of which are separated // by a time duration of RollbackSamplingInterval. func getProtectedSnapshots(rollbackSamplingInterval time.Duration, @@ -1133,7 +1136,7 @@ func (s *Scorch) removeOldZapFiles() error { for _, f := range files { fname := f.Name() if filepath.Ext(fname) == ".zap" { - if _, exists := liveFileNames[fname]; !exists && !s.ineligibleForRemoval[fname] { + if _, exists := liveFileNames[fname]; !exists && !s.ineligibleForRemoval[fname] && (s.copyScheduled[fname] <= 0) { err := os.Remove(s.path + string(os.PathSeparator) + fname) if err != nil { log.Printf("got err removing file: %s, err: %v", fname, err) diff --git a/vendor/github.com/blevesearch/bleve/v2/index/scorch/scorch.go b/vendor/github.com/blevesearch/bleve/v2/index/scorch/scorch.go index 2e6435ee..7966d844 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/scorch/scorch.go +++ b/vendor/github.com/blevesearch/bleve/v2/index/scorch/scorch.go @@ -18,6 +18,7 @@ import ( "encoding/json" "fmt" "os" + "path/filepath" "sync" "sync/atomic" "time" @@ -49,6 +50,7 @@ type Scorch struct { unsafeBatch bool rootLock sync.RWMutex + root *IndexSnapshot // holds 1 ref-count on the root rootPersisted []chan error // closed when root is persisted persistedCallbacks []index.BatchCallback @@ -56,6 +58,12 @@ type Scorch struct { eligibleForRemoval []uint64 // Index snapshot epochs that are safe to GC. ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet. + // keeps track of segments scheduled for online copy/backup operation. Each segment's filename maps to + // the count of copy schedules. Segments with non-zero counts are protected from removal by the cleanup + // operation. Counts decrement upon successful copy, allowing removal of segments with zero or absent counts. + // must be accessed within the rootLock as it is accessed by the asynchronous cleanup routine. + copyScheduled map[string]int + numSnapshotsToKeep int rollbackRetentionFactor float64 checkPoints []*snapshotMetaData @@ -69,7 +77,7 @@ type Scorch struct { rootBolt *bolt.DB asyncTasks sync.WaitGroup - onEvent func(event Event) + onEvent func(event Event) bool onAsyncError func(err error, path string) forceMergeRequestCh chan *mergerCtrl @@ -112,6 +120,7 @@ func NewScorch(storeName string, ineligibleForRemoval: map[string]bool{}, forceMergeRequestCh: make(chan *mergerCtrl, 1), segPlugin: defaultSegmentPlugin, + copyScheduled: map[string]int{}, } forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config) @@ -175,12 +184,14 @@ func (s *Scorch) NumEventsBlocking() uint64 { return eventsStarted - eventsCompleted } -func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) { +func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) bool { + res := true if s.onEvent != nil { atomic.AddUint64(&s.stats.TotEventTriggerStarted, 1) - s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur}) + res = s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur}) atomic.AddUint64(&s.stats.TotEventTriggerCompleted, 1) } + return res } func (s *Scorch) fireAsyncError(err error) { @@ -822,6 +833,10 @@ func (fs *fieldStats) Aggregate(stats segment.FieldStats) { // Returns the stats map func (fs *fieldStats) Fetch() map[string]map[string]uint64 { + if fs == nil { + return nil + } + return fs.statMap } @@ -832,3 +847,34 @@ func newFieldStats() *fieldStats { } return rv } + +// CopyReader returns a low-level accessor for index data, ensuring persisted segments +// remain on disk for backup, preventing race conditions with the persister/merger cleanup. +// Close the reader after backup to allow segment removal by the persister/merger. +func (s *Scorch) CopyReader() index.CopyReader { + s.rootLock.Lock() + rv := s.root + if rv != nil { + rv.AddRef() + var fileName string + // schedule a backup for all the segments from the root. Note that the + // both the unpersisted and persisted segments are scheduled for backup. + // because during the backup, the unpersisted segments may get persisted and + // hence we need to protect both the unpersisted and persisted segments from removal + // by the cleanup routine during the online backup + for _, seg := range rv.segment { + if perSeg, ok := seg.segment.(segment.PersistedSegment); ok { + // segment is persisted + fileName = filepath.Base(perSeg.Path()) + } else { + // segment is not persisted + // the name of the segment file that is generated if the + // the segment is persisted in the future. + fileName = zapFileName(seg.id) + } + rv.parent.copyScheduled[fileName]++ + } + } + s.rootLock.Unlock() + return rv +} diff --git a/vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index.go b/vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index.go index 59828e87..f0e7ae1c 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index.go +++ b/vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_index.go @@ -905,3 +905,26 @@ func (is *IndexSnapshot) GetSpatialAnalyzerPlugin(typ string) ( } return rv, nil } + +func (is *IndexSnapshot) CloseCopyReader() error { + // first unmark the segments that were marked for backup by this index snapshot + is.parent.rootLock.Lock() + for _, seg := range is.segment { + var fileName string + if perSeg, ok := seg.segment.(segment.PersistedSegment); ok { + // segment is persisted + fileName = filepath.Base(perSeg.Path()) + } else { + // segment is not persisted + // the name of the segment file that is generated if the + // the segment is persisted in the future. + fileName = zapFileName(seg.id) + } + if is.parent.copyScheduled[fileName]--; is.parent.copyScheduled[fileName] <= 0 { + delete(is.parent.copyScheduled, fileName) + } + } + is.parent.rootLock.Unlock() + // close the index snapshot normally + return is.Close() +} diff --git a/vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_segment.go b/vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_segment.go index 1c14af72..96e59a31 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_segment.go +++ b/vendor/github.com/blevesearch/bleve/v2/index/scorch/snapshot_segment.go @@ -16,6 +16,7 @@ package scorch import ( "bytes" + "os" "sync" "sync/atomic" @@ -66,6 +67,31 @@ func (s *SegmentSnapshot) LiveSize() int64 { return int64(s.Count()) } +func (s *SegmentSnapshot) HasVector() bool { + // number of vectors, for each vector field in the segment + numVecs := s.stats.Fetch()["num_vectors"] + return len(numVecs) > 0 +} + +func (s *SegmentSnapshot) FileSize() int64 { + ps, ok := s.segment.(segment.PersistedSegment) + if !ok { + return 0 + } + + path := ps.Path() + if path == "" { + return 0 + } + + fi, err := os.Stat(path) + if err != nil { + return 0 + } + + return fi.Size() +} + func (s *SegmentSnapshot) Close() error { return s.segment.Close() } diff --git a/vendor/github.com/blevesearch/bleve/v2/index_impl.go b/vendor/github.com/blevesearch/bleve/v2/index_impl.go index a5254735..55212e3e 100644 --- a/vendor/github.com/blevesearch/bleve/v2/index_impl.go +++ b/vendor/github.com/blevesearch/bleve/v2/index_impl.go @@ -25,6 +25,10 @@ import ( "sync/atomic" "time" + "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds" + "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds" + "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds" + "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds" "github.com/blevesearch/bleve/v2/document" "github.com/blevesearch/bleve/v2/index/scorch" "github.com/blevesearch/bleve/v2/index/upsidedown" @@ -738,10 +742,28 @@ func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest, datetime, layout, err := docF.DateTime() if err == nil { if layout == "" { - // layout not set probably means it was indexed as a timestamp - value = strconv.FormatInt(datetime.UnixNano(), 10) + // missing layout means we fallback to + // the default layout which is RFC3339 + value = datetime.Format(time.RFC3339) } else { - value = datetime.Format(layout) + // the layout here can now either be representative + // of an actual datetime layout or a timestamp + switch layout { + case seconds.Name: + value = strconv.FormatInt(datetime.Unix(), 10) + case milliseconds.Name: + value = strconv.FormatInt(datetime.UnixMilli(), 10) + case microseconds.Name: + value = strconv.FormatInt(datetime.UnixMicro(), 10) + case nanoseconds.Name: + value = strconv.FormatInt(datetime.UnixNano(), 10) + default: + // the layout for formatting the date to a string + // is provided by a datetime parser which is not + // handling the timestamp case, hence the layout + // can be directly used to format the date + value = datetime.Format(layout) + } } } case index.BooleanField: @@ -1052,22 +1074,23 @@ func (i *indexImpl) CopyTo(d index.Directory) (err error) { return ErrorIndexClosed } - indexReader, err := i.i.Reader() - if err != nil { - return err + copyIndex, ok := i.i.(index.CopyIndex) + if !ok { + return fmt.Errorf("index implementation does not support copy reader") + } + + copyReader := copyIndex.CopyReader() + if copyReader == nil { + return fmt.Errorf("index's copyReader is nil") } + defer func() { - if cerr := indexReader.Close(); err == nil && cerr != nil { + if cerr := copyReader.CloseCopyReader(); err == nil && cerr != nil { err = cerr } }() - irc, ok := indexReader.(IndexCopyable) - if !ok { - return fmt.Errorf("index implementation does not support copy") - } - - err = irc.CopyTo(d) + err = copyReader.CopyTo(d) if err != nil { return fmt.Errorf("error copying index metadata: %v", err) } diff --git a/vendor/github.com/blevesearch/bleve/v2/mapping/document.go b/vendor/github.com/blevesearch/bleve/v2/mapping/document.go index 73bb124d..3131f33b 100644 --- a/vendor/github.com/blevesearch/bleve/v2/mapping/document.go +++ b/vendor/github.com/blevesearch/bleve/v2/mapping/document.go @@ -443,6 +443,8 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string, fieldMapping.processGeoShape(property, pathString, path, indexes, context) } else if fieldMapping.Type == "geopoint" { fieldMapping.processGeoPoint(property, pathString, path, indexes, context) + } else if fieldMapping.Type == "vector_base64" { + fieldMapping.processVectorBase64(property, pathString, path, indexes, context) } else { fieldMapping.processString(propertyValueString, pathString, path, indexes, context) } diff --git a/vendor/github.com/blevesearch/bleve/v2/mapping/field.go b/vendor/github.com/blevesearch/bleve/v2/mapping/field.go index f4339b38..5c064fdd 100644 --- a/vendor/github.com/blevesearch/bleve/v2/mapping/field.go +++ b/vendor/github.com/blevesearch/bleve/v2/mapping/field.go @@ -102,7 +102,7 @@ func newTextFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { return rv } -// NewKeyworFieldMapping returns a default field mapping for text with analyzer "keyword". +// NewKeywordFieldMapping returns a default field mapping for text with analyzer "keyword". func NewKeywordFieldMapping() *FieldMapping { return &FieldMapping{ Type: "text", diff --git a/vendor/github.com/blevesearch/bleve/v2/mapping/index.go b/vendor/github.com/blevesearch/bleve/v2/mapping/index.go index 171ee1a7..fe8c9671 100644 --- a/vendor/github.com/blevesearch/bleve/v2/mapping/index.go +++ b/vendor/github.com/blevesearch/bleve/v2/mapping/index.go @@ -437,24 +437,16 @@ func (im *IndexMappingImpl) FieldAnalyzer(field string) string { func (im *IndexMappingImpl) FieldMappingForPath(path string) FieldMapping { if im.TypeMapping != nil { for _, v := range im.TypeMapping { - for field, property := range v.Properties { - for _, v1 := range property.Fields { - if field == path { - // Return field mapping if the name matches the path param. - return *v1 - } - } + fm := v.fieldDescribedByPath(path) + if fm != nil { + return *fm } } } - for field, property := range im.DefaultMapping.Properties { - for _, v1 := range property.Fields { - if field == path { - // Return field mapping if the name matches the path param. - return *v1 - } - } + fm := im.DefaultMapping.fieldDescribedByPath(path) + if fm != nil { + return *fm } return FieldMapping{} diff --git a/vendor/github.com/blevesearch/bleve/v2/mapping/mapping_no_vectors.go b/vendor/github.com/blevesearch/bleve/v2/mapping/mapping_no_vectors.go index f9f35f57..90cb1e22 100644 --- a/vendor/github.com/blevesearch/bleve/v2/mapping/mapping_no_vectors.go +++ b/vendor/github.com/blevesearch/bleve/v2/mapping/mapping_no_vectors.go @@ -21,11 +21,20 @@ func NewVectorFieldMapping() *FieldMapping { return nil } +func NewVectorBase64FieldMapping() *FieldMapping { + return nil +} + func (fm *FieldMapping) processVector(propertyMightBeVector interface{}, pathString string, path []string, indexes []uint64, context *walkContext) bool { return false } +func (fm *FieldMapping) processVectorBase64(propertyMightBeVector interface{}, + pathString string, path []string, indexes []uint64, context *walkContext) { + +} + // ----------------------------------------------------------------------------- // document validation functions diff --git a/vendor/github.com/blevesearch/bleve/v2/mapping/mapping_vectors.go b/vendor/github.com/blevesearch/bleve/v2/mapping/mapping_vectors.go index a0b71260..a3879c4b 100644 --- a/vendor/github.com/blevesearch/bleve/v2/mapping/mapping_vectors.go +++ b/vendor/github.com/blevesearch/bleve/v2/mapping/mapping_vectors.go @@ -26,10 +26,11 @@ import ( index "github.com/blevesearch/bleve_index_api" ) -// Min and Max allowed dimensions for a vector field -const ( +// Min and Max allowed dimensions for a vector field; +// p.s must be set/updated at process init() _only_ +var ( MinVectorDims = 1 - MaxVectorDims = 2048 + MaxVectorDims = 4096 ) func NewVectorFieldMapping() *FieldMapping { @@ -43,6 +44,17 @@ func NewVectorFieldMapping() *FieldMapping { } } +func NewVectorBase64FieldMapping() *FieldMapping { + return &FieldMapping{ + Type: "vector_base64", + Store: false, + Index: true, + IncludeInAll: false, + DocValues: false, + SkipFreqNorm: true, + } +} + // validate and process a flat vector func processFlatVector(vecV reflect.Value, dims int) ([]float32, bool) { if vecV.Len() != dims { @@ -140,13 +152,35 @@ func (fm *FieldMapping) processVector(propertyMightBeVector interface{}, return true } +func (fm *FieldMapping) processVectorBase64(propertyMightBeVectorBase64 interface{}, + pathString string, path []string, indexes []uint64, context *walkContext) { + encodedString, ok := propertyMightBeVectorBase64.(string) + if !ok { + return + } + + decodedVector, err := document.DecodeVector(encodedString) + if err != nil || len(decodedVector) != fm.Dims { + return + } + + fieldName := getFieldName(pathString, path, fm) + options := fm.Options() + field := document.NewVectorFieldWithIndexingOptions(fieldName, indexes, decodedVector, + fm.Dims, fm.Similarity, fm.VectorIndexOptimizedFor, options) + context.doc.AddField(field) + + // "_all" composite field is not applicable for vector_base64 field + context.excludedFromAll = append(context.excludedFromAll, fieldName) +} + // ----------------------------------------------------------------------------- // document validation functions func validateFieldMapping(field *FieldMapping, parentName string, fieldAliasCtx map[string]*FieldMapping) error { switch field.Type { - case "vector": + case "vector", "vector_base64": return validateVectorFieldAlias(field, parentName, fieldAliasCtx) default: // non-vector field return validateFieldType(field) diff --git a/vendor/github.com/blevesearch/bleve/v2/mapping_vector.go b/vendor/github.com/blevesearch/bleve/v2/mapping_vector.go index 59431386..c73dac9e 100644 --- a/vendor/github.com/blevesearch/bleve/v2/mapping_vector.go +++ b/vendor/github.com/blevesearch/bleve/v2/mapping_vector.go @@ -22,3 +22,7 @@ import "github.com/blevesearch/bleve/v2/mapping" func NewVectorFieldMapping() *mapping.FieldMapping { return mapping.NewVectorFieldMapping() } + +func NewVectorBase64FieldMapping() *mapping.FieldMapping { + return mapping.NewVectorBase64FieldMapping() +} diff --git a/vendor/github.com/blevesearch/bleve/v2/query.go b/vendor/github.com/blevesearch/bleve/v2/query.go index 3af750a0..93e662b9 100644 --- a/vendor/github.com/blevesearch/bleve/v2/query.go +++ b/vendor/github.com/blevesearch/bleve/v2/query.go @@ -83,7 +83,7 @@ func NewDateRangeStringQuery(start, end string) *query.DateRangeStringQuery { return query.NewDateRangeStringQuery(start, end) } -// NewDateRangeStringQuery creates a new Query for ranges +// NewDateRangeInclusiveStringQuery creates a new Query for ranges // of date values. // Date strings are parsed using the DateTimeParser set using // diff --git a/vendor/github.com/blevesearch/bleve/v2/search/query/date_range_string.go b/vendor/github.com/blevesearch/bleve/v2/search/query/date_range_string.go index b5e5c170..ac107195 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/query/date_range_string.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/query/date_range_string.go @@ -53,7 +53,7 @@ func NewDateRangeStringQuery(start, end string) *DateRangeStringQuery { return NewDateRangeStringInclusiveQuery(start, end, nil, nil) } -// NewDateRangeStringQuery creates a new Query for ranges +// NewDateRangeStringInclusiveQuery creates a new Query for ranges // of date values. // Date strings are parsed using the DateTimeParser field of the query struct, // which is a custom date time parser defined in the index mapping. diff --git a/vendor/github.com/blevesearch/bleve/v2/search/query/query.go b/vendor/github.com/blevesearch/bleve/v2/search/query/query.go index 26ab656e..d263a0e5 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/query/query.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/query/query.go @@ -65,7 +65,7 @@ type ValidatableQuery interface { Validate() error } -// ParseQuery deserializes a JSON representation of +// ParsePreSearchData deserializes a JSON representation of // a PreSearchData object. func ParsePreSearchData(input []byte) (map[string]interface{}, error) { var rv map[string]interface{} diff --git a/vendor/github.com/blevesearch/bleve/v2/search/search.go b/vendor/github.com/blevesearch/bleve/v2/search/search.go index 515a320f..8cc5115d 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search/search.go +++ b/vendor/github.com/blevesearch/bleve/v2/search/search.go @@ -147,7 +147,7 @@ type DocumentMatch struct { Index string `json:"index,omitempty"` ID string `json:"id"` IndexInternalID index.IndexInternalID `json:"-"` - Score float64 `json:"score,omitempty"` + Score float64 `json:"score"` Expl *Explanation `json:"explanation,omitempty"` Locations FieldTermLocationMap `json:"locations,omitempty"` Fragments FieldFragmentMap `json:"fragments,omitempty"` diff --git a/vendor/github.com/blevesearch/bleve/v2/search_knn.go b/vendor/github.com/blevesearch/bleve/v2/search_knn.go index 68377141..7ea21d37 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search_knn.go +++ b/vendor/github.com/blevesearch/bleve/v2/search_knn.go @@ -23,18 +23,22 @@ import ( "fmt" "sort" + "github.com/blevesearch/bleve/v2/document" "github.com/blevesearch/bleve/v2/search" "github.com/blevesearch/bleve/v2/search/collector" "github.com/blevesearch/bleve/v2/search/query" index "github.com/blevesearch/bleve_index_api" ) +const supportForVectorSearch = true + type knnOperator string // Must be updated only at init var BleveMaxK = int64(10000) type SearchRequest struct { + ClientContextID string `json:"client_context_id,omitempty"` Query query.Query `json:"query"` Size int `json:"size"` From int `json:"from"` @@ -66,11 +70,13 @@ type SearchRequest struct { sortFunc func(sort.Interface) } +// Vector takes precedence over vectorBase64 in case both fields are given type KNNRequest struct { - Field string `json:"field"` - Vector []float32 `json:"vector"` - K int64 `json:"k"` - Boost *query.Boost `json:"boost,omitempty"` + Field string `json:"field"` + Vector []float32 `json:"vector"` + VectorBase64 string `json:"vector_base64"` + K int64 `json:"k"` + Boost *query.Boost `json:"boost,omitempty"` } func (r *SearchRequest) AddKNN(field string, vector []float32, k int64, boost float64) { @@ -230,6 +236,15 @@ func validateKNN(req *SearchRequest) error { if q == nil { return fmt.Errorf("knn query cannot be nil") } + if len(q.Vector) == 0 && q.VectorBase64 != "" { + // consider vector_base64 only if vector is not provided + decodedVector, err := document.DecodeVector(q.VectorBase64) + if err != nil { + return err + } + + q.Vector = decodedVector + } if q.K <= 0 || len(q.Vector) == 0 { return fmt.Errorf("k must be greater than 0 and vector must be non-empty") } diff --git a/vendor/github.com/blevesearch/bleve/v2/search_no_knn.go b/vendor/github.com/blevesearch/bleve/v2/search_no_knn.go index aff82611..bb72e15a 100644 --- a/vendor/github.com/blevesearch/bleve/v2/search_no_knn.go +++ b/vendor/github.com/blevesearch/bleve/v2/search_no_knn.go @@ -28,6 +28,8 @@ import ( index "github.com/blevesearch/bleve_index_api" ) +const supportForVectorSearch = false + // A SearchRequest describes all the parameters // needed to search the index. // Query is required. diff --git a/vendor/github.com/blevesearch/bleve/v2/util/extract.go b/vendor/github.com/blevesearch/bleve/v2/util/extract.go index e963d0c3..0d3decfa 100644 --- a/vendor/github.com/blevesearch/bleve/v2/util/extract.go +++ b/vendor/github.com/blevesearch/bleve/v2/util/extract.go @@ -48,7 +48,7 @@ func ExtractNumericValFloat32(v interface{}) (float32, bool) { switch { case val.CanFloat(): floatVal := val.Float() - if floatVal > math.MaxFloat32 { + if !IsValidFloat32(floatVal) { return 0, false } return float32(floatVal), true @@ -60,3 +60,7 @@ func ExtractNumericValFloat32(v interface{}) (float32, bool) { return 0, false } + +func IsValidFloat32(val float64) bool { + return !math.IsNaN(val) && !math.IsInf(val, 0) && val <= math.MaxFloat32 +} diff --git a/vendor/golang.org/x/mod/module/module.go b/vendor/golang.org/x/mod/module/module.go index cac1a899..2a364b22 100644 --- a/vendor/golang.org/x/mod/module/module.go +++ b/vendor/golang.org/x/mod/module/module.go @@ -506,7 +506,6 @@ var badWindowsNames = []string{ "PRN", "AUX", "NUL", - "COM0", "COM1", "COM2", "COM3", @@ -516,7 +515,6 @@ var badWindowsNames = []string{ "COM7", "COM8", "COM9", - "LPT0", "LPT1", "LPT2", "LPT3", diff --git a/vendor/golang.org/x/sys/unix/mremap.go b/vendor/golang.org/x/sys/unix/mremap.go index fd45fe52..3a5e776f 100644 --- a/vendor/golang.org/x/sys/unix/mremap.go +++ b/vendor/golang.org/x/sys/unix/mremap.go @@ -50,3 +50,8 @@ func (m *mremapMmapper) Mremap(oldData []byte, newLength int, flags int) (data [ func Mremap(oldData []byte, newLength int, flags int) (data []byte, err error) { return mapper.Mremap(oldData, newLength, flags) } + +func MremapPtr(oldAddr unsafe.Pointer, oldSize uintptr, newAddr unsafe.Pointer, newSize uintptr, flags int) (ret unsafe.Pointer, err error) { + xaddr, err := mapper.mremap(uintptr(oldAddr), oldSize, newSize, flags, uintptr(newAddr)) + return unsafe.Pointer(xaddr), err +} diff --git a/vendor/golang.org/x/sys/unix/syscall_darwin.go b/vendor/golang.org/x/sys/unix/syscall_darwin.go index 59542a89..4cc7b005 100644 --- a/vendor/golang.org/x/sys/unix/syscall_darwin.go +++ b/vendor/golang.org/x/sys/unix/syscall_darwin.go @@ -542,6 +542,18 @@ func SysctlKinfoProcSlice(name string, args ...int) ([]KinfoProc, error) { } } +//sys pthread_chdir_np(path string) (err error) + +func PthreadChdir(path string) (err error) { + return pthread_chdir_np(path) +} + +//sys pthread_fchdir_np(fd int) (err error) + +func PthreadFchdir(fd int) (err error) { + return pthread_fchdir_np(fd) +} + //sys sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) //sys shmat(id int, addr uintptr, flag int) (ret uintptr, err error) diff --git a/vendor/golang.org/x/sys/unix/syscall_unix.go b/vendor/golang.org/x/sys/unix/syscall_unix.go index 77081de8..4e92e5aa 100644 --- a/vendor/golang.org/x/sys/unix/syscall_unix.go +++ b/vendor/golang.org/x/sys/unix/syscall_unix.go @@ -154,6 +154,15 @@ func Munmap(b []byte) (err error) { return mapper.Munmap(b) } +func MmapPtr(fd int, offset int64, addr unsafe.Pointer, length uintptr, prot int, flags int) (ret unsafe.Pointer, err error) { + xaddr, err := mapper.mmap(uintptr(addr), length, prot, flags, fd, offset) + return unsafe.Pointer(xaddr), err +} + +func MunmapPtr(addr unsafe.Pointer, length uintptr) (err error) { + return mapper.munmap(uintptr(addr), length) +} + func Read(fd int, p []byte) (n int, err error) { n, err = read(fd, p) if raceenabled { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go index ccb02f24..07642c30 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.go @@ -760,6 +760,39 @@ var libc_sysctl_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func pthread_chdir_np(path string) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(path) + if err != nil { + return + } + _, _, e1 := syscall_syscall(libc_pthread_chdir_np_trampoline_addr, uintptr(unsafe.Pointer(_p0)), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_pthread_chdir_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_pthread_chdir_np pthread_chdir_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func pthread_fchdir_np(fd int) (err error) { + _, _, e1 := syscall_syscall(libc_pthread_fchdir_np_trampoline_addr, uintptr(fd), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_pthread_fchdir_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_pthread_fchdir_np pthread_fchdir_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) { _, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags)) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s index 8b8bb284..923e08cb 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_amd64.s @@ -228,6 +228,16 @@ TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8 DATA ·libc_sysctl_trampoline_addr(SB)/8, $libc_sysctl_trampoline<>(SB) +TEXT libc_pthread_chdir_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_pthread_chdir_np(SB) +GLOBL ·libc_pthread_chdir_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_pthread_chdir_np_trampoline_addr(SB)/8, $libc_pthread_chdir_np_trampoline<>(SB) + +TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_pthread_fchdir_np(SB) +GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB) + TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sendfile(SB) GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go index 1b40b997..7d73dda6 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.go @@ -760,6 +760,39 @@ var libc_sysctl_trampoline_addr uintptr // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT +func pthread_chdir_np(path string) (err error) { + var _p0 *byte + _p0, err = BytePtrFromString(path) + if err != nil { + return + } + _, _, e1 := syscall_syscall(libc_pthread_chdir_np_trampoline_addr, uintptr(unsafe.Pointer(_p0)), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_pthread_chdir_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_pthread_chdir_np pthread_chdir_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + +func pthread_fchdir_np(fd int) (err error) { + _, _, e1 := syscall_syscall(libc_pthread_fchdir_np_trampoline_addr, uintptr(fd), 0, 0) + if e1 != 0 { + err = errnoErr(e1) + } + return +} + +var libc_pthread_fchdir_np_trampoline_addr uintptr + +//go:cgo_import_dynamic libc_pthread_fchdir_np pthread_fchdir_np "/usr/lib/libSystem.B.dylib" + +// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT + func sendfile(infd int, outfd int, offset int64, len *int64, hdtr unsafe.Pointer, flags int) (err error) { _, _, e1 := syscall_syscall6(libc_sendfile_trampoline_addr, uintptr(infd), uintptr(outfd), uintptr(offset), uintptr(unsafe.Pointer(len)), uintptr(hdtr), uintptr(flags)) if e1 != 0 { diff --git a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s index 08362c1a..05770011 100644 --- a/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s +++ b/vendor/golang.org/x/sys/unix/zsyscall_darwin_arm64.s @@ -228,6 +228,16 @@ TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0 GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8 DATA ·libc_sysctl_trampoline_addr(SB)/8, $libc_sysctl_trampoline<>(SB) +TEXT libc_pthread_chdir_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_pthread_chdir_np(SB) +GLOBL ·libc_pthread_chdir_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_pthread_chdir_np_trampoline_addr(SB)/8, $libc_pthread_chdir_np_trampoline<>(SB) + +TEXT libc_pthread_fchdir_np_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_pthread_fchdir_np(SB) +GLOBL ·libc_pthread_fchdir_np_trampoline_addr(SB), RODATA, $8 +DATA ·libc_pthread_fchdir_np_trampoline_addr(SB)/8, $libc_pthread_fchdir_np_trampoline<>(SB) + TEXT libc_sendfile_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_sendfile(SB) GLOBL ·libc_sendfile_trampoline_addr(SB), RODATA, $8 diff --git a/vendor/golang.org/x/sys/windows/security_windows.go b/vendor/golang.org/x/sys/windows/security_windows.go index 6f7d2ac7..97651b5b 100644 --- a/vendor/golang.org/x/sys/windows/security_windows.go +++ b/vendor/golang.org/x/sys/windows/security_windows.go @@ -894,7 +894,7 @@ type ACL struct { aclRevision byte sbz1 byte aclSize uint16 - aceCount uint16 + AceCount uint16 sbz2 uint16 } @@ -1087,6 +1087,27 @@ type EXPLICIT_ACCESS struct { Trustee TRUSTEE } +// https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-ace_header +type ACE_HEADER struct { + AceType uint8 + AceFlags uint8 + AceSize uint16 +} + +// https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-access_allowed_ace +type ACCESS_ALLOWED_ACE struct { + Header ACE_HEADER + Mask ACCESS_MASK + SidStart uint32 +} + +const ( + // Constants for AceType + // https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-ace_header + ACCESS_ALLOWED_ACE_TYPE = 0 + ACCESS_DENIED_ACE_TYPE = 1 +) + // This type is the union inside of TRUSTEE and must be created using one of the TrusteeValueFrom* functions. type TrusteeValue uintptr @@ -1158,6 +1179,7 @@ type OBJECTS_AND_NAME struct { //sys makeSelfRelativeSD(absoluteSD *SECURITY_DESCRIPTOR, selfRelativeSD *SECURITY_DESCRIPTOR, selfRelativeSDSize *uint32) (err error) = advapi32.MakeSelfRelativeSD //sys setEntriesInAcl(countExplicitEntries uint32, explicitEntries *EXPLICIT_ACCESS, oldACL *ACL, newACL **ACL) (ret error) = advapi32.SetEntriesInAclW +//sys GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (ret error) = advapi32.GetAce // Control returns the security descriptor control bits. func (sd *SECURITY_DESCRIPTOR) Control() (control SECURITY_DESCRIPTOR_CONTROL, revision uint32, err error) { diff --git a/vendor/golang.org/x/sys/windows/zsyscall_windows.go b/vendor/golang.org/x/sys/windows/zsyscall_windows.go index 9f73df75..eba76101 100644 --- a/vendor/golang.org/x/sys/windows/zsyscall_windows.go +++ b/vendor/golang.org/x/sys/windows/zsyscall_windows.go @@ -91,6 +91,7 @@ var ( procEnumServicesStatusExW = modadvapi32.NewProc("EnumServicesStatusExW") procEqualSid = modadvapi32.NewProc("EqualSid") procFreeSid = modadvapi32.NewProc("FreeSid") + procGetAce = modadvapi32.NewProc("GetAce") procGetLengthSid = modadvapi32.NewProc("GetLengthSid") procGetNamedSecurityInfoW = modadvapi32.NewProc("GetNamedSecurityInfoW") procGetSecurityDescriptorControl = modadvapi32.NewProc("GetSecurityDescriptorControl") @@ -1224,6 +1225,14 @@ func setEntriesInAcl(countExplicitEntries uint32, explicitEntries *EXPLICIT_ACCE return } +func GetAce(acl *ACL, aceIndex uint32, pAce **ACCESS_ALLOWED_ACE) (ret error) { + r0, _, _ := syscall.Syscall(procGetAce.Addr(), 3, uintptr(unsafe.Pointer(acl)), uintptr(aceIndex), uintptr(unsafe.Pointer(pAce))) + if r0 == 0 { + ret = GetLastError() + } + return +} + func SetKernelObjectSecurity(handle Handle, securityInformation SECURITY_INFORMATION, securityDescriptor *SECURITY_DESCRIPTOR) (err error) { r1, _, e1 := syscall.Syscall(procSetKernelObjectSecurity.Addr(), 3, uintptr(handle), uintptr(securityInformation), uintptr(unsafe.Pointer(securityDescriptor))) if r1 == 0 { diff --git a/vendor/modules.txt b/vendor/modules.txt index ae0aea27..355fbc59 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -6,14 +6,18 @@ github.com/RoaringBitmap/roaring/roaring64 # github.com/bits-and-blooms/bitset v1.13.0 ## explicit; go 1.16 github.com/bits-and-blooms/bitset -# github.com/blevesearch/bleve/v2 v2.4.0 -## explicit; go 1.20 +# github.com/blevesearch/bleve/v2 v2.4.1 +## explicit; go 1.21 github.com/blevesearch/bleve/v2 github.com/blevesearch/bleve/v2/analysis github.com/blevesearch/bleve/v2/analysis/analyzer/keyword github.com/blevesearch/bleve/v2/analysis/analyzer/standard github.com/blevesearch/bleve/v2/analysis/datetime/flexible github.com/blevesearch/bleve/v2/analysis/datetime/optional +github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds +github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds +github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds +github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds github.com/blevesearch/bleve/v2/analysis/lang/en github.com/blevesearch/bleve/v2/analysis/token/lowercase github.com/blevesearch/bleve/v2/analysis/token/porter @@ -135,15 +139,15 @@ github.com/yuin/goldmark/util # go.etcd.io/bbolt v1.3.10 ## explicit; go 1.21 go.etcd.io/bbolt -# golang.org/x/mod v0.18.0 +# golang.org/x/mod v0.19.0 ## explicit; go 1.18 golang.org/x/mod/internal/lazyregexp golang.org/x/mod/module golang.org/x/mod/semver -# golang.org/x/net v0.26.0 +# golang.org/x/net v0.27.0 ## explicit; go 1.18 golang.org/x/net/websocket -# golang.org/x/sys v0.21.0 +# golang.org/x/sys v0.22.0 ## explicit; go 1.18 golang.org/x/sys/execabs golang.org/x/sys/unix