diff --git a/go/go.mod b/go/go.mod index 22588da0edb..6b55810ec71 100644 --- a/go/go.mod +++ b/go/go.mod @@ -63,6 +63,7 @@ require ( github.com/google/btree v1.1.2 github.com/google/go-github/v57 v57.0.0 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 + github.com/hashicorp/go-uuid v1.0.1 github.com/hashicorp/golang-lru/v2 v2.0.2 github.com/jmoiron/sqlx v1.3.4 github.com/kch42/buzhash v0.0.0-20160816060738-9bdec3dec7c6 diff --git a/go/go.sum b/go/go.sum index 55f9e7af7fa..878d843c29c 100644 --- a/go/go.sum +++ b/go/go.sum @@ -389,6 +389,7 @@ github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerX github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= github.com/hashicorp/go-uuid v0.0.0-20180228145832-27454136f036/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-uuid v1.0.1 h1:fv1ep09latC32wFoVwnqcnKJGnMSdBanPczbHAYm1BE= github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= diff --git a/go/libraries/doltcore/sqle/enginetest/dolt_queries.go b/go/libraries/doltcore/sqle/enginetest/dolt_queries.go index a4e592401e2..7197c182af0 100644 --- a/go/libraries/doltcore/sqle/enginetest/dolt_queries.go +++ b/go/libraries/doltcore/sqle/enginetest/dolt_queries.go @@ -4786,11 +4786,8 @@ var LargeJsonObjectScriptTests = []queries.ScriptTest{ Expected: []sql.Row{{types.OkResult{RowsAffected: 1}}}, }, { - Skip: true, - // TODO: The JSON is coming back truncated for some reason and failing this test. - // When that's fixed, unskip this test, and fix the length value below. - Query: `SELECT pk, length(j1) from t;`, - Expected: []sql.Row{{1, 123}}, + Query: `SELECT pk, length(j1->>"$.large_value") from t;`, + Expected: []sql.Row{{1, 1024 * 1024 * 3}}, }, }, }, diff --git a/go/store/prolly/tree/indexed_json_diff.go b/go/store/prolly/tree/indexed_json_diff.go index 6773b3f679a..e23ead39ff7 100644 --- a/go/store/prolly/tree/indexed_json_diff.go +++ b/go/store/prolly/tree/indexed_json_diff.go @@ -23,7 +23,7 @@ import ( ) type IndexedJsonDiffer struct { - differ Differ[jsonLocationKey, jsonLocationOrdering] + differ Differ[jsonLocationKey, *jsonLocationOrdering] currentFromCursor, currentToCursor *JsonCursor from, to IndexedJsonDocument started bool @@ -32,10 +32,14 @@ type IndexedJsonDiffer struct { var _ IJsonDiffer = &IndexedJsonDiffer{} func NewIndexedJsonDiffer(ctx context.Context, from, to IndexedJsonDocument) (*IndexedJsonDiffer, error) { - differ, err := DifferFromRoots[jsonLocationKey, jsonLocationOrdering](ctx, from.m.NodeStore, to.m.NodeStore, from.m.Root, to.m.Root, jsonLocationOrdering{}, false) + ordering := jsonLocationOrdering{} + differ, err := DifferFromRoots[jsonLocationKey, *jsonLocationOrdering](ctx, from.m.NodeStore, to.m.NodeStore, from.m.Root, to.m.Root, &ordering, false) if err != nil { return nil, err } + if ordering.err != nil { + return nil, err + } // We want to diff the prolly tree as if it was an address map pointing to the individual blob fragments, rather // than diffing the blob fragments themselves. We can accomplish this by just replacing the cursors in the differ // with their parents. @@ -225,7 +229,11 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error // Neither cursor points to the start of a value. // This should only be possible if they're at the same location. // Do a sanity check, then continue. - if compareJsonLocations(fromCurrentLocation, toCurrentLocation) != 0 { + cmp, err := compareJsonLocations(fromCurrentLocation, toCurrentLocation) + if err != nil { + return JsonDiff{}, err + } + if cmp != 0 { return JsonDiff{}, jsonParseError } err = advanceCursor(ctx, &jd.currentFromCursor) @@ -240,7 +248,10 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error } if fromScannerAtStartOfValue && toScannerAtStartOfValue { - cmp := compareJsonLocations(fromCurrentLocation, toCurrentLocation) + cmp, err := compareJsonLocations(fromCurrentLocation, toCurrentLocation) + if err != nil { + return JsonDiff{}, err + } switch cmp { case 0: key := fromCurrentLocation.Clone().key diff --git a/go/store/prolly/tree/json_chunker.go b/go/store/prolly/tree/json_chunker.go index d825a906c64..104be871fd9 100644 --- a/go/store/prolly/tree/json_chunker.go +++ b/go/store/prolly/tree/json_chunker.go @@ -56,6 +56,13 @@ func SerializeJsonToAddr(ctx context.Context, ns NodeStore, j sql.JSONWrapper) ( } jsonChunker.appendJsonToBuffer(jsonBytes) err = jsonChunker.processBuffer(ctx) + if largeJsonStringError.Is(err) { + // Due to current limits on chunk sizes, and an inability of older clients to read + // string keys and values split across multiple chunks, we can't use the JSON chunker for documents + // with extremely long strings. + node, _, err := serializeJsonToBlob(ctx, ns, j) + return node, err + } if err != nil { return Node{}, err } diff --git a/go/store/prolly/tree/json_cursor.go b/go/store/prolly/tree/json_cursor.go index 61ef2995aec..ee83b761f86 100644 --- a/go/store/prolly/tree/json_cursor.go +++ b/go/store/prolly/tree/json_cursor.go @@ -65,10 +65,14 @@ func newJsonCursor(ctx context.Context, ns NodeStore, root Node, startKey jsonLo } func newJsonCursorAtStartOfChunk(ctx context.Context, ns NodeStore, root Node, startKey []byte) (jCur *JsonCursor, err error) { - cur, err := newCursorAtKey(ctx, ns, root, startKey, jsonLocationOrdering{}) + ordering := jsonLocationOrdering{} + cur, err := newCursorAtKey(ctx, ns, root, startKey, &ordering) if err != nil { return nil, err } + if ordering.err != nil { + return nil, err + } return newJsonCursorFromCursor(ctx, cur) } @@ -125,7 +129,15 @@ func (j *JsonCursor) NextValue(ctx context.Context) (result []byte, err error) { return } - for compareJsonLocations(j.jsonScanner.currentPath, path) < 0 { + for { + var cmp int + cmp, err = compareJsonLocations(j.jsonScanner.currentPath, path) + if err != nil { + return + } + if cmp >= 0 { + break + } parseChunk() if err != nil { return @@ -135,13 +147,14 @@ func (j *JsonCursor) NextValue(ctx context.Context) (result []byte, err error) { return } -func (j *JsonCursor) isKeyInChunk(path jsonLocation) bool { +func (j *JsonCursor) isKeyInChunk(path jsonLocation) (bool, error) { if j.cur.parent == nil { // This is the only chunk, so the path must refer to this chunk. - return true + return true, nil } nodeEndPosition := jsonPathFromKey(j.cur.parent.CurrentKey()) - return compareJsonLocations(path, nodeEndPosition) <= 0 + cmp, err := compareJsonLocations(path, nodeEndPosition) + return cmp <= 0, err } // AdvanceToLocation causes the cursor to advance to the specified position. This function returns a boolean indicating @@ -151,12 +164,20 @@ func (j *JsonCursor) isKeyInChunk(path jsonLocation) bool { // the cursor advances to the end of the previous value, prior to the object key. This allows the key to be removed along // with the value. func (j *JsonCursor) AdvanceToLocation(ctx context.Context, path jsonLocation, forRemoval bool) (found bool, err error) { - if !j.isKeyInChunk(path) { + isInChunk, err := j.isKeyInChunk(path) + if err != nil { + return false, err + } + if !isInChunk { // Our destination is in another chunk, load it. - err := Seek(ctx, j.cur.parent, path.key, jsonLocationOrdering{}) + ordering := jsonLocationOrdering{} + err := Seek(ctx, j.cur.parent, path.key, &ordering) if err != nil { return false, err } + if ordering.err != nil { + return false, err + } j.cur.nd, err = fetchChild(ctx, j.cur.nrw, j.cur.parent.currentRef()) if err != nil { return false, err @@ -169,7 +190,10 @@ func (j *JsonCursor) AdvanceToLocation(ctx context.Context, path jsonLocation, f } previousScanner := j.jsonScanner - cmp := compareJsonLocations(j.jsonScanner.currentPath, path) + cmp, err := compareJsonLocations(j.jsonScanner.currentPath, path) + if err != nil { + return false, err + } for cmp < 0 { previousScanner = j.jsonScanner.Clone() err := j.jsonScanner.AdvanceToNextLocation() @@ -180,7 +204,10 @@ func (j *JsonCursor) AdvanceToLocation(ctx context.Context, path jsonLocation, f } else if err != nil { return false, err } - cmp = compareJsonLocations(j.jsonScanner.currentPath, path) + cmp, err = compareJsonLocations(j.jsonScanner.currentPath, path) + if err != nil { + return false, err + } } // If the supplied path doesn't exist in the document, then we want to stop the cursor at the start of the point // were it would appear. This may mean that we've gone too far and need to rewind one location. diff --git a/go/store/prolly/tree/json_indexed_document.go b/go/store/prolly/tree/json_indexed_document.go index b0ac82f43b1..9dd1ab2ba4e 100644 --- a/go/store/prolly/tree/json_indexed_document.go +++ b/go/store/prolly/tree/json_indexed_document.go @@ -31,7 +31,7 @@ type jsonLocationKey = []byte type address = []byte -type StaticJsonMap = StaticMap[jsonLocationKey, address, jsonLocationOrdering] +type StaticJsonMap = StaticMap[jsonLocationKey, address, *jsonLocationOrdering] // IndexedJsonDocument is an implementation of sql.JSONWrapper that stores the document in a prolly tree. // Every leaf node in the tree is a blob containing a substring of the original document. This allows the document @@ -51,10 +51,10 @@ var _ fmt.Stringer = IndexedJsonDocument{} var _ driver.Valuer = IndexedJsonDocument{} func NewIndexedJsonDocument(ctx context.Context, root Node, ns NodeStore) IndexedJsonDocument { - m := StaticMap[jsonLocationKey, address, jsonLocationOrdering]{ + m := StaticMap[jsonLocationKey, address, *jsonLocationOrdering]{ Root: root, NodeStore: ns, - Order: jsonLocationOrdering{}, + Order: &jsonLocationOrdering{}, } return IndexedJsonDocument{ m: m, @@ -114,8 +114,8 @@ func tryWithFallback( tryFunc func() error, fallbackFunc func(document types.JSONDocument) error) error { err := tryFunc() - if err == unknownLocationKeyError || err == unsupportedPathError { - if err == unknownLocationKeyError { + if err == unknownLocationKeyError || err == unsupportedPathError || err == jsonParseError || largeJsonStringError.Is(err) { + if err != unsupportedPathError { if sqlCtx, ok := ctx.(*sql.Context); ok { sqlCtx.GetLogger().Warn(err) } @@ -282,7 +282,10 @@ func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonL // For example, attempting to insert into the path "$.a.b" in the document {"a": 1} // We can detect this by checking to see if the insertion point in the original document comes before the inserted path. // (For example, the insertion point occurs at $.a.START, which is before $.a.b) - cmp := compareJsonLocations(cursorPath, keyPath) + cmp, err := compareJsonLocations(cursorPath, keyPath) + if err != nil { + return IndexedJsonDocument{}, false, err + } if cmp < 0 && cursorPath.getScannerState() == startOfValue { // We just attempted to insert into a scalar. return i, false, nil @@ -444,7 +447,11 @@ func (i IndexedJsonDocument) setWithLocation(ctx context.Context, keyPath jsonLo } keyPath.pop() - found = compareJsonLocations(keyPath, jsonCursor.jsonScanner.currentPath) == 0 + cmp, err := compareJsonLocations(keyPath, jsonCursor.jsonScanner.currentPath) + if err != nil { + return IndexedJsonDocument{}, false, err + } + found = cmp == 0 } if found { @@ -491,7 +498,11 @@ func (i IndexedJsonDocument) tryReplace(ctx context.Context, path string, val sq } keyPath.pop() - found = compareJsonLocations(keyPath, jsonCursor.jsonScanner.currentPath) == 0 + cmp, err := compareJsonLocations(keyPath, jsonCursor.jsonScanner.currentPath) + if err != nil { + return nil, false, err + } + found = cmp == 0 } if !found { diff --git a/go/store/prolly/tree/json_indexed_document_test.go b/go/store/prolly/tree/json_indexed_document_test.go index 7a7adc5637b..dade1fc917d 100644 --- a/go/store/prolly/tree/json_indexed_document_test.go +++ b/go/store/prolly/tree/json_indexed_document_test.go @@ -455,3 +455,22 @@ func TestJsonCompare(t *testing.T) { }) }) } + +// Test that we can write a JSON document with a multi-MB string value into storage and read it back. +func TestIndexedJsonDocument_CreateLargeStringValues(t *testing.T) { + ctx := sql.NewEmptyContext() + ns := NewTestNodeStore() + docMap := make(map[string]interface{}) + value := strings.Repeat("x", 2097152) + docMap["key"] = value + doc, _, err := types.JSON.Convert(docMap) + require.NoError(t, err) + root, err := SerializeJsonToAddr(ctx, ns, doc.(sql.JSONWrapper)) + require.NoError(t, err) + indexedDoc, err := NewJSONDoc(root.HashOf(), ns).ToIndexedJSONDocument(ctx) + lookup, err := types.LookupJSONValue(indexedDoc, "$.key") + require.NoError(t, err) + extractedValue, _, err := types.LongText.Convert(lookup) + require.NoError(t, err) + require.Equal(t, value, extractedValue) +} diff --git a/go/store/prolly/tree/json_location.go b/go/store/prolly/tree/json_location.go index c532f07ac9b..2d270efad2c 100644 --- a/go/store/prolly/tree/json_location.go +++ b/go/store/prolly/tree/json_location.go @@ -16,7 +16,6 @@ package tree import ( "bytes" - "cmp" "fmt" "slices" "strconv" @@ -69,8 +68,30 @@ const ( objectInitialElement arrayInitialElement endOfValue + middleOfStringValue + jsonPathTypeNumElements ) +func compareJsonPathTypes(left, right jsonPathType) (int, error) { + if left >= jsonPathTypeNumElements || right >= jsonPathTypeNumElements { + // These paths were written by a future version of Dolt. We can't assume anything about them. + return 0, unknownLocationKeyError + } + if left == startOfValue && right != startOfValue { + return -1, nil + } + if left == endOfValue && right != endOfValue { + return 1, nil + } + if right == startOfValue && left != startOfValue { + return 1, nil + } + if right == endOfValue && left != endOfValue { + return -1, nil + } + return 0, nil +} + func (t jsonPathType) isInitialElement() bool { return t == objectInitialElement || t == arrayInitialElement } @@ -170,7 +191,7 @@ func isUnsupportedJsonArrayIndex(index []byte) bool { } func errorIfNotSupportedLocation(key []byte) error { - if jsonPathType(key[0]) > endOfValue { + if jsonPathType(key[0]) > jsonPathTypeNumElements { return unknownLocationKeyError } return nil @@ -336,6 +357,10 @@ func (p *jsonLocation) getScannerState() jsonPathType { return jsonPathType(p.key[0]) } +func (p jsonLocation) IsMiddleOfString() bool { + return p.getScannerState() == middleOfStringValue +} + type jsonPathElement struct { key []byte isArrayIndex bool @@ -378,17 +403,17 @@ func (p *jsonLocation) Clone() jsonLocation { // compareJsonLocations creates an ordering on locations by determining which one would come first in a normalized JSON // document where all keys are sorted lexographically. -func compareJsonLocations(left, right jsonLocation) int { +func compareJsonLocations(left, right jsonLocation) (int, error) { minLength := min(left.size(), right.size()) for i := 0; i < minLength; i++ { leftPathElement := left.getPathElement(i) rightPathElement := right.getPathElement(i) c := bytes.Compare(leftPathElement.key, rightPathElement.key) if c < 0 { - return -1 + return -1, nil } if c > 0 { - return 1 + return 1, nil } } if left.size() < right.size() { @@ -400,14 +425,14 @@ func compareJsonLocations(left, right jsonLocation) int { if left.getScannerState() == objectInitialElement { rightIsArray := right.getLastPathElement().isArrayIndex if rightIsArray { - return 1 + return 1, nil } } } if left.getScannerState() != endOfValue { - return -1 + return -1, nil } - return 1 + return 1, nil } if left.size() > right.size() { // right is a parent of left @@ -418,26 +443,28 @@ func compareJsonLocations(left, right jsonLocation) int { if right.getScannerState() == objectInitialElement { leftIsArray := left.getLastPathElement().isArrayIndex if leftIsArray { - return -1 + return -1, nil } } } if right.getScannerState() != endOfValue { - return 1 + return 1, nil } - return -1 + return -1, nil } // left and right have the exact same key elements - return cmp.Compare(left.getScannerState(), right.getScannerState()) + return compareJsonPathTypes(left.getScannerState(), right.getScannerState()) } -type jsonLocationOrdering struct{} +type jsonLocationOrdering struct { + err error // store errors created during comparisons +} -var _ Ordering[[]byte] = jsonLocationOrdering{} +var _ Ordering[[]byte] = &jsonLocationOrdering{} -func (jsonLocationOrdering) Compare(left, right []byte) int { +func (o *jsonLocationOrdering) Compare(left, right []byte) int { // A JSON document that fits entirely in a single chunk has no keys, if len(left) == 0 && len(right) == 0 { return 0 @@ -448,5 +475,9 @@ func (jsonLocationOrdering) Compare(left, right []byte) int { } leftPath := jsonPathFromKey(left) rightPath := jsonPathFromKey(right) - return compareJsonLocations(leftPath, rightPath) + cmp, err := compareJsonLocations(leftPath, rightPath) + if err != nil { + o.err = err + } + return cmp } diff --git a/go/store/prolly/tree/json_scanner.go b/go/store/prolly/tree/json_scanner.go index 9478d93a378..f6f00cf7bc9 100644 --- a/go/store/prolly/tree/json_scanner.go +++ b/go/store/prolly/tree/json_scanner.go @@ -17,6 +17,8 @@ package tree import ( "fmt" "io" + + errorkinds "gopkg.in/src-d/go-errors.v1" ) // JsonScanner is a state machine that parses already-normalized JSON while keeping track of the path to the current value. @@ -38,7 +40,15 @@ type JsonScanner struct { valueOffset int } +// The JSONChunker can't draw a chunk boundary within an object key. +// This can lead to large chunks that may cause problems. +// We've observed chunks getting written incorrectly if they exceed 48KB. +// Since boundaries are always drawn once a chunk exceeds maxChunkSize (16KB), +// this is the largest length that can be appended to a chunk without exceeding 48KB. +var maxJsonStringLength = 48*1024 - maxChunkSize + var jsonParseError = fmt.Errorf("encountered invalid JSON while reading JSON from the database, or while preparing to write JSON to the database. This is most likely a bug in JSON diffing") +var largeJsonStringError = errorkinds.NewKind("encountered JSON key with length %s, larger than max allowed length %s") func (j JsonScanner) Clone() JsonScanner { return JsonScanner{ @@ -118,6 +128,14 @@ func (s *JsonScanner) AdvanceToNextLocation() error { } else { return s.acceptNextKeyValue() } + case middleOfStringValue: + finishedString, err := s.acceptRestOfValueString() + if finishedString { + s.currentPath.setScannerState(endOfValue) + } else { + s.currentPath.setScannerState(middleOfStringValue) + } + return err default: return jsonParseError } @@ -127,11 +145,16 @@ func (s *JsonScanner) acceptValue() error { current := s.current() switch current { case '"': - _, err := s.acceptString() + finishedString, err := s.acceptValueString() if err != nil { return err } - s.currentPath.setScannerState(endOfValue) + if finishedString { + s.currentPath.setScannerState(endOfValue) + } else { + s.currentPath.setScannerState(middleOfStringValue) + } + return nil case '[': s.valueOffset++ @@ -177,24 +200,58 @@ func (s *JsonScanner) accept(b byte) error { return nil } -func (s *JsonScanner) acceptString() ([]byte, error) { - err := s.accept('"') +func (s *JsonScanner) acceptKeyString() (stringBytes []byte, err error) { + err = s.accept('"') if err != nil { return nil, err } stringStart := s.valueOffset for s.current() != '"' { switch s.current() { + case endOfFile: + return nil, jsonParseError case '\\': s.valueOffset++ } s.valueOffset++ } + if s.valueOffset-stringStart > maxJsonStringLength { + return nil, largeJsonStringError.New(s.valueOffset-stringStart, maxJsonStringLength) + } result := s.jsonBuffer[stringStart:s.valueOffset] + // Advance past the ending quotes s.valueOffset++ return result, nil } +func (s *JsonScanner) acceptValueString() (finishedString bool, err error) { + err = s.accept('"') + if err != nil { + return false, err + } + return s.acceptRestOfValueString() +} + +func (s *JsonScanner) acceptRestOfValueString() (finishedString bool, err error) { + stringStart := s.valueOffset + for s.current() != '"' { + switch s.current() { + case '\\': + s.valueOffset++ + } + s.valueOffset++ + } + // We don't currently split value strings across chunks because it causes issues being read by older clients. + // Instead, by returning largeJsonStringError, we trigger the fallback behavior where the JSON document + // gets treated as a non-indexed blob. + if s.valueOffset-stringStart > maxJsonStringLength { + return false, largeJsonStringError.New(s.valueOffset-stringStart, maxJsonStringLength) + } + // Advance past the ending quotes + s.valueOffset++ + return true, nil +} + func (s *JsonScanner) acceptKeyValue() error { current := s.current() switch current { @@ -228,7 +285,7 @@ func (s *JsonScanner) acceptNextKeyValue() error { } func (s *JsonScanner) acceptObjectKey() error { - objectKey, err := s.acceptString() + objectKey, err := s.acceptKeyString() if err != nil { return err } diff --git a/go/store/prolly/tree/prolly_fields.go b/go/store/prolly/tree/prolly_fields.go index dfc7c1c516f..9ebeddc896c 100644 --- a/go/store/prolly/tree/prolly_fields.go +++ b/go/store/prolly/tree/prolly_fields.go @@ -236,14 +236,14 @@ func PutField(ctx context.Context, ns NodeStore, tb *val.TupleBuilder, i int, v // TODO: eventually remove GeometryEnc, but in the meantime write them as GeomAddrEnc case val.GeometryEnc: geo := serializeGeometry(v) - h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(geo), len(geo)) + _, h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(geo), len(geo)) if err != nil { return err } tb.PutGeometryAddr(i, h) case val.GeomAddrEnc: geo := serializeGeometry(v) - h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(geo), len(geo)) + _, h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(geo), len(geo)) if err != nil { return err } @@ -255,14 +255,14 @@ func PutField(ctx context.Context, ns NodeStore, tb *val.TupleBuilder, i int, v } tb.PutJSONAddr(i, h) case val.BytesAddrEnc: - h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(v.([]byte)), len(v.([]byte))) + _, h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(v.([]byte)), len(v.([]byte))) if err != nil { return err } tb.PutBytesAddr(i, h) case val.StringAddrEnc: //todo: v will be []byte after daylon's changes - h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader([]byte(v.(string))), len(v.(string))) + _, h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader([]byte(v.(string))), len(v.(string))) if err != nil { return err } @@ -292,7 +292,7 @@ func PutField(ctx context.Context, ns NodeStore, tb *val.TupleBuilder, i int, v if err != nil { return err } - h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(b), len(b)) + _, h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(b), len(b)) if err != nil { return err } @@ -315,11 +315,8 @@ func getJSONAddrHash(ctx context.Context, ns NodeStore, v interface{}) (hash.Has return hash.Hash{}, err } if optimizeJson == int8(0) { - buf, err := types.MarshallJson(j) - if err != nil { - return hash.Hash{}, err - } - return SerializeBytesToAddr(ctx, ns, bytes.NewReader(buf), len(buf)) + _, h, err := serializeJsonToBlob(ctx, ns, j) + return h, err } } root, err := SerializeJsonToAddr(ctx, ns, j) @@ -329,6 +326,14 @@ func getJSONAddrHash(ctx context.Context, ns NodeStore, v interface{}) (hash.Has return root.HashOf(), nil } +func serializeJsonToBlob(ctx context.Context, ns NodeStore, j sql.JSONWrapper) (Node, hash.Hash, error) { + buf, err := types.MarshallJson(j) + if err != nil { + return Node{}, hash.Hash{}, err + } + return SerializeBytesToAddr(ctx, ns, bytes.NewReader(buf), len(buf)) +} + func convInt(v interface{}) int { switch i := v.(type) { case int: @@ -417,15 +422,15 @@ func serializeGeometry(v interface{}) []byte { } } -func SerializeBytesToAddr(ctx context.Context, ns NodeStore, r io.Reader, dataSize int) (hash.Hash, error) { +func SerializeBytesToAddr(ctx context.Context, ns NodeStore, r io.Reader, dataSize int) (Node, hash.Hash, error) { bb := ns.BlobBuilder() defer ns.PutBlobBuilder(bb) bb.Init(dataSize) - _, addr, err := bb.Chunk(ctx, r) + node, addr, err := bb.Chunk(ctx, r) if err != nil { - return hash.Hash{}, err + return Node{}, hash.Hash{}, err } - return addr, nil + return node, addr, nil } func convJson(v interface{}) (res sql.JSONWrapper, err error) { diff --git a/integration-tests/bats/json.bats b/integration-tests/bats/json.bats index f8724bd6a6c..f7c619dbfb6 100644 --- a/integration-tests/bats/json.bats +++ b/integration-tests/bats/json.bats @@ -294,8 +294,28 @@ SQL insert into js values (1, "[[[[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]],[[[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]],[[[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]],[[[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]]]"); SQL # If the document isn't put into an IndexedJsonDocument, it will have the following hash. - dolt show WORKING run dolt show qivuleqpbin1eise78h5u8k1hqe4f07g [ "$status" -eq 0 ] [[ "$output" =~ "Blob" ]] || false +} + +# Older clients have a bug that prevents them from reading indexed JSON documents with strings split across chunks, +# And making one large chunk can causes issues with the journal. +# Thus, we expect that the document gets stored as a blob. +@test "json: document with large string value doesn't get indexed" { + run dolt sql <