diff --git a/go/store/prolly/tree/json_chunker.go b/go/store/prolly/tree/json_chunker.go index 75b9f1591fc..10c8707b5f3 100644 --- a/go/store/prolly/tree/json_chunker.go +++ b/go/store/prolly/tree/json_chunker.go @@ -130,7 +130,7 @@ func (j *JsonChunker) Done(ctx context.Context) (Node, error) { // When inserting into the beginning of an object or array, we need to add an extra comma. // We could track then in the chunker, but it's easier to just check the next part of JSON to determine // whether we need the comma. - if jsonBytes[0] != '}' && jsonBytes[0] != ']' && jsonBytes[0] != ',' { + if j.jScanner.currentPath.getScannerState() == endOfValue && jsonBytes[0] != '}' && jsonBytes[0] != ']' && jsonBytes[0] != ',' { j.appendJsonToBuffer([]byte(",")) } // Append the rest of the JsonCursor, then continue until we either exhaust the cursor, or we coincide with a boundary from the original tree. diff --git a/go/store/prolly/tree/json_cursor.go b/go/store/prolly/tree/json_cursor.go index 619be39ae28..e7d6be61c28 100644 --- a/go/store/prolly/tree/json_cursor.go +++ b/go/store/prolly/tree/json_cursor.go @@ -49,25 +49,28 @@ func getPreviousKey(ctx context.Context, cur *cursor) ([]byte, error) { } // newJsonCursor takes the root node of a prolly tree representing a JSON document, and creates a new JsonCursor for reading -// JSON, starting at the specified location in the document. -func newJsonCursor(ctx context.Context, ns NodeStore, root Node, startKey jsonLocation) (*JsonCursor, error) { +// JSON, starting at the specified location in the document. Returns a boolean indicating whether the location already exists +// in the document. If the location does not exist in the document, the resulting JsonCursor +// will be at the location where the value would be if it was inserted. +func newJsonCursor(ctx context.Context, ns NodeStore, root Node, startKey jsonLocation, forRemoval bool) (jCur *JsonCursor, found bool, err error) { cur, err := newCursorAtKey(ctx, ns, root, startKey.key, jsonLocationOrdering{}) if err != nil { - return nil, err + return nil, false, err } previousKey, err := getPreviousKey(ctx, cur) if err != nil { - return nil, err + return nil, false, err } jsonBytes := cur.currentValue() jsonDecoder := ScanJsonFromMiddleWithKey(jsonBytes, previousKey) jcur := JsonCursor{cur: cur, jsonScanner: jsonDecoder} - err = jcur.AdvanceToLocation(ctx, startKey) + + found, err = jcur.AdvanceToLocation(ctx, startKey, forRemoval) if err != nil { - return nil, err + return nil, found, err } - return &jcur, nil + return &jcur, found, nil } func (j JsonCursor) Valid() bool { @@ -122,20 +125,26 @@ func (j *JsonCursor) isKeyInChunk(path jsonLocation) bool { return compareJsonLocations(path, nodeEndPosition) <= 0 } -func (j *JsonCursor) AdvanceToLocation(ctx context.Context, path jsonLocation) error { +// AdvanceToLocation causes the cursor to advance to the specified position. This function returns a boolean indicating +// whether the position already exists. If it doesn't, the cursor stops at the location where the value would be if it +// were inserted. +// The `forRemoval` parameter changes the behavior when advancing to the start of an object key. When this parameter is true, +// the cursor advances to the end of the previous value, prior to the object key. This allows the key to be removed along +// with the value. +func (j *JsonCursor) AdvanceToLocation(ctx context.Context, path jsonLocation, forRemoval bool) (found bool, err error) { if !j.isKeyInChunk(path) { // Our destination is in another chunk, load it. err := Seek(ctx, j.cur.parent, path.key, jsonLocationOrdering{}) if err != nil { - return err + return false, err } j.cur.nd, err = fetchChild(ctx, j.cur.nrw, j.cur.parent.currentRef()) if err != nil { - return err + return false, err } previousKey, err := getPreviousKey(ctx, j.cur) if err != nil { - return err + return false, err } j.jsonScanner = ScanJsonFromMiddleWithKey(j.cur.currentValue(), previousKey) } @@ -150,7 +159,7 @@ func (j *JsonCursor) AdvanceToLocation(ctx context.Context, path jsonLocation) e // there is no path greater than the end-of-document path, which is always the last key. panic("Reached the end of the JSON document while advancing. This should not be possible. Is the document corrupt?") } else if err != nil { - return err + return false, err } cmp = compareJsonLocations(j.jsonScanner.currentPath, path) } @@ -158,8 +167,13 @@ func (j *JsonCursor) AdvanceToLocation(ctx context.Context, path jsonLocation) e // were it would appear. This may mean that we've gone too far and need to rewind one location. if cmp > 0 { j.jsonScanner = previousScanner + return false, nil + } + // If the element is going to be removed, rewind the scanner one location, to before the key of the removed object. + if forRemoval { + j.jsonScanner = previousScanner } - return nil + return true, nil } func (j *JsonCursor) AdvanceToNextLocation(ctx context.Context) (crossedBoundary bool, err error) { diff --git a/go/store/prolly/tree/json_indexed_document.go b/go/store/prolly/tree/json_indexed_document.go index 065f18ed33f..d41b4683676 100644 --- a/go/store/prolly/tree/json_indexed_document.go +++ b/go/store/prolly/tree/json_indexed_document.go @@ -150,14 +150,16 @@ func (i IndexedJsonDocument) tryLookup(ctx context.Context, pathString string) ( return nil, err } - jCur, err := newJsonCursor(ctx, i.m.NodeStore, i.m.Root, path) + return i.lookupByLocation(ctx, path) +} + +func (i IndexedJsonDocument) lookupByLocation(ctx context.Context, path jsonLocation) (sql.JSONWrapper, error) { + jCur, found, err := newJsonCursor(ctx, i.m.NodeStore, i.m.Root, path, false) if err != nil { return nil, err } - cursorPath := jCur.GetCurrentPath() - cmp := compareJsonLocations(cursorPath, path) - if cmp != 0 { + if !found { // The key doesn't exist in the document. return nil, nil } @@ -191,18 +193,22 @@ func (i IndexedJsonDocument) tryInsert(ctx context.Context, path string, val sql return nil, false, err } - jsonCursor, err := newJsonCursor(ctx, i.m.NodeStore, i.m.Root, keyPath) + jsonCursor, found, err := newJsonCursor(ctx, i.m.NodeStore, i.m.Root, keyPath, false) if err != nil { return nil, false, err } - cursorPath := jsonCursor.GetCurrentPath() - cmp := compareJsonLocations(cursorPath, keyPath) - if cmp == 0 { + if found { // The key already exists in the document. return i, false, nil } + return i.insertIntoCursor(ctx, keyPath, jsonCursor, val) +} + +func (i IndexedJsonDocument) insertIntoCursor(ctx context.Context, keyPath jsonLocation, jsonCursor *JsonCursor, val sql.JSONWrapper) (types.MutableJSON, bool, error) { + cursorPath := jsonCursor.GetCurrentPath() + // If the inserted path is equivalent to "$" (which also includes "$[0]" on non-arrays), do nothing. if cursorPath.size() == 0 && cursorPath.getScannerState() == startOfValue { return i, false, nil @@ -272,6 +278,7 @@ func (i IndexedJsonDocument) tryInsert(ctx context.Context, path string, val sql // For example, attempting to insert into the path "$.a.b" in the document {"a": 1} // We can detect this by checking to see if the insertion point in the original document comes before the inserted path. // (For example, the insertion point occurs at $.a.START, which is before $.a.b) + cmp := compareJsonLocations(cursorPath, keyPath) if cmp < 0 && cursorPath.getScannerState() == startOfValue { // We just attempted to insert into a scalar. return i, false, nil @@ -313,13 +320,70 @@ func (i IndexedJsonDocument) tryInsert(ctx context.Context, path string, val sql return NewIndexedJsonDocument(ctx, newRoot, i.m.NodeStore), true, nil } -// Remove is not yet implemented, so we call it on a types.JSONDocument instead. -func (i IndexedJsonDocument) Remove(ctx context.Context, path string) (types.MutableJSON, bool, error) { - v, err := i.ToInterface() +// Remove implements types.MutableJSON +func (i IndexedJsonDocument) Remove(ctx context.Context, path string) (result types.MutableJSON, changed bool, err error) { + if path == "$" { + return nil, false, fmt.Errorf("The path expression '$' is not allowed in this context.") + } + err = tryWithFallback( + ctx, + i, + func() error { + result, changed, err = i.tryRemove(ctx, path) + return err + }, + func(jsonDocument types.JSONDocument) error { + result, changed, err = jsonDocument.Remove(ctx, path) + return err + }) + return result, changed, err +} + +func (i IndexedJsonDocument) tryRemove(ctx context.Context, path string) (types.MutableJSON, bool, error) { + keyPath, err := jsonPathElementsFromMySQLJsonPath([]byte(path)) if err != nil { return nil, false, err } - return types.JSONDocument{Val: v}.Remove(ctx, path) + + jsonCursor, found, err := newJsonCursor(ctx, i.m.NodeStore, i.m.Root, keyPath, true) + if err != nil { + return nil, false, err + } + if !found { + // The key does not exist in the document. + return i, false, nil + } + + // The cursor is now pointing to the end of the value prior to the one being removed. + jsonChunker, err := newJsonChunker(ctx, jsonCursor, i.m.NodeStore) + if err != nil { + return nil, false, err + } + + startofRemovedLocation := jsonCursor.GetCurrentPath() + startofRemovedLocation = startofRemovedLocation.Clone() + isInitialElement := startofRemovedLocation.getScannerState().isInitialElement() + + // Advance the cursor to the end of the value being removed. + keyPath.setScannerState(endOfValue) + _, err = jsonCursor.AdvanceToLocation(ctx, keyPath, false) + if err != nil { + return nil, false, err + } + + // If removing the first element of an object/array, skip past the comma, and set the chunker as if it's + // at the start of the object/array. + if isInitialElement && jsonCursor.jsonScanner.current() == ',' { + jsonCursor.jsonScanner.valueOffset++ + jsonChunker.jScanner.currentPath = startofRemovedLocation + } + + newRoot, err := jsonChunker.Done(ctx) + if err != nil { + return nil, false, err + } + + return NewIndexedJsonDocument(ctx, newRoot, i.m.NodeStore), true, nil } // Set is not yet implemented, so we call it on a types.JSONDocument instead. diff --git a/go/store/prolly/tree/json_indexed_document_test.go b/go/store/prolly/tree/json_indexed_document_test.go index f6b94aa5e15..143857bc731 100644 --- a/go/store/prolly/tree/json_indexed_document_test.go +++ b/go/store/prolly/tree/json_indexed_document_test.go @@ -17,6 +17,7 @@ package tree import ( "context" "fmt" + "slices" "strings" "testing" @@ -177,6 +178,76 @@ func TestIndexedJsonDocument_Insert(t *testing.T) { } +func TestIndexedJsonDocument_Remove(t *testing.T) { + ctx := sql.NewEmptyContext() + ns := NewTestNodeStore() + convertToIndexedJsonDocument := func(t *testing.T, s interface{}) interface{} { + return newIndexedJsonDocumentFromValue(t, ctx, ns, s) + } + + testCases := jsontests.JsonRemoveTestCases(t, convertToIndexedJsonDocument) + jsontests.RunJsonTests(t, testCases) + + t.Run("large document removals", func(t *testing.T) { + + largeDoc := createLargeDocumentForTesting(t, ctx, ns) + + for _, chunkBoundary := range largeDocumentChunkBoundaries { + t.Run(jsonPathTypeNames[chunkBoundary.pathType], func(t *testing.T) { + // For each tested chunk boundary, get the path to the object containing that crosses that boundary, and remove it. + removalPointString := chunkBoundary.path[:strings.LastIndex(chunkBoundary.path, ".")] + + removalPointLocation, err := jsonPathElementsFromMySQLJsonPath([]byte(removalPointString)) + require.NoError(t, err) + + // Test that the value exists prior to removal + result, err := largeDoc.Lookup(ctx, removalPointString) + require.NoError(t, err) + require.NotNil(t, result) + + newDoc, changed, err := largeDoc.Remove(ctx, removalPointString) + require.NoError(t, err) + require.True(t, changed) + + // test that new value is valid by calling ToInterface + v, err := newDoc.ToInterface() + require.NoError(t, err) + + // If the removed value was an object key, check that the key no longer exists. + // If the removed value was an array element, check that the parent array has shrunk. + if removalPointLocation.getLastPathElement().isArrayIndex { + // Check that the parent array has shrunk. + arrayIndex := int(removalPointLocation.getLastPathElement().getArrayIndex()) + parentLocation := removalPointLocation.Clone() + parentLocation.pop() + + getParentArray := func(doc IndexedJsonDocument) []interface{} { + arrayWrapper, err := doc.lookupByLocation(ctx, parentLocation) + require.NoError(t, err) + arrayInterface, err := arrayWrapper.ToInterface() + require.NoError(t, err) + require.IsType(t, []interface{}{}, arrayInterface) + return arrayInterface.([]interface{}) + } + + oldArray := getParentArray(largeDoc) + newArray := getParentArray(newDoc.(IndexedJsonDocument)) + + oldArray = slices.Delete(oldArray, arrayIndex, arrayIndex+1) + + require.Equal(t, oldArray, newArray) + } else { + // Check that the key no longer exists. + newJsonDocument := types.JSONDocument{Val: v} + result, err = newJsonDocument.Lookup(ctx, removalPointString) + require.NoError(t, err) + require.Nil(t, result) + } + }) + } + }) +} + func TestIndexedJsonDocument_Extract(t *testing.T) { ctx := context.Background() ns := NewTestNodeStore() diff --git a/go/store/prolly/tree/json_location.go b/go/store/prolly/tree/json_location.go index 48c5681a8ff..b6c8a413be7 100644 --- a/go/store/prolly/tree/json_location.go +++ b/go/store/prolly/tree/json_location.go @@ -71,6 +71,10 @@ const ( endOfValue ) +func (t jsonPathType) isInitialElement() bool { + return t == objectInitialElement || t == arrayInitialElement +} + var unknownLocationKeyError = fmt.Errorf("A JSON document was written with a future version of Dolt, and the index metadata cannot be read. This will impact performance for large documents.") var unsupportedPathError = fmt.Errorf("The supplied JSON path is not supported for optimized lookup, falling back on unoptimized implementation.")