From e21bd65650407b0ad2aa1eecfa2346a7f8b1a310 Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Tue, 7 Jan 2025 17:42:42 -0800 Subject: [PATCH] Add "dolt_dont_optimize_json" system variable. When set, Dolt will write Json documents to storage as simple blobs instead of path-indexed trees. --- .../doltcore/sqle/system_variables.go | 7 ++++ go/store/prolly/tree/prolly_fields.go | 33 +++++++++++++++---- integration-tests/bats/json.bats | 18 ++++++++++ 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/go/libraries/doltcore/sqle/system_variables.go b/go/libraries/doltcore/sqle/system_variables.go index ae5cd34e8b..61c406cf9a 100644 --- a/go/libraries/doltcore/sqle/system_variables.go +++ b/go/libraries/doltcore/sqle/system_variables.go @@ -211,6 +211,13 @@ var DoltSystemVariables = []sql.SystemVariable{ Type: types.NewSystemBoolType("dolt_dont_merge_json"), Default: int8(0), }, + &sql.MysqlSystemVariable{ + Name: "dolt_dont_optimize_json", + Dynamic: true, + Scope: sql.GetMysqlScope(sql.SystemVariableScope_Both), + Type: types.NewSystemBoolType("dolt_dont_optimize_json"), + Default: int8(0), + }, &sql.MysqlSystemVariable{ Name: dsess.DoltStatsAutoRefreshEnabled, Dynamic: true, diff --git a/go/store/prolly/tree/prolly_fields.go b/go/store/prolly/tree/prolly_fields.go index ac1face0de..e593239876 100644 --- a/go/store/prolly/tree/prolly_fields.go +++ b/go/store/prolly/tree/prolly_fields.go @@ -249,15 +249,10 @@ func PutField(ctx context.Context, ns NodeStore, tb *val.TupleBuilder, i int, v } tb.PutGeometryAddr(i, h) case val.JSONAddrEnc: - j, err := convJson(v) + h, err := getJSONAddrHash(ctx, ns, v) if err != nil { return err } - root, err := SerializeJsonToAddr(ctx, ns, j) - if err != nil { - return err - } - h := root.HashOf() tb.PutJSONAddr(i, h) case val.BytesAddrEnc: h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(v.([]byte)), len(v.([]byte))) @@ -308,6 +303,32 @@ func PutField(ctx context.Context, ns NodeStore, tb *val.TupleBuilder, i int, v return nil } +func getJSONAddrHash(ctx context.Context, ns NodeStore, v interface{}) (hash.Hash, error) { + j, err := convJson(v) + if err != nil { + return hash.Hash{}, err + } + sqlCtx, isSqlCtx := ctx.(*sql.Context) + if isSqlCtx { + dontOptimizeJson, err := sqlCtx.Session.GetSessionVariable(sqlCtx, "dolt_dont_optimize_json") + if err != nil { + return hash.Hash{}, err + } + if dontOptimizeJson != 0 { + buf, err := types.MarshallJson(j) + if err != nil { + return hash.Hash{}, err + } + return SerializeBytesToAddr(ctx, ns, bytes.NewReader(buf), len(buf)) + } + } + root, err := SerializeJsonToAddr(ctx, ns, j) + if err != nil { + return hash.Hash{}, err + } + return root.HashOf(), nil +} + func convInt(v interface{}) int { switch i := v.(type) { case int: diff --git a/integration-tests/bats/json.bats b/integration-tests/bats/json.bats index 2708d0671e..f580016690 100644 --- a/integration-tests/bats/json.bats +++ b/integration-tests/bats/json.bats @@ -261,3 +261,21 @@ SQL [ "$status" -eq 0 ] [ "${lines[1]}" = '1,"[{""a"":""<>&""}]"' ] } + +# This test inserts a large JSON document with the `dolt_dont_optimize_json` flag set. +# We expect that the document gets stored as a blob. +@test "json: Test dolt_dont_optimize_json system variable" { + run dolt sql <