Skip to content

Commit

Permalink
[store] row length guards for prolly serial messages (#7533)
Browse files Browse the repository at this point in the history
* Add row length guards

* tidy

* bump

* more check to integrator

* default panic

* comments

* bump

* better names

* more edits

* merge main

* [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh

* smaller default noms string type

* bats tests valid row lengths

* more tests

* more row len tests

* [ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/update.sh

---------

Co-authored-by: max-hoffman <[email protected]>
  • Loading branch information
max-hoffman and max-hoffman authored Feb 27, 2024
1 parent 2fbcb38 commit 6f61ac5
Show file tree
Hide file tree
Showing 19 changed files with 144 additions and 64 deletions.
4 changes: 2 additions & 2 deletions go/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ require (
github.com/dolthub/dolt/go/gen/proto/dolt/services/eventsapi v0.0.0-20201005193433-3ee972b1d078
github.com/dolthub/fslock v0.0.3
github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488
github.com/dolthub/sqllogictest/go v0.0.0-20240222182842-573e6a164de8
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81
github.com/dolthub/vitess v0.0.0-20240209125211-6c93b0341608
github.com/dustin/go-humanize v1.0.1
github.com/fatih/color v1.13.0
Expand Down Expand Up @@ -57,7 +57,7 @@ require (
github.com/cespare/xxhash v1.1.0
github.com/creasty/defaults v1.6.0
github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2
github.com/dolthub/go-mysql-server v0.17.1-0.20240224070646-d75d49c4ec20
github.com/dolthub/go-mysql-server v0.17.1-0.20240226201640-a9f896c0352d
github.com/dolthub/swiss v0.1.0
github.com/goccy/go-json v0.10.2
github.com/google/go-github/v57 v57.0.0
Expand Down
8 changes: 4 additions & 4 deletions go/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -183,16 +183,16 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U=
github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0=
github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e h1:kPsT4a47cw1+y/N5SSCkma7FhAPw7KeGmD6c9PBZW9Y=
github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e/go.mod h1:KPUcpx070QOfJK1gNe0zx4pA5sicIK1GMikIGLKC168=
github.com/dolthub/go-mysql-server v0.17.1-0.20240224070646-d75d49c4ec20 h1:CAiVQ94yGzGiB4hVchI0exzzgndIYOIQ6JsTISQ4DVA=
github.com/dolthub/go-mysql-server v0.17.1-0.20240224070646-d75d49c4ec20/go.mod h1:282SPNGdGzoUa1GfBr/kKNMzMDSn9+eKIUPvYxlxIdg=
github.com/dolthub/go-mysql-server v0.17.1-0.20240226201640-a9f896c0352d h1:V5VPAZa6A2A9bCNZx7NiSGB8tb2PqHOh0gUHUgNxXN4=
github.com/dolthub/go-mysql-server v0.17.1-0.20240226201640-a9f896c0352d/go.mod h1:282SPNGdGzoUa1GfBr/kKNMzMDSn9+eKIUPvYxlxIdg=
github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488 h1:0HHu0GWJH0N6a6keStrHhUAK5/o9LVfkh44pvsV4514=
github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488/go.mod h1:ehexgi1mPxRTk0Mok/pADALuHbvATulTh6gzr7NzZto=
github.com/dolthub/jsonpath v0.0.2-0.20240201003050-392940944c15 h1:sfTETOpsrNJPDn2KydiCtDgVu6Xopq8k3JP8PjFT22s=
github.com/dolthub/jsonpath v0.0.2-0.20240201003050-392940944c15/go.mod h1:2/2zjLQ/JOOSbbSboojeg+cAwcRV0fDLzIiWch/lhqI=
github.com/dolthub/maphash v0.0.0-20221220182448-74e1e1ea1577 h1:SegEguMxToBn045KRHLIUlF2/jR7Y2qD6fF+3tdOfvI=
github.com/dolthub/maphash v0.0.0-20221220182448-74e1e1ea1577/go.mod h1:gkg4Ch4CdCDu5h6PMriVLawB7koZ+5ijb9puGMV50a4=
github.com/dolthub/sqllogictest/go v0.0.0-20240222182842-573e6a164de8 h1:nFOhaKpwXfts8HgCcHA43KvuYwosXTnk4y4VVxNEgvs=
github.com/dolthub/sqllogictest/go v0.0.0-20240222182842-573e6a164de8/go.mod h1:e/FIZVvT2IR53HBCAo41NjqgtEnjMJGKca3Y/dAmZaA=
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 h1:7/v8q9XGFa6q5Ap4Z/OhNkAMBaK5YeuEzwJt+NZdhiE=
github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81/go.mod h1:siLfyv2c92W1eN/R4QqG/+RjjX5W2+gCTRjZxBjI3TY=
github.com/dolthub/swiss v0.1.0 h1:EaGQct3AqeP/MjASHLiH6i4TAmgbG/c4rA6a1bzCOPc=
github.com/dolthub/swiss v0.1.0/go.mod h1:BeucyB08Vb1G9tumVN3Vp/pyY4AMUnr9p7Rz7wJ7kAQ=
github.com/dolthub/vitess v0.0.0-20240209125211-6c93b0341608 h1:jnInva1KcJJf/QQsxbN9tTJckOZf73EzUen8rrik0Yw=
Expand Down
2 changes: 2 additions & 0 deletions go/libraries/doltcore/env/actions/infer_schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,8 @@ func findCommonType(ts typeInfoSet) typeinfo.TypeInfo {
return typeinfo.TextType
} else if setHasType(ts, typeinfo.StringDefaultType) {
return typeinfo.StringDefaultType
} else if setHasType(ts, typeinfo.StringDefaultType) {
return typeinfo.StringDefaultType
}

hasNumeric := false
Expand Down
44 changes: 44 additions & 0 deletions go/libraries/doltcore/schema/schema_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"strings"

"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/analyzer/analyzererrors"
gmstypes "github.com/dolthub/go-mysql-server/sql/types"
"github.com/dolthub/vitess/go/vt/proto/query"

Expand Down Expand Up @@ -178,6 +179,13 @@ func ValidateForInsert(allCols *ColCollection) error {
seenPkCol = true
break
}
c.TypeInfo.ToSqlType()
}

if rowLen := MaxRowStorageSize(allCols); rowLen > int64(val.MaxTupleDataSize) {
// |val.MaxTupleDataSize| is less than |types.MaxRowLength| to account for
// serial message metadata
return analyzererrors.ErrInvalidRowLength.New(val.MaxTupleDataSize, rowLen)
}

if !seenPkCol && !FeatureFlagKeylessSchema {
Expand Down Expand Up @@ -208,6 +216,42 @@ func ValidateForInsert(allCols *ColCollection) error {
return err
}

// MaxRowStorageSize returns the storage length for Dolt types.
func MaxRowStorageSize(cols *ColCollection) int64 {
var numBytesPerRow int64 = 0
for _, col := range cols.cols {
switch n := col.TypeInfo.ToSqlType().(type) {
case sql.NumberType:
numBytesPerRow += 8
case sql.StringType:
if gmstypes.IsTextBlob(n) {
numBytesPerRow += 20
} else {
numBytesPerRow += n.MaxByteLength()
}
case gmstypes.BitType:
numBytesPerRow += 8
case sql.DatetimeType:
numBytesPerRow += 8
case sql.DecimalType:
numBytesPerRow += int64(n.MaximumScale())
case sql.EnumType:
numBytesPerRow += 2
case gmstypes.JsonType:
numBytesPerRow += 20
case sql.NullType:
numBytesPerRow += 1
case gmstypes.TimeType:
numBytesPerRow += 16
case sql.YearType:
numBytesPerRow += 8
default:
panic(fmt.Sprintf("unknown type in create table: %s", n.String()))
}
}
return numBytesPerRow
}

// isAutoIncrementKind returns true is |k| is a numeric kind.
func isAutoIncrementKind(k types.NomsKind) bool {
return k == types.IntKind || k == types.UintKind || k == types.FloatKind
Expand Down
3 changes: 1 addition & 2 deletions go/libraries/doltcore/schema/typeinfo/typeinfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ package typeinfo
import (
"context"
"fmt"
"math"

"github.com/dolthub/go-mysql-server/sql"
gmstypes "github.com/dolthub/go-mysql-server/sql/types"
Expand Down Expand Up @@ -363,7 +362,7 @@ func FromKind(kind types.NomsKind) TypeInfo {
case types.FloatKind:
return Float64Type
case types.InlineBlobKind:
return &inlineBlobType{gmstypes.MustCreateBinary(sqltypes.VarBinary, math.MaxUint16)}
return &inlineBlobType{gmstypes.MustCreateBinary(sqltypes.VarBinary, MaxVarcharLength/16)}
case types.IntKind:
return Int64Type
case types.JSONKind:
Expand Down
5 changes: 3 additions & 2 deletions go/libraries/doltcore/schema/typeinfo/varstring.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@ type varStringType struct {
var _ TypeInfo = (*varStringType)(nil)

var (
MaxVarcharLength = int64(16383)
StringDefaultType = &varStringType{gmstypes.MustCreateStringWithDefaults(sqltypes.VarChar, MaxVarcharLength)}
MaxVarcharLength = int64(16383)
// StringDefaultType is sized to 1k, which allows up to 16 fields per row
StringDefaultType = &varStringType{gmstypes.MustCreateStringWithDefaults(sqltypes.VarChar, MaxVarcharLength/16)}
)

func CreateVarStringTypeFromSqlType(stringType sql.StringType) TypeInfo {
Expand Down
4 changes: 3 additions & 1 deletion go/libraries/doltcore/sqle/dtables/docs_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,12 @@ func (dt *DocsTable) String() string {
return doltdb.DocTableName
}

const defaultStringsLen = 16383 / 16

// Schema is a sql.Table interface function that gets the sql.Schema of the dolt_docs system table.
func (dt *DocsTable) Schema() sql.Schema {
return []*sql.Column{
{Name: doltdb.DocPkColumnName, Type: sqlTypes.MustCreateString(sqltypes.VarChar, 16383, sql.Collation_Default), Source: doltdb.DocTableName, PrimaryKey: true, Nullable: false},
{Name: doltdb.DocPkColumnName, Type: sqlTypes.MustCreateString(sqltypes.VarChar, defaultStringsLen, sql.Collation_Default), Source: doltdb.DocTableName, PrimaryKey: true, Nullable: false},
{Name: doltdb.DocTextColumnName, Type: sqlTypes.LongText, Source: doltdb.DocTableName, PrimaryKey: false},
}
}
Expand Down
24 changes: 12 additions & 12 deletions go/libraries/doltcore/sqle/enginetest/ddl_queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,15 @@ var ModifyAndChangeColumnScripts = []queries.ScriptTest{
SetUpScript: SimpsonsSetup,
Assertions: []queries.ScriptTestAssertion{
{
Query: "alter table people modify column first_name varchar(16383) not null after last_name",
Query: "alter table people modify column first_name varchar(163) not null after last_name",
SkipResultsCheck: true,
},
{
Query: "show create table people",
Expected: []sql.Row{sql.Row{"people", "CREATE TABLE `people` (\n" +
" `id` int NOT NULL,\n" +
" `last_name` varchar(100) NOT NULL,\n" +
" `first_name` varchar(16383) NOT NULL,\n" +
" `first_name` varchar(163) NOT NULL,\n" +
" `is_married` tinyint,\n" +
" `age` int,\n" +
" `rating` float,\n" +
Expand All @@ -121,13 +121,13 @@ var ModifyAndChangeColumnScripts = []queries.ScriptTest{
SetUpScript: SimpsonsSetup,
Assertions: []queries.ScriptTestAssertion{
{
Query: "alter table people modify column first_name varchar(16383) not null first",
Query: "alter table people modify column first_name varchar(163) not null first",
SkipResultsCheck: true,
},
{
Query: "show create table people",
Expected: []sql.Row{sql.Row{"people", "CREATE TABLE `people` (\n" +
" `first_name` varchar(16383) NOT NULL,\n" +
" `first_name` varchar(163) NOT NULL,\n" +
" `id` int NOT NULL,\n" +
" `last_name` varchar(100) NOT NULL,\n" +
" `is_married` tinyint,\n" +
Expand Down Expand Up @@ -156,14 +156,14 @@ var ModifyAndChangeColumnScripts = []queries.ScriptTest{
SetUpScript: SimpsonsSetup,
Assertions: []queries.ScriptTestAssertion{
{
Query: "alter table people modify column first_name varchar(16383) null",
Query: "alter table people modify column first_name varchar(163) null",
SkipResultsCheck: true,
},
{
Query: "show create table people",
Expected: []sql.Row{sql.Row{"people", "CREATE TABLE `people` (\n" +
" `id` int NOT NULL,\n" +
" `first_name` varchar(16383),\n" +
" `first_name` varchar(163),\n" +
" `last_name` varchar(100) NOT NULL,\n" +
" `is_married` tinyint,\n" +
" `age` int,\n" +
Expand All @@ -184,15 +184,15 @@ var ModifyAndChangeColumnScripts = []queries.ScriptTest{
SetUpScript: SimpsonsSetup,
Assertions: []queries.ScriptTestAssertion{
{
Query: "alter table people change first_name christian_name varchar(16383) not null after last_name",
Query: "alter table people change first_name christian_name varchar(163) not null after last_name",
SkipResultsCheck: true,
},
{
Query: "show create table people",
Expected: []sql.Row{sql.Row{"people", "CREATE TABLE `people` (\n" +
" `id` int NOT NULL,\n" +
" `last_name` varchar(100) NOT NULL,\n" +
" `christian_name` varchar(16383) NOT NULL,\n" +
" `christian_name` varchar(163) NOT NULL,\n" +
" `is_married` tinyint,\n" +
" `age` int,\n" +
" `rating` float,\n" +
Expand All @@ -219,13 +219,13 @@ var ModifyAndChangeColumnScripts = []queries.ScriptTest{
SetUpScript: SimpsonsSetup,
Assertions: []queries.ScriptTestAssertion{
{
Query: "alter table people change column first_name christian_name varchar(16383) not null first",
Query: "alter table people change column first_name christian_name varchar(163) not null first",
SkipResultsCheck: true,
},
{
Query: "show create table people",
Expected: []sql.Row{sql.Row{"people", "CREATE TABLE `people` (\n" +
" `christian_name` varchar(16383) NOT NULL,\n" +
" `christian_name` varchar(163) NOT NULL,\n" +
" `id` int NOT NULL,\n" +
" `last_name` varchar(100) NOT NULL,\n" +
" `is_married` tinyint,\n" +
Expand Down Expand Up @@ -254,14 +254,14 @@ var ModifyAndChangeColumnScripts = []queries.ScriptTest{
SetUpScript: SimpsonsSetup,
Assertions: []queries.ScriptTestAssertion{
{
Query: "alter table people change column first_name first_name varchar(16383) null",
Query: "alter table people change column first_name first_name varchar(163) null",
SkipResultsCheck: true,
},
{
Query: "show create table people",
Expected: []sql.Row{sql.Row{"people", "CREATE TABLE `people` (\n" +
" `id` int NOT NULL,\n" +
" `first_name` varchar(16383),\n" +
" `first_name` varchar(163),\n" +
" `last_name` varchar(100) NOT NULL,\n" +
" `is_married` tinyint,\n" +
" `age` int,\n" +
Expand Down
14 changes: 14 additions & 0 deletions go/libraries/doltcore/sqle/enginetest/dolt_engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,10 @@ func TestIgnoreIntoWithDuplicateUniqueKeyKeylessPrepared(t *testing.T) {
func TestInsertIntoErrors(t *testing.T) {
h := newDoltHarness(t)
defer h.Close()
h = h.WithSkippedQueries([]string{
"create table bad (vb varbinary(65535))",
"insert into bad values (repeat('0', 65536))",
})
enginetest.TestInsertIntoErrors(t, h)
}

Expand Down Expand Up @@ -880,6 +884,12 @@ func TestCreateTable(t *testing.T) {
enginetest.TestCreateTable(t, h)
}

func TestRowLimit(t *testing.T) {
h := newDoltHarness(t)
defer h.Close()
enginetest.TestRowLimit(t, h)
}

func TestBranchDdl(t *testing.T) {
for _, script := range DdlBranchTests {
func() {
Expand Down Expand Up @@ -2719,6 +2729,10 @@ func TestInsertErrorScriptsPrepared(t *testing.T) {
skipPreparedTests(t)
h := newDoltHarness(t)
defer h.Close()
h = h.WithSkippedQueries([]string{
"create table bad (vb varbinary(65535))",
"insert into bad values (repeat('0', 65536))",
})
enginetest.TestInsertErrorScriptsPrepared(t, h)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3414,7 +3414,7 @@ var MergeArtifactsScripts = []queries.ScriptTest{
{
Name: "regression test for bad column ordering in schema",
SetUpScript: []string{
"CREATE TABLE t (col1 enum ('A', 'B'), col2 varchar(max), primary key (col2));",
"CREATE TABLE t (col1 enum ('A', 'B'), col2 varchar(100), primary key (col2));",
"ALTER TABLE t add unique index (col1);",
"call DOLT_COMMIT('-Am', 'initial');",

Expand Down
2 changes: 1 addition & 1 deletion go/libraries/doltcore/sqle/sqlinsert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ var BasicInsertTests = []InsertTest{
{
Name: "insert partial columns existing pk",
AdditionalSetup: ExecuteSetupSQL(context.Background(), `
CREATE TABLE temppeople (id bigint primary key, first_name varchar(16383), last_name varchar(16383));
CREATE TABLE temppeople (id bigint primary key, first_name varchar(1023), last_name varchar(1023));
INSERT INTO temppeople VALUES (2, 'Bart', 'Simpson');`),
InsertQuery: "insert into temppeople (id, first_name, last_name) values (2, 'Bart', 'Simpson')",
ExpectedErr: "duplicate primary key",
Expand Down
2 changes: 1 addition & 1 deletion go/libraries/doltcore/sqle/sqlreplace_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ var BasicReplaceTests = []ReplaceTest{
{
Name: "replace partial columns existing pk",
AdditionalSetup: ExecuteSetupSQL(context.Background(), `
CREATE TABLE temppeople (id bigint primary key, first_name varchar(16383), last_name varchar(16383), num bigint);
CREATE TABLE temppeople (id bigint primary key, first_name varchar(1023), last_name varchar(1023), num bigint);
INSERT INTO temppeople VALUES (2, 'Bart', 'Simpson', 44);`),
ReplaceQuery: "replace into temppeople (id, first_name, last_name, num) values (2, 'Bart', 'Simpson', 88)",
SelectQuery: "select id, first_name, last_name, num from temppeople where id = 2 ORDER BY id",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ func TestEndToEnd(t *testing.T) {
tableName := "people"
dropCreateStatement := sqlfmt.DropTableIfExistsStmt(tableName) + "\n" +
"CREATE TABLE `people` (\n" +
" `id` varchar(16383) NOT NULL,\n" +
" `name` varchar(16383) NOT NULL,\n" +
" `id` varchar(1023) NOT NULL,\n" +
" `name` varchar(1023) NOT NULL,\n" +
" `age` bigint unsigned NOT NULL,\n" +
" `is_married` bigint NOT NULL,\n" +
" `title` varchar(16383),\n" +
" `title` varchar(1023),\n" +
" PRIMARY KEY (`id`),\n" +
" KEY `idx_name` (`name`),\n" +
" CONSTRAINT `test-check` CHECK ((`age` < 123))\n" +
Expand Down
2 changes: 1 addition & 1 deletion go/store/prolly/tree/prolly_fields.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ func PutField(ctx context.Context, ns NodeStore, tb *val.TupleBuilder, i int, v
case val.SetEnc:
tb.PutSet(i, v.(uint64))
case val.StringEnc:
tb.PutString(i, v.(string))
return tb.PutString(i, v.(string))
case val.ByteStringEnc:
if s, ok := v.(string); ok {
if len(s) > math.MaxUint16 {
Expand Down
17 changes: 13 additions & 4 deletions go/store/val/tuple.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,23 @@ package val
import (
"math"

"github.com/dolthub/dolt/go/store/hash"

"github.com/dolthub/dolt/go/store/pool"
)

const (
MaxTupleFields = 4096
MaxTupleDataSize ByteSize = math.MaxUint16

countSize ByteSize = 2
MaxTupleFields = 4096
countSize ByteSize = 2
nodeCountSize = uint64Size
treeLevelSize = uint8Size

// MaxTupleDataSize is the maximum KV length considering the extra
// flatbuffer metadata required to serialize the message. This number
// implicitly checks the "last row" size that will append chunk level
// metadata. Key and value offsets per field are excluded from this number.
// (uint16) - (field count) - (content hash) - (node count) - (tree level)
MaxTupleDataSize ByteSize = math.MaxUint16 - countSize - hash.ByteLen - nodeCountSize - treeLevelSize
)

// A Tuple is a vector of fields encoded as a byte slice. Key-Value Tuple pairs
Expand Down
Loading

0 comments on commit 6f61ac5

Please sign in to comment.