diff --git a/go/gen/fb/serial/encoding.go b/go/gen/fb/serial/encoding.go index 350aeb3814d..8d3eaa33d41 100644 --- a/go/gen/fb/serial/encoding.go +++ b/go/gen/fb/serial/encoding.go @@ -23,102 +23,108 @@ import ( type Encoding byte const ( - EncodingNull Encoding = 0 - EncodingInt8 Encoding = 1 - EncodingUint8 Encoding = 2 - EncodingInt16 Encoding = 3 - EncodingUint16 Encoding = 4 - EncodingInt32 Encoding = 7 - EncodingUint32 Encoding = 8 - EncodingInt64 Encoding = 9 - EncodingUint64 Encoding = 10 - EncodingFloat32 Encoding = 11 - EncodingFloat64 Encoding = 12 - EncodingBit64 Encoding = 13 - EncodingHash128 Encoding = 14 - EncodingYear Encoding = 15 - EncodingDate Encoding = 16 - EncodingTime Encoding = 17 - EncodingDatetime Encoding = 18 - EncodingEnum Encoding = 19 - EncodingSet Encoding = 20 - EncodingBytesAddr Encoding = 21 - EncodingCommitAddr Encoding = 22 - EncodingStringAddr Encoding = 23 - EncodingJSONAddr Encoding = 24 - EncodingCell Encoding = 25 - EncodingGeomAddr Encoding = 26 - EncodingString Encoding = 128 - EncodingBytes Encoding = 129 - EncodingDecimal Encoding = 130 - EncodingJSON Encoding = 131 - EncodingGeometry Encoding = 133 + EncodingNull Encoding = 0 + EncodingInt8 Encoding = 1 + EncodingUint8 Encoding = 2 + EncodingInt16 Encoding = 3 + EncodingUint16 Encoding = 4 + EncodingInt32 Encoding = 7 + EncodingUint32 Encoding = 8 + EncodingInt64 Encoding = 9 + EncodingUint64 Encoding = 10 + EncodingFloat32 Encoding = 11 + EncodingFloat64 Encoding = 12 + EncodingBit64 Encoding = 13 + EncodingHash128 Encoding = 14 + EncodingYear Encoding = 15 + EncodingDate Encoding = 16 + EncodingTime Encoding = 17 + EncodingDatetime Encoding = 18 + EncodingEnum Encoding = 19 + EncodingSet Encoding = 20 + EncodingBytesAddr Encoding = 21 + EncodingCommitAddr Encoding = 22 + EncodingStringAddr Encoding = 23 + EncodingJSONAddr Encoding = 24 + EncodingCell Encoding = 25 + EncodingGeomAddr Encoding = 26 + EncodingExtendedAddr Encoding = 27 + EncodingString Encoding = 128 + EncodingBytes Encoding = 129 + EncodingDecimal Encoding = 130 + EncodingJSON Encoding = 131 + EncodingGeometry Encoding = 133 + EncodingExtended Encoding = 134 ) var EnumNamesEncoding = map[Encoding]string{ - EncodingNull: "Null", - EncodingInt8: "Int8", - EncodingUint8: "Uint8", - EncodingInt16: "Int16", - EncodingUint16: "Uint16", - EncodingInt32: "Int32", - EncodingUint32: "Uint32", - EncodingInt64: "Int64", - EncodingUint64: "Uint64", - EncodingFloat32: "Float32", - EncodingFloat64: "Float64", - EncodingBit64: "Bit64", - EncodingHash128: "Hash128", - EncodingYear: "Year", - EncodingDate: "Date", - EncodingTime: "Time", - EncodingDatetime: "Datetime", - EncodingEnum: "Enum", - EncodingSet: "Set", - EncodingBytesAddr: "BytesAddr", - EncodingCommitAddr: "CommitAddr", - EncodingStringAddr: "StringAddr", - EncodingJSONAddr: "JSONAddr", - EncodingCell: "Cell", - EncodingGeomAddr: "GeomAddr", - EncodingString: "String", - EncodingBytes: "Bytes", - EncodingDecimal: "Decimal", - EncodingJSON: "JSON", - EncodingGeometry: "Geometry", + EncodingNull: "Null", + EncodingInt8: "Int8", + EncodingUint8: "Uint8", + EncodingInt16: "Int16", + EncodingUint16: "Uint16", + EncodingInt32: "Int32", + EncodingUint32: "Uint32", + EncodingInt64: "Int64", + EncodingUint64: "Uint64", + EncodingFloat32: "Float32", + EncodingFloat64: "Float64", + EncodingBit64: "Bit64", + EncodingHash128: "Hash128", + EncodingYear: "Year", + EncodingDate: "Date", + EncodingTime: "Time", + EncodingDatetime: "Datetime", + EncodingEnum: "Enum", + EncodingSet: "Set", + EncodingBytesAddr: "BytesAddr", + EncodingCommitAddr: "CommitAddr", + EncodingStringAddr: "StringAddr", + EncodingJSONAddr: "JSONAddr", + EncodingCell: "Cell", + EncodingGeomAddr: "GeomAddr", + EncodingExtendedAddr: "ExtendedAddr", + EncodingString: "String", + EncodingBytes: "Bytes", + EncodingDecimal: "Decimal", + EncodingJSON: "JSON", + EncodingGeometry: "Geometry", + EncodingExtended: "Extended", } var EnumValuesEncoding = map[string]Encoding{ - "Null": EncodingNull, - "Int8": EncodingInt8, - "Uint8": EncodingUint8, - "Int16": EncodingInt16, - "Uint16": EncodingUint16, - "Int32": EncodingInt32, - "Uint32": EncodingUint32, - "Int64": EncodingInt64, - "Uint64": EncodingUint64, - "Float32": EncodingFloat32, - "Float64": EncodingFloat64, - "Bit64": EncodingBit64, - "Hash128": EncodingHash128, - "Year": EncodingYear, - "Date": EncodingDate, - "Time": EncodingTime, - "Datetime": EncodingDatetime, - "Enum": EncodingEnum, - "Set": EncodingSet, - "BytesAddr": EncodingBytesAddr, - "CommitAddr": EncodingCommitAddr, - "StringAddr": EncodingStringAddr, - "JSONAddr": EncodingJSONAddr, - "Cell": EncodingCell, - "GeomAddr": EncodingGeomAddr, - "String": EncodingString, - "Bytes": EncodingBytes, - "Decimal": EncodingDecimal, - "JSON": EncodingJSON, - "Geometry": EncodingGeometry, + "Null": EncodingNull, + "Int8": EncodingInt8, + "Uint8": EncodingUint8, + "Int16": EncodingInt16, + "Uint16": EncodingUint16, + "Int32": EncodingInt32, + "Uint32": EncodingUint32, + "Int64": EncodingInt64, + "Uint64": EncodingUint64, + "Float32": EncodingFloat32, + "Float64": EncodingFloat64, + "Bit64": EncodingBit64, + "Hash128": EncodingHash128, + "Year": EncodingYear, + "Date": EncodingDate, + "Time": EncodingTime, + "Datetime": EncodingDatetime, + "Enum": EncodingEnum, + "Set": EncodingSet, + "BytesAddr": EncodingBytesAddr, + "CommitAddr": EncodingCommitAddr, + "StringAddr": EncodingStringAddr, + "JSONAddr": EncodingJSONAddr, + "Cell": EncodingCell, + "GeomAddr": EncodingGeomAddr, + "ExtendedAddr": EncodingExtendedAddr, + "String": EncodingString, + "Bytes": EncodingBytes, + "Decimal": EncodingDecimal, + "JSON": EncodingJSON, + "Geometry": EncodingGeometry, + "Extended": EncodingExtended, } func (v Encoding) String() string { diff --git a/go/go.mod b/go/go.mod index bda87abc7c1..2fe1e57b448 100644 --- a/go/go.mod +++ b/go/go.mod @@ -15,7 +15,7 @@ require ( github.com/dolthub/fslock v0.0.3 github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488 github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 - github.com/dolthub/vitess v0.0.0-20240206204925-6acf16fa777c + github.com/dolthub/vitess v0.0.0-20240207121055-c057d2347007 github.com/dustin/go-humanize v1.0.1 github.com/fatih/color v1.13.0 github.com/flynn-archive/go-shlex v0.0.0-20150515145356-3f9db97f8568 @@ -57,7 +57,7 @@ require ( github.com/cespare/xxhash v1.1.0 github.com/creasty/defaults v1.6.0 github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2 - github.com/dolthub/go-mysql-server v0.17.1-0.20240207001922-0aaa10ab846e + github.com/dolthub/go-mysql-server v0.17.1-0.20240207124505-c0f397a6aaca github.com/dolthub/swiss v0.1.0 github.com/goccy/go-json v0.10.2 github.com/google/go-github/v57 v57.0.0 diff --git a/go/go.sum b/go/go.sum index c0284f81a81..aec27389a53 100644 --- a/go/go.sum +++ b/go/go.sum @@ -183,8 +183,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U= github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0= github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e h1:kPsT4a47cw1+y/N5SSCkma7FhAPw7KeGmD6c9PBZW9Y= github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e/go.mod h1:KPUcpx070QOfJK1gNe0zx4pA5sicIK1GMikIGLKC168= -github.com/dolthub/go-mysql-server v0.17.1-0.20240207001922-0aaa10ab846e h1:adkEQion8xhowSQSk9NenvrpGt0U6sG6Yr1OaOA7Xjs= -github.com/dolthub/go-mysql-server v0.17.1-0.20240207001922-0aaa10ab846e/go.mod h1:Kjfapc/1feJ3/WgTgiY0LoR5x/3ax5PJ26brPnvYUO8= +github.com/dolthub/go-mysql-server v0.17.1-0.20240207124505-c0f397a6aaca h1:tI3X4fIUTOT0N8n+GYkPNa384WlJoOBcztK5c5mBzjU= +github.com/dolthub/go-mysql-server v0.17.1-0.20240207124505-c0f397a6aaca/go.mod h1:ANK0a6tyjrZ2cOzDJT3nFsDp80xksI4UfeijFlvnjwE= github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488 h1:0HHu0GWJH0N6a6keStrHhUAK5/o9LVfkh44pvsV4514= github.com/dolthub/ishell v0.0.0-20221214210346-d7db0b066488/go.mod h1:ehexgi1mPxRTk0Mok/pADALuHbvATulTh6gzr7NzZto= github.com/dolthub/jsonpath v0.0.2-0.20240201003050-392940944c15 h1:sfTETOpsrNJPDn2KydiCtDgVu6Xopq8k3JP8PjFT22s= @@ -195,10 +195,8 @@ github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81 h1:7/v8q9X github.com/dolthub/sqllogictest/go v0.0.0-20201107003712-816f3ae12d81/go.mod h1:siLfyv2c92W1eN/R4QqG/+RjjX5W2+gCTRjZxBjI3TY= github.com/dolthub/swiss v0.1.0 h1:EaGQct3AqeP/MjASHLiH6i4TAmgbG/c4rA6a1bzCOPc= github.com/dolthub/swiss v0.1.0/go.mod h1:BeucyB08Vb1G9tumVN3Vp/pyY4AMUnr9p7Rz7wJ7kAQ= -github.com/dolthub/vitess v0.0.0-20240205203605-9e6c6d650813 h1:tGwsoLAMFQ+7FDEyIWOIJ1Vc/nptbFi0Fh7SQahB8ro= -github.com/dolthub/vitess v0.0.0-20240205203605-9e6c6d650813/go.mod h1:IwjNXSQPymrja5pVqmfnYdcy7Uv7eNJNBPK/MEh9OOw= -github.com/dolthub/vitess v0.0.0-20240206204925-6acf16fa777c h1:Zt23BHsxvPHGfpHV9k/FcsHqWZjfybyQQux2OLpRni8= -github.com/dolthub/vitess v0.0.0-20240206204925-6acf16fa777c/go.mod h1:IwjNXSQPymrja5pVqmfnYdcy7Uv7eNJNBPK/MEh9OOw= +github.com/dolthub/vitess v0.0.0-20240207121055-c057d2347007 h1:MvFoe0FnHhxQLyp4Ldw0HRj1yu83YErbtbr7XxhaIFk= +github.com/dolthub/vitess v0.0.0-20240207121055-c057d2347007/go.mod h1:IwjNXSQPymrja5pVqmfnYdcy7Uv7eNJNBPK/MEh9OOw= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= diff --git a/go/libraries/doltcore/migrate/tuples.go b/go/libraries/doltcore/migrate/tuples.go index d73bbb9a24d..9bedc637a48 100644 --- a/go/libraries/doltcore/migrate/tuples.go +++ b/go/libraries/doltcore/migrate/tuples.go @@ -175,6 +175,9 @@ func translateNomsField(ctx context.Context, ns tree.NodeStore, value types.Valu case types.BlobKind: return translateBlobField(ctx, ns, value.(types.Blob), idx, b) + case types.ExtendedKind: + return fmt.Errorf("extended types are invalid during migration") + default: return fmt.Errorf("encountered unexpected NomsKind %s", types.KindToString[nk]) diff --git a/go/libraries/doltcore/schema/encoding/serialization.go b/go/libraries/doltcore/schema/encoding/serialization.go index 5379b100523..3eb53784b85 100644 --- a/go/libraries/doltcore/schema/encoding/serialization.go +++ b/go/libraries/doltcore/schema/encoding/serialization.go @@ -638,6 +638,15 @@ func sqlTypeString(t typeinfo.TypeInfo) string { return typ.String() } + // Extended types are string serializable, so we'll just prepend a tag + if extendedType, ok := typ.(sqltypes.ExtendedType); ok { + serializedType, err := sqltypes.SerializeTypeToString(extendedType) + if err != nil { + panic(err) + } + return planbuilder.ExtendedTypeTag + serializedType + } + return typ.String() } @@ -650,7 +659,7 @@ func typeinfoFromSqlType(s string) (typeinfo.TypeInfo, error) { } func encodingFromTypeinfo(t typeinfo.TypeInfo) serial.Encoding { - return schema.EncodingFromSqlType(t.ToSqlType().Type()) + return schema.EncodingFromSqlType(t.ToSqlType()) } func constraintsFromSerialColumn(col *serial.Column) (cc []schema.ColConstraint) { diff --git a/go/libraries/doltcore/schema/schema_impl.go b/go/libraries/doltcore/schema/schema_impl.go index 5553e4e6bca..93ea61fcf45 100644 --- a/go/libraries/doltcore/schema/schema_impl.go +++ b/go/libraries/doltcore/schema/schema_impl.go @@ -21,6 +21,7 @@ import ( "strings" "github.com/dolthub/go-mysql-server/sql" + gmstypes "github.com/dolthub/go-mysql-server/sql/types" "github.com/dolthub/vitess/go/vt/proto/query" "github.com/dolthub/dolt/go/gen/fb/serial" @@ -438,6 +439,7 @@ func (si *schemaImpl) getKeyColumnsDescriptor(convertAddressColumns bool) val.Tu } var tt []val.Type + var handlers []val.TupleTypeHandler useCollations := false // We only use collations if a string exists var collations []sql.CollationID _ = si.GetPKCols().Iter(func(tag uint64, col Column) (stop bool, err error) { @@ -452,12 +454,12 @@ func (si *schemaImpl) getKeyColumnsDescriptor(convertAddressColumns bool) val.Tu if convertAddressColumns && !contentHashedField && queryType == query.Type_BLOB { t = val.Type{ - Enc: val.Encoding(EncodingFromSqlType(query.Type_VARBINARY)), + Enc: val.Encoding(EncodingFromQueryType(query.Type_VARBINARY)), Nullable: columnMissingNotNullConstraint(col), } } else if convertAddressColumns && !contentHashedField && queryType == query.Type_TEXT { t = val.Type{ - Enc: val.Encoding(EncodingFromSqlType(query.Type_VARCHAR)), + Enc: val.Encoding(EncodingFromQueryType(query.Type_VARCHAR)), Nullable: columnMissingNotNullConstraint(col), } } else if convertAddressColumns && !contentHashedField && queryType == query.Type_GEOMETRY { @@ -467,7 +469,7 @@ func (si *schemaImpl) getKeyColumnsDescriptor(convertAddressColumns bool) val.Tu } } else { t = val.Type{ - Enc: val.Encoding(EncodingFromSqlType(queryType)), + Enc: val.Encoding(EncodingFromSqlType(sqlType)), Nullable: columnMissingNotNullConstraint(col), } } @@ -478,6 +480,12 @@ func (si *schemaImpl) getKeyColumnsDescriptor(convertAddressColumns bool) val.Tu } else { collations = append(collations, sql.Collation_Unspecified) } + + if extendedType, ok := sqlType.(gmstypes.ExtendedType); ok { + handlers = append(handlers, extendedType) + } else { + handlers = append(handlers, nil) + } return }) @@ -486,18 +494,20 @@ func (si *schemaImpl) getKeyColumnsDescriptor(convertAddressColumns bool) val.Tu panic(fmt.Errorf("cannot create tuple descriptor from %d collations and %d types", len(collations), len(tt))) } cmp := CollationTupleComparator{Collations: collations} - return val.NewTupleDescriptorWithComparator(cmp, tt...) + return val.NewTupleDescriptorWithArgs(val.TupleDescriptorArgs{Comparator: cmp, Handlers: handlers}, tt...) } else { - return val.NewTupleDescriptor(tt...) + return val.NewTupleDescriptorWithArgs(val.TupleDescriptorArgs{Handlers: handlers}, tt...) } } // GetValueDescriptor implements the Schema interface. func (si *schemaImpl) GetValueDescriptor() val.TupleDesc { var tt []val.Type + var handlers []val.TupleTypeHandler var collations []sql.CollationID if IsKeyless(si) { tt = []val.Type{val.KeylessCardType} + handlers = []val.TupleTypeHandler{nil} collations = []sql.CollationID{sql.Collation_Unspecified} } @@ -510,7 +520,7 @@ func (si *schemaImpl) GetValueDescriptor() val.TupleDesc { sqlType := col.TypeInfo.ToSqlType() queryType := sqlType.Type() tt = append(tt, val.Type{ - Enc: val.Encoding(EncodingFromSqlType(queryType)), + Enc: val.Encoding(EncodingFromSqlType(sqlType)), Nullable: col.IsNullable(), }) if queryType == query.Type_CHAR || queryType == query.Type_VARCHAR { @@ -519,6 +529,12 @@ func (si *schemaImpl) GetValueDescriptor() val.TupleDesc { } else { collations = append(collations, sql.Collation_Unspecified) } + + if extendedType, ok := sqlType.(gmstypes.ExtendedType); ok { + handlers = append(handlers, extendedType) + } else { + handlers = append(handlers, nil) + } return }) @@ -527,9 +543,9 @@ func (si *schemaImpl) GetValueDescriptor() val.TupleDesc { panic(fmt.Errorf("cannot create tuple descriptor from %d collations and %d types", len(collations), len(tt))) } cmp := CollationTupleComparator{Collations: collations} - return val.NewTupleDescriptorWithComparator(cmp, tt...) + return val.NewTupleDescriptorWithArgs(val.TupleDescriptorArgs{Comparator: cmp, Handlers: handlers}, tt...) } else { - return val.NewTupleDescriptor(tt...) + return val.NewTupleDescriptorWithArgs(val.TupleDescriptorArgs{Handlers: handlers}, tt...) } } diff --git a/go/libraries/doltcore/schema/serial_encoding.go b/go/libraries/doltcore/schema/serial_encoding.go index 271e670a501..e2b16296257 100644 --- a/go/libraries/doltcore/schema/serial_encoding.go +++ b/go/libraries/doltcore/schema/serial_encoding.go @@ -17,13 +17,30 @@ package schema import ( "fmt" + "github.com/dolthub/go-mysql-server/sql" + "github.com/dolthub/go-mysql-server/sql/types" "github.com/dolthub/vitess/go/vt/proto/query" "github.com/dolthub/dolt/go/gen/fb/serial" ) -// EncodingFromSqlType returns a serial.Encoding for a query.Type. -func EncodingFromSqlType(typ query.Type) serial.Encoding { +// EncodingFromSqlType returns a serial.Encoding for a sql.Type. +func EncodingFromSqlType(typ sql.Type) serial.Encoding { + if extendedType, ok := typ.(types.ExtendedType); ok { + switch extendedType.MaxSerializedWidth() { + case types.ExtendedTypeSerializedWidth_64K: + return serial.EncodingExtended + case types.ExtendedTypeSerializedWidth_Unbounded: + return serial.EncodingExtendedAddr + default: + panic(fmt.Errorf("unknown serialization width")) + } + } + return EncodingFromQueryType(typ.Type()) +} + +// EncodingFromQueryType returns a serial.Encoding for a query.Type. +func EncodingFromQueryType(typ query.Type) serial.Encoding { switch typ { case query.Type_INT8: return serial.EncodingInt8 diff --git a/go/libraries/doltcore/schema/typeinfo/extended.go b/go/libraries/doltcore/schema/typeinfo/extended.go new file mode 100644 index 00000000000..48327015671 --- /dev/null +++ b/go/libraries/doltcore/schema/typeinfo/extended.go @@ -0,0 +1,124 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package typeinfo + +import ( + "context" + "fmt" + + "github.com/dolthub/go-mysql-server/sql" + gmstypes "github.com/dolthub/go-mysql-server/sql/types" + + "github.com/dolthub/dolt/go/store/types" +) + +const ( + extendedTypeParams_string_encoded = "string_encoded" +) + +// extendedType is a type that refers to an ExtendedType in GMS. These are only supported in the new format, and have many +// more limitations than traditional types (for now). +type extendedType struct { + sqlExtendedType gmstypes.ExtendedType +} + +var _ TypeInfo = (*extendedType)(nil) + +// CreateExtendedTypeFromParams creates a TypeInfo from the given parameter map. +func CreateExtendedTypeFromParams(params map[string]string) (TypeInfo, error) { + if encodedString, ok := params[extendedTypeParams_string_encoded]; ok { + t, err := gmstypes.DeserializeTypeFromString(encodedString) + if err != nil { + return nil, err + } + return &extendedType{t}, nil + } + return nil, fmt.Errorf(`create extended type info is missing "%v" param`, extendedTypeParams_string_encoded) +} + +// CreateExtendedTypeFromSqlType creates a TypeInfo from the given extended type. +func CreateExtendedTypeFromSqlType(typ gmstypes.ExtendedType) TypeInfo { + return &extendedType{typ} +} + +// ConvertNomsValueToValue implements the TypeInfo interface. +func (ti *extendedType) ConvertNomsValueToValue(v types.Value) (interface{}, error) { + return nil, fmt.Errorf(`"%v" is not valid in the old format`, ti.String()) +} + +// ReadFrom reads a go value from a noms types.CodecReader directly +func (ti *extendedType) ReadFrom(_ *types.NomsBinFormat, reader types.CodecReader) (interface{}, error) { + return nil, fmt.Errorf(`"%v" is not valid in the old format`, ti.String()) +} + +// ConvertValueToNomsValue implements the TypeInfo interface. +func (ti *extendedType) ConvertValueToNomsValue(ctx context.Context, vrw types.ValueReadWriter, v interface{}) (types.Value, error) { + return nil, fmt.Errorf(`"%v" is not valid in the old format`, ti.String()) +} + +// Equals implements the TypeInfo interface. +func (ti *extendedType) Equals(other TypeInfo) bool { + if other == nil { + return false + } + if ti2, ok := other.(*extendedType); ok { + return ti.sqlExtendedType.Equals(ti2.sqlExtendedType) + } + return false +} + +// FormatValue implements the TypeInfo interface. +func (ti *extendedType) FormatValue(v types.Value) (*string, error) { + return nil, fmt.Errorf(`"%v" is not valid in the old format`, ti.String()) +} + +// GetTypeIdentifier implements the TypeInfo interface. +func (ti *extendedType) GetTypeIdentifier() Identifier { + return ExtendedTypeIdentifier +} + +// GetTypeParams implements the TypeInfo interface. +func (ti *extendedType) GetTypeParams() map[string]string { + serializedString, err := gmstypes.SerializeTypeToString(ti.sqlExtendedType) + if err != nil { + panic(err) + } + return map[string]string{extendedTypeParams_string_encoded: serializedString} +} + +// IsValid implements the TypeInfo interface. +func (ti *extendedType) IsValid(v types.Value) bool { + return true +} + +// NomsKind implements the TypeInfo interface. +func (ti *extendedType) NomsKind() types.NomsKind { + return types.ExtendedKind +} + +// Promote implements the TypeInfo interface. +func (ti *extendedType) Promote() TypeInfo { + return &extendedType{ti.sqlExtendedType.Promote().(gmstypes.ExtendedType)} +} + +// String implements the TypeInfo interface. +func (ti *extendedType) String() string { + return ti.sqlExtendedType.String() +} + +// ToSqlType implements the TypeInfo interface. +func (ti *extendedType) ToSqlType() sql.Type { + return ti.sqlExtendedType +} diff --git a/go/libraries/doltcore/schema/typeinfo/typeconverter.go b/go/libraries/doltcore/schema/typeinfo/typeconverter.go index 5df9171e540..38e7b6fd643 100644 --- a/go/libraries/doltcore/schema/typeinfo/typeconverter.go +++ b/go/libraries/doltcore/schema/typeinfo/typeconverter.go @@ -56,6 +56,8 @@ func GetTypeConverter(ctx context.Context, srcTi TypeInfo, destTi TypeInfo) (tc return blobStringTypeConverter(ctx, src, destTi) case *boolType: return boolTypeConverter(ctx, src, destTi) + case *extendedType: + return nil, false, fmt.Errorf("extended types require conversion at a different layer") case *datetimeType: return datetimeTypeConverter(ctx, src, destTi) case *decimalType: @@ -132,6 +134,8 @@ func wrapConvertValueToNomsValue( vInt = string(str) case types.Bool: vInt = bool(val) + case types.Extended: + return nil, fmt.Errorf("cannot convert to a custom type") case types.Decimal: vInt = decimal.Decimal(val).String() case types.Float: diff --git a/go/libraries/doltcore/schema/typeinfo/typeinfo.go b/go/libraries/doltcore/schema/typeinfo/typeinfo.go index bca2437edcf..29e45ba7b02 100644 --- a/go/libraries/doltcore/schema/typeinfo/typeinfo.go +++ b/go/libraries/doltcore/schema/typeinfo/typeinfo.go @@ -56,6 +56,7 @@ const ( MultiLineStringTypeIdentifier Identifier = "multilinestring" MultiPolygonTypeIdentifier Identifier = "multipolygon" GeometryCollectionTypeIdentifier Identifier = "geometrycollection" + ExtendedTypeIdentifier Identifier = "extended" ) var Identifiers = map[Identifier]struct{}{ @@ -86,6 +87,7 @@ var Identifiers = map[Identifier]struct{}{ MultiLineStringTypeIdentifier: {}, MultiPolygonTypeIdentifier: {}, GeometryCollectionTypeIdentifier: {}, + ExtendedTypeIdentifier: {}, } // TypeInfo is an interface used for encoding type information. @@ -135,6 +137,9 @@ type TypeInfo interface { // FromSqlType takes in a sql.Type and returns the most relevant TypeInfo. func FromSqlType(sqlType sql.Type) (TypeInfo, error) { + if customType, ok := sqlType.(gmstypes.ExtendedType); ok { + return CreateExtendedTypeFromSqlType(customType), nil + } sqlType, err := fillInCollationWithDefault(sqlType) if err != nil { return nil, err @@ -293,6 +298,8 @@ func FromTypeParams(id Identifier, params map[string]string) (TypeInfo, error) { return CreateBlobStringTypeFromParams(params) case BoolTypeIdentifier: return BoolType, nil + case ExtendedTypeIdentifier: + return CreateExtendedTypeFromParams(params) case DatetimeTypeIdentifier: return CreateDatetimeTypeFromParams(params) case DecimalTypeIdentifier: @@ -301,28 +308,28 @@ func FromTypeParams(id Identifier, params map[string]string) (TypeInfo, error) { return CreateEnumTypeFromParams(params) case FloatTypeIdentifier: return CreateFloatTypeFromParams(params) + case GeometryCollectionTypeIdentifier: + return CreateGeomCollTypeFromParams(params) + case GeometryTypeIdentifier: + return CreateGeometryTypeFromParams(params) case InlineBlobTypeIdentifier: return CreateInlineBlobTypeFromParams(params) case IntTypeIdentifier: return CreateIntTypeFromParams(params) case JSONTypeIdentifier: return JSONType, nil - case GeometryTypeIdentifier: - return CreateGeometryTypeFromParams(params) - case PointTypeIdentifier: - return CreatePointTypeFromParams(params) case LineStringTypeIdentifier: return CreateLineStringTypeFromParams(params) - case PolygonTypeIdentifier: - return CreatePolygonTypeFromParams(params) case MultiPointTypeIdentifier: return CreateMultiPointTypeFromParams(params) case MultiLineStringTypeIdentifier: return CreateMultiLineStringTypeFromParams(params) case MultiPolygonTypeIdentifier: return CreateMultiPolygonTypeFromParams(params) - case GeometryCollectionTypeIdentifier: - return CreateGeomCollTypeFromParams(params) + case PointTypeIdentifier: + return CreatePointTypeFromParams(params) + case PolygonTypeIdentifier: + return CreatePolygonTypeFromParams(params) case SetTypeIdentifier: return CreateSetTypeFromParams(params) case TimeTypeIdentifier: @@ -351,6 +358,8 @@ func FromKind(kind types.NomsKind) TypeInfo { return &varBinaryType{gmstypes.LongBlob} case types.BoolKind: return BoolType + case types.ExtendedKind: + panic(fmt.Errorf(`type not supported by the old format "%v"`, kind.String())) case types.FloatKind: return Float64Type case types.InlineBlobKind: diff --git a/go/serial/encoding.fbs b/go/serial/encoding.fbs index cd3cc895bac..77d0ab394c6 100644 --- a/go/serial/encoding.fbs +++ b/go/serial/encoding.fbs @@ -16,31 +16,32 @@ namespace serial; enum Encoding : uint8 { // fixed width - Null = 0, - Int8 = 1, - Uint8 = 2, - Int16 = 3, - Uint16 = 4, - Int32 = 7, - Uint32 = 8, - Int64 = 9, - Uint64 = 10, - Float32 = 11, - Float64 = 12, - Bit64 = 13, - Hash128 = 14, - Year = 15, - Date = 16, - Time = 17, - Datetime = 18, - Enum = 19, - Set = 20, - BytesAddr = 21, - CommitAddr = 22, - StringAddr = 23, - JSONAddr = 24, - Cell = 25, - GeomAddr = 26, + Null = 0, + Int8 = 1, + Uint8 = 2, + Int16 = 3, + Uint16 = 4, + Int32 = 7, + Uint32 = 8, + Int64 = 9, + Uint64 = 10, + Float32 = 11, + Float64 = 12, + Bit64 = 13, + Hash128 = 14, + Year = 15, + Date = 16, + Time = 17, + Datetime = 18, + Enum = 19, + Set = 20, + BytesAddr = 21, + CommitAddr = 22, + StringAddr = 23, + JSONAddr = 24, + Cell = 25, + GeomAddr = 26, + ExtendedAddr = 27, // variable width String = 128, @@ -48,4 +49,5 @@ enum Encoding : uint8 { Decimal = 130, JSON = 131, Geometry = 133, + Extended = 134, } diff --git a/go/store/prolly/tree/prolly_fields.go b/go/store/prolly/tree/prolly_fields.go index 77cff647359..99e9f332f4b 100644 --- a/go/store/prolly/tree/prolly_fields.go +++ b/go/store/prolly/tree/prolly_fields.go @@ -139,6 +139,22 @@ func GetField(ctx context.Context, td val.TupleDesc, i int, tup val.Tuple, ns No v, ok = td.GetCommitAddr(i, tup) case val.CellEnc: v, ok = td.GetCell(i, tup) + case val.ExtendedEnc: + var b []byte + b, ok = td.GetExtended(i, tup) + if ok { + v, err = td.Handlers[i].DeserializeValue(b) + } + case val.ExtendedAddrEnc: + var h hash.Hash + h, ok = td.GetExtendedAddr(i, tup) + if ok { + var b []byte + b, err = NewByteArray(h, ns).ToBytes(ctx) + if err == nil { + v, err = td.Handlers[i].DeserializeValue(b) + } + } default: panic("unknown val.encoding") } @@ -265,6 +281,25 @@ func PutField(ctx context.Context, ns NodeStore, tb *val.TupleBuilder, i int, v } } tb.PutCell(i, ZCell(v.(types.GeometryValue))) + case val.ExtendedEnc: + b, err := tb.Desc.Handlers[i].SerializeValue(v) + if err != nil { + return err + } + if len(b) > math.MaxUint16 { + return ErrValueExceededMaxFieldSize + } + tb.PutExtended(i, b) + case val.ExtendedAddrEnc: + b, err := tb.Desc.Handlers[i].SerializeValue(v) + if err != nil { + return err + } + h, err := SerializeBytesToAddr(ctx, ns, bytes.NewReader(b), len(b)) + if err != nil { + return err + } + tb.PutExtendedAddr(i, h) default: panic(fmt.Sprintf("unknown encoding %v %v", enc, v)) } diff --git a/go/store/prolly/tuple_map.go b/go/store/prolly/tuple_map.go index 7e30439041d..647ab6e8460 100644 --- a/go/store/prolly/tuple_map.go +++ b/go/store/prolly/tuple_map.go @@ -437,7 +437,7 @@ func ConvertToSecondaryKeylessIndex(m Map) Map { newTypes := make([]val.Type, len(keyDesc.Types)+1) copy(newTypes, keyDesc.Types) newTypes[len(newTypes)-1] = val.Type{Enc: val.Hash128Enc} - newKeyDesc := val.NewTupleDescriptorWithComparator(keyDesc.Comparator(), newTypes...) + newKeyDesc := val.NewTupleDescriptorWithArgs(val.TupleDescriptorArgs{Comparator: keyDesc.Comparator()}, newTypes...) newTuples := m.tuples newTuples.Order = newKeyDesc return Map{ diff --git a/go/store/types/extended.go b/go/store/types/extended.go new file mode 100644 index 00000000000..0208d1547fb --- /dev/null +++ b/go/store/types/extended.go @@ -0,0 +1,78 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package types + +import ( + "context" + "errors" + + "github.com/dolthub/dolt/go/store/hash" +) + +type Extended []byte + +func (v Extended) Value(ctx context.Context) (Value, error) { + return v, errors.New("extended is invalid in the old format") +} + +func (v Extended) Equals(other Value) bool { + return true +} + +func (v Extended) Less(ctx context.Context, nbf *NomsBinFormat, other LesserValuable) (bool, error) { + return false, errors.New("extended is invalid in the old format") +} + +func (v Extended) Hash(nbf *NomsBinFormat) (hash.Hash, error) { + return hash.Hash{}, errors.New("extended is invalid in the old format") +} + +func (v Extended) Kind() NomsKind { + return ExtendedKind +} + +func (v Extended) HumanReadableString() string { + return "INVALID" +} + +func (v Extended) Compare(other LesserValuable) (int, error) { + return 0, errors.New("extended is invalid in the old format") +} + +func (v Extended) isPrimitive() bool { + return true +} + +func (v Extended) walkRefs(nbf *NomsBinFormat, cb RefCallback) error { + return errors.New("extended is invalid in the old format") +} + +func (v Extended) typeOf() (*Type, error) { + return PrimitiveTypeMap[ExtendedKind], nil +} + +func (v Extended) valueReadWriter() ValueReadWriter { + return nil +} + +func (v Extended) writeTo(w nomsWriter, nbf *NomsBinFormat) error { + return errors.New("extended is invalid in the old format") +} + +func (v Extended) readFrom(nbf *NomsBinFormat, b *binaryNomsReader) (Value, error) { + return Extended{}, errors.New("extended is invalid in the old format") +} + +func (v Extended) skip(nbf *NomsBinFormat, b *binaryNomsReader) {} diff --git a/go/store/types/noms_kind.go b/go/store/types/noms_kind.go index b009c84c459..51d32337d26 100644 --- a/go/store/types/noms_kind.go +++ b/go/store/types/noms_kind.go @@ -71,6 +71,7 @@ const ( MultiLineStringKind MultiPolygonKind GeometryCollectionKind + ExtendedKind UnknownKind NomsKind = 255 ) @@ -107,6 +108,7 @@ func init() { KindToType[MultiLineStringKind] = MultiLineString{} KindToType[MultiPolygonKind] = MultiPolygon{} KindToType[GeometryCollectionKind] = GeomColl{} + KindToType[ExtendedKind] = Extended{} SupportedKinds[BlobKind] = true SupportedKinds[BoolKind] = true @@ -139,6 +141,7 @@ func init() { SupportedKinds[MultiLineStringKind] = true SupportedKinds[MultiPolygonKind] = true SupportedKinds[GeometryCollectionKind] = true + SupportedKinds[ExtendedKind] = true if serial.MessageTypesKind != int(SerialMessageKind) { panic("internal error: serial.MessageTypesKind != SerialMessageKind") @@ -180,6 +183,7 @@ var KindToString = map[NomsKind]string{ MultiLineStringKind: "MultiLineString", MultiPolygonKind: "MultiPolygon", GeometryCollectionKind: "GeometryCollection", + ExtendedKind: "ExtendedType", } // String returns the name of the kind. diff --git a/go/store/types/tuple.go b/go/store/types/tuple.go index 08d05466444..46a7b7556ed 100644 --- a/go/store/types/tuple.go +++ b/go/store/types/tuple.go @@ -25,6 +25,7 @@ import ( "bytes" "context" "errors" + "fmt" "io" "strings" "sync" @@ -829,6 +830,9 @@ func (t Tuple) TupleCompare(ctx context.Context, nbf *NomsBinFormat, otherTuple return 0, err } + case ExtendedKind: + return 0, fmt.Errorf("extended types are not valid in the old format") + default: v, err := dec.readValue(nbf) diff --git a/go/store/val/codec.go b/go/store/val/codec.go index bc10dc2e9a3..2fa2df923ec 100644 --- a/go/store/val/codec.go +++ b/go/store/val/codec.go @@ -41,61 +41,63 @@ const ( type ByteSize uint16 const ( - int8Size ByteSize = 1 - uint8Size ByteSize = 1 - int16Size ByteSize = 2 - uint16Size ByteSize = 2 - int32Size ByteSize = 4 - uint32Size ByteSize = 4 - int64Size ByteSize = 8 - uint64Size ByteSize = 8 - float32Size ByteSize = 4 - float64Size ByteSize = 8 - bit64Size ByteSize = 8 - hash128Size ByteSize = 16 - yearSize ByteSize = 1 - dateSize ByteSize = 4 - timeSize ByteSize = 8 - datetimeSize ByteSize = 8 - enumSize ByteSize = 2 - setSize ByteSize = 8 - bytesAddrEnc ByteSize = hash.ByteLen - commitAddrEnc ByteSize = hash.ByteLen - stringAddrEnc ByteSize = hash.ByteLen - jsonAddrEnc ByteSize = hash.ByteLen - cellSize ByteSize = 17 - geomAddrEnc ByteSize = hash.ByteLen + int8Size ByteSize = 1 + uint8Size ByteSize = 1 + int16Size ByteSize = 2 + uint16Size ByteSize = 2 + int32Size ByteSize = 4 + uint32Size ByteSize = 4 + int64Size ByteSize = 8 + uint64Size ByteSize = 8 + float32Size ByteSize = 4 + float64Size ByteSize = 8 + bit64Size ByteSize = 8 + hash128Size ByteSize = 16 + yearSize ByteSize = 1 + dateSize ByteSize = 4 + timeSize ByteSize = 8 + datetimeSize ByteSize = 8 + enumSize ByteSize = 2 + setSize ByteSize = 8 + bytesAddrEnc ByteSize = hash.ByteLen + commitAddrEnc ByteSize = hash.ByteLen + stringAddrEnc ByteSize = hash.ByteLen + jsonAddrEnc ByteSize = hash.ByteLen + cellSize ByteSize = 17 + geomAddrEnc ByteSize = hash.ByteLen + extendedAddrSize ByteSize = hash.ByteLen ) type Encoding byte // Fixed Width Encodings const ( - NullEnc = Encoding(serial.EncodingNull) - Int8Enc = Encoding(serial.EncodingInt8) - Uint8Enc = Encoding(serial.EncodingUint8) - Int16Enc = Encoding(serial.EncodingInt16) - Uint16Enc = Encoding(serial.EncodingUint16) - Int32Enc = Encoding(serial.EncodingInt32) - Uint32Enc = Encoding(serial.EncodingUint32) - Int64Enc = Encoding(serial.EncodingInt64) - Uint64Enc = Encoding(serial.EncodingUint64) - Float32Enc = Encoding(serial.EncodingFloat32) - Float64Enc = Encoding(serial.EncodingFloat64) - Bit64Enc = Encoding(serial.EncodingBit64) - Hash128Enc = Encoding(serial.EncodingHash128) - YearEnc = Encoding(serial.EncodingYear) - DateEnc = Encoding(serial.EncodingDate) - TimeEnc = Encoding(serial.EncodingTime) - DatetimeEnc = Encoding(serial.EncodingDatetime) - EnumEnc = Encoding(serial.EncodingEnum) - SetEnc = Encoding(serial.EncodingSet) - BytesAddrEnc = Encoding(serial.EncodingBytesAddr) - CommitAddrEnc = Encoding(serial.EncodingCommitAddr) - StringAddrEnc = Encoding(serial.EncodingStringAddr) - JSONAddrEnc = Encoding(serial.EncodingJSONAddr) - CellEnc = Encoding(serial.EncodingCell) - GeomAddrEnc = Encoding(serial.EncodingGeomAddr) + NullEnc = Encoding(serial.EncodingNull) + Int8Enc = Encoding(serial.EncodingInt8) + Uint8Enc = Encoding(serial.EncodingUint8) + Int16Enc = Encoding(serial.EncodingInt16) + Uint16Enc = Encoding(serial.EncodingUint16) + Int32Enc = Encoding(serial.EncodingInt32) + Uint32Enc = Encoding(serial.EncodingUint32) + Int64Enc = Encoding(serial.EncodingInt64) + Uint64Enc = Encoding(serial.EncodingUint64) + Float32Enc = Encoding(serial.EncodingFloat32) + Float64Enc = Encoding(serial.EncodingFloat64) + Bit64Enc = Encoding(serial.EncodingBit64) + Hash128Enc = Encoding(serial.EncodingHash128) + YearEnc = Encoding(serial.EncodingYear) + DateEnc = Encoding(serial.EncodingDate) + TimeEnc = Encoding(serial.EncodingTime) + DatetimeEnc = Encoding(serial.EncodingDatetime) + EnumEnc = Encoding(serial.EncodingEnum) + SetEnc = Encoding(serial.EncodingSet) + BytesAddrEnc = Encoding(serial.EncodingBytesAddr) + CommitAddrEnc = Encoding(serial.EncodingCommitAddr) + StringAddrEnc = Encoding(serial.EncodingStringAddr) + JSONAddrEnc = Encoding(serial.EncodingJSONAddr) + CellEnc = Encoding(serial.EncodingCell) + GeomAddrEnc = Encoding(serial.EncodingGeomAddr) + ExtendedAddrEnc = Encoding(serial.EncodingExtendedAddr) sentinel Encoding = 127 ) @@ -106,7 +108,8 @@ func IsAddrEncoding(enc Encoding) bool { CommitAddrEnc, StringAddrEnc, JSONAddrEnc, - GeomAddrEnc: + GeomAddrEnc, + ExtendedAddrEnc: return true default: return false @@ -120,6 +123,7 @@ const ( DecimalEnc = Encoding(serial.EncodingDecimal) JSONEnc = Encoding(serial.EncodingJSON) GeometryEnc = Encoding(serial.EncodingGeometry) + ExtendedEnc = Encoding(serial.EncodingExtended) ) func sizeFromType(t Type) (ByteSize, bool) { @@ -170,6 +174,8 @@ func sizeFromType(t Type) (ByteSize, bool) { return jsonAddrEnc, true case GeomAddrEnc: return geomAddrEnc, true + case ExtendedAddrEnc: + return extendedAddrSize, true default: return 0, false } @@ -589,6 +595,19 @@ func compareByteString(l, r []byte) int { return bytes.Compare(l, r) } +func readExtended(handler TupleTypeHandler, val []byte) any { + v, err := handler.DeserializeValue(val) + if err != nil { + panic(err) + } + return v +} + +func writeExtended(handler TupleTypeHandler, buf []byte, val []byte) { + expectSize(buf, ByteSize(len(val))) + copy(buf, val) +} + func readHash128(val []byte) []byte { expectSize(val, hash128Size) return val diff --git a/go/store/val/codec_test.go b/go/store/val/codec_test.go index 581d59832c8..c9078b269d1 100644 --- a/go/store/val/codec_test.go +++ b/go/store/val/codec_test.go @@ -252,9 +252,9 @@ func TestCompare(t *testing.T) { for _, test := range tests { act := compare(test.typ, test.l, test.r) assert.Equal(t, test.cmp, act, "expected %s %s %s ", - formatValue(test.typ.Enc, test.l), + TupleDesc{}.formatValue(test.typ.Enc, 0, test.l), fmtComparator(test.cmp), - formatValue(test.typ.Enc, test.r)) + TupleDesc{}.formatValue(test.typ.Enc, 0, test.r)) } } diff --git a/go/store/val/extended_comparator.go b/go/store/val/extended_comparator.go new file mode 100644 index 00000000000..773efea8244 --- /dev/null +++ b/go/store/val/extended_comparator.go @@ -0,0 +1,95 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package val + +// ExtendedTupleComparator is a comparator that properly handles extended types. +type ExtendedTupleComparator struct { + innerCmp TupleComparator + handlers []TupleTypeHandler +} + +//TODO: compare performance of rolling this logic into the DefaultTupleComparator (nil check or generic handlers that call compare) +var _ TupleComparator = ExtendedTupleComparator{} + +// Compare implements the TupleComparator interface. +func (c ExtendedTupleComparator) Compare(left, right Tuple, desc TupleDesc) (cmp int) { + fast := desc.GetFixedAccess() + for i := range fast { + start, stop := fast[i][0], fast[i][1] + cmp = c.CompareValues(i, left[start:stop], right[start:stop], desc.Types[i]) + if cmp != 0 { + return cmp + } + } + + off := len(fast) + for i, typ := range desc.Types[off:] { + j := i + off + cmp = c.CompareValues(j, left.GetField(j), right.GetField(j), typ) + if cmp != 0 { + return cmp + } + } + return +} + +// CompareValues implements the TupleComparator interface. +func (c ExtendedTupleComparator) CompareValues(index int, left, right []byte, typ Type) int { + switch typ.Enc { + case ExtendedEnc, ExtendedAddrEnc: + cmp, err := c.handlers[index].SerializedCompare(left, right) + if err != nil { + panic(err) + } + return cmp + default: + return compare(typ, left, right) + } +} + +// Prefix implements the TupleComparator interface. +func (c ExtendedTupleComparator) Prefix(n int) TupleComparator { + return ExtendedTupleComparator{c.innerCmp.Prefix(n), c.handlers[:n]} +} + +// Suffix implements the TupleComparator interface. +func (c ExtendedTupleComparator) Suffix(n int) TupleComparator { + return ExtendedTupleComparator{c.innerCmp.Suffix(n), c.handlers[n:]} +} + +// Validated implements the TupleComparator interface. +func (c ExtendedTupleComparator) Validated(types []Type) TupleComparator { + innerCmp := c.innerCmp.Validated(types) + if len(c.handlers) == 0 { + c.handlers = make([]TupleTypeHandler, len(types)) + } else if len(c.handlers) != len(types) { + panic("invalid handler count compared to types") + } + hasHandler := false + for i, handler := range c.handlers { + switch types[i].Enc { + case ExtendedEnc, ExtendedAddrEnc: + if handler == nil { + panic("extende encoding requires a handler") + } else { + hasHandler = true + } + } + } + if !hasHandler { + return innerCmp + } + return ExtendedTupleComparator{innerCmp, c.handlers} +} diff --git a/go/store/val/tuple_builder.go b/go/store/val/tuple_builder.go index fada50015e7..3cee278f071 100644 --- a/go/store/val/tuple_builder.go +++ b/go/store/val/tuple_builder.go @@ -347,6 +347,23 @@ func (tb *TupleBuilder) PutHash128(i int, v []byte) { tb.pos += hash128Size } +// PutExtended writes a []byte to the ith field of the Tuple being built. +func (tb *TupleBuilder) PutExtended(i int, v []byte) { + tb.Desc.expectEncoding(i, ExtendedEnc) + sz := ByteSize(len(v)) + tb.ensureCapacity(sz) + tb.fields[i] = tb.buf[tb.pos : tb.pos+sz] + writeExtended(tb.Desc.Handlers[i], tb.fields[i], v) + tb.pos += sz +} + +// PutExtendedAddr writes a []byte to the ith field of the Tuple being built. +func (tb *TupleBuilder) PutExtendedAddr(i int, v hash.Hash) { + tb.Desc.expectEncoding(i, ExtendedAddrEnc) + tb.ensureCapacity(hash.ByteLen) + tb.putAddr(i, v) +} + // PutRaw writes a []byte to the ith field of the Tuple being built. func (tb *TupleBuilder) PutRaw(i int, buf []byte) { if buf == nil { diff --git a/go/store/val/tuple_descriptor.go b/go/store/val/tuple_descriptor.go index 3d8a7382cd2..3b01b74b6b6 100644 --- a/go/store/val/tuple_descriptor.go +++ b/go/store/val/tuple_descriptor.go @@ -42,18 +42,39 @@ var disableFixedAccess = false // Data structures that contain Tuples and algorithms that process Tuples // use a TupleDesc's types to interpret the fields of a Tuple. type TupleDesc struct { - Types []Type - cmp TupleComparator - fast FixedAccess + Types []Type + Handlers []TupleTypeHandler + cmp TupleComparator + fast FixedAccess +} + +// TupleTypeHandler is used to specifically handle types that use extended encoding. Such types are declared by GMS, and +// this is a forward reference for the interface functions that are necessary here. +type TupleTypeHandler interface { + // SerializedCompare compares two byte slices that each represent a serialized value, without first deserializing + // the value. + SerializedCompare(v1 []byte, v2 []byte) (int, error) + // SerializeValue converts the given value into a binary representation. + SerializeValue(val any) ([]byte, error) + // DeserializeValue converts a binary representation of a value into its canonical type. + DeserializeValue(val []byte) (any, error) + // FormatValue returns a string version of the value. Primarily intended for display. + FormatValue(val any) (string, error) +} + +// TupleDescriptorArgs are a set of optional arguments for TupleDesc creation. +type TupleDescriptorArgs struct { + Comparator TupleComparator + Handlers []TupleTypeHandler } // NewTupleDescriptor makes a TupleDescriptor from |types|. func NewTupleDescriptor(types ...Type) TupleDesc { - return NewTupleDescriptorWithComparator(DefaultTupleComparator{}, types...) + return NewTupleDescriptorWithArgs(TupleDescriptorArgs{}, types...) } -// NewTupleDescriptorWithComparator returns a TupleDesc from a slice of Types. -func NewTupleDescriptorWithComparator(cmp TupleComparator, types ...Type) (td TupleDesc) { +// NewTupleDescriptorWithArgs returns a TupleDesc based on the given arguments. +func NewTupleDescriptorWithArgs(args TupleDescriptorArgs, types ...Type) (td TupleDesc) { if len(types) > MaxTupleFields { panic("tuple field maxIdx exceeds maximum") } @@ -62,12 +83,16 @@ func NewTupleDescriptorWithComparator(cmp TupleComparator, types ...Type) (td Tu panic("invalid encoding") } } - cmp = cmp.Validated(types) + if args.Comparator == nil { + args.Comparator = DefaultTupleComparator{} + } + args.Comparator = ExtendedTupleComparator{args.Comparator, args.Handlers}.Validated(types) td = TupleDesc{ - Types: types, - cmp: cmp, - fast: makeFixedAccess(types), + Types: types, + Handlers: args.Handlers, + cmp: args.Comparator, + fast: makeFixedAccess(types), } return } @@ -115,7 +140,10 @@ func (td TupleDesc) AddressFieldCount() (n int) { // PrefixDesc returns a descriptor for the first n types. func (td TupleDesc) PrefixDesc(n int) TupleDesc { - return NewTupleDescriptorWithComparator(td.cmp.Prefix(n), td.Types[:n]...) + if len(td.Handlers) == 0 { + return NewTupleDescriptorWithArgs(TupleDescriptorArgs{Comparator: td.cmp.Prefix(n)}, td.Types[:n]...) + } + return NewTupleDescriptorWithArgs(TupleDescriptorArgs{Comparator: td.cmp.Prefix(n), Handlers: td.Handlers[:n]}, td.Types[:n]...) } // GetField returns the ith field of |tup|. @@ -179,7 +207,7 @@ func (td TupleDesc) GetFixedAccess() FixedAccess { // WithoutFixedAccess returns a copy of |td| without fixed access metadata. func (td TupleDesc) WithoutFixedAccess() TupleDesc { - return TupleDesc{Types: td.Types, cmp: td.cmp} + return TupleDesc{Types: td.Types, Handlers: td.Handlers, cmp: td.cmp} } // GetBool reads a bool from the ith field of the Tuple. @@ -456,6 +484,19 @@ func (td TupleDesc) GetHash128(i int, tup Tuple) (v []byte, ok bool) { return } +// GetExtended reads a byte slice from the ith field of the Tuple. +func (td TupleDesc) GetExtended(i int, tup Tuple) ([]byte, bool) { + td.expectEncoding(i, ExtendedEnc) + v := td.GetField(i, tup) + return v, v != nil +} + +// GetExtendedAddr reads a hash from the ith field of the Tuple. +func (td TupleDesc) GetExtendedAddr(i int, tup Tuple) (hash.Hash, bool) { + td.expectEncoding(i, ExtendedAddrEnc) + return td.getAddr(i, tup) +} + func (td TupleDesc) GetJSONAddr(i int, tup Tuple) (hash.Hash, bool) { td.expectEncoding(i, JSONAddrEnc) return td.getAddr(i, tup) @@ -528,9 +569,10 @@ func (td TupleDesc) FormatValue(i int, value []byte) string { if value == nil { return "NULL" } - return formatValue(td.Types[i].Enc, value) + return td.formatValue(td.Types[i].Enc, i, value) } -func formatValue(enc Encoding, value []byte) string { + +func (td TupleDesc) formatValue(enc Encoding, i int, value []byte) string { switch enc { case Int8Enc: v := readInt8(value) @@ -598,6 +640,16 @@ func formatValue(enc Encoding, value []byte) string { return hex.EncodeToString(value) case CellEnc: return hex.EncodeToString(value) + case ExtendedEnc: + handler := td.Handlers[i] + v := readExtended(handler, value) + str, err := handler.FormatValue(v) + if err != nil { + panic(err) + } + return str + case ExtendedAddrEnc: + return hex.EncodeToString(value) default: return string(value) } diff --git a/go/store/val/tuple_descriptor_test.go b/go/store/val/tuple_descriptor_test.go index a6baeb222fb..c78c2067a08 100644 --- a/go/store/val/tuple_descriptor_test.go +++ b/go/store/val/tuple_descriptor_test.go @@ -23,7 +23,7 @@ import ( func TestTupleDescriptorSize(t *testing.T) { sz := unsafe.Sizeof(TupleDesc{}) - assert.Equal(t, 64, int(sz)) + assert.Equal(t, 88, int(sz)) } func TestTupleDescriptorAddressTypes(t *testing.T) {