Skip to content

Commit

Permalink
store GEOMETRY types as BLOBs (#6933)
Browse files Browse the repository at this point in the history
  • Loading branch information
jycor authored Nov 7, 2023
1 parent 9d1562e commit c5710b4
Show file tree
Hide file tree
Showing 14 changed files with 230 additions and 77 deletions.
3 changes: 3 additions & 0 deletions go/gen/fb/serial/encoding.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion go/libraries/doltcore/doltdb/root_val.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ type FeatureVersion int64

// DoltFeatureVersion is described in feature_version.md.
// only variable for testing.
var DoltFeatureVersion FeatureVersion = 5 // last bumped when adding virtual columns to schema storage
var DoltFeatureVersion FeatureVersion = 6 // last bumped when changing geometry types to be stored as BLOBs

// RootValue is the value of the Database and is the committed value in every Dolt commit.
type RootValue struct {
Expand Down
2 changes: 1 addition & 1 deletion go/libraries/doltcore/schema/serial_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func EncodingFromSqlType(typ query.Type) serial.Encoding {
case query.Type_VARCHAR:
return serial.EncodingString
case query.Type_GEOMETRY:
return serial.EncodingGeometry
return serial.EncodingGeomAddr
case query.Type_JSON:
return serial.EncodingJSONAddr
case query.Type_BLOB:
Expand Down
41 changes: 37 additions & 4 deletions go/libraries/doltcore/sqle/enginetest/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,13 @@ func validateKeylessIndex(ctx context.Context, sch schema.Schema, def schema.Ind
return err
}
} else if def.IsSpatial() {
geom, _, err := sqltypes.GeometryType{}.Convert(field[:len(field)-1])
geom, err := dereferenceGeometry(ctx, vd, j+1, value, secondary.NodeStore())
if err != nil {
panic(err)
return err
}
geom, _, err = sqltypes.GeometryType{}.Convert(geom)
if err != nil {
return err
}
cell := index.ZCell(geom.(sqltypes.GeometryValue))
field = cell[:]
Expand Down Expand Up @@ -298,9 +302,13 @@ func validatePkIndex(ctx context.Context, sch schema.Schema, def schema.Index, p
return err
}
} else if def.IsSpatial() {
geom, _, err := sqltypes.GeometryType{}.Convert(field[:len(field)-1])
geom, err := dereferenceGeometry(ctx, vd, j-pkSize, value, secondary.NodeStore())
if err != nil {
panic(err)
return err
}
geom, _, err = sqltypes.GeometryType{}.Convert(geom)
if err != nil {
return err
}
cell := index.ZCell(geom.(sqltypes.GeometryValue))
field = cell[:]
Expand Down Expand Up @@ -379,6 +387,31 @@ func dereferenceContent(ctx context.Context, tableValueDescriptor val.TupleDesc,
}
}

// dereferenceGeometry dereferences an address encoded geometry field to load the content
// and return a GeometryType. |tableValueDescriptor| is the tuple descriptor for the value tuple of the main
// table, |tablePos| is the field index into the value tuple, and |tuple| is the value tuple from the
// main table.
func dereferenceGeometry(ctx context.Context, tableValueDescriptor val.TupleDesc, tablePos int, tuple val.Tuple, ns tree.NodeStore) (interface{}, error) {
v, err := index.GetField(ctx, tableValueDescriptor, tablePos, tuple, ns)
if err != nil {
return nil, err
}
if v == nil {
return nil, nil
}

switch x := v.(type) {
case string:
return []byte(x), nil
case []byte:
return x, nil
case sqltypes.Point, sqltypes.LineString, sqltypes.Polygon, sqltypes.MultiPoint, sqltypes.MultiLineString, sqltypes.MultiPolygon, sqltypes.GeometryType, sqltypes.GeomColl:
return x, nil
default:
return nil, fmt.Errorf("unexpected type for address encoded content: %T", v)
}
}

// trimValueToPrefixLength trims |value| by truncating the bytes after |prefixLength|. If |prefixLength|
// is zero or if |value| is nil, then no trimming is done and |value| is directly returned. The
// |encoding| param indicates the original encoding of |value| in the source table.
Expand Down
65 changes: 50 additions & 15 deletions go/libraries/doltcore/sqle/index/prolly_fields.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,30 @@ func GetField(ctx context.Context, td val.TupleDesc, i int, tup val.Tuple, ns tr
err = json.Unmarshal(buf, &doc.Val)
v = doc
}
// TODO: eventually remove this, and only read GeomAddrEnc
case val.GeometryEnc:
var buf []byte
buf, ok = td.GetGeometry(i, tup)
if ok {
v = deserializeGeometry(buf)
v, err = deserializeGeometry(buf)
}
case val.GeomAddrEnc:
// TODO: until GeometryEnc is removed, we must check if GeomAddrEnc is a GeometryEnc
var buf []byte
buf, ok = td.GetGeometry(i, tup)
if ok {
v, err = deserializeGeometry(buf)
}
if !ok || err != nil {
var h hash.Hash
h, ok = td.GetGeometryAddr(i, tup)
if ok {
buf, err = tree.NewByteArray(h, ns).ToBytes(ctx)
if err != nil {
return nil, err
}
v, err = deserializeGeometry(buf)
}
}
case val.Hash128Enc:
v, ok = td.GetHash128(i, tup)
Expand Down Expand Up @@ -198,12 +217,21 @@ func PutField(ctx context.Context, ns tree.NodeStore, tb *val.TupleBuilder, i in
tb.PutByteString(i, v.([]byte))
case val.Hash128Enc:
tb.PutHash128(i, v.([]byte))
// TODO: eventually remove GeometryEnc, but in the meantime write them as GeomAddrEnc
case val.GeometryEnc:
geo := serializeGeometry(v)
if len(geo) > math.MaxUint16 {
return ErrValueExceededMaxFieldSize
h, err := serializeBytesToAddr(ctx, ns, bytes.NewReader(geo), len(geo))
if err != nil {
return err
}
tb.PutGeometryAddr(i, h)
case val.GeomAddrEnc:
geo := serializeGeometry(v)
h, err := serializeBytesToAddr(ctx, ns, bytes.NewReader(geo), len(geo))
if err != nil {
return err
}
tb.PutGeometry(i, geo)
tb.PutGeometryAddr(i, h)
case val.JSONAddrEnc:
buf, err := convJson(v)
if err != nil {
Expand Down Expand Up @@ -231,7 +259,11 @@ func PutField(ctx context.Context, ns tree.NodeStore, tb *val.TupleBuilder, i in
tb.PutCommitAddr(i, v.(hash.Hash))
case val.CellEnc:
if _, ok := v.([]byte); ok {
v = deserializeGeometry(v.([]byte))
var err error
v, err = deserializeGeometry(v.([]byte))
if err != nil {
return err
}
}
tb.PutCell(i, ZCell(v.(types.GeometryValue)))
default:
Expand Down Expand Up @@ -292,26 +324,29 @@ func convUint(v interface{}) uint {
}
}

func deserializeGeometry(buf []byte) (v interface{}) {
srid, _, typ, _ := types.DeserializeEWKBHeader(buf)
func deserializeGeometry(buf []byte) (v interface{}, err error) {
srid, _, typ, err := types.DeserializeEWKBHeader(buf)
if err != nil {
return nil, err
}
buf = buf[types.EWKBHeaderSize:]
switch typ {
case types.WKBPointID:
v, _, _ = types.DeserializePoint(buf, false, srid)
v, _, err = types.DeserializePoint(buf, false, srid)
case types.WKBLineID:
v, _, _ = types.DeserializeLine(buf, false, srid)
v, _, err = types.DeserializeLine(buf, false, srid)
case types.WKBPolyID:
v, _, _ = types.DeserializePoly(buf, false, srid)
v, _, err = types.DeserializePoly(buf, false, srid)
case types.WKBMultiPointID:
v, _, _ = types.DeserializeMPoint(buf, false, srid)
v, _, err = types.DeserializeMPoint(buf, false, srid)
case types.WKBMultiLineID:
v, _, _ = types.DeserializeMLine(buf, false, srid)
v, _, err = types.DeserializeMLine(buf, false, srid)
case types.WKBMultiPolyID:
v, _, _ = types.DeserializeMPoly(buf, false, srid)
v, _, err = types.DeserializeMPoly(buf, false, srid)
case types.WKBGeomCollID:
v, _, _ = types.DeserializeGeomColl(buf, false, srid)
v, _, err = types.DeserializeGeomColl(buf, false, srid)
default:
panic(fmt.Sprintf("unknown geometry type %d", typ))
return nil, fmt.Errorf("unknown geometry type %d", typ)
}
return
}
Expand Down
52 changes: 49 additions & 3 deletions go/libraries/doltcore/sqle/index/prolly_fields_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,17 +155,17 @@ func TestRoundTripProllyFields(t *testing.T) {
},
{
name: "point",
typ: val.Type{Enc: val.GeometryEnc},
typ: val.Type{Enc: val.GeomAddrEnc},
value: mustParseGeometryType(t, "POINT(1 2)"),
},
{
name: "linestring",
typ: val.Type{Enc: val.GeometryEnc},
typ: val.Type{Enc: val.GeomAddrEnc},
value: mustParseGeometryType(t, "LINESTRING(1 2,3 4)"),
},
{
name: "polygon",
typ: val.Type{Enc: val.GeometryEnc},
typ: val.Type{Enc: val.GeomAddrEnc},
value: mustParseGeometryType(t, "POLYGON((0 0,1 1,1 0,0 0))"),
},
{
Expand Down Expand Up @@ -244,3 +244,49 @@ func dateFromTime(t time.Time) time.Time {
y, m, d := t.Year(), t.Month(), t.Day()
return time.Date(y, m, d, 0, 0, 0, 0, time.UTC)
}

// TestGeometryEncoding contains tests that ensure backwards compatibility with the old geometry encoding.
//
// Initially, Geometries were stored in line, but now they are stored out of band as BLOBs.
func TestGeometryEncoding(t *testing.T) {
tests := []struct {
name string
value interface{}
}{
{
name: "point",
value: mustParseGeometryType(t, "POINT(1 2)"),
},
{
name: "linestring",
value: mustParseGeometryType(t, "LINESTRING(1 2,3 4)"),
},
{
name: "polygon",
value: mustParseGeometryType(t, "POLYGON((0 0,1 1,1 0,0 0))"),
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
ns := tree.NewTestNodeStore()
oldDesc := val.NewTupleDescriptor(val.Type{Enc: val.GeometryEnc})
builder := val.NewTupleBuilder(oldDesc)
b := serializeGeometry(test.value)
builder.PutGeometry(0, b)
tup := builder.Build(testPool)

var v interface{}
var err error

v, err = GetField(context.Background(), oldDesc, 0, tup, ns)
assert.NoError(t, err)
assert.Equal(t, test.value, v)

newDesc := val.NewTupleDescriptor(val.Type{Enc: val.GeometryEnc})
v, err = GetField(context.Background(), newDesc, 0, tup, ns)
assert.NoError(t, err)
assert.Equal(t, test.value, v)
})
}
}
2 changes: 1 addition & 1 deletion go/libraries/doltcore/sqle/sqlselect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func BasicSelectTests() []SelectTest {
var headCommitHash string
switch types.Format_Default {
case types.Format_DOLT:
headCommitHash = "li3mp6hml1bctgon5hptfh9b8rqc1i6a"
headCommitHash = "6665g1bg08efo1sr2ui23iulsc7h22hd"
case types.Format_LD_1:
headCommitHash = "73hc2robs4v0kt9taoe3m5hd49dmrgun"
}
Expand Down
101 changes: 51 additions & 50 deletions go/serial/encoding.fbs
Original file line number Diff line number Diff line change
@@ -1,50 +1,51 @@
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

namespace serial;

enum Encoding : uint8 {
// fixed width
Null = 0,
Int8 = 1,
Uint8 = 2,
Int16 = 3,
Uint16 = 4,
Int32 = 7,
Uint32 = 8,
Int64 = 9,
Uint64 = 10,
Float32 = 11,
Float64 = 12,
Bit64 = 13,
Hash128 = 14,
Year = 15,
Date = 16,
Time = 17,
Datetime = 18,
Enum = 19,
Set = 20,
BytesAddr = 21,
CommitAddr = 22,
StringAddr = 23,
JSONAddr = 24,
Cell = 25,

// variable width
String = 128,
Bytes = 129,
Decimal = 130,
JSON = 131,
Geometry = 133,
}
// Copyright 2021 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

namespace serial;

enum Encoding : uint8 {
// fixed width
Null = 0,
Int8 = 1,
Uint8 = 2,
Int16 = 3,
Uint16 = 4,
Int32 = 7,
Uint32 = 8,
Int64 = 9,
Uint64 = 10,
Float32 = 11,
Float64 = 12,
Bit64 = 13,
Hash128 = 14,
Year = 15,
Date = 16,
Time = 17,
Datetime = 18,
Enum = 19,
Set = 20,
BytesAddr = 21,
CommitAddr = 22,
StringAddr = 23,
JSONAddr = 24,
Cell = 25,
GeomAddr = 26,

// variable width
String = 128,
Bytes = 129,
Decimal = 130,
JSON = 131,
Geometry = 133,
}
Loading

0 comments on commit c5710b4

Please sign in to comment.